Move CSV handling to corelibs-csv
This commit is contained in:
29
README.md
29
README.md
@@ -47,10 +47,35 @@ This is a pip package that can be installed into any project and covers the foll
|
||||
|
||||
All content in this module will move to stand alone libraries, as of now the following entries have moved and will throw deprecated warnings if used
|
||||
|
||||
- check_handling.regex_constants_compiled: corelibs-regex-checks
|
||||
- check_handling.regex_constants: corelibs-regex-checks
|
||||
- csv_handling.csv_interface: corelibs-csv
|
||||
- datetime_handling.datetime_helpers: corelibs-datetime
|
||||
- datetime_handling.timestamp_convert: corelibs-datetime
|
||||
- datetime_handling.timestamp_strings: corelibs-datetime
|
||||
- debug_handling.debug_helpers: corelibs-stack-trace
|
||||
- debug_handling.dump_data: corelibs-dump-data
|
||||
- debug_handling.profiling: corelibs-debug
|
||||
- debug_handling.timer: corelibs-debug
|
||||
- debug_handling.writeline: corelibs-debug
|
||||
- encryption_handling.symmetrix_encryption: corelibs-encryption
|
||||
- exceptions.csv_exceptions: orelibs-csv
|
||||
- file_handling.file_bom_encoding: corelibs-file
|
||||
- file_handling.file_crc: corelibs-file
|
||||
- file_handling.file_handling: corelibs-file
|
||||
- iterator_handling.data_search: corelibs-search
|
||||
- iterator_handling.dict_helpers: corelibs-iterator
|
||||
- iterator_handling.dict_mask: corelibs-dump-data
|
||||
- iterator_handling.fingerprint: corelibs-hash
|
||||
- iterator_handling.list_helpers: corelibs-iterator
|
||||
- json_handling.jmespath_helper: corelibs-search
|
||||
- json_handling.json_helper: corelibs-json
|
||||
- math_handling.math_helpers: python.math
|
||||
- requests_handling.auth_helpers: corelibs-requests
|
||||
- requests_handling.caller: corelibs-requests
|
||||
- string_handling.text_colors: corelibs-text-colors
|
||||
- var_handling.enum_base: corelibs-enum-base
|
||||
- var_handling.var_helpers: corelibs-var
|
||||
- datetime_handling: corelibs-datetime
|
||||
- string_handling.text_colors: corelibs-text-colors
|
||||
|
||||
## UV setup
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ description = "Collection of utils for Python scripts"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"corelibs-csv>=1.0.0",
|
||||
"corelibs-datetime>=1.0.1",
|
||||
"corelibs-debug>=1.0.0",
|
||||
"corelibs-dump-data>=1.0.0",
|
||||
|
||||
@@ -3,168 +3,37 @@ Write to CSV file
|
||||
- each class set is one file write with one header set
|
||||
"""
|
||||
|
||||
from typing import Any, Sequence
|
||||
from pathlib import Path
|
||||
from collections import Counter
|
||||
import csv
|
||||
from corelibs.file_handling.file_bom_encoding import is_bom_encoded, is_bom_encoded_info
|
||||
from corelibs.exceptions.csv_exceptions import (
|
||||
NoCsvReader, CompulsoryCsvHeaderCheckFailed, CsvHeaderDataMissing
|
||||
from warnings import warn
|
||||
from corelibs_csv.csv_interface import (
|
||||
CsvReader as CoreLibsCsvReader, CsvWriter as CoreLibsCsvWriter,
|
||||
ENCODING as CoreLibsEncoding,
|
||||
ENCODING_UTF8_SIG as CoreLibsEncodingUtf8Sig,
|
||||
DELIMITER as CoreLibsDelimiter,
|
||||
QUOTECHAR as CoreLibsQuotechar,
|
||||
QUOTING as CoreLibsQuoting
|
||||
)
|
||||
|
||||
ENCODING = 'utf-8'
|
||||
ENCODING_UTF8_SIG = 'utf-8-sig'
|
||||
DELIMITER = ","
|
||||
QUOTECHAR = '"'
|
||||
|
||||
ENCODING = CoreLibsEncoding
|
||||
ENCODING_UTF8_SIG = CoreLibsEncodingUtf8Sig
|
||||
DELIMITER = CoreLibsDelimiter
|
||||
QUOTECHAR = CoreLibsQuotechar
|
||||
# type: _QuotingType
|
||||
QUOTING = csv.QUOTE_MINIMAL
|
||||
QUOTING = CoreLibsQuoting
|
||||
|
||||
|
||||
class CsvWriter:
|
||||
class CsvWriter(CoreLibsCsvWriter):
|
||||
"""
|
||||
write to a CSV file
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_name: Path,
|
||||
header_mapping: dict[str, str],
|
||||
header_order: list[str] | None = None,
|
||||
encoding: str = ENCODING,
|
||||
delimiter: str = DELIMITER,
|
||||
quotechar: str = QUOTECHAR,
|
||||
quoting: Any = QUOTING,
|
||||
):
|
||||
self.__file_name = file_name
|
||||
# Key: index for write for the line dict, Values: header entries
|
||||
self.header_mapping = header_mapping
|
||||
self.header: Sequence[str] = list(header_mapping.values())
|
||||
self.__delimiter = delimiter
|
||||
self.__quotechar = quotechar
|
||||
self.__quoting = quoting
|
||||
self.__encoding = encoding
|
||||
self.csv_file_writer = self.__open_csv(header_order)
|
||||
|
||||
def __open_csv(self, header_order: list[str] | None) -> csv.DictWriter[str]:
|
||||
"""
|
||||
open csv file for writing, write headers
|
||||
|
||||
Note that if there is no header_order set we use the order in header dictionary
|
||||
|
||||
Arguments:
|
||||
line {list[str] | None} -- optional dedicated header order
|
||||
|
||||
Returns:
|
||||
csv.DictWriter[str] | None: _description_
|
||||
"""
|
||||
# if header order is set, make sure all header value fields exist
|
||||
if not self.header:
|
||||
raise CsvHeaderDataMissing("No header data available to write CSV file")
|
||||
header_values = self.header
|
||||
if header_order is not None:
|
||||
if Counter(header_values) != Counter(header_order):
|
||||
raise CompulsoryCsvHeaderCheckFailed(
|
||||
"header order does not match header values: "
|
||||
f"{', '.join(header_values)} != {', '.join(header_order)}"
|
||||
)
|
||||
header_values = header_order
|
||||
# no duplicates
|
||||
if len(header_values) != len(set(header_values)):
|
||||
raise CompulsoryCsvHeaderCheckFailed(f"Header must have unique values only: {', '.join(header_values)}")
|
||||
try:
|
||||
fp = open(
|
||||
self.__file_name,
|
||||
"w",
|
||||
encoding=self.__encoding
|
||||
)
|
||||
csv_file_writer = csv.DictWriter(
|
||||
fp,
|
||||
fieldnames=header_values,
|
||||
delimiter=self.__delimiter,
|
||||
quotechar=self.__quotechar,
|
||||
quoting=self.__quoting,
|
||||
)
|
||||
csv_file_writer.writeheader()
|
||||
return csv_file_writer
|
||||
except OSError as err:
|
||||
raise NoCsvReader(f"Could not open CSV file for writing: {err}") from err
|
||||
|
||||
def write_csv(self, line: dict[str, str]) -> None:
|
||||
"""
|
||||
write member csv line
|
||||
|
||||
Arguments:
|
||||
line {dict[str, str]} -- _description_
|
||||
|
||||
Returns:
|
||||
bool -- _description_
|
||||
"""
|
||||
csv_row: dict[str, Any] = {}
|
||||
# only write entries that are in the header list
|
||||
for key, value in self.header_mapping.items():
|
||||
csv_row[value] = line[key]
|
||||
self.csv_file_writer.writerow(csv_row)
|
||||
|
||||
|
||||
class CsvReader:
|
||||
class CsvReader(CoreLibsCsvReader):
|
||||
"""
|
||||
read from a CSV file
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_name: Path,
|
||||
header_check: Sequence[str] | None = None,
|
||||
encoding: str = ENCODING,
|
||||
delimiter: str = DELIMITER,
|
||||
quotechar: str = QUOTECHAR,
|
||||
quoting: Any = QUOTING,
|
||||
):
|
||||
self.__file_name = file_name
|
||||
self.__header_check = header_check
|
||||
self.__delimiter = delimiter
|
||||
self.__quotechar = quotechar
|
||||
self.__quoting = quoting
|
||||
self.__encoding = encoding
|
||||
self.header: Sequence[str] | None = None
|
||||
self.csv_file_reader = self.__open_csv()
|
||||
|
||||
def __open_csv(self) -> csv.DictReader[str]:
|
||||
"""
|
||||
open csv file for reading
|
||||
|
||||
Returns:
|
||||
csv.DictReader | None: _description_
|
||||
"""
|
||||
try:
|
||||
# if UTF style check if this is BOM
|
||||
if self.__encoding.lower().startswith('utf-') and is_bom_encoded(self.__file_name):
|
||||
bom_info = is_bom_encoded_info(self.__file_name)
|
||||
if bom_info['encoding'] == 'utf-8':
|
||||
self.__encoding = ENCODING_UTF8_SIG
|
||||
else:
|
||||
self.__encoding = bom_info['encoding'] or self.__encoding
|
||||
fp = open(
|
||||
self.__file_name,
|
||||
"r", encoding=self.__encoding
|
||||
)
|
||||
csv_file_reader = csv.DictReader(
|
||||
fp,
|
||||
delimiter=self.__delimiter,
|
||||
quotechar=self.__quotechar,
|
||||
quoting=self.__quoting,
|
||||
)
|
||||
self.header = csv_file_reader.fieldnames
|
||||
if not self.header:
|
||||
raise CsvHeaderDataMissing("No header data available in CSV file")
|
||||
if self.__header_check is not None:
|
||||
header_diff = set(self.__header_check).difference(set(self.header or []))
|
||||
if header_diff:
|
||||
raise CompulsoryCsvHeaderCheckFailed(
|
||||
f"CSV header does not match expected header: {', '.join(header_diff)} missing"
|
||||
)
|
||||
return csv_file_reader
|
||||
except OSError as err:
|
||||
raise NoCsvReader(f"Could not open CSV file for reading: {err}") from err
|
||||
warn("Use corelibs_csv.csv_interface instead", DeprecationWarning, stacklevel=2)
|
||||
|
||||
# __END__
|
||||
|
||||
@@ -2,22 +2,39 @@
|
||||
Exceptions for csv file reading and processing
|
||||
"""
|
||||
|
||||
from warnings import warn
|
||||
from corelibs_csv.csv_exceptions import (
|
||||
NoCsvReader as CoreLibsNoCsvReader,
|
||||
CompulsoryCsvHeaderCheckFailed as CoreLibsCompulsoryCsvHeaderCheckFailed,
|
||||
CsvHeaderDataMissing as CoreLibsCsvHeaderDataMissing,
|
||||
CsvRowDataKeysNotMatching as CoreLibsCsvRowDataKeysNotMatching
|
||||
)
|
||||
|
||||
class NoCsvReader(Exception):
|
||||
|
||||
class NoCsvReader(CoreLibsNoCsvReader):
|
||||
"""
|
||||
CSV reader is none
|
||||
"""
|
||||
|
||||
|
||||
class CsvHeaderDataMissing(Exception):
|
||||
class CsvHeaderDataMissing(CoreLibsCsvHeaderDataMissing):
|
||||
"""
|
||||
The csv reader returned None as headers, the header column in the csv file is missing
|
||||
"""
|
||||
|
||||
|
||||
class CompulsoryCsvHeaderCheckFailed(Exception):
|
||||
class CompulsoryCsvHeaderCheckFailed(CoreLibsCompulsoryCsvHeaderCheckFailed):
|
||||
"""
|
||||
raise if the header is not matching to the excpeted values
|
||||
"""
|
||||
|
||||
|
||||
class CsvRowDataKeysNotMatching(CoreLibsCsvRowDataKeysNotMatching):
|
||||
"""
|
||||
raise if the row data keys do not match the expected header keys
|
||||
"""
|
||||
|
||||
|
||||
warn("Use corelibs_csv.csv_exceptions instead", DeprecationWarning, stacklevel=2)
|
||||
|
||||
# __END__
|
||||
|
||||
Reference in New Issue
Block a user