Move CSV handling to corelibs-csv

This commit is contained in:
Clemens Schwaighofer
2026-02-05 10:48:57 +09:00
parent f265b55ef8
commit 8bb4a202cd
4 changed files with 65 additions and 153 deletions

View File

@@ -47,10 +47,35 @@ This is a pip package that can be installed into any project and covers the foll
All content in this module will move to stand alone libraries, as of now the following entries have moved and will throw deprecated warnings if used
- check_handling.regex_constants_compiled: corelibs-regex-checks
- check_handling.regex_constants: corelibs-regex-checks
- csv_handling.csv_interface: corelibs-csv
- datetime_handling.datetime_helpers: corelibs-datetime
- datetime_handling.timestamp_convert: corelibs-datetime
- datetime_handling.timestamp_strings: corelibs-datetime
- debug_handling.debug_helpers: corelibs-stack-trace
- debug_handling.dump_data: corelibs-dump-data
- debug_handling.profiling: corelibs-debug
- debug_handling.timer: corelibs-debug
- debug_handling.writeline: corelibs-debug
- encryption_handling.symmetrix_encryption: corelibs-encryption
- exceptions.csv_exceptions: orelibs-csv
- file_handling.file_bom_encoding: corelibs-file
- file_handling.file_crc: corelibs-file
- file_handling.file_handling: corelibs-file
- iterator_handling.data_search: corelibs-search
- iterator_handling.dict_helpers: corelibs-iterator
- iterator_handling.dict_mask: corelibs-dump-data
- iterator_handling.fingerprint: corelibs-hash
- iterator_handling.list_helpers: corelibs-iterator
- json_handling.jmespath_helper: corelibs-search
- json_handling.json_helper: corelibs-json
- math_handling.math_helpers: python.math
- requests_handling.auth_helpers: corelibs-requests
- requests_handling.caller: corelibs-requests
- string_handling.text_colors: corelibs-text-colors
- var_handling.enum_base: corelibs-enum-base
- var_handling.var_helpers: corelibs-var
- datetime_handling: corelibs-datetime
- string_handling.text_colors: corelibs-text-colors
## UV setup

View File

@@ -6,6 +6,7 @@ description = "Collection of utils for Python scripts"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"corelibs-csv>=1.0.0",
"corelibs-datetime>=1.0.1",
"corelibs-debug>=1.0.0",
"corelibs-dump-data>=1.0.0",

View File

@@ -3,168 +3,37 @@ Write to CSV file
- each class set is one file write with one header set
"""
from typing import Any, Sequence
from pathlib import Path
from collections import Counter
import csv
from corelibs.file_handling.file_bom_encoding import is_bom_encoded, is_bom_encoded_info
from corelibs.exceptions.csv_exceptions import (
NoCsvReader, CompulsoryCsvHeaderCheckFailed, CsvHeaderDataMissing
from warnings import warn
from corelibs_csv.csv_interface import (
CsvReader as CoreLibsCsvReader, CsvWriter as CoreLibsCsvWriter,
ENCODING as CoreLibsEncoding,
ENCODING_UTF8_SIG as CoreLibsEncodingUtf8Sig,
DELIMITER as CoreLibsDelimiter,
QUOTECHAR as CoreLibsQuotechar,
QUOTING as CoreLibsQuoting
)
ENCODING = 'utf-8'
ENCODING_UTF8_SIG = 'utf-8-sig'
DELIMITER = ","
QUOTECHAR = '"'
ENCODING = CoreLibsEncoding
ENCODING_UTF8_SIG = CoreLibsEncodingUtf8Sig
DELIMITER = CoreLibsDelimiter
QUOTECHAR = CoreLibsQuotechar
# type: _QuotingType
QUOTING = csv.QUOTE_MINIMAL
QUOTING = CoreLibsQuoting
class CsvWriter:
class CsvWriter(CoreLibsCsvWriter):
"""
write to a CSV file
"""
def __init__(
self,
file_name: Path,
header_mapping: dict[str, str],
header_order: list[str] | None = None,
encoding: str = ENCODING,
delimiter: str = DELIMITER,
quotechar: str = QUOTECHAR,
quoting: Any = QUOTING,
):
self.__file_name = file_name
# Key: index for write for the line dict, Values: header entries
self.header_mapping = header_mapping
self.header: Sequence[str] = list(header_mapping.values())
self.__delimiter = delimiter
self.__quotechar = quotechar
self.__quoting = quoting
self.__encoding = encoding
self.csv_file_writer = self.__open_csv(header_order)
def __open_csv(self, header_order: list[str] | None) -> csv.DictWriter[str]:
"""
open csv file for writing, write headers
Note that if there is no header_order set we use the order in header dictionary
Arguments:
line {list[str] | None} -- optional dedicated header order
Returns:
csv.DictWriter[str] | None: _description_
"""
# if header order is set, make sure all header value fields exist
if not self.header:
raise CsvHeaderDataMissing("No header data available to write CSV file")
header_values = self.header
if header_order is not None:
if Counter(header_values) != Counter(header_order):
raise CompulsoryCsvHeaderCheckFailed(
"header order does not match header values: "
f"{', '.join(header_values)} != {', '.join(header_order)}"
)
header_values = header_order
# no duplicates
if len(header_values) != len(set(header_values)):
raise CompulsoryCsvHeaderCheckFailed(f"Header must have unique values only: {', '.join(header_values)}")
try:
fp = open(
self.__file_name,
"w",
encoding=self.__encoding
)
csv_file_writer = csv.DictWriter(
fp,
fieldnames=header_values,
delimiter=self.__delimiter,
quotechar=self.__quotechar,
quoting=self.__quoting,
)
csv_file_writer.writeheader()
return csv_file_writer
except OSError as err:
raise NoCsvReader(f"Could not open CSV file for writing: {err}") from err
def write_csv(self, line: dict[str, str]) -> None:
"""
write member csv line
Arguments:
line {dict[str, str]} -- _description_
Returns:
bool -- _description_
"""
csv_row: dict[str, Any] = {}
# only write entries that are in the header list
for key, value in self.header_mapping.items():
csv_row[value] = line[key]
self.csv_file_writer.writerow(csv_row)
class CsvReader:
class CsvReader(CoreLibsCsvReader):
"""
read from a CSV file
"""
def __init__(
self,
file_name: Path,
header_check: Sequence[str] | None = None,
encoding: str = ENCODING,
delimiter: str = DELIMITER,
quotechar: str = QUOTECHAR,
quoting: Any = QUOTING,
):
self.__file_name = file_name
self.__header_check = header_check
self.__delimiter = delimiter
self.__quotechar = quotechar
self.__quoting = quoting
self.__encoding = encoding
self.header: Sequence[str] | None = None
self.csv_file_reader = self.__open_csv()
def __open_csv(self) -> csv.DictReader[str]:
"""
open csv file for reading
Returns:
csv.DictReader | None: _description_
"""
try:
# if UTF style check if this is BOM
if self.__encoding.lower().startswith('utf-') and is_bom_encoded(self.__file_name):
bom_info = is_bom_encoded_info(self.__file_name)
if bom_info['encoding'] == 'utf-8':
self.__encoding = ENCODING_UTF8_SIG
else:
self.__encoding = bom_info['encoding'] or self.__encoding
fp = open(
self.__file_name,
"r", encoding=self.__encoding
)
csv_file_reader = csv.DictReader(
fp,
delimiter=self.__delimiter,
quotechar=self.__quotechar,
quoting=self.__quoting,
)
self.header = csv_file_reader.fieldnames
if not self.header:
raise CsvHeaderDataMissing("No header data available in CSV file")
if self.__header_check is not None:
header_diff = set(self.__header_check).difference(set(self.header or []))
if header_diff:
raise CompulsoryCsvHeaderCheckFailed(
f"CSV header does not match expected header: {', '.join(header_diff)} missing"
)
return csv_file_reader
except OSError as err:
raise NoCsvReader(f"Could not open CSV file for reading: {err}") from err
warn("Use corelibs_csv.csv_interface instead", DeprecationWarning, stacklevel=2)
# __END__

View File

@@ -2,22 +2,39 @@
Exceptions for csv file reading and processing
"""
from warnings import warn
from corelibs_csv.csv_exceptions import (
NoCsvReader as CoreLibsNoCsvReader,
CompulsoryCsvHeaderCheckFailed as CoreLibsCompulsoryCsvHeaderCheckFailed,
CsvHeaderDataMissing as CoreLibsCsvHeaderDataMissing,
CsvRowDataKeysNotMatching as CoreLibsCsvRowDataKeysNotMatching
)
class NoCsvReader(Exception):
class NoCsvReader(CoreLibsNoCsvReader):
"""
CSV reader is none
"""
class CsvHeaderDataMissing(Exception):
class CsvHeaderDataMissing(CoreLibsCsvHeaderDataMissing):
"""
The csv reader returned None as headers, the header column in the csv file is missing
"""
class CompulsoryCsvHeaderCheckFailed(Exception):
class CompulsoryCsvHeaderCheckFailed(CoreLibsCompulsoryCsvHeaderCheckFailed):
"""
raise if the header is not matching to the excpeted values
"""
class CsvRowDataKeysNotMatching(CoreLibsCsvRowDataKeysNotMatching):
"""
raise if the row data keys do not match the expected header keys
"""
warn("Use corelibs_csv.csv_exceptions instead", DeprecationWarning, stacklevel=2)
# __END__