From b3616269bc2e20c6ff3d56c67061b0570468848f Mon Sep 17 00:00:00 2001 From: Clemens Schwaighofer Date: Fri, 24 Oct 2025 14:43:29 +0900 Subject: [PATCH] csv writer to csv interface with reader class But this is more for reference and should not be considered final Missing things are like - all values to private - reader interface to parts - value check for delimiter, quotechar, etc --- src/corelibs/csv_handling/csv_interface.py | 148 +++++++++++++++++++++ src/corelibs/csv_handling/csv_writer.py | 93 ------------- 2 files changed, 148 insertions(+), 93 deletions(-) create mode 100644 src/corelibs/csv_handling/csv_interface.py delete mode 100644 src/corelibs/csv_handling/csv_writer.py diff --git a/src/corelibs/csv_handling/csv_interface.py b/src/corelibs/csv_handling/csv_interface.py new file mode 100644 index 0000000..cafecdb --- /dev/null +++ b/src/corelibs/csv_handling/csv_interface.py @@ -0,0 +1,148 @@ +""" +Write to CSV file +- each class set is one file write with one header set +""" + +from typing import Any, Sequence +from pathlib import Path +from collections import Counter +import csv + +DELIMITER = "," +QUOTECHAR = '"' +# type: _QuotingType +QUOTING = csv.QUOTE_MINIMAL + + +class CsvWriter: + """ + write to a CSV file + """ + + def __init__( + self, + file_name: Path, + header_mapping: dict[str, str], + header_order: list[str] | None = None, + delimiter: str = DELIMITER, + quotechar: str = QUOTECHAR, + quoting: Any = QUOTING, + ): + self.__file_name = file_name + # Key: index for write for the line dict, Values: header entries + self.header_mapping = header_mapping + self.header: Sequence[str] = list(header_mapping.values()) + self.__delimiter = delimiter + self.__quotechar = quotechar + self.__quoting = quoting + self.csv_file_writer = self.__open_csv(header_order) + + def __open_csv(self, header_order: list[str] | None) -> csv.DictWriter[str]: + """ + open csv file for writing, write headers + + Note that if there is no header_order set we use the order in header dictionary + + Arguments: + line {list[str] | None} -- optional dedicated header order + + Returns: + csv.DictWriter[str] | None: _description_ + """ + # if header order is set, make sure all header value fields exist + header_values = self.header + if header_order is not None: + if Counter(header_values) != Counter(header_order): + raise ValueError( + "header order does not match header values: " + f"{', '.join(header_values)} != {', '.join(header_order)}" + ) + header_values = header_order + # no duplicates + if len(header_values) != len(set(header_values)): + raise ValueError(f"Header must have unique values only: {', '.join(header_values)}") + try: + fp = open( + self.__file_name, + "w", encoding="utf-8" + ) + csv_file_writer = csv.DictWriter( + fp, + fieldnames=header_values, + delimiter=self.__delimiter, + quotechar=self.__quotechar, + quoting=self.__quoting, + ) + csv_file_writer.writeheader() + return csv_file_writer + except OSError as err: + raise ValueError(f"Could not open CSV file for writing: {err}") from err + + def write_csv(self, line: dict[str, str]) -> None: + """ + write member csv line + + Arguments: + line {dict[str, str]} -- _description_ + + Returns: + bool -- _description_ + """ + csv_row: dict[str, Any] = {} + # only write entries that are in the header list + for key, value in self.header_mapping.items(): + csv_row[value] = line[key] + self.csv_file_writer.writerow(csv_row) + + +class CsvReader: + """ + read from a CSV file + """ + + def __init__( + self, + file_name: Path, + header_check: Sequence[str] | None = None, + delimiter: str = DELIMITER, + quotechar: str = QUOTECHAR, + quoting: Any = QUOTING, + ): + self.__file_name = file_name + self.__header_check = header_check + self.__delimiter = delimiter + self.__quotechar = quotechar + self.__quoting = quoting + self.header: Sequence[str] | None = None + self.csv_file_reader = self.__open_csv() + + def __open_csv(self) -> csv.DictReader[str]: + """ + open csv file for reading + + Returns: + csv.DictReader | None: _description_ + """ + try: + fp = open( + self.__file_name, + "r", encoding="utf-8" + ) + csv_file_reader = csv.DictReader( + fp, + delimiter=self.__delimiter, + quotechar=self.__quotechar, + quoting=self.__quoting, + ) + self.header = csv_file_reader.fieldnames + if self.__header_check is not None: + header_diff = set(self.__header_check).difference(set(self.header or [])) + if header_diff: + raise ValueError( + f"CSV header does not match expected header: {', '.join(header_diff)} missing" + ) + return csv_file_reader + except OSError as err: + raise ValueError(f"Could not open CSV file for reading: {err}") from err + +# __END__ diff --git a/src/corelibs/csv_handling/csv_writer.py b/src/corelibs/csv_handling/csv_writer.py deleted file mode 100644 index 3ca9bdd..0000000 --- a/src/corelibs/csv_handling/csv_writer.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -Write to CSV file -- each class set is one file write with one header set -""" - -from typing import Any -from pathlib import Path -from collections import Counter -import csv - - -class CsvWriter: - """ - write to a CSV file - """ - - def __init__( - self, - path: Path, - file_name: str, - header: dict[str, str], - header_order: list[str] | None = None - ): - self.path = path - self.file_name = file_name - # Key: index for write for the line dict, Values: header entries - self.header = header - self.csv_file_writer = self.__open_csv(header_order) - - def __open_csv(self, header_order: list[str] | None) -> 'csv.DictWriter[str] | None': - """ - open csv file for writing, write headers - - Note that if there is no header_order set we use the order in header dictionary - - Arguments: - line {list[str] | None} -- optional dedicated header order - - Returns: - csv.DictWriter[str] | None: _description_ - """ - # if header order is set, make sure all header value fields exist - header_values = self.header.values() - if header_order is not None: - if Counter(header_values) != Counter(header_order): - print( - "header order does not match header values: " - f"{', '.join(header_values)} != {', '.join(header_order)}" - ) - return None - header_values = header_order - # no duplicates - if len(header_values) != len(set(header_values)): - print(f"Header must have unique values only: {', '.join(header_values)}") - return None - try: - fp = open( - self.path.joinpath(self.file_name), - "w", encoding="utf-8" - ) - csv_file_writer = csv.DictWriter( - fp, - fieldnames=header_values, - delimiter=",", - quotechar='"', - quoting=csv.QUOTE_MINIMAL, - ) - csv_file_writer.writeheader() - return csv_file_writer - except OSError as err: - print("OS error:", err) - return None - - def write_csv(self, line: dict[str, str]) -> bool: - """ - write member csv line - - Arguments: - line {dict[str, str]} -- _description_ - - Returns: - bool -- _description_ - """ - if self.csv_file_writer is None: - return False - csv_row: dict[str, Any] = {} - # only write entries that are in the header list - for key, value in self.header.items(): - csv_row[value] = line[key] - self.csv_file_writer.writerow(csv_row) - return True - -# __END__