csv writer to csv interface with reader class

But this is more for reference and should not be considered final
Missing things are like
- all values to private
- reader interface to parts
- value check for delimiter, quotechar, etc
This commit is contained in:
Clemens Schwaighofer
2025-10-24 14:43:29 +09:00
parent 4fa22813ce
commit b3616269bc
2 changed files with 148 additions and 93 deletions

View File

@@ -0,0 +1,148 @@
"""
Write to CSV file
- each class set is one file write with one header set
"""
from typing import Any, Sequence
from pathlib import Path
from collections import Counter
import csv
DELIMITER = ","
QUOTECHAR = '"'
# type: _QuotingType
QUOTING = csv.QUOTE_MINIMAL
class CsvWriter:
"""
write to a CSV file
"""
def __init__(
self,
file_name: Path,
header_mapping: dict[str, str],
header_order: list[str] | None = None,
delimiter: str = DELIMITER,
quotechar: str = QUOTECHAR,
quoting: Any = QUOTING,
):
self.__file_name = file_name
# Key: index for write for the line dict, Values: header entries
self.header_mapping = header_mapping
self.header: Sequence[str] = list(header_mapping.values())
self.__delimiter = delimiter
self.__quotechar = quotechar
self.__quoting = quoting
self.csv_file_writer = self.__open_csv(header_order)
def __open_csv(self, header_order: list[str] | None) -> csv.DictWriter[str]:
"""
open csv file for writing, write headers
Note that if there is no header_order set we use the order in header dictionary
Arguments:
line {list[str] | None} -- optional dedicated header order
Returns:
csv.DictWriter[str] | None: _description_
"""
# if header order is set, make sure all header value fields exist
header_values = self.header
if header_order is not None:
if Counter(header_values) != Counter(header_order):
raise ValueError(
"header order does not match header values: "
f"{', '.join(header_values)} != {', '.join(header_order)}"
)
header_values = header_order
# no duplicates
if len(header_values) != len(set(header_values)):
raise ValueError(f"Header must have unique values only: {', '.join(header_values)}")
try:
fp = open(
self.__file_name,
"w", encoding="utf-8"
)
csv_file_writer = csv.DictWriter(
fp,
fieldnames=header_values,
delimiter=self.__delimiter,
quotechar=self.__quotechar,
quoting=self.__quoting,
)
csv_file_writer.writeheader()
return csv_file_writer
except OSError as err:
raise ValueError(f"Could not open CSV file for writing: {err}") from err
def write_csv(self, line: dict[str, str]) -> None:
"""
write member csv line
Arguments:
line {dict[str, str]} -- _description_
Returns:
bool -- _description_
"""
csv_row: dict[str, Any] = {}
# only write entries that are in the header list
for key, value in self.header_mapping.items():
csv_row[value] = line[key]
self.csv_file_writer.writerow(csv_row)
class CsvReader:
"""
read from a CSV file
"""
def __init__(
self,
file_name: Path,
header_check: Sequence[str] | None = None,
delimiter: str = DELIMITER,
quotechar: str = QUOTECHAR,
quoting: Any = QUOTING,
):
self.__file_name = file_name
self.__header_check = header_check
self.__delimiter = delimiter
self.__quotechar = quotechar
self.__quoting = quoting
self.header: Sequence[str] | None = None
self.csv_file_reader = self.__open_csv()
def __open_csv(self) -> csv.DictReader[str]:
"""
open csv file for reading
Returns:
csv.DictReader | None: _description_
"""
try:
fp = open(
self.__file_name,
"r", encoding="utf-8"
)
csv_file_reader = csv.DictReader(
fp,
delimiter=self.__delimiter,
quotechar=self.__quotechar,
quoting=self.__quoting,
)
self.header = csv_file_reader.fieldnames
if self.__header_check is not None:
header_diff = set(self.__header_check).difference(set(self.header or []))
if header_diff:
raise ValueError(
f"CSV header does not match expected header: {', '.join(header_diff)} missing"
)
return csv_file_reader
except OSError as err:
raise ValueError(f"Could not open CSV file for reading: {err}") from err
# __END__

View File

@@ -1,93 +0,0 @@
"""
Write to CSV file
- each class set is one file write with one header set
"""
from typing import Any
from pathlib import Path
from collections import Counter
import csv
class CsvWriter:
"""
write to a CSV file
"""
def __init__(
self,
path: Path,
file_name: str,
header: dict[str, str],
header_order: list[str] | None = None
):
self.path = path
self.file_name = file_name
# Key: index for write for the line dict, Values: header entries
self.header = header
self.csv_file_writer = self.__open_csv(header_order)
def __open_csv(self, header_order: list[str] | None) -> 'csv.DictWriter[str] | None':
"""
open csv file for writing, write headers
Note that if there is no header_order set we use the order in header dictionary
Arguments:
line {list[str] | None} -- optional dedicated header order
Returns:
csv.DictWriter[str] | None: _description_
"""
# if header order is set, make sure all header value fields exist
header_values = self.header.values()
if header_order is not None:
if Counter(header_values) != Counter(header_order):
print(
"header order does not match header values: "
f"{', '.join(header_values)} != {', '.join(header_order)}"
)
return None
header_values = header_order
# no duplicates
if len(header_values) != len(set(header_values)):
print(f"Header must have unique values only: {', '.join(header_values)}")
return None
try:
fp = open(
self.path.joinpath(self.file_name),
"w", encoding="utf-8"
)
csv_file_writer = csv.DictWriter(
fp,
fieldnames=header_values,
delimiter=",",
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
)
csv_file_writer.writeheader()
return csv_file_writer
except OSError as err:
print("OS error:", err)
return None
def write_csv(self, line: dict[str, str]) -> bool:
"""
write member csv line
Arguments:
line {dict[str, str]} -- _description_
Returns:
bool -- _description_
"""
if self.csv_file_writer is None:
return False
csv_row: dict[str, Any] = {}
# only write entries that are in the header list
for key, value in self.header.items():
csv_row[value] = line[key]
self.csv_file_writer.writerow(csv_row)
return True
# __END__