Update on the CSV helper class with UTF detection for BOM reading
This commit is contained in:
@@ -7,10 +7,13 @@ from typing import Any, Sequence
|
||||
from pathlib import Path
|
||||
from collections import Counter
|
||||
import csv
|
||||
from corelibs.file_handling.file_bom_encoding import is_bom_encoded, is_bom_encoded_info
|
||||
from corelibs.exceptions.csv_exceptions import (
|
||||
NoCsvReader, CompulsoryCsvHeaderCheckFailed, CsvHeaderDataMissing
|
||||
)
|
||||
|
||||
ENCODING = 'utf-8'
|
||||
ENCODING_UTF8_SIG = 'utf-8-sig'
|
||||
DELIMITER = ","
|
||||
QUOTECHAR = '"'
|
||||
# type: _QuotingType
|
||||
@@ -27,6 +30,7 @@ class CsvWriter:
|
||||
file_name: Path,
|
||||
header_mapping: dict[str, str],
|
||||
header_order: list[str] | None = None,
|
||||
encoding: str = ENCODING,
|
||||
delimiter: str = DELIMITER,
|
||||
quotechar: str = QUOTECHAR,
|
||||
quoting: Any = QUOTING,
|
||||
@@ -38,6 +42,7 @@ class CsvWriter:
|
||||
self.__delimiter = delimiter
|
||||
self.__quotechar = quotechar
|
||||
self.__quoting = quoting
|
||||
self.__encoding = encoding
|
||||
self.csv_file_writer = self.__open_csv(header_order)
|
||||
|
||||
def __open_csv(self, header_order: list[str] | None) -> csv.DictWriter[str]:
|
||||
@@ -69,7 +74,8 @@ class CsvWriter:
|
||||
try:
|
||||
fp = open(
|
||||
self.__file_name,
|
||||
"w", encoding="utf-8"
|
||||
"w",
|
||||
encoding=self.__encoding
|
||||
)
|
||||
csv_file_writer = csv.DictWriter(
|
||||
fp,
|
||||
@@ -109,6 +115,7 @@ class CsvReader:
|
||||
self,
|
||||
file_name: Path,
|
||||
header_check: Sequence[str] | None = None,
|
||||
encoding: str = ENCODING,
|
||||
delimiter: str = DELIMITER,
|
||||
quotechar: str = QUOTECHAR,
|
||||
quoting: Any = QUOTING,
|
||||
@@ -118,6 +125,7 @@ class CsvReader:
|
||||
self.__delimiter = delimiter
|
||||
self.__quotechar = quotechar
|
||||
self.__quoting = quoting
|
||||
self.__encoding = encoding
|
||||
self.header: Sequence[str] | None = None
|
||||
self.csv_file_reader = self.__open_csv()
|
||||
|
||||
@@ -129,9 +137,16 @@ class CsvReader:
|
||||
csv.DictReader | None: _description_
|
||||
"""
|
||||
try:
|
||||
# if UTF style check if this is BOM
|
||||
if self.__encoding.lower().startswith('utf-') and is_bom_encoded(self.__file_name):
|
||||
bom_info = is_bom_encoded_info(self.__file_name)
|
||||
if bom_info['encoding'] == 'utf-8':
|
||||
self.__encoding = ENCODING_UTF8_SIG
|
||||
else:
|
||||
self.__encoding = bom_info['encoding'] or self.__encoding
|
||||
fp = open(
|
||||
self.__file_name,
|
||||
"r", encoding="utf-8"
|
||||
"r", encoding=self.__encoding
|
||||
)
|
||||
csv_file_reader = csv.DictReader(
|
||||
fp,
|
||||
|
||||
Reference in New Issue
Block a user