Move list_dict_handling to iterator_handling, add settings parser, add list helpers, add some string helpers

list_helpers: convert to list, any input, output is always a list compare to lists, check what elements from A are not in B, type safe string helpers add is_int, is_float checker add string to bool converter for true/True/false/False strings config reader with parsing and checking The simple config reader is now in the corelibs with the basic content check, convert to list for entries, convert to value for entries, etc log updates: Add Log type Enum for better log level checks and convert Add a get int for requested log level, and return default if not found Make the validate log level a static function Add tests for list helpers and new string helpers
2025-07-11 10:58:35 +09:00
parent 19d7e9b5ed
commit c559a6bafb
21 changed files with 1332 additions and 6 deletions
--- a/src/corelibs/list_dict_handling/init.py
+++ b/src/corelibs/list_dict_handling/init.py
--- a/src/corelibs/config_handling/settings_loader.py
+++ b/src/corelibs/config_handling/settings_loader.py
@@ -0,0 +1,507 @@
+"""
+Load settings file for a certain group
+Check data for existing and valid
+Additional check for override settings as arguments
+"""
+
+import re
+import sys
+import configparser
+from typing import Any, Tuple, Sequence, cast
+from pathlib import Path
+from corelibs.logging_handling.log import Log
+from corelibs.iterator_handling.list_helpers import convert_to_list, is_list_in_list
+from corelibs.string_handling.string_helpers import is_int, is_float, str_to_bool
+from corelibs.config_handling.settings_loader_handling.settings_loader_check import SettingsLoaderCheck
+
+
+class SettingsLoader:
+    """
+    Settings Loader with Argument parser
+    """
+
+    # split char
+    DEFAULT_ELEMENT_SPLIT_CHAR: str = ','
+
+    CONVERT_TO_LIST: list[str] = ['str', 'int', 'float', 'bool', 'auto']
+
+    def __init__(
+        self,
+        args: dict[str, Any],
+        config_file: Path,
+        log: 'Log | None' = None,
+        always_print: bool = False
+    ) -> None:
+        """
+        init the Settings loader
+
+        Args:
+            args (dict): Script Arguments
+            config_file (Path): config file including path
+            log (Log | None): Lop class, if set errors are written to this
+            always_print (bool): Set to true to always print errors, even if Log is available
+            element_split_char (str): Split character, default is ','
+
+        Raises:
+            ValueError: _description_
+        """
+        self.args = args
+        self.config_file = config_file
+        self.log = log
+        self.always_print = always_print
+        # entries that have to be split
+        self.entry_split_char: dict[str, str] = {}
+        self.entry_convert: dict[str, str] = {}
+        # config parser
+        self.config_parser: configparser.ConfigParser | None = self.__load_config_file()
+        # all settings
+        self.settings: dict[str, dict[str, Any]] | None = None
+        # remove file name and get base path and check
+        if not self.config_file.parent.is_dir():
+            raise ValueError(f"Cannot find the config folder: {self.config_file.parent}")
+        # load the config file before we parse anything
+
+    def load_settings(self, config_id: str, config_validate: dict[str, list[str]]) -> dict[str, str]:
+        """
+        neutral settings loader
+
+        The settings values on the right side are seen as a list if they have "," inside (see ELEMENT SPLIT CHAR)
+        but only if the "check:list." is set
+
+        for the allowe entries set, each set is "key => checks", check set is "check type:settings"
+        key: the key name in the settings file
+        check: check set with the following allowed entries on the left side for type
+        - mandatory: must be set as "mandatory:yes", if the key entry is missing or empty throws error
+        - check: see __check_settings for the settings currently available
+        - matching: a | list of entries where the value has to match too
+        - in: the right side is another KEY value from the settings where this value must be inside
+        - split: character to split entries, if set check:list+ must be set if checks are needed
+        - convert: convert to int, float -> if element is number convert, else leave as is
+
+        Args:
+            config_id (str): what block to load
+            config_allowed (list[str]): list of allowed entries sets
+
+        Returns:
+            dict[str, str]: key = value list
+        """
+        settings: dict[str, dict[str, Any]] = {
+            config_id: {},
+        }
+        if self.config_parser is not None:
+            try:
+                # load all data as is, validation is done afterwards
+                settings[config_id] = dict(self.config_parser[config_id])
+                for key, checks in config_validate.items():
+                    skip = True
+                    split_char = self.DEFAULT_ELEMENT_SPLIT_CHAR
+                    # if one is set as list in check -> do not skip, but add to list
+                    for check in checks:
+                        if check.startswith("convert:"):
+                            try:
+                                [_, convert_to] = check.split(":")
+                                if convert_to not in self.CONVERT_TO_LIST:
+                                    self.__print(
+                                        f"[!] In [{config_id}] the convert type is invalid {check}: {convert_to}",
+                                        'CRITICAL',
+                                        raise_exception=True
+                                    )
+                            except ValueError as e:
+                                self.__print(
+                                    f"[!] In [{config_id}] the convert type setup for entry failed: {check}: {e}",
+                                    'CRITICAL',
+                                    raise_exception=True
+                                )
+                                sys.exit(1)
+                        # split char, also check to not set it twice, first one only
+                        if check.startswith("split:") and not self.entry_split_char.get(key):
+                            try:
+                                [_, split_char] = check.split(":")
+                                if len(split_char) == 0:
+                                    self.__print(
+                                        (
+                                            f"[*] In [{config_id}] the [{key}] split char character is empty, "
+                                            f"fallback to: {self.DEFAULT_ELEMENT_SPLIT_CHAR}"
+                                        ),
+                                        "WARNING"
+                                    )
+                                    split_char = self.DEFAULT_ELEMENT_SPLIT_CHAR
+                                self.entry_split_char[key] = split_char
+                                skip = False
+                            except ValueError as e:
+                                self.__print(
+                                    f"[!] In [{config_id}] the split character setup for entry failed: {check}: {e}",
+                                    'CRITICAL',
+                                    raise_exception=True
+                                )
+                                sys.exit(1)
+                    if skip:
+                        continue
+                    settings[config_id][key] = [
+                        __value.replace(" ", "")
+                        for __value in settings[config_id][key].split(split_char)
+                    ]
+            except KeyError as e:
+                self.__print(
+                    f"[!] Cannot read [{config_id}] block in the {self.config_file}: {e}",
+                    'CRITICAL', raise_exception=True
+                )
+                sys.exit(1)
+        else:
+            # ignore error if arguments are set
+            if not self.__check_arguments(config_validate, True):
+                self.__print(f"[!] Cannot find file: {self.config_file}", 'CRITICAL', raise_exception=True)
+                sys.exit(1)
+            else:
+                # base set
+                settings[config_id] = {}
+        # make sure all are set
+        # if we have arguments set, this override config settings
+        error: bool = False
+        for entry, validate in config_validate.items():
+            # if we have command line option set, this one overrides config
+            if self.__get_arg(entry):
+                self.__print(f"[*] Command line option override for: {entry}", 'WARNING')
+                settings[config_id][entry] = self.args.get(entry)
+            # validate checks
+            for check in validate:
+                # CHECKS
+                # - mandatory
+                # - check: regex check (see SettingsLoaderCheck class for entries)
+                # - matching: entry in given list
+                # - in: entry in other setting entry list
+                # - length: for string length
+                # - range: for int/float range check
+                # mandatory check
+                if check == "mandatory:yes" and not settings[config_id].get(entry):
+                    error = True
+                    self.__print(f"[!] Missing content entry for: {entry}", 'ERROR')
+                # skip if empty none
+                if settings[config_id].get(entry) is None:
+                    continue
+                if check.startswith("check:"):
+                    # replace the check and run normal checks
+                    settings[config_id][entry] = self.__check_settings(
+                        check, entry, settings[config_id][entry]
+                    )
+                elif check.startswith("matching:"):
+                    checks = check.replace("matching:", "").split("|")
+                    if __result := is_list_in_list(convert_to_list(settings[config_id][entry]), list(checks)):
+                        error = True
+                        self.__print(f"[!] [{entry}] '{__result}' not matching {checks}", 'ERROR')
+                elif check.startswith("in:"):
+                    check = check.replace("in:", "")
+                    # skip if check does not exist, and set error
+                    if settings[config_id].get(check) is None:
+                        error = True
+                        self.__print(f"[!] [{entry}] '{check}' target does not exist", 'ERROR')
+                        continue
+                    # entry must be in check entry
+                    # in for list, else equal with convert to string
+                    if (
+                        __result := is_list_in_list(
+                            convert_to_list(settings[config_id][entry]),
+                            __checks := convert_to_list(settings[config_id][check])
+                        )
+                    ):
+                        self.__print(f"[!] [{entry}] '{__result}' must be in the '{__checks}' values list", 'ERROR')
+                        error = True
+                elif check.startswith('length:'):
+                    check = check.replace("length:", "")
+                    # length can be: n, n-, n-m, -m
+                    # as: equal, >= >=< =<
+                    self.__build_from_to_equal(entry, check)
+                    if not self.__length_range_validate(
+                        entry,
+                        'length',
+                        cast(list[str], convert_to_list(settings[config_id][entry])),
+                        self.__build_from_to_equal(entry, check, convert_to_int=True)
+                    ):
+                        error = True
+                elif check.startswith('range:'):
+                    check = check.replace("range:", "")
+                    if not self.__length_range_validate(
+                        entry,
+                        'range',
+                        cast(list[str], convert_to_list(settings[config_id][entry])),
+                        self.__build_from_to_equal(entry, check)
+                    ):
+                        error = True
+        if error is True:
+            self.__print("[!] Missing or incorrect settings data. Cannot proceed", 'CRITICAL', raise_exception=True)
+            sys.exit(1)
+        # Convert input
+        for [entry, convert_type] in self.entry_convert:
+            if convert_type in ["int", "any"] and is_int(settings[config_id][entry]):
+                settings[config_id][entry] = int(settings[config_id][entry])
+            elif convert_type in ["float", "any"] and is_float(settings[config_id][entry]):
+                settings[config_id][entry] = float(settings[config_id][entry])
+            elif convert_type in ["bool", "any"] and (
+                settings[config_id][entry] == "true" or
+                settings[config_id][entry] == "True" or
+                settings[config_id][entry] == "false" or
+                settings[config_id][entry] == "False"
+            ):
+                try:
+                    settings[config_id][entry] = str_to_bool(settings[config_id][entry])
+                except ValueError:
+                    self.__print(
+                        f"[!] Could not convert to boolean for '{entry}': {settings[config_id][entry]}",
+                        'ERROR'
+                    )
+            # string is always string
+
+        return settings[config_id]
+
+    def __build_from_to_equal(
+        self, entry: str, check: str, convert_to_int: bool = False
+    ) -> Tuple[float | None, float | None, float | None]:
+        """
+        split out the "n-m" part to get the to/from/equal
+
+        Arguments:
+            entry {str} -- _description_
+            check {str} -- _description_
+
+        Returns:
+            Tuple[float | None, float | None, float | None] -- _description_
+
+        Throws:
+            ValueError if range/length entries are not float
+        """
+        __from = None
+        __to = None
+        __equal = None
+        try:
+            [__from, __to] = check.split('-')
+            if (__from and not is_float(__from)) or (__to and not is_float(__to)):
+                self.__print(
+                    f"[{entry}] Check value for length is not in: {check}",
+                    'CRITICAL', raise_exception=True
+                )
+                sys.exit(1)
+            if len(__from) == 0:
+                __from = None
+            if len(__to) == 0:
+                __to = None
+        except ValueError:
+            if not is_float(__equal := check):
+                self.__print(
+                    f"[{entry}] Check value for length is not a valid integer: {check}",
+                    'CRITICAL', raise_exception=True
+                )
+                sys.exit(1)
+            if len(__equal) == 0:
+                __equal = None
+        # makre sure this is all int or None
+        if __from is not None:
+            __from = int(__from) if convert_to_int else float(__from)
+        if __to is not None:
+            __to = int(__to) if convert_to_int else float(__to)
+        if __equal is not None:
+            __equal = int(__equal) if convert_to_int else float(__equal)
+        return (
+            __from,
+            __to,
+            __equal
+        )
+
+    def __length_range_validate(
+        self,
+        entry: str,
+        check_type: str,
+        values: Sequence[str | int | float],
+        check: Tuple[float | None, float | None, float | None],
+    ) -> bool:
+        (__from, __to, __equal) = check
+        valid = True
+        for value_raw in convert_to_list(values):
+            value = 0
+            error_mark = ''
+            if check_type == 'length':
+                error_mark = 'length'
+                value = len(str(value_raw))
+            elif check_type == 'range':
+                error_mark = 'range'
+                value = float(str(value_raw))
+            if __equal is not None and value != __equal:
+                self.__print(f"[!] [{entry}] '{value_raw}' {error_mark} does not match {__equal}", 'ERROR')
+                valid = False
+                continue
+            if __from is not None and __to is None and value < __from:
+                self.__print(f"[!] [{entry}] '{value_raw}' {error_mark} smaller than minimum {__from}", 'ERROR')
+                valid = False
+                continue
+            if __from is None and __to is not None and value > __to:
+                self.__print(f"[!] [{entry}] '{value_raw}' {error_mark} larger than maximum {__to}", 'ERROR')
+                valid = False
+                continue
+            if __from is not None and __to is not None and (
+                value < __from or value > __to
+            ):
+                self.__print(
+                    f"[!] [{entry}] '{value_raw}' {error_mark} outside valid range {__from} to {__to}",
+                    'ERROR'
+                )
+                valid = False
+                continue
+        return valid
+
+    def __load_config_file(self) -> configparser.ConfigParser | None:
+        """
+        load and parse the config file
+        if not loadable return None
+        """
+        config = configparser.ConfigParser()
+        if self.config_file.is_file():
+            config.read(self.config_file)
+            return config
+        return None
+
+    def __clean_invalid_setting(
+        self,
+        entry: str,
+        validate: str,
+        value: str,
+        regex: str,
+        regex_clean: str,
+        replace: str = "",
+        print_error: bool = True,
+    ) -> str:
+        """
+        check is a string is invalid, print optional error message and clean up string
+
+        Args:
+            entry (str): what entry key
+            validate (str): validate type
+            value (str): the value to check against
+            regex (str): regex used for checking as r'...'
+            regex_clean (str): regex used for cleaning as r'...'
+            replace (str): replace with character. Defaults to ''
+            print_error (bool): print the error message. Defaults to True
+        """
+        check = re.compile(regex)
+        clean = re.compile(regex_clean)
+        if not check.search(value):
+            self.__print(
+                f"[!] Invalid content for '{entry}' with check '{validate}' and data: {value}",
+                'ERROR', print_error
+            )
+            # clean up
+            return clean.sub(replace, value)
+        # else return as is
+        return value
+
+    def __check_settings(
+        self,
+        check: str, entry: str, setting_value: list[str] | str
+    ) -> list[str] | str:
+        """
+        check each setting valid
+        The settings are defined in the SettingsLoaderCheck class
+
+        Args:
+            check (str): What check to run
+            entry (str): Variable name, just for information message
+            setting_value (list[str | int] | str | int): settings value data
+            entry_split_char (str | None): split char, for list check
+
+        Returns:
+            list[str | int] |111 str | int: cleaned up settings value data
+        """
+        check = check.replace("check:", "")
+        # get the check settings
+        __check_settings = SettingsLoaderCheck.CHECK_SETTINGS.get(check)
+        if __check_settings is None:
+            self.__print(
+                f"[{entry}] Cannot get SettingsLoaderCheck.CHECK_SETTINGS for {check}",
+                'CRITICAL', raise_exception=True
+            )
+            sys.exit(1)
+        # either removes or replaces invalid characters in the list
+        if isinstance(setting_value, list):
+            # clean up invalid characters
+            # loop over result and keep only filled (strip empty)
+            setting_value = [e for e in [
+                self.__clean_invalid_setting(
+                    entry, check, str(__entry),
+                    __check_settings['regex'], __check_settings['regex_clean'], __check_settings['replace']
+                )
+                for __entry in setting_value
+            ] if e]
+        else:
+            setting_value = self.__clean_invalid_setting(
+                entry, check, str(setting_value),
+                __check_settings['regex'], __check_settings['regex_clean'], __check_settings['replace']
+            )
+        # else:
+        #     self.__print(f"[!] Unkown type to check", 'ERROR)
+        # return data
+        return setting_value
+
+    def __check_arguments(self, arguments: dict[str, list[str]], all_set: bool = False) -> bool:
+        """
+        check if ast least one argument is set
+
+        Args:
+            arguments (list[str]): _description_
+
+        Returns:
+            bool: _description_
+        """
+        count_set = 0
+        count_arguments = 0
+        has_argument = False
+        for argument, validate in arguments.items():
+            # if argument is mandatory add to count, if not mandatory set has "has" to skip error
+            mandatory = any(entry == "mandatory:yes" for entry in validate)
+            if not mandatory:
+                has_argument = True
+                continue
+            count_arguments += 1
+            if self.__get_arg(argument):
+                has_argument = True
+                count_set += 1
+        # for all set, True only if all are set
+        if all_set is True:
+            has_argument = count_set == count_arguments
+
+        return has_argument
+
+    def __get_arg(self, entry: str) -> Any:
+        """
+        check if an argument entry xists, if None -> returns None else value of argument
+
+        Arguments:
+            entry {str} -- _description_
+
+        Returns:
+            Any -- _description_
+        """
+        if self.args.get(entry) is None:
+            return None
+        return self.args.get(entry)
+
+    def __print(self, msg: str, level: str, print_error: bool = True, raise_exception: bool = False):
+        """
+        print out error, if Log class is set then print to log instead
+
+        Arguments:
+            msg {str} -- _description_
+            level {str} -- _description_
+
+        Keyword Arguments:
+            print_error {bool} -- _description_ (default: {True})
+        """
+        if self.log is not None:
+            if not Log.validate_log_level(level):
+                level = 'ERROR'
+            self.log.logger.log(Log.get_log_level_int(level), msg)
+        if self.log is None or self.always_print:
+            if print_error:
+                print(msg)
+        if raise_exception:
+            raise ValueError(msg)
+
+
+# __END__
--- a/src/corelibs/config_handling/settings_loader_handling/init.py
+++ b/src/corelibs/config_handling/settings_loader_handling/init.py
--- a/src/corelibs/config_handling/settings_loader_handling/settings_loader_check.py
+++ b/src/corelibs/config_handling/settings_loader_handling/settings_loader_check.py
@@ -0,0 +1,44 @@
+"""
+Class of checks that can be run on value entries
+"""
+
+from typing import TypedDict
+
+
+class SettingsLoaderCheckValue(TypedDict):
+    """Settings check entries"""
+    regex: str
+    regex_clean: str
+    replace: str
+
+
+class SettingsLoaderCheck:
+    """
+    check:<NAME> or check:list+<NAME>
+    """
+    CHECK_SETTINGS: dict[str, SettingsLoaderCheckValue] = {
+        "int": {
+            "regex": r"^[0-9]+$",
+            "regex_clean": r"[^0-9]",
+            "replace": ""
+        },
+        "string.alphanumeric": {
+            "regex": r"^[a-zA-Z0-9]+$",
+            "regex_clean": r"[^a-zA-Z0-9]",
+            "replace": ""
+        },
+        "string.alphanumeric.lower.dash": {
+            "regex": r"^[a-z0-9-]+$",
+            "regex_clean": r"[^a-z0-9-]",
+            "replace": ""
+        },
+        # A-Z a-z 0-9 _ - . ONLY
+        # This one does not remove, but replaces with _
+        "string.alphanumeric.extended.replace": {
+            "regex": r"^[_.a-zA-Z0-9-]+$",
+            "regex_clean": r"[^_.a-zA-Z0-9-]",
+            "replace": "_"
+        },
+    }
+
+# __END__
--- a/src/corelibs/iterator_handling/init.py
+++ b/src/corelibs/iterator_handling/init.py
--- a/src/corelibs/list_dict_handling/data_search.py
+++ b/src/corelibs/list_dict_handling/data_search.py
--- a/src/corelibs/list_dict_handling/dict_helpers.py
+++ b/src/corelibs/list_dict_handling/dict_helpers.py
--- a/src/corelibs/list_dict_handling/dump_data.py
+++ b/src/corelibs/list_dict_handling/dump_data.py
--- a/src/corelibs/list_dict_handling/fingerprint.py
+++ b/src/corelibs/list_dict_handling/fingerprint.py
--- a/src/corelibs/iterator_handling/list_helpers.py
+++ b/src/corelibs/iterator_handling/list_helpers.py
@@ -0,0 +1,47 @@
+"""
+List type helpers
+"""
+
+from typing import Any, Sequence
+
+
+def convert_to_list(
+    entry: str | int | float | bool | Sequence[str | int | float | bool | Sequence[Any]]
+) -> Sequence[str | int | float | bool | Sequence[Any]]:
+    """
+    Convert any of the non list values (except dictionary) to a list
+
+    Arguments:
+        entry {str | int | float | bool | list[str  |  int  |  float  |  bool]} -- _description_
+
+    Returns:
+        list[str | int | float | bool] -- _description_
+    """
+    if isinstance(entry, list):
+        return entry
+    return [entry]
+
+
+def is_list_in_list(
+    list_a: Sequence[str | int | float | bool | Sequence[Any]],
+    list_b: Sequence[str | int | float | bool | Sequence[Any]]
+) -> Sequence[str | int | float | bool | Sequence[Any]]:
+    """
+    Return entries from list_a that are not in list_b
+    Type safe compare
+
+    Arguments:
+        list_a {list[Any]} -- _description_
+        list_b {list[Any]} -- _description_
+
+    Returns:
+        list[Any] -- _description_
+    """
+    # Create sets of (value, type) tuples
+    set_a = set((item, type(item)) for item in list_a)
+    set_b = set((item, type(item)) for item in list_b)
+
+    # Get the difference and extract just the values
+    return [item for item, _ in set_a - set_b]
+
+# __END__
--- a/src/corelibs/list_dict_handling/manage_dict.py
+++ b/src/corelibs/list_dict_handling/manage_dict.py
--- a/src/corelibs/string_handling/string_helpers.py
+++ b/src/corelibs/string_handling/string_helpers.py
@@ -2,6 +2,7 @@
 String helpers
 """

+from typing import Any
 from decimal import Decimal, getcontext
 from textwrap import shorten

@@ -101,4 +102,62 @@ def format_number(number: float, precision: int = 0) -> str:
        "f}"
    ).format(_number)

+
+def is_int(string: Any) -> bool:
+    """
+    check if a value is int
+
+    Arguments:
+        string {Any} -- _description_
+
+    Returns:
+        bool -- _description_
+    """
+    try:
+        int(string)
+        return True
+    except TypeError:
+        return False
+    except ValueError:
+        return False
+
+
+def is_float(string: Any) -> bool:
+    """
+    check if a value is float
+
+    Arguments:
+        string {Any} -- _description_
+
+    Returns:
+        bool -- _description_
+    """
+    try:
+        float(string)
+        return True
+    except TypeError:
+        return False
+    except ValueError:
+        return False
+
+
+def str_to_bool(string: str):
+    """
+    convert string to bool
+
+    Arguments:
+        s {str} -- _description_
+
+    Raises:
+        ValueError: _description_
+
+    Returns:
+        _type_ -- _description_
+    """
+    if string == "True" or string == "true":
+        return True
+    if string == "False" or string == "false":
+        return False
+    raise ValueError(f"Invalid boolean string: {string}")
+
 # __END__