From f900a6eab905f07fe112c78145f2aba93ec6bb44 Mon Sep 17 00:00:00 2001 From: Clemens Schwaighofer Date: Wed, 2 Jul 2025 18:34:53 +0900 Subject: [PATCH] Add double byte string format --- ReadMe.md | 10 +- pyproject.toml | 2 +- .../double_byte_string_format.py | 226 ++++++++++++++++++ .../double_byte_string_format.py | 66 +++++ tests/progress/progress_test.py | 4 +- uv.lock | 2 +- 6 files changed, 304 insertions(+), 6 deletions(-) create mode 100644 src/CoreLibs/string_handling/double_byte_string_format.py create mode 100644 tests/double_byte_string_format/double_byte_string_format.py diff --git a/ReadMe.md b/ReadMe.md index 6a03558..02a16f8 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -20,7 +20,7 @@ This is a pip package that can be installed into any project and covers the foll - logging_handling: extend log and also error message handling - requests_handling: requests wrapper for better calls with auth headers - script_handling: pid lock file handling, abort timer -- string_handling: byte format, datetime format, hashing, string formats for numbrers, etc +- string_handling: byte format, datetime format, hashing, string formats for numbrers, double byte string format, etc ## How to publish @@ -41,7 +41,7 @@ uv publish --index egra-gitea --token --native-tls ## Test package -We must set the full index URL here because we run with "--no-project2 +We must set the full index URL here because we run with "--no-project" ```sh uv run --with corelibs --index egra-gitea=https://git.egplusww.jp/api/packages/PyPI/pypi/simple/ --no-project --native-tls -- python -c "import corelibs" @@ -51,12 +51,16 @@ uv run --with corelibs --index egra-gitea=https://git.egplusww.jp/api/packages/P In the test folder other tests are located. -At the moment only a small test for the "progress" module is set +At the moment only a small test for the "progress" and the "double byte string format" module is set ```sh uv run --native-tls tests/progress/progress_test.py ``` +```sh +uv run --native-tls tests/double_byte_string_format/double_byte_string_format.py +``` + ## How to install in another project This will also add the index entry diff --git a/pyproject.toml b/pyproject.toml index 7082f02..f34a211 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ # MARK: Project info [project] name = "corelibs" -version = "0.3.1" +version = "0.4.0" description = "Collection of utils for Python scripts" readme = "ReadMe.md" requires-python = ">=3.13" diff --git a/src/CoreLibs/string_handling/double_byte_string_format.py b/src/CoreLibs/string_handling/double_byte_string_format.py new file mode 100644 index 0000000..dd8276a --- /dev/null +++ b/src/CoreLibs/string_handling/double_byte_string_format.py @@ -0,0 +1,226 @@ +""" +Format double byte strings to exact length +""" + +import unicodedata + + +class DoubleByteFormatString: + """ + Format a string to exact length + """ + + def __init__( + self, + string: str, + cut_length: int, + format_length: int | None = None, + placeholder: str = '..', + format_string: str = '{{:<{len}}}' + ): + """ + shorts a string to exact cut length and sets it to format length + + after "cut_length" cut the "placeholder" will be added, so that the new cut_length is never + larget than the cut_length given (".." is counted to cut_length) + if format_length if set and outside format_length will be set + the cut_length is adjusted to format_length if the format_length is shorter + + Example + + "Foo bar baz" 10 charcters -> 5 cut_length -> 10 format_length + "Foo.. " + + use class.get_string_short() for cut length shortend string + use class.get_string_short_formated() to get the shorted string to format length padding + + creates a class that shortens and sets the format length + to use with a print format run the format needs to be pre set in + the style of {{:<{len}}} style + self.get_string_short_formated() for the "len" parameter + + Args: + string (str): string to work with + cut_length (int): width to shorten to + format_length (int | None): format length. Defaults to None + placeholder (str, optional): placeholder to put after shortened string. Defaults to '..'. + format_string (str, optional): format string. Defaults to '{{:<{len}}}' + """ + # output variables + self.string_short: str = '' + self.string_width_value: int = 0 + self.string_short_width: int = 0 + self.format_length_value: int = 0 + # internal varaibles + self.placeholder: str = placeholder + # original string + self.string: str = '' + # width to cut string to + self.cut_length: int = 0 + # format length to set to + self.format_length: int = 0 + # main string + self.string = str(string) + + self.format_string: str = format_string + + # if width is > 0 set, else set width of string (fallback) + if cut_length > 0: + self.cut_length = cut_length + elif cut_length <= 0: + self.cut_length = self.__string_width_calc(self.string) + # format length set, if not set or smaller than 0, set to width of string + self.format_length = self.cut_length + if format_length is not None and format_length > 0: + self.format_length = format_length + # check that width is not larger then length if yes, set width to length + self.cut_length = min(self.cut_length, self.format_length) + + # process the string shorten and format length calculation + self.process() + + def process(self): + """ + runs all the class methods to set string length, the string shortened + and the format length + """ + # call the internal ones to set the data + if self.string: + self.__string_width() + self.__shorten_string() + if self.format_length: + self.__format_length() + + def get_string_short(self) -> str: + """ + get the shortend string + + Returns: + str -- _description_ + """ + return self.string_short + + def get_string_short_formated(self, format_string: str = '{{:<{len}}}') -> str: + """ + get the formatted string + + Keyword Arguments: + format_string {_type_} -- _description_ (default: {'{{:<{len}}}'}) + + Returns: + str -- _description_ + """ + if not format_string: + format_string = self.format_string + return format_string.format( + len=self.get_format_length() + ).format( + self.get_string_short() + ) + + def get_format_length(self) -> int: + """ + get the format length for outside length set + + Returns: + int -- _description_ + """ + return self.format_length_value + + def get_cut_length(self) -> int: + """ + get the actual cut length + + Returns: + int -- _description_ + """ + return self.cut_length + + def get_requested_cut_length(self) -> int: + """ + get the requested cut length + + Returns: + int -- _description_ + """ + return self.cut_length + + def get_requested_format_length(self) -> int: + """ + get the requested format length + + Returns: + int -- _description_ + """ + return self.format_length + + def __string_width_calc(self, string: str) -> int: + """ + does the actual string width calculation + + Args: + string (str): string to calculate from + + Returns: + int: stringth width + """ + return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string) + + def __string_width(self): + """ + calculates the string width based on the characters + this is an internal method and should not be called on itself + """ + # only run if string is set and is valid string + if self.string: + # calculate width. add +1 for each double byte character + self.string_width_value = self.__string_width_calc(self.string) + + def __format_length(self): + """ + set the format length based on the length for the format + and the shortend string + this is an internal method and should not be called on itself + """ + if not self.string_short: + self.__shorten_string() + # get correct format length based on string + if ( + self.string_short and + self.format_length > 0 and + self.string_short_width > 0 + ): + # length: format length wanted + # substract the width of the shortend string - the length of the shortend string + self.format_length_value = self.format_length - (self.string_short_width - len(self.string_short)) + else: + # if we have nothing to shorten the length, keep the old one + self.format_length_value = self.format_length + + def __shorten_string(self): + """ + shorten string down to set width + this is an internal method and should not be called on itself + """ + # set string width if not set + if not self.string_width_value: + self.__string_width() + # if the double byte string width is larger than the wanted width + if self.string_width_value > self.cut_length: + cur_len = 0 + self.string_short = '' + for char in str(self.string): + # set the current length if we add the character + cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1 + # if the new length is smaller than the output length to shorten too add the char + if cur_len <= (self.cut_length - len(self.placeholder)): + self.string_short += char + self.string_short_width = cur_len + # return string with new width and placeholder + self.string_short = f"{self.string_short}{self.placeholder}" + self.string_short_width += len(self.placeholder) + else: + # if string is same saze just copy + self.string_short = self.string + +# __END__ diff --git a/tests/double_byte_string_format/double_byte_string_format.py b/tests/double_byte_string_format/double_byte_string_format.py new file mode 100644 index 0000000..d0e5cc3 --- /dev/null +++ b/tests/double_byte_string_format/double_byte_string_format.py @@ -0,0 +1,66 @@ +#!/usr/bin/env -S uv run --script + +""" +Test for double byte format +""" + +from corelibs.string_handling.double_byte_string_format import DoubleByteFormatString + + +def main(): + """ + Main call + """ + string = [ + "Some string 123 other text", + "Some string 日本語 other text", + "日本語は string 123 other text", + "あいうえおかきくけこさしすせそなにぬねのまみむめも〜", + "あいうえおかきくけこさしす 1 other text", + "Some string すせそなにぬねのまみむめも〜", + "SOME OTHER STRING THAT IS LONGER THAN TWENTYSIX CHARACTERS", + "日本語は string 123 other text Some string 日本語 other text" + ] + + format_str = "{{:<{len}}}" + + length_set = [ + (26, 25), + (26, 26), + (26, 60), + (26, 20), + (26, -5), + (-6, -5), + ] + + for _length_set in length_set: + cut_length = _length_set[0] + format_length = _length_set[1] + print(f"========= Cut: {cut_length} | Format: {format_length} ==> ") + for _string in string: + string_test = DoubleByteFormatString(_string, cut_length, format_length) + formated = format_str.format( + len=string_test.get_format_length() + ).format( + string_test.get_string_short() + ) + + print( + "* Shorten string: shorten length: " + f"Req: {string_test.get_requested_cut_length()} ({cut_length}) / " + f"Set: {string_test.get_cut_length()}, " + "format length: " + f"Req: {string_test.get_requested_format_length()} ({format_length}) / " + f"Set: {string_test.get_format_length()}" + f"\nOrig: |{_string}|" + f"\nGSS : |{string_test.get_string_short()}|" + f"\nF : |{formated}|" + f"\nGSSF: |{string_test.get_string_short_formated()}|" + ) + print("-------") + + +if __name__ == "__main__": + main() + +# __END__ diff --git a/tests/progress/progress_test.py b/tests/progress/progress_test.py index 2f572b9..a7938a4 100755 --- a/tests/progress/progress_test.py +++ b/tests/progress/progress_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env -S uv run --script """ Test for progress class @@ -89,3 +89,5 @@ def main(): if __name__ == '__main__': main() + +# __END__ diff --git a/uv.lock b/uv.lock index e57bce0..0a88823 100644 --- a/uv.lock +++ b/uv.lock @@ -35,7 +35,7 @@ wheels = [ [[package]] name = "corelibs" -version = "0.3.0" +version = "0.3.1" source = { editable = "." } dependencies = [ { name = "jmespath" },