""" various string helpers1 """ import unicodedata # this is for looking up if string is non latin letters # this is used by isLatin and onlyLatinChars cache_latin_letters = {} def shorten_string(string, width, placeholder=".."): """ shortens a string to width and attached placeholder Args: string(str): string to shorten width (int): length th shorten to placeholder (str, optional): optional string for removed shortend part. Defaults to '..'. Returns: string: shortened string """ # get the length with double byte charactes string_length_cjk = string_len_cjk(str(string)) # if double byte width is too big if string_length_cjk > width: # set current length and output string cur_len = 0 out_string = "" # loop through each character for char in str(string): # set the current length if we add the character cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1 # if the new length is smaller than the output length to shorten too add the char if cur_len <= (width - len(placeholder)): out_string += char # return string with new width and placeholder return f"{out_string}{placeholder}" else: return str(string) def string_len_cjk(string): """ because len on string in python counts characters but we need the width count for formatting, we count two for a double byte characters Args: string (string): string to check length Returns: int: length including double count for double width characters """ # return string len including double count for double width characters return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string) def is_latin(uchr): """ checks via the unciode class if a character is LATIN char based from https://stackoverflow.com/a/3308844/7811993 Args: uchr (str): _description_ Returns: str: flagged LATIN or not char """ try: # if we found in the dictionary return return cache_latin_letters[uchr] except KeyError: # find LATIN in uncide type returned and set in dictionary for this character return cache_latin_letters.setdefault(uchr, "LATIN" in unicodedata.name(uchr)) def only_latin_chars(unistr): """ chekcs if a string is based on LATIN chars. No for any CJK, Cyrillic, Hebrew, etc from: https://stackoverflow.com/a/3308844/7811993 Args: unistr (str): string Returns: bool: True/False for if string is LATIN char based """ return all(is_latin(uchr) for uchr in unistr if uchr.isalpha()) def format_len(string, length): """ in case of CJK characters we need to adjust the format length dynamically calculate correct length based on string given Args: string (str): string length (int): format length Returns: int: adjusted format legnth """ # returns length udpated for string with double byte characters # get string length normal, get string length including double byte characters # then subtract that from the original length return length - (string_len_cjk(string) - len(string))