Move functions into modules
This commit is contained in:
106
bin/utils/string_helpers.py
Normal file
106
bin/utils/string_helpers.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
various string helpers1
|
||||
"""
|
||||
|
||||
import unicodedata
|
||||
|
||||
# this is for looking up if string is non latin letters
|
||||
# this is used by isLatin and onlyLatinChars
|
||||
cache_latin_letters = {}
|
||||
|
||||
def shorten_string(string, width, placeholder='..'):
|
||||
"""
|
||||
shortens a string to width and attached placeholder
|
||||
|
||||
Args:
|
||||
string(str): string to shorten
|
||||
width (int): length th shorten to
|
||||
placeholder (str, optional): optional string for removed shortend part. Defaults to '..'.
|
||||
|
||||
Returns:
|
||||
string: shortened string
|
||||
"""
|
||||
# get the length with double byte charactes
|
||||
string_length_cjk = string_len_cjk(str(string))
|
||||
# if double byte width is too big
|
||||
if string_length_cjk > width:
|
||||
# set current length and output string
|
||||
cur_len = 0
|
||||
out_string = ''
|
||||
# loop through each character
|
||||
for char in str(string):
|
||||
# set the current length if we add the character
|
||||
cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1
|
||||
# if the new length is smaller than the output length to shorten too add the char
|
||||
if cur_len <= (width - len(placeholder)):
|
||||
out_string += char
|
||||
# return string with new width and placeholder
|
||||
return f"{out_string}{placeholder}"
|
||||
else:
|
||||
return str(string)
|
||||
|
||||
def string_len_cjk(string):
|
||||
"""
|
||||
because len on string in python counts characters but we need the width
|
||||
count for formatting, we count two for a double byte characters
|
||||
|
||||
Args:
|
||||
string (string): string to check length
|
||||
|
||||
Returns:
|
||||
int: length including double count for double width characters
|
||||
"""
|
||||
# return string len including double count for double width characters
|
||||
return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string)
|
||||
|
||||
def is_latin(uchr):
|
||||
"""
|
||||
checks via the unciode class if a character is LATIN char based
|
||||
|
||||
from
|
||||
https://stackoverflow.com/a/3308844/7811993
|
||||
|
||||
Args:
|
||||
uchr (str): _description_
|
||||
|
||||
Returns:
|
||||
str: flagged LATIN or not char
|
||||
"""
|
||||
try:
|
||||
# if we found in the dictionary return
|
||||
return cache_latin_letters[uchr]
|
||||
except KeyError:
|
||||
# find LATIN in uncide type returned and set in dictionary for this character
|
||||
return cache_latin_letters.setdefault(uchr, 'LATIN' in unicodedata.name(uchr))
|
||||
|
||||
def only_latin_chars(unistr):
|
||||
"""
|
||||
chekcs if a string is based on LATIN chars. No for any CJK, Cyrillic, Hebrew, etc
|
||||
|
||||
from:
|
||||
https://stackoverflow.com/a/3308844/7811993
|
||||
|
||||
Args:
|
||||
unistr (str): string
|
||||
|
||||
Returns:
|
||||
bool: True/False for if string is LATIN char based
|
||||
"""
|
||||
return all(is_latin(uchr) for uchr in unistr if uchr.isalpha())
|
||||
|
||||
def format_len(string, length):
|
||||
"""
|
||||
in case of CJK characters we need to adjust the format length dynamically
|
||||
calculate correct length based on string given
|
||||
|
||||
Args:
|
||||
string (str): string
|
||||
length (int): format length
|
||||
|
||||
Returns:
|
||||
int: adjusted format legnth
|
||||
"""
|
||||
# returns length udpated for string with double byte characters
|
||||
# get string length normal, get string length including double byte characters
|
||||
# then subtract that from the original length
|
||||
return length - (string_len_cjk(string) - len(string))
|
||||
Reference in New Issue
Block a user