Fix double byte path format issues

When the path has double byte characters the length format does not work
and the path output will be too long for the space available.
We now use special double byte double count detection methods to adjust
the path length and output format
This commit is contained in:
2018-03-14 14:55:35 +09:00
parent b3ac5051d0
commit 4f4a0db7aa

View File

@@ -13,6 +13,7 @@ import argparse
import sqlite3
import requests
import configparser
import unicodedata
# import textwrap
import glob
import os
@@ -368,8 +369,8 @@ def shortenPath(path, length=30, file_only=False, path_only=False):
path = os.path.split(path)[1]
if path_only:
path = os.path.split(path)[0]
if len(path) > length:
path = "{} {}".format("..", path[len(path) - length:])
if stringLenCJK(path) > length:
path = "{} {}".format("..", path[stringLenCJK(path) - length:])
return path
@@ -378,13 +379,30 @@ def shortenPath(path, length=30, file_only=False, path_only=False):
# RETURN: shortened string
# DESC : shortens a string to width and attached placeholder
def shortenString(string, width, placeholder='..'):
if len(str(string)) > width:
width -= len(placeholder)
# get the length with double byte charactes
string_len_cjk = stringLenCJK(str(string))
# if double byte width is too big
if string_len_cjk > width:
# substract the place holder
# subtract difference between double byte and character lenght
width -= ((string_len_cjk - len(str(string))) + len(placeholder))
# return string width new width
return "{}{}".format(str(string)[:width], placeholder)
else:
return str(string)
# METHOD: stringLenCJK
# PARAMS: string
# RETURN: length including double count for double width characters
# DESC : because len on string in python counts characters but we need
# the width count for formatting, we count two for a double byte
# characters
def stringLenCJK(string):
""" return string len including double count for double width characters """
return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string)
# METHOD: printHeader
# PARAMS: header string, line counter, print header counter trigger
# RETURN: line counter +1
@@ -401,6 +419,18 @@ def printHeader(header, lines=0, header_line=0):
return lines
# METHOD: formatLen
# PARAMS: string, format length
# RETURN: returns adjusted format length
# DESC : in case of CJK characters we need to adjust the format length dynamically
# calculate correct length based on string given
def formatLen(string, length):
# returns length udpated for string with double byte characters
# get string length normal, get string length including double byte characters
# then subtract that from the original length
return length - (stringLenCJK(string) - len(string))
# METHOD: fileSortNumber
# PARAMS: file name
# RETURN: number found in the BK string or 0 for none
@@ -859,7 +889,9 @@ if args.read_only:
# current page number
page_no = 1
# the formatted line for the output
format_line = " {{filename:<{}}} | {{latitude:>{}}} | {{longitude:>{}}} | {{code:<{}}} | {{country:<{}}} | {{state:<{}}} | {{city:<{}}} | {{location:<{}}} | {{path:<{}}}".format(
# 4 {} => final replace: data (2 pre replaces)
# 1 {} => length replace here
format_line = " {{{{filename:<{}}}}} | {{{{latitude:>{}}}}} | {{{{longitude:>{}}}}} | {{{{code:<{}}}}} | {{{{country:<{}}}}} | {{{{state:<{}}}}} | {{{{city:<{}}}}} | {{{{location:<{}}}}} | {{{{path:<{}}}}}".format(
format_length['filename'],
format_length['latitude'],
format_length['longitude'],
@@ -868,7 +900,7 @@ if args.read_only:
format_length['state'],
format_length['city'],
format_length['location'],
format_length['path']
"{pathlen}" # set path len replacer variable
)
# header line format:
# blank line
@@ -878,7 +910,8 @@ if args.read_only:
{}
{}'''.format(
'> Page {page_no:,}/{page_all:,}', # can later be set to something else, eg page numbers
format_line.format( # the header title line
# pre replace path length before we add the header titles
format_line.format(pathlen=format_length['path']).format( # the header title line
filename='File'[:format_length['filename']],
latitude='Latitude'[:format_length['latitude']],
longitude='Longitude'[:format_length['longitude']],
@@ -959,16 +992,20 @@ for xmp_file in work_files:
count['read'] = printHeader(header_line.format(page_no=page_no, page_all=page_all), count['read'], header_repeat)
# the data content
print(format_line.format(
filename=shortenPath(xmp_file, format_length['filename'], file_only=True), # shorten from the left
latitude=str(convertDMStoLat(data_set['GPSLatitude']))[:format_length['latitude']], # cut off from the right
longitude=str(convertDMStoLong(data_set['GPSLongitude']))[:format_length['longitude']],
code=data_set['CountryCode'][:2].center(4), # is only 2 chars
country=shortenString(data_set['Country'], width=format_length['country']), # shorten from the right
state=shortenString(data_set['State'], width=format_length['state']),
city=shortenString(data_set['City'], width=format_length['city']),
location=shortenString(data_set['Location'], width=format_length['location']),
path=shortenPath(xmp_file, format_length['path'], path_only=True)
))
# we need to adjust the path length to the folder name if it has double byte characters inside
pathlen=formatLen(shortenPath(xmp_file, format_length['path'], path_only=True), format_length['path'])
).format(
filename=shortenPath(xmp_file, format_length['filename'], file_only=True), # shorten from the left
latitude=str(convertDMStoLat(data_set['GPSLatitude']))[:format_length['latitude']], # cut off from the right
longitude=str(convertDMStoLong(data_set['GPSLongitude']))[:format_length['longitude']],
code=data_set['CountryCode'][:2].center(4), # is only 2 chars
country=shortenString(data_set['Country'], width=format_length['country']), # shorten from the right
state=shortenString(data_set['State'], width=format_length['state']),
city=shortenString(data_set['City'], width=format_length['city']),
location=shortenString(data_set['Location'], width=format_length['location']),
path=shortenPath(xmp_file, format_length['path'], path_only=True)
)
)
count['listed'] += 1
else:
# create a duplicate copy for later checking if something changed