diff --git a/bin/reverse_geolocate.py b/bin/reverse_geolocate.py index f25dd77..5cee96a 100755 --- a/bin/reverse_geolocate.py +++ b/bin/reverse_geolocate.py @@ -27,6 +27,9 @@ from math import ceil, radians, sin, cos, atan2, sqrt import requests # Note XMPFiles does not work with sidecar files, need to read via XMPMeta from libxmp import XMPMeta, consts +from utils.long_lat import convert_dms_to_lat, convert_dms_to_long, convert_lat_to_dms, convert_long_to_dms, get_distance +from utils.reverse_geolocate import reverse_geolocate +from utils.string_helpers import string_len_cjk, shorten_string, format_len ############################################################## # FUNCTIONS @@ -121,389 +124,6 @@ class DistanceValues(argparse.Action): # MAIN FUNCTIONS -def reverse_geolocate(longitude, latitude, map_type, args): - """ - wrapper to call to either the google or openstreetmap - - Args: - longitude (float): latitude - latitude (float): longitue - map_type(str): map search target (google or openstreetmap) - args (_type_): _description_ - - Returns: - _type_: dict with all data (see below) - """ - # clean up long/lat - # they are stored with N/S/E/W if they come from an XMP - # format: Deg,Min.Sec[NSEW] - # NOTE: lat is N/S, long is E/W - # detect and convert - lat_long = long_lat_reg(longitude=longitude, latitude=latitude) - # which service to use - if map_type == 'google': - return reverse_geolocate_google(lat_long['longitude'], lat_long['latitude'], args) - elif map_type == 'openstreetmap': - return reverse_geolocate_open_street_map(lat_long['longitude'], lat_long['latitude'], args) - else: - return { - 'Country': '', - 'status': 'ERROR', - 'error': 'Map type not valid' - } - -def reverse_geolocate_init(longitude, latitude): - """ - inits the dictionary for return, and checks the lat/long on valid - returns geolocation dict with status = 'ERROR' if an error occurded - - Args: - longitude (float): longitude - latitude (float): latitude - - Returns: - _type_: empty geolocation dictionary, or error flag if lat/long is not valid - """ - # basic dict format - geolocation = { - 'CountryCode': '', - 'Country': '', - 'State': '', - 'City': '', - 'Location': '', - # below for error reports - 'status': '', - 'error_message': '' - } - # error if long/lat is not valid - latlong_re = re.compile(r'^\d+\.\d+$') - if not latlong_re.match(str(longitude)) or not latlong_re.match(str(latitude)): - geolocation['status'] = 'ERROR' - geolocation['error_message'] = f"Latitude {latitude} or Longitude {longitude} are not valid" - return geolocation - -def reverse_geolocate_open_street_map(longitude, latitude, args): - """ - OpenStreetMap reverse lookcation lookup - - sample: - https://nominatim.openstreetmap.org/reverse.php?format=jsonv2& - at=&lon=&zoom=21&accept-languge=en-US,en& - - Args: - longitude (float): longitude - latitude (float): latitude - args (_type_): _description_ - - Returns: - dictionary: dict with locaiton, city, state, country, country code - if not fillable, entry is empty - """ - # init - geolocation = reverse_geolocate_init(longitude, latitude) - if geolocation['status'] == 'ERROR': - return geolocation - # query format - query_format = 'jsonv2' - # language to return (english) - language = 'en-US,en' - # build query - base = 'https://nominatim.openstreetmap.org/reverse.php?' - # parameters - payload = { - 'format': query_format, - 'lat': latitude, - 'lon': longitude, - 'accept-language': language - } - # if we have an email, add it here - if args.email: - payload['email'] = args.email - url = f"{base}" - # timeout in seconds - timeout = 60 - response = requests.get(url, params=payload, timeout=timeout) - # debug output - if args.debug: - print(f"OpenStreetMap search for Lat: {latitude}, Long: {longitude}") - if args.debug and args.verbose >= 1: - print(f"OpenStreetMap response: {response} => JSON: {response.json()}") - # type map - # Country to Location and for each in order of priority - type_map = { - 'CountryCode': ['country_code'], - 'Country': ['country'], - 'State': ['state'], - 'City': ['city', 'city_district', 'state_district'], - 'Location': ['county', 'town', 'suburb', 'hamlet', 'neighbourhood', 'road'] - } - # if not error - if 'error' not in response.json(): - # get address block - addr = response.json()['address'] - # loop for locations - for loc_index, sub_index in type_map.items(): - for index in sub_index: - if index in addr and not geolocation[loc_index]: - geolocation[loc_index] = addr[index] - # for loc_index in type_map: - # for index in type_map[loc_index]: - # if index in addr and not geolocation[loc_index]: - # geolocation[loc_index] = addr[index] - else: - geolocation['status'] = 'ERROR' - geolocation['error_message'] = response.json()['error'] - print(f"Error in request: {geolocation['error']}") - # return - return geolocation - -def reverse_geolocate_google(longitude, latitude, args): - """ - Google Maps reverse location lookup - - sample: - http://maps.googleapis.com/maps/api/geocode/json?latlng=,&language= - &sensor=false&key= - - Args: - longitude (float): longitude - latitude (float): latitude - args (_type_): _description_ - - Returns: - dictionary: dict with location, city, state, country, country code - if not fillable, entry is empty - """ - # init - geolocation = reverse_geolocate_init(longitude, latitude) - temp_geolocation = geolocation.copy() - if geolocation['status'] == 'ERROR': - return geolocation - # sensor (why?) - sensor = 'false' - # language, so we get ascii en back - language = 'en' - # request to google - # if a google api key is used, the request has to be via https - protocol = 'https://' if args.google_api_key else 'http://' - base = "maps.googleapis.com/maps/api/geocode/json?" - # build the base params - payload = { - 'latlng': f"{latitude},{longitude}", - 'language': language, - 'sensor': sensor - } - # if we have a google api key, add it here - if args.google_api_key: - payload['key'] = args.google_api_key - # build the full url and send it to google - url = f"{protocol}{base}" - # timeout in seconds - timeout = 60 - response = requests.get(url, params=payload, timeout=timeout) - # debug output - if args.debug: - print(f"Google search for Lat: {latitude}, Long: {longitude} with {response.url}") - if args.debug and args.verbose >= 1: - print(f"Google response: {response} => JSON: {response.json()}") - # type map - # For automated return of correct data into set to return - type_map = { - 'CountryCode': ['country'], - 'Country': ['country'], - 'State': ['administrative_area_level_1', 'administrative_area_level_2'], - 'City': ['locality', 'administrative_area_level_3'], - 'Location': ['sublocality_level_1', 'sublocality_level_2', 'route'], - } - # print("Error: {}".format(response.json()['status'])) - if response.json()['status'] == 'OK': - # first entry for type = premise - for entry in response.json()['results']: - for sub_entry in entry: - if sub_entry == 'types' and ( - 'premise' in entry[sub_entry] or - 'route' in entry[sub_entry] or - 'street_address' in entry[sub_entry] or - 'sublocality' in entry[sub_entry] - ): - # print("Entry {}: {}".format(sub_entry, entry[sub_entry])) - # print("Address {}".format(entry['address_components'])) - # type - # -> country, - # -> administrative_area (1, 2), - # -> locality, - # -> sublocality (_level_1 or 2 first found, then route) - # so we get the data in the correct order - # for loc_index in type_map: - # for index in type_map[loc_index]: - for loc_index, sub_index in type_map.items(): - for index in sub_index: - # this is an array, so we need to loop through each - for addr in entry['address_components']: - # in types check that index is in there - # and the location is not yet set - # also check that entry is in LATIN based - # NOTE: fallback if all are non LATIN? - if index in addr['types'] and not geolocation[loc_index]: - # for country code we need to use short name, - # else we use long name - if loc_index == 'CountryCode': - if only_latin_chars(addr['short_name']): - geolocation[loc_index] = addr['short_name'] - elif not temp_geolocation[loc_index]: - temp_geolocation[loc_index] = addr['short_name'] - else: - if only_latin_chars(addr['long_name']): - geolocation[loc_index] = addr['long_name'] - elif not temp_geolocation[loc_index]: - temp_geolocation[loc_index] = addr['long_name'] - # check that all in geoloaction are filled and if not fille from temp_geolocation dictionary - for loc_index in type_map: - if not geolocation[loc_index] and temp_geolocation[loc_index]: - geolocation[loc_index] = temp_geolocation[loc_index] - # write OK status - geolocation['status'] = response.json()['status'] - else: - geolocation['error_message'] = response.json()['error_message'] - geolocation['status'] = response.json()['status'] - print(f"Error in request: {geolocation['status']} {geolocation['error_message']}") - # return - return geolocation - -def convert_lat_long_to_dms(lat_long, is_latitude=False, is_longitude=False): - """ - convert the LR format of N.N to the Exif GPS format - - Args: - lat_long(str): latLong in (-)N.N format - is_latitude (bool, optional): flag, else we can't set North/Sout. Defaults to False. - is_longitude (bool, optional): flag, else we can't set West/East. Defaults to False. - - Returns: - string: Deg,Min.Sec(NESW) format - """ - # minus part before . and then multiply rest by 60 - degree = int(abs(lat_long)) - minutes = round((float(abs(lat_long)) - int(abs(lat_long))) * 60, 10) - if is_latitude is True: - direction = 'S' if int(lat_long) < 0 else 'N' - elif is_longitude is True: - direction = 'W' if int(lat_long) < 0 else 'E' - else: - direction = '(INVALID)' - return f"{degree},{minutes}{direction}" - -def convert_lat_to_dms(lat_long): - """ - wrapper functions for Long/Lat calls: latitude - - Args: - lat_long(str): latLong in (-)N.N format - - Returns: - string: Deg,Min.Sec(NESW) format - """ - return convert_lat_long_to_dms(lat_long, is_latitude=True) - - -# wrapper for Long/Lat call: longitute -def convert_long_to_dms(lat_long): - """ - wrapper for Long/Lat call: longitute - - Args: - lat_long(str): latLong in (-)N.N format - - Returns: - string: Deg,Min.Sec(NESW) format - """ - return convert_lat_long_to_dms(lat_long, is_longitude=True) - -def long_lat_reg(longitude, latitude): - """ - converts the XMP/EXIF formatted GPS Long/Lat coordinates - from the , to the normal float - number used in google/lr internal - - Args: - longitude(str): n,n.nNSEW format - latitude(str): n,n.nNSEW format - - Returns: - dictionary: dict with converted lat/long - """ - # regex - latlong_re = re.compile(r'^(\d+),(\d+\.\d+)([NESW]{1})$') - # dict for loop - lat_long = { - 'longitude': longitude, - 'latitude': latitude - } - # for element in lat_long: - for index, element in lat_long.items(): - # match if it is exif GPS format - _match = latlong_re.match(element) - if _match is not None: - # convert from Degree, Min.Sec into float format - lat_long[index] = float(_match.group(1)) + (float(_match.group(2)) / 60) - # if S or W => inverse to negative - if _match.group(3) == 'S' or _match.group(3) == 'W': - lat_long[index] *= -1 - return lat_long - -def convert_dms_to_lat(lat_long): - """ - rapper calls for DMS to Lat/Long: latitude - - Args: - lat_long(str): n,n.nNSEW format - - Returns: - dict: dict with converted lat/long - """ - return long_lat_reg('0,0.0N', lat_long)['latitude'] - -def convert_dms_to_long(lat_long): - """ - wrapper calls for DMS to Lat/Long: longitude - - Args: - lat_long(str): n,n.nNSEW format - - Returns: - dict: dict with converted lat/long - """ - return long_lat_reg(lat_long, '0,0.0N')['longitude'] - -def get_distance(from_longitude, from_latitude, to_longitude, to_latitude): - """ - calculates the difference between two coordinates - - Args: - from_longitude(str): from longitude - from_latitude(str): from latitude - to_longitude(str): to longitude - to_latitude(str): to latitude - - Returns: - float: distance in meters - """ - # earth radius in meters - earth_radius = 6378137.0 - # convert all from radians with pre convert DMS to long and to float - from_longitude = radians(float(convert_dms_to_long(from_longitude))) - from_latitude = radians(float(convert_dms_to_lat(from_latitude))) - to_longitude = radians(float(convert_dms_to_long(to_longitude))) - to_latitude = radians(float(convert_dms_to_lat(to_latitude))) - # distance from - to - distance_longitude = from_longitude - to_longitude - distance_latitude = from_latitude - to_latitude - # main distance calculation - distance = sin(distance_latitude / 2)**2 + cos(from_latitude) * \ - cos(to_latitude) * sin(distance_longitude / 2)**2 - distance = 2 * atan2(sqrt(distance), sqrt(1 - distance)) - return earth_radius * distance - def check_overwrite(data, key, field_controls, args): """ checks with field control flags if given data for key should be written @@ -573,86 +193,6 @@ def shorten_path(path, length=30, file_only=False, path_only=False): path = f".. {path[string_len_cjk(path) - length:]}" return path -def shorten_string(string, width, placeholder='..'): - """ - shortens a string to width and attached placeholder - - Args: - string(str): string to shorten - width (int): length th shorten to - placeholder (str, optional): optional string for removed shortend part. Defaults to '..'. - - Returns: - string: shortened string - """ - # get the length with double byte charactes - string_length_cjk = string_len_cjk(str(string)) - # if double byte width is too big - if string_length_cjk > width: - # set current length and output string - cur_len = 0 - out_string = '' - # loop through each character - for char in str(string): - # set the current length if we add the character - cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1 - # if the new length is smaller than the output length to shorten too add the char - if cur_len <= (width - len(placeholder)): - out_string += char - # return string with new width and placeholder - return f"{out_string}{placeholder}" - else: - return str(string) - -def string_len_cjk(string): - """ - because len on string in python counts characters but we need the width - count for formatting, we count two for a double byte characters - - Args: - string (string): string to check length - - Returns: - int: length including double count for double width characters - """ - # return string len including double count for double width characters - return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string) - -def is_latin(uchr): - """ - checks via the unciode class if a character is LATIN char based - - from - https://stackoverflow.com/a/3308844/7811993 - - Args: - uchr (str): _description_ - - Returns: - str: flagged LATIN or not char - """ - try: - # if we found in the dictionary return - return cache_latin_letters[uchr] - except KeyError: - # find LATIN in uncide type returned and set in dictionary for this character - return cache_latin_letters.setdefault(uchr, 'LATIN' in unicodedata.name(uchr)) - -def only_latin_chars(unistr): - """ - chekcs if a string is based on LATIN chars. No for any CJK, Cyrillic, Hebrew, etc - - from: - https://stackoverflow.com/a/3308844/7811993 - - Args: - unistr (str): string - - Returns: - bool: True/False for if string is LATIN char based - """ - return all(is_latin(uchr) for uchr in unistr if uchr.isalpha()) - # def print_header(header, lines=0, header_line=0): # """ # prints header line and header seperator line @@ -713,23 +253,6 @@ class ReadOnlyOutput: )) self.lines += 1 -def format_len(string, length): - """ - in case of CJK characters we need to adjust the format length dynamically - calculate correct length based on string given - - Args: - string (str): string - length (int): format length - - Returns: - int: adjusted format legnth - """ - # returns length udpated for string with double byte characters - # get string length normal, get string length including double byte characters - # then subtract that from the original length - return length - (string_len_cjk(string) - len(string)) - def file_sort_number(file): """ gets the BK number for sorting in the file list diff --git a/bin/utils/__init__.py b/bin/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bin/utils/long_lat.py b/bin/utils/long_lat.py new file mode 100644 index 0000000..31791d6 --- /dev/null +++ b/bin/utils/long_lat.py @@ -0,0 +1,140 @@ +""" +latitude/longitude functions +""" + +import re +from math import radians, sin, cos, atan2, sqrt + +def convert_lat_long_to_dms(lat_long, is_latitude=False, is_longitude=False): + """ + convert the LR format of N.N to the Exif GPS format + + Args: + lat_long(str): latLong in (-)N.N format + is_latitude (bool, optional): flag, else we can't set North/Sout. Defaults to False. + is_longitude (bool, optional): flag, else we can't set West/East. Defaults to False. + + Returns: + string: Deg,Min.Sec(NESW) format + """ + # minus part before . and then multiply rest by 60 + degree = int(abs(lat_long)) + minutes = round((float(abs(lat_long)) - int(abs(lat_long))) * 60, 10) + if is_latitude is True: + direction = 'S' if int(lat_long) < 0 else 'N' + elif is_longitude is True: + direction = 'W' if int(lat_long) < 0 else 'E' + else: + direction = '(INVALID)' + return f"{degree},{minutes}{direction}" + +def convert_lat_to_dms(lat_long): + """ + wrapper functions for Long/Lat calls: latitude + + Args: + lat_long(str): latLong in (-)N.N format + + Returns: + string: Deg,Min.Sec(NESW) format + """ + return convert_lat_long_to_dms(lat_long, is_latitude=True) + + +# wrapper for Long/Lat call: longitute +def convert_long_to_dms(lat_long): + """ + wrapper for Long/Lat call: longitute + + Args: + lat_long(str): latLong in (-)N.N format + + Returns: + string: Deg,Min.Sec(NESW) format + """ + return convert_lat_long_to_dms(lat_long, is_longitude=True) + +def long_lat_reg(longitude, latitude): + """ + converts the XMP/EXIF formatted GPS Long/Lat coordinates + from the , to the normal float + number used in google/lr internal + + Args: + longitude(str): n,n.nNSEW format + latitude(str): n,n.nNSEW format + + Returns: + dictionary: dict with converted lat/long + """ + # regex + latlong_re = re.compile(r'^(\d+),(\d+\.\d+)([NESW]{1})$') + # dict for loop + lat_long = { + 'longitude': longitude, + 'latitude': latitude + } + # for element in lat_long: + for index, element in lat_long.items(): + # match if it is exif GPS format + _match = latlong_re.match(element) + if _match is not None: + # convert from Degree, Min.Sec into float format + lat_long[index] = float(_match.group(1)) + (float(_match.group(2)) / 60) + # if S or W => inverse to negative + if _match.group(3) == 'S' or _match.group(3) == 'W': + lat_long[index] *= -1 + return lat_long + +def convert_dms_to_lat(lat_long): + """ + rapper calls for DMS to Lat/Long: latitude + + Args: + lat_long(str): n,n.nNSEW format + + Returns: + dict: dict with converted lat/long + """ + return long_lat_reg('0,0.0N', lat_long)['latitude'] + +def convert_dms_to_long(lat_long): + """ + wrapper calls for DMS to Lat/Long: longitude + + Args: + lat_long(str): n,n.nNSEW format + + Returns: + dict: dict with converted lat/long + """ + return long_lat_reg(lat_long, '0,0.0N')['longitude'] + +def get_distance(from_longitude, from_latitude, to_longitude, to_latitude): + """ + calculates the difference between two coordinates + + Args: + from_longitude(str): from longitude + from_latitude(str): from latitude + to_longitude(str): to longitude + to_latitude(str): to latitude + + Returns: + float: distance in meters + """ + # earth radius in meters + earth_radius = 6378137.0 + # convert all from radians with pre convert DMS to long and to float + from_longitude = radians(float(convert_dms_to_long(from_longitude))) + from_latitude = radians(float(convert_dms_to_lat(from_latitude))) + to_longitude = radians(float(convert_dms_to_long(to_longitude))) + to_latitude = radians(float(convert_dms_to_lat(to_latitude))) + # distance from - to + distance_longitude = from_longitude - to_longitude + distance_latitude = from_latitude - to_latitude + # main distance calculation + distance = sin(distance_latitude / 2)**2 + cos(from_latitude) * \ + cos(to_latitude) * sin(distance_longitude / 2)**2 + distance = 2 * atan2(sqrt(distance), sqrt(1 - distance)) + return earth_radius * distance diff --git a/bin/utils/reverse_geolocate.py b/bin/utils/reverse_geolocate.py new file mode 100644 index 0000000..06af211 --- /dev/null +++ b/bin/utils/reverse_geolocate.py @@ -0,0 +1,257 @@ +""" +reverse geolacte functions +""" + +import requests +import re +from long_lat import long_lat_reg +from string_helpers import only_latin_chars + +def reverse_geolocate(longitude, latitude, map_type, args): + """ + wrapper to call to either the google or openstreetmap + + Args: + longitude (float): latitude + latitude (float): longitue + map_type(str): map search target (google or openstreetmap) + args (_type_): _description_ + + Returns: + _type_: dict with all data (see below) + """ + # clean up long/lat + # they are stored with N/S/E/W if they come from an XMP + # format: Deg,Min.Sec[NSEW] + # NOTE: lat is N/S, long is E/W + # detect and convert + lat_long = long_lat_reg(longitude=longitude, latitude=latitude) + # which service to use + if map_type == 'google': + return reverse_geolocate_google(lat_long['longitude'], lat_long['latitude'], args) + elif map_type == 'openstreetmap': + return reverse_geolocate_open_street_map(lat_long['longitude'], lat_long['latitude'], args) + else: + return { + 'Country': '', + 'status': 'ERROR', + 'error': 'Map type not valid' + } + +def reverse_geolocate_init(longitude, latitude): + """ + inits the dictionary for return, and checks the lat/long on valid + returns geolocation dict with status = 'ERROR' if an error occurded + + Args: + longitude (float): longitude + latitude (float): latitude + + Returns: + _type_: empty geolocation dictionary, or error flag if lat/long is not valid + """ + # basic dict format + geolocation = { + 'CountryCode': '', + 'Country': '', + 'State': '', + 'City': '', + 'Location': '', + # below for error reports + 'status': '', + 'error_message': '' + } + # error if long/lat is not valid + latlong_re = re.compile(r'^\d+\.\d+$') + if not latlong_re.match(str(longitude)) or not latlong_re.match(str(latitude)): + geolocation['status'] = 'ERROR' + geolocation['error_message'] = f"Latitude {latitude} or Longitude {longitude} are not valid" + return geolocation + +def reverse_geolocate_open_street_map(longitude, latitude, args): + """ + OpenStreetMap reverse lookcation lookup + + sample: + https://nominatim.openstreetmap.org/reverse.php?format=jsonv2& + at=&lon=&zoom=21&accept-languge=en-US,en& + + Args: + longitude (float): longitude + latitude (float): latitude + args (_type_): _description_ + + Returns: + dictionary: dict with locaiton, city, state, country, country code + if not fillable, entry is empty + """ + # init + geolocation = reverse_geolocate_init(longitude, latitude) + if geolocation['status'] == 'ERROR': + return geolocation + # query format + query_format = 'jsonv2' + # language to return (english) + language = 'en-US,en' + # build query + base = 'https://nominatim.openstreetmap.org/reverse.php?' + # parameters + payload = { + 'format': query_format, + 'lat': latitude, + 'lon': longitude, + 'accept-language': language + } + # if we have an email, add it here + if args.email: + payload['email'] = args.email + url = f"{base}" + # timeout in seconds + timeout = 60 + response = requests.get(url, params=payload, timeout=timeout) + # debug output + if args.debug: + print(f"OpenStreetMap search for Lat: {latitude}, Long: {longitude}") + if args.debug and args.verbose >= 1: + print(f"OpenStreetMap response: {response} => JSON: {response.json()}") + # type map + # Country to Location and for each in order of priority + type_map = { + 'CountryCode': ['country_code'], + 'Country': ['country'], + 'State': ['state'], + 'City': ['city', 'city_district', 'state_district'], + 'Location': ['county', 'town', 'suburb', 'hamlet', 'neighbourhood', 'road'] + } + # if not error + if 'error' not in response.json(): + # get address block + addr = response.json()['address'] + # loop for locations + for loc_index, sub_index in type_map.items(): + for index in sub_index: + if index in addr and not geolocation[loc_index]: + geolocation[loc_index] = addr[index] + # for loc_index in type_map: + # for index in type_map[loc_index]: + # if index in addr and not geolocation[loc_index]: + # geolocation[loc_index] = addr[index] + else: + geolocation['status'] = 'ERROR' + geolocation['error_message'] = response.json()['error'] + print(f"Error in request: {geolocation['error']}") + # return + return geolocation + +def reverse_geolocate_google(longitude, latitude, args): + """ + Google Maps reverse location lookup + + sample: + http://maps.googleapis.com/maps/api/geocode/json?latlng=,&language= + &sensor=false&key= + + Args: + longitude (float): longitude + latitude (float): latitude + args (_type_): _description_ + + Returns: + dictionary: dict with location, city, state, country, country code + if not fillable, entry is empty + """ + # init + geolocation = reverse_geolocate_init(longitude, latitude) + temp_geolocation = geolocation.copy() + if geolocation['status'] == 'ERROR': + return geolocation + # sensor (why?) + sensor = 'false' + # language, so we get ascii en back + language = 'en' + # request to google + # if a google api key is used, the request has to be via https + protocol = 'https://' if args.google_api_key else 'http://' + base = "maps.googleapis.com/maps/api/geocode/json?" + # build the base params + payload = { + 'latlng': f"{latitude},{longitude}", + 'language': language, + 'sensor': sensor + } + # if we have a google api key, add it here + if args.google_api_key: + payload['key'] = args.google_api_key + # build the full url and send it to google + url = f"{protocol}{base}" + # timeout in seconds + timeout = 60 + response = requests.get(url, params=payload, timeout=timeout) + # debug output + if args.debug: + print(f"Google search for Lat: {latitude}, Long: {longitude} with {response.url}") + if args.debug and args.verbose >= 1: + print(f"Google response: {response} => JSON: {response.json()}") + # type map + # For automated return of correct data into set to return + type_map = { + 'CountryCode': ['country'], + 'Country': ['country'], + 'State': ['administrative_area_level_1', 'administrative_area_level_2'], + 'City': ['locality', 'administrative_area_level_3'], + 'Location': ['sublocality_level_1', 'sublocality_level_2', 'route'], + } + # print("Error: {}".format(response.json()['status'])) + if response.json()['status'] == 'OK': + # first entry for type = premise + for entry in response.json()['results']: + for sub_entry in entry: + if sub_entry == 'types' and ( + 'premise' in entry[sub_entry] or + 'route' in entry[sub_entry] or + 'street_address' in entry[sub_entry] or + 'sublocality' in entry[sub_entry] + ): + # print("Entry {}: {}".format(sub_entry, entry[sub_entry])) + # print("Address {}".format(entry['address_components'])) + # type + # -> country, + # -> administrative_area (1, 2), + # -> locality, + # -> sublocality (_level_1 or 2 first found, then route) + # so we get the data in the correct order + # for loc_index in type_map: + # for index in type_map[loc_index]: + for loc_index, sub_index in type_map.items(): + for index in sub_index: + # this is an array, so we need to loop through each + for addr in entry['address_components']: + # in types check that index is in there + # and the location is not yet set + # also check that entry is in LATIN based + # NOTE: fallback if all are non LATIN? + if index in addr['types'] and not geolocation[loc_index]: + # for country code we need to use short name, + # else we use long name + if loc_index == 'CountryCode': + if only_latin_chars(addr['short_name']): + geolocation[loc_index] = addr['short_name'] + elif not temp_geolocation[loc_index]: + temp_geolocation[loc_index] = addr['short_name'] + else: + if only_latin_chars(addr['long_name']): + geolocation[loc_index] = addr['long_name'] + elif not temp_geolocation[loc_index]: + temp_geolocation[loc_index] = addr['long_name'] + # check that all in geoloaction are filled and if not fille from temp_geolocation dictionary + for loc_index in type_map: + if not geolocation[loc_index] and temp_geolocation[loc_index]: + geolocation[loc_index] = temp_geolocation[loc_index] + # write OK status + geolocation['status'] = response.json()['status'] + else: + geolocation['error_message'] = response.json()['error_message'] + geolocation['status'] = response.json()['status'] + print(f"Error in request: {geolocation['status']} {geolocation['error_message']}") + # return + return geolocation diff --git a/bin/utils/string_helpers.py b/bin/utils/string_helpers.py new file mode 100644 index 0000000..6982ef9 --- /dev/null +++ b/bin/utils/string_helpers.py @@ -0,0 +1,106 @@ +""" +various string helpers1 +""" + +import unicodedata + +# this is for looking up if string is non latin letters +# this is used by isLatin and onlyLatinChars +cache_latin_letters = {} + +def shorten_string(string, width, placeholder='..'): + """ + shortens a string to width and attached placeholder + + Args: + string(str): string to shorten + width (int): length th shorten to + placeholder (str, optional): optional string for removed shortend part. Defaults to '..'. + + Returns: + string: shortened string + """ + # get the length with double byte charactes + string_length_cjk = string_len_cjk(str(string)) + # if double byte width is too big + if string_length_cjk > width: + # set current length and output string + cur_len = 0 + out_string = '' + # loop through each character + for char in str(string): + # set the current length if we add the character + cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1 + # if the new length is smaller than the output length to shorten too add the char + if cur_len <= (width - len(placeholder)): + out_string += char + # return string with new width and placeholder + return f"{out_string}{placeholder}" + else: + return str(string) + +def string_len_cjk(string): + """ + because len on string in python counts characters but we need the width + count for formatting, we count two for a double byte characters + + Args: + string (string): string to check length + + Returns: + int: length including double count for double width characters + """ + # return string len including double count for double width characters + return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string) + +def is_latin(uchr): + """ + checks via the unciode class if a character is LATIN char based + + from + https://stackoverflow.com/a/3308844/7811993 + + Args: + uchr (str): _description_ + + Returns: + str: flagged LATIN or not char + """ + try: + # if we found in the dictionary return + return cache_latin_letters[uchr] + except KeyError: + # find LATIN in uncide type returned and set in dictionary for this character + return cache_latin_letters.setdefault(uchr, 'LATIN' in unicodedata.name(uchr)) + +def only_latin_chars(unistr): + """ + chekcs if a string is based on LATIN chars. No for any CJK, Cyrillic, Hebrew, etc + + from: + https://stackoverflow.com/a/3308844/7811993 + + Args: + unistr (str): string + + Returns: + bool: True/False for if string is LATIN char based + """ + return all(is_latin(uchr) for uchr in unistr if uchr.isalpha()) + +def format_len(string, length): + """ + in case of CJK characters we need to adjust the format length dynamically + calculate correct length based on string given + + Args: + string (str): string + length (int): format length + + Returns: + int: adjusted format legnth + """ + # returns length udpated for string with double byte characters + # get string length normal, get string length including double byte characters + # then subtract that from the original length + return length - (string_len_cjk(string) - len(string))