Fix non Latin based character return in google reverse geo lookup

In some occasions Google might return non Latin based character strings
for reverse lookups.
Fixed with:
* add language=en to force english as primary return
* check each return set and only use entries that have Latin based
characters
This commit is contained in:
2018-05-07 12:37:36 +09:00
parent db4298b141
commit 3fd6282907

View File

@@ -28,6 +28,10 @@ from math import ceil
# FUNCTIONS # FUNCTIONS
############################################################## ##############################################################
# this is for looking up if string is non latin letters
# this is used by isLatin and onlyLatinChars
cache_latin_letters = {}
# ARGPARSE HELPERS # ARGPARSE HELPERS
@@ -182,7 +186,7 @@ def reverseGeolocateOpenStreetMap(longitude, latitude):
# RETURN: Google Maps reverse location lookup # RETURN: Google Maps reverse location lookup
# dict with location, city, state, country, country code # dict with location, city, state, country, country code
# if not fillable, entry is empty # if not fillable, entry is empty
# SAMPLE: http://maps.googleapis.com/maps/api/geocode/json?latlng=<latitude>,<longitude>&sensor=false&key=<api key> # SAMPLE: http://maps.googleapis.com/maps/api/geocode/json?latlng=<latitude>,<longitude>&language=<lang>&sensor=false&key=<api key>
def reverseGeolocateGoogle(longitude, latitude): def reverseGeolocateGoogle(longitude, latitude):
# init # init
geolocation = reverseGeolocateInit(longitude, latitude) geolocation = reverseGeolocateInit(longitude, latitude)
@@ -190,6 +194,8 @@ def reverseGeolocateGoogle(longitude, latitude):
return geolocation return geolocation
# sensor (why?) # sensor (why?)
sensor = 'false' sensor = 'false'
# language, so we get ascii en back
language = 'en'
# request to google # request to google
# if a google api key is used, the request has to be via https # if a google api key is used, the request has to be via https
protocol = 'https://' if args.google_api_key else 'http://' protocol = 'https://' if args.google_api_key else 'http://'
@@ -197,6 +203,7 @@ def reverseGeolocateGoogle(longitude, latitude):
# build the base params # build the base params
payload = { payload = {
'latlng': '{lat},{lon}'.format(lon=longitude, lat=latitude), 'latlng': '{lat},{lon}'.format(lon=longitude, lat=latitude),
'language': language,
'sensor': sensor 'sensor': sensor
} }
# if we have a google api key, add it here # if we have a google api key, add it here
@@ -223,9 +230,6 @@ def reverseGeolocateGoogle(longitude, latitude):
if response.json()['status'] == 'OK': if response.json()['status'] == 'OK':
# first entry for type = premise # first entry for type = premise
for entry in response.json()['results']: for entry in response.json()['results']:
# check here that in geometry the location type is "ROOFTOP" or "GEOMETRIC_CENTER"
if entry['geometry']['location_type'] == 'ROOFTOP' or entry['geometry']['location_type'] == 'GEOMETRIC_CENTER':
print("OK for {}".format(entry['geometry']['location_type']))
for sub_entry in entry: for sub_entry in entry:
if sub_entry == 'types' and ( if sub_entry == 'types' and (
'premise' in entry[sub_entry] or 'premise' in entry[sub_entry] or
@@ -250,9 +254,11 @@ def reverseGeolocateGoogle(longitude, latitude):
if index in addr['types'] and not geolocation[loc_index]: if index in addr['types'] and not geolocation[loc_index]:
# for country code we need to use short name, else we use long name # for country code we need to use short name, else we use long name
if loc_index == 'CountryCode': if loc_index == 'CountryCode':
geolocation[loc_index] = addr['short_name'] if onlyLatinChars(addr['short_name']):
geolocation[loc_index] = addr['short_name']
else: else:
geolocation[loc_index] = addr['long_name'] if onlyLatinChars(addr['long_name']):
geolocation[loc_index] = addr['long_name']
# write OK status # write OK status
geolocation['status'] = response.json()['status'] geolocation['status'] = response.json()['status']
else: else:
@@ -414,6 +420,29 @@ def stringLenCJK(string):
return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string) return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string)
# FROM: https://stackoverflow.com/a/3308844/7811993
# METHOD: isLatin
# PARAMS: character
# RETURN: flagged LATIN or not char
# DESC : checks via the unciode class if a character is LATIN char based
def isLatin(uchr):
try:
# if we found in the dictionary return
return cache_latin_letters[uchr]
except KeyError:
# find LATIN in uncide type returned and set in dictionary for this character
return cache_latin_letters.setdefault(uchr, 'LATIN' in unicodedata.name(uchr))
# FROM: https://stackoverflow.com/a/3308844/7811993
# METHOD: onlyLatinChars
# PARAMS: string
# RETURN: True/False for if string is LATIN char based
# DESC : chekcs if a string is based on LATIN chars. No for any CJK, Cyrillic, Hebrew, etc
def onlyLatinChars(unistr):
return all(isLatin(uchr) for uchr in unistr if uchr.isalpha())
# METHOD: printHeader # METHOD: printHeader
# PARAMS: header string, line counter, print header counter trigger # PARAMS: header string, line counter, print header counter trigger
# RETURN: line counter +1 # RETURN: line counter +1