Fix non Latin based character return in google reverse geo lookup
In some occasions Google might return non Latin based character strings for reverse lookups. Fixed with: * add language=en to force english as primary return * check each return set and only use entries that have Latin based characters
This commit is contained in:
@@ -28,6 +28,10 @@ from math import ceil
|
|||||||
# FUNCTIONS
|
# FUNCTIONS
|
||||||
##############################################################
|
##############################################################
|
||||||
|
|
||||||
|
# this is for looking up if string is non latin letters
|
||||||
|
# this is used by isLatin and onlyLatinChars
|
||||||
|
cache_latin_letters = {}
|
||||||
|
|
||||||
|
|
||||||
# ARGPARSE HELPERS
|
# ARGPARSE HELPERS
|
||||||
|
|
||||||
@@ -182,7 +186,7 @@ def reverseGeolocateOpenStreetMap(longitude, latitude):
|
|||||||
# RETURN: Google Maps reverse location lookup
|
# RETURN: Google Maps reverse location lookup
|
||||||
# dict with location, city, state, country, country code
|
# dict with location, city, state, country, country code
|
||||||
# if not fillable, entry is empty
|
# if not fillable, entry is empty
|
||||||
# SAMPLE: http://maps.googleapis.com/maps/api/geocode/json?latlng=<latitude>,<longitude>&sensor=false&key=<api key>
|
# SAMPLE: http://maps.googleapis.com/maps/api/geocode/json?latlng=<latitude>,<longitude>&language=<lang>&sensor=false&key=<api key>
|
||||||
def reverseGeolocateGoogle(longitude, latitude):
|
def reverseGeolocateGoogle(longitude, latitude):
|
||||||
# init
|
# init
|
||||||
geolocation = reverseGeolocateInit(longitude, latitude)
|
geolocation = reverseGeolocateInit(longitude, latitude)
|
||||||
@@ -190,6 +194,8 @@ def reverseGeolocateGoogle(longitude, latitude):
|
|||||||
return geolocation
|
return geolocation
|
||||||
# sensor (why?)
|
# sensor (why?)
|
||||||
sensor = 'false'
|
sensor = 'false'
|
||||||
|
# language, so we get ascii en back
|
||||||
|
language = 'en'
|
||||||
# request to google
|
# request to google
|
||||||
# if a google api key is used, the request has to be via https
|
# if a google api key is used, the request has to be via https
|
||||||
protocol = 'https://' if args.google_api_key else 'http://'
|
protocol = 'https://' if args.google_api_key else 'http://'
|
||||||
@@ -197,6 +203,7 @@ def reverseGeolocateGoogle(longitude, latitude):
|
|||||||
# build the base params
|
# build the base params
|
||||||
payload = {
|
payload = {
|
||||||
'latlng': '{lat},{lon}'.format(lon=longitude, lat=latitude),
|
'latlng': '{lat},{lon}'.format(lon=longitude, lat=latitude),
|
||||||
|
'language': language,
|
||||||
'sensor': sensor
|
'sensor': sensor
|
||||||
}
|
}
|
||||||
# if we have a google api key, add it here
|
# if we have a google api key, add it here
|
||||||
@@ -223,9 +230,6 @@ def reverseGeolocateGoogle(longitude, latitude):
|
|||||||
if response.json()['status'] == 'OK':
|
if response.json()['status'] == 'OK':
|
||||||
# first entry for type = premise
|
# first entry for type = premise
|
||||||
for entry in response.json()['results']:
|
for entry in response.json()['results']:
|
||||||
# check here that in geometry the location type is "ROOFTOP" or "GEOMETRIC_CENTER"
|
|
||||||
if entry['geometry']['location_type'] == 'ROOFTOP' or entry['geometry']['location_type'] == 'GEOMETRIC_CENTER':
|
|
||||||
print("OK for {}".format(entry['geometry']['location_type']))
|
|
||||||
for sub_entry in entry:
|
for sub_entry in entry:
|
||||||
if sub_entry == 'types' and (
|
if sub_entry == 'types' and (
|
||||||
'premise' in entry[sub_entry] or
|
'premise' in entry[sub_entry] or
|
||||||
@@ -250,9 +254,11 @@ def reverseGeolocateGoogle(longitude, latitude):
|
|||||||
if index in addr['types'] and not geolocation[loc_index]:
|
if index in addr['types'] and not geolocation[loc_index]:
|
||||||
# for country code we need to use short name, else we use long name
|
# for country code we need to use short name, else we use long name
|
||||||
if loc_index == 'CountryCode':
|
if loc_index == 'CountryCode':
|
||||||
geolocation[loc_index] = addr['short_name']
|
if onlyLatinChars(addr['short_name']):
|
||||||
|
geolocation[loc_index] = addr['short_name']
|
||||||
else:
|
else:
|
||||||
geolocation[loc_index] = addr['long_name']
|
if onlyLatinChars(addr['long_name']):
|
||||||
|
geolocation[loc_index] = addr['long_name']
|
||||||
# write OK status
|
# write OK status
|
||||||
geolocation['status'] = response.json()['status']
|
geolocation['status'] = response.json()['status']
|
||||||
else:
|
else:
|
||||||
@@ -414,6 +420,29 @@ def stringLenCJK(string):
|
|||||||
return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string)
|
return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string)
|
||||||
|
|
||||||
|
|
||||||
|
# FROM: https://stackoverflow.com/a/3308844/7811993
|
||||||
|
# METHOD: isLatin
|
||||||
|
# PARAMS: character
|
||||||
|
# RETURN: flagged LATIN or not char
|
||||||
|
# DESC : checks via the unciode class if a character is LATIN char based
|
||||||
|
def isLatin(uchr):
|
||||||
|
try:
|
||||||
|
# if we found in the dictionary return
|
||||||
|
return cache_latin_letters[uchr]
|
||||||
|
except KeyError:
|
||||||
|
# find LATIN in uncide type returned and set in dictionary for this character
|
||||||
|
return cache_latin_letters.setdefault(uchr, 'LATIN' in unicodedata.name(uchr))
|
||||||
|
|
||||||
|
|
||||||
|
# FROM: https://stackoverflow.com/a/3308844/7811993
|
||||||
|
# METHOD: onlyLatinChars
|
||||||
|
# PARAMS: string
|
||||||
|
# RETURN: True/False for if string is LATIN char based
|
||||||
|
# DESC : chekcs if a string is based on LATIN chars. No for any CJK, Cyrillic, Hebrew, etc
|
||||||
|
def onlyLatinChars(unistr):
|
||||||
|
return all(isLatin(uchr) for uchr in unistr if uchr.isalpha())
|
||||||
|
|
||||||
|
|
||||||
# METHOD: printHeader
|
# METHOD: printHeader
|
||||||
# PARAMS: header string, line counter, print header counter trigger
|
# PARAMS: header string, line counter, print header counter trigger
|
||||||
# RETURN: line counter +1
|
# RETURN: line counter +1
|
||||||
|
|||||||
Reference in New Issue
Block a user