2 Commits
v1.0 ... v1.1.0

Author SHA1 Message Date
a0a023859a README update with new fuzzy option info
Also fix footer seperator width
2018-05-18 15:43:48 +09:00
87f8ad4dae Fuzzy cache lookup flow added
Besides the exact GPS coordinate match and optional fuzzy match can be
used via the -d/--fuzzy-cache argument.

The default value is set to to 10m and can be set in the format number
and m/km. Eg 100m, 1km

During the run it calculates the distance from all previous cached
entries and if one falls inside the given fuzzy distance this lookup
data will be used and also stored in the direct GPS cache for further
direct GPS matches
2018-05-18 15:36:08 +09:00
2 changed files with 124 additions and 34 deletions

View File

@@ -26,8 +26,9 @@ reverse_geolocate.py [-h] -i
[-x [EXCLUDE XMP SOURCE FOLDER [EXCLUDE XMP SOURCE FOLDER ...]]]
[-l LIGHTROOM FOLDER] [-s]
[-f <overwrite, location, city, state, country, countrycode>]
[-g GOOGLE API KEY] [-o] [-e EMIL ADDRESS] [-w]
[-r] [-u] [-a] [-c] [-n] [-v] [--debug] [--test]
[-d [FUZZY DISTANCE]] [-g GOOGLE API KEY] [-o]
[-e EMIL ADDRESS] [-w] [-r] [-u] [-a] [-c] [-n]
[-v] [--debug] [--test]
### Arguments
@@ -38,6 +39,7 @@ Argument | Argument Value | Description
-l, --lightroom | Lightroom DB base folder | The folder where the .lrcat file is located. Optional, if this is set, LR values are read before any Google maps connection is done. Fills the Latitude and Longitude and the location names. Lightroom data never overwrites data already set in the XMP sidecar file. It is recommended to have Lightroom write the XMP sidecar file before this script is run
-s, --strict | | Do strict check for Lightroom files and include the path into the check
-f, --field | Keyword: overwrite, location, city, state, country, countrycode | In the default no data is overwritten if it is already set. With the 'overwrite' flag all data is set new from the Google Maps location data. Other arguments are each of the location fields and if set only this field will be set. This can be combined with the 'overwrite' flag to overwrite already set data
-d, --fuzzy-cache | distance | Allow fuzzy cache lookup with either default value of 10m or an override value in m or km
-n, --nobackup | | Do not create a backup of XMP sidecar file when it is changed
-o, --openstreetmap | | Use OpenStreetMap instead of the default google maps
-e, --email | email address | For OpenStreetMap with a large number of access
@@ -87,6 +89,12 @@ openstreetmapemail = <email>
if no -g or -e flag is given the keys are read from the config file. If the -g or -e parameter is given it will override the one found in the config file. A new parameter can be written to this config file with -w parameter.
### Cache lookups ###
If the same GPS coordinate is detected no other API maps call is done. With the fuzzy-distance argument this can be further extended to certain distances for each GPS coordinate from each other. The default value is 10m and can be overriden with an value to the argument.
Can be used to force cache on GPS coordinates that are very close to each other but not exactly the same.
### Google data priority
Based in the JSON return data the following fields are set in order. If one can not be found for a target set, the next one below is used
@@ -123,16 +131,17 @@ order | type | target set
After the script is done the following overview will be printed
```
=======================================
XMP Files found : 57
Updated : 3
Skipped : 54
New GeoLocation from Map : 2
GeoLocation from Cache : 1
Failed reverse GeoLocate : 0
GeoLocaction from Lightroom : 1
No Lightroom data found : 46
More than one found in LR : 0
========================================
XMP Files found : 57
Updated : 3
Skipped : 54
New GeoLocation from Map : 2
GeoLocation from Cache : 1
GeoLocation from Fuzzy Cache : 0
Failed reverse GeoLocate : 0
GeoLocaction from Lightroom : 1
No Lightroom data found : 46
More than one found in LR : 0
```
If there are problems with getting data from the Google Maps API the complete errior sting will be printed
@@ -148,11 +157,11 @@ Also the files that could not be updated will be printed at the end of the run u
```
...
------------------------------
----------------------------------------
Files that failed to update:
Photos/2017/02/some_file.xmp
```
### Tested OS
This script has only been tested on macOS
This script has only been tested on macOS

View File

@@ -22,7 +22,7 @@ import re
# Note XMPFiles does not work with sidecar files, need to read via XMPMeta
from libxmp import XMPMeta, consts
from shutil import copyfile, get_terminal_size
from math import ceil
from math import ceil, radians, sin, cos, atan2, sqrt
##############################################################
# FUNCTIONS
@@ -70,6 +70,20 @@ class readable_dir(argparse.Action):
raise argparse.ArgumentTypeError("readable_dir:{0} is not a readable dir".format(prospective_dir))
# check distance values are valid
class distance_values(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
m = re.match('^(\d+)\s?(m|km)$', values)
if m:
# convert to int in meters
values = int(m.group(1))
if m.group(2) == 'km':
values *= 1000
setattr(namespace, self.dest, values)
else:
raise argparse.ArgumentTypeError("distance_values:{0} is not a valid argument".format(values))
# MAIN FUNCTIONS
# METHOD: reverseGeolocate
@@ -275,7 +289,6 @@ def reverseGeolocateGoogle(longitude, latitude):
geolocation['error_message'] = response.json()['error_message']
geolocation['status'] = response.json()['status']
print("Error in request: {} {}".format(geolocation['status'], geolocation['error_message']))
# return
return geolocation
@@ -343,6 +356,27 @@ def convertDMStoLong(lat_long):
return longLatReg(lat_long, '0,0.0N')['longitude']
# METHOD: getDistance
# PARAMS: from long/lat, to long_lat
# RETURN: distance in meters
# DESC : calculates the difference between two coordinates
def getDistance(from_longitude, from_latitude, to_longitude, to_latitude):
# earth radius in meters
earth_radius = 6378137.0
# convert all from radians with pre convert DMS to long and to float
from_longitude = radians(float(convertDMStoLong(from_longitude)))
from_latitude = radians(float(convertDMStoLat(from_latitude)))
to_longitude = radians(float(convertDMStoLong(to_longitude)))
to_latitude = radians(float(convertDMStoLat(to_latitude)))
# distance from - to
distance_longitude = from_longitude - to_longitude
distance_latitude = from_latitude - to_latitude
# main distance calculation
distance = sin(distance_latitude / 2)**2 + cos(from_latitude) * cos(to_latitude) * sin(distance_longitude / 2)**2
distance = 2 * atan2(sqrt(distance), sqrt(1 - distance))
return earth_radius * distance
# METHOD: checkOverwrite
# PARAMS: data: value field, key: XMP key, field_controls: array from args
# RETURN: true/false
@@ -660,6 +694,18 @@ parser.add_argument('-f', '--field',
'If with overwrite the field will be overwritten if already set, else it will be always skipped.'
)
parser.add_argument('-d', '--fuzzy-cache',
type=str.lower,
action=distance_values,
nargs='?',
const='10m', # default is 10m
dest='fuzzy_distance',
metavar='FUZZY DISTANCE',
help='Allow fuzzy distance cache lookup. Optional distance can be given, '\
'if not set default of 10m is used. '\
'Allowed argument is in the format of 12m or 12km'
)
# Google Maps API key to overcome restrictions
parser.add_argument('-g', '--google',
dest='google_api_key',
@@ -753,11 +799,12 @@ if not args.unset_only:
args.unset_only = 0
if args.debug:
print("### ARGUMENT VARS: I: {incl}, X: {excl}, L: {lr}, F: {fc}, M: {osm}, G: {gp}, E: {em}, R: {read}, U: {us}, A: {adj}, C: {cmp}, N: {nbk}, W: {wrc}, V: {v}, D: {d}, T: {t}".format(
print("### ARGUMENT VARS: I: {incl}, X: {excl}, L: {lr}, F: {fc}, D: {fdist}, M: {osm}, G: {gp}, E: {em}, R: {read}, U: {us}, A: {adj}, C: {cmp}, N: {nbk}, W: {wrc}, V: {v}, D: {d}, T: {t}".format(
incl=args.xmp_sources,
excl=args.exclude_sources,
lr=args.lightroom_folder,
fc=args.field_controls,
fdist=args.fuzzy_distance,
osm=args.use_openstreetmap,
gp=args.google_api_key,
em=args.email,
@@ -887,6 +934,7 @@ count = {
'read': 0,
'map': 0,
'cache': 0,
'fuzzy_cache': 0,
'lightroom': 0,
'changed': 0,
'failed': 0,
@@ -1145,14 +1193,46 @@ for xmp_file in work_files:
has_unset = True
if has_unset:
# check if lat/long is in cache
cache_key = '{}.#.{}'.format(data_set['GPSLatitude'], data_set['GPSLongitude'])
cache_key = '{}#{}'.format(data_set['GPSLongitude'], data_set['GPSLatitude'])
if args.debug:
print("### *** CACHE: {}: {}".format(cache_key, 'NO' if cache_key not in data_cache else 'YES'))
# main chache check = identical
# second cache level check is on distance:
# default distance is 10m, can be set via flag
# check distance to previous cache entries (reverse newest to oldest) and match before we do google lookup
if cache_key not in data_cache:
# get location from maps (google or openstreetmap)
maps_location = reverseGeolocate(latitude=data_set['GPSLatitude'], longitude=data_set['GPSLongitude'], map_type=map_type)
# cache data with Lat/Long
data_cache[cache_key] = maps_location
has_fuzzy_cache = False
if args.fuzzy_distance:
shortest_distance = args.fuzzy_distance
best_match_latlong = ''
# check if we have fuzzy distance, if no valid found do maps lookup
for _cache_key in data_cache:
# split up cache key so we can use in the distance calc method
to_lat_long = _cache_key.split('#')
# get the distance based on current set + cached set
# print("Lookup f-long {} f-lat {} t-long {} t-lat {}".format(data_set['GPSLongitude'], data_set['GPSLatitude'], to_lat_long[0], to_lat_long[1]))
distance = getDistance(from_longitude=data_set['GPSLongitude'], from_latitude=data_set['GPSLatitude'], to_longitude=to_lat_long[0], to_latitude=to_lat_long[1])
if args.debug:
print("### **= FUZZY CACHE: => distance: {} (m), shortest: {}".format(distance, shortest_distance))
if distance <= shortest_distance:
# set new distance and keep current best matching location
shortest_distance = distance
best_match_latlong = _cache_key
has_fuzzy_cache = True
if args.debug:
print("### ***= FUZZY CACHE: YES => Best match: {}".format(best_match_latlong))
if not has_fuzzy_cache:
# get location from maps (google or openstreetmap)
maps_location = reverseGeolocate(latitude=data_set['GPSLatitude'], longitude=data_set['GPSLongitude'], map_type=map_type)
# cache data with Lat/Long
data_cache[cache_key] = maps_location
else:
maps_location = data_cache[best_match_latlong]
# cache this one, because the next one will match this one too
# we don't need to loop search again for the same fuzzy location
data_cache[cache_key] = maps_location
count['cache'] += 1
count['fuzzy_cache'] += 1
else:
# load location from cache
maps_location = data_cache[cache_key]
@@ -1216,23 +1296,24 @@ if use_lightroom:
lrdb.close()
# end stats only if we write
print("{}".format('=' * 39))
print("XMP Files found : {:9,}".format(count['all']))
print("{}".format('=' * 40))
print("XMP Files found : {:9,}".format(count['all']))
if args.read_only:
print("XMP Files listed : {:9,}".format(count['listed']))
print("XMP Files listed : {:9,}".format(count['listed']))
if not args.read_only:
print("Updated : {:9,}".format(count['changed']))
print("Skipped : {:9,}".format(count['skipped']))
print("New GeoLocation from Map : {:9,}".format(count['map']))
print("GeoLocation from Cache : {:9,}".format(count['cache']))
print("Failed reverse GeoLocate : {:9,}".format(count['failed']))
print("Updated : {:9,}".format(count['changed']))
print("Skipped : {:9,}".format(count['skipped']))
print("New GeoLocation from Map : {:9,}".format(count['map']))
print("GeoLocation from Cache : {:9,}".format(count['cache']))
print("GeoLocation from Fuzzy Cache : {:9,}".format(count['fuzzy_cache']))
print("Failed reverse GeoLocate : {:9,}".format(count['failed']))
if use_lightroom:
print("GeoLocaction from Lightroom : {:9,}".format(count['lightroom']))
print("No Lightroom data found : {:9,}".format(count['not_found']))
print("More than one found in LR : {:9,}".format(count['many_found']))
print("GeoLocaction from Lightroom : {:9,}".format(count['lightroom']))
print("No Lightroom data found : {:9,}".format(count['not_found']))
print("More than one found in LR : {:9,}".format(count['many_found']))
# if we have failed data
if len(failed_files) > 0:
print("{}".format('-' * 39))
print("{}".format('-' * 40))
print("Files that failed to update:")
print("{}".format(', '.join(failed_files)))