10 Commits
v1.0 ... master

Author SHA1 Message Date
65e352e8a4 Merge branch 'master' of github.com:gullevek/reverse_geolocate 2022-11-11 22:36:30 +09:00
38083d8a46 Add .venv folder to gitignore 2022-11-11 22:34:48 +09:00
ce676dea72 Changed indent level for parser arguments
They are all now on the same level indented by one step
2022-11-11 22:32:55 +09:00
8548cc1f0f Add simpe debug print, add 'r' for all regex calls 2022-08-13 15:50:24 +09:00
63202b53f1 add -p for only list unset GPS long/lat entries 2020-10-20 23:59:32 +09:00
2836a40616 Add another City location lookup for google API 2020-10-20 22:33:46 +09:00
19a8c2b997 Fix for [Bug with exempi 2.5.0 #1]
With exempi 2.5.0 when the get_property was called on an unset one, it
crashed with "unkown error".

Fix with checking if property exists before actually getting the
property
2019-04-01 20:58:45 +09:00
217cd87feb Update output info if update is from cache
on write change the output message from only "UPDATED" to also show if
it was read from cache by "UPDATED FROM CACHE"
2018-06-11 23:50:32 +09:00
a0a023859a README update with new fuzzy option info
Also fix footer seperator width
2018-05-18 15:43:48 +09:00
87f8ad4dae Fuzzy cache lookup flow added
Besides the exact GPS coordinate match and optional fuzzy match can be
used via the -d/--fuzzy-cache argument.

The default value is set to to 10m and can be set in the format number
and m/km. Eg 100m, 1km

During the run it calculates the distance from all previous cached
entries and if one falls inside the given fuzzy distance this lookup
data will be used and also stored in the direct GPS cache for further
direct GPS matches
2018-05-18 15:36:08 +09:00
3 changed files with 280 additions and 135 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.venv/

View File

@@ -26,8 +26,9 @@ reverse_geolocate.py [-h] -i
[-x [EXCLUDE XMP SOURCE FOLDER [EXCLUDE XMP SOURCE FOLDER ...]]]
[-l LIGHTROOM FOLDER] [-s]
[-f <overwrite, location, city, state, country, countrycode>]
[-g GOOGLE API KEY] [-o] [-e EMIL ADDRESS] [-w]
[-r] [-u] [-a] [-c] [-n] [-v] [--debug] [--test]
[-d [FUZZY DISTANCE]] [-g GOOGLE API KEY] [-o]
[-e EMIL ADDRESS] [-w] [-r] [-u] [-a] [-c] [-n]
[-v] [--debug] [--test]
### Arguments
@@ -38,6 +39,7 @@ Argument | Argument Value | Description
-l, --lightroom | Lightroom DB base folder | The folder where the .lrcat file is located. Optional, if this is set, LR values are read before any Google maps connection is done. Fills the Latitude and Longitude and the location names. Lightroom data never overwrites data already set in the XMP sidecar file. It is recommended to have Lightroom write the XMP sidecar file before this script is run
-s, --strict | | Do strict check for Lightroom files and include the path into the check
-f, --field | Keyword: overwrite, location, city, state, country, countrycode | In the default no data is overwritten if it is already set. With the 'overwrite' flag all data is set new from the Google Maps location data. Other arguments are each of the location fields and if set only this field will be set. This can be combined with the 'overwrite' flag to overwrite already set data
-d, --fuzzy-cache | distance | Allow fuzzy cache lookup with either default value of 10m or an override value in m or km
-n, --nobackup | | Do not create a backup of XMP sidecar file when it is changed
-o, --openstreetmap | | Use OpenStreetMap instead of the default google maps
-e, --email | email address | For OpenStreetMap with a large number of access
@@ -87,6 +89,12 @@ openstreetmapemail = <email>
if no -g or -e flag is given the keys are read from the config file. If the -g or -e parameter is given it will override the one found in the config file. A new parameter can be written to this config file with -w parameter.
### Cache lookups ###
If the same GPS coordinate is detected no other API maps call is done. With the fuzzy-distance argument this can be further extended to certain distances for each GPS coordinate from each other. The default value is 10m and can be overriden with an value to the argument.
Can be used to force cache on GPS coordinates that are very close to each other but not exactly the same.
### Google data priority
Based in the JSON return data the following fields are set in order. If one can not be found for a target set, the next one below is used
@@ -123,12 +131,13 @@ order | type | target set
After the script is done the following overview will be printed
```
=======================================
========================================
XMP Files found : 57
Updated : 3
Skipped : 54
New GeoLocation from Map : 2
GeoLocation from Cache : 1
GeoLocation from Fuzzy Cache : 0
Failed reverse GeoLocate : 0
GeoLocaction from Lightroom : 1
No Lightroom data found : 46
@@ -148,7 +157,7 @@ Also the files that could not be updated will be printed at the end of the run u
```
...
------------------------------
----------------------------------------
Files that failed to update:
Photos/2017/02/some_file.xmp
```

View File

@@ -22,7 +22,7 @@ import re
# Note XMPFiles does not work with sidecar files, need to read via XMPMeta
from libxmp import XMPMeta, consts
from shutil import copyfile, get_terminal_size
from math import ceil
from math import ceil, radians, sin, cos, atan2, sqrt
##############################################################
# FUNCTIONS
@@ -70,6 +70,20 @@ class readable_dir(argparse.Action):
raise argparse.ArgumentTypeError("readable_dir:{0} is not a readable dir".format(prospective_dir))
# check distance values are valid
class distance_values(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
m = re.match(r'^(\d+)\s?(m|km)$', values)
if m:
# convert to int in meters
values = int(m.group(1))
if m.group(2) == 'km':
values *= 1000
setattr(namespace, self.dest, values)
else:
raise argparse.ArgumentTypeError("distance_values:{0} is not a valid argument".format(values))
# MAIN FUNCTIONS
# METHOD: reverseGeolocate
@@ -114,7 +128,7 @@ def reverseGeolocateInit(longitude, latitude):
'error_message': ''
}
# error if long/lat is not valid
latlong_re = re.compile('^\d+\.\d+$')
latlong_re = re.compile(r'^\d+\.\d+$')
if not latlong_re.match(str(longitude)) or not latlong_re.match(str(latitude)):
geolocation['status'] = 'ERROR'
geolocation['error_message'] = 'Latitude {} or Longitude {} are not valid'.format(latitude, longitude)
@@ -187,7 +201,7 @@ def reverseGeolocateOpenStreetMap(longitude, latitude):
# dict with location, city, state, country, country code
# if not fillable, entry is empty
# SAMPLE: http://maps.googleapis.com/maps/api/geocode/json?latlng=<latitude>,<longitude>&language=<lang>&sensor=false&key=<api key>
def reverseGeolocateGoogle(longitude, latitude):
def reverseGeolocateGoogle(longitude, latitude): # noqa: C901
# init
geolocation = reverseGeolocateInit(longitude, latitude)
temp_geolocation = geolocation.copy()
@@ -224,7 +238,7 @@ def reverseGeolocateGoogle(longitude, latitude):
'CountryCode': ['country'],
'Country': ['country'],
'State': ['administrative_area_level_1', 'administrative_area_level_2'],
'City': ['locality'],
'City': ['locality', 'administrative_area_level_3'],
'Location': ['sublocality_level_1', 'sublocality_level_2', 'route'],
}
# print("Error: {}".format(response.json()['status']))
@@ -275,7 +289,6 @@ def reverseGeolocateGoogle(longitude, latitude):
geolocation['error_message'] = response.json()['error_message']
geolocation['status'] = response.json()['status']
print("Error in request: {} {}".format(geolocation['status'], geolocation['error_message']))
# return
return geolocation
@@ -315,7 +328,7 @@ def convertLongToDMS(lat_long):
# number used in google/lr internal
def longLatReg(longitude, latitude):
# regex
latlong_re = re.compile('^(\d+),(\d+\.\d+)([NESW]{1})$')
latlong_re = re.compile(r'^(\d+),(\d+\.\d+)([NESW]{1})$')
# dict for loop
lat_long = {
'longitude': longitude,
@@ -343,6 +356,27 @@ def convertDMStoLong(lat_long):
return longLatReg(lat_long, '0,0.0N')['longitude']
# METHOD: getDistance
# PARAMS: from long/lat, to long_lat
# RETURN: distance in meters
# DESC : calculates the difference between two coordinates
def getDistance(from_longitude, from_latitude, to_longitude, to_latitude):
# earth radius in meters
earth_radius = 6378137.0
# convert all from radians with pre convert DMS to long and to float
from_longitude = radians(float(convertDMStoLong(from_longitude)))
from_latitude = radians(float(convertDMStoLat(from_latitude)))
to_longitude = radians(float(convertDMStoLong(to_longitude)))
to_latitude = radians(float(convertDMStoLat(to_latitude)))
# distance from - to
distance_longitude = from_longitude - to_longitude
distance_latitude = from_latitude - to_latitude
# main distance calculation
distance = sin(distance_latitude / 2)**2 + cos(from_latitude) * cos(to_latitude) * sin(distance_longitude / 2)**2
distance = 2 * atan2(sqrt(distance), sqrt(1 - distance))
return earth_radius * distance
# METHOD: checkOverwrite
# PARAMS: data: value field, key: XMP key, field_controls: array from args
# RETURN: true/false
@@ -486,7 +520,7 @@ def formatLen(string, length):
# RETURN: number found in the BK string or 0 for none
# DESC : gets the BK number for sorting in the file list
def fileSortNumber(file):
m = re.match('.*\.BK\.(\d+)\.xmp$', file)
m = re.match(r'.*\.BK\.(\d+)\.xmp$', file)
return int(m.group(1)) if m is not None else 0
@@ -610,45 +644,55 @@ parser = argparse.ArgumentParser(
# xmp folder (or folders), or file (or files)
# note that the target directory or file needs to be writeable
parser.add_argument('-i', '--include-source',
parser.add_argument(
'-i',
'--include-source',
required=True,
nargs='*',
action=writable_dir_folder,
dest='xmp_sources',
metavar='XMP SOURCE FOLDER',
help='The source folder or folders with the XMP files that need reverse geo encoding to be set. Single XMP files can be given here'
)
)
# exclude folders
parser.add_argument('-x', '--exclude-source',
parser.add_argument(
'-x',
'--exclude-source',
nargs='*',
action=writable_dir_folder,
dest='exclude_sources',
metavar='EXCLUDE XMP SOURCE FOLDER',
help='Folders and files that will be excluded.'
)
)
# LR database (base folder)
# get .lrcat file in this folder
parser.add_argument('-l', '--lightroom',
parser.add_argument(
'-l',
'--lightroom',
# required=True,
action=readable_dir,
dest='lightroom_folder',
metavar='LIGHTROOM FOLDER',
help='Lightroom catalogue base folder'
)
)
# strict LR check with base path next to the file base name
parser.add_argument('-s', '--strict',
parser.add_argument(
'-s',
'--strict',
dest='lightroom_strict',
action='store_true',
help='Do strict check for Lightroom files including Path in query'
)
)
# set behaviour override
# FLAG: default: only set not filled
# other: overwrite all or overwrite if one is missing, overwrite specifc field (as defined below)
# fields: Location, City, State, Country, CountryCode
parser.add_argument('-f', '--field',
parser.add_argument(
'-f',
'--field',
action='append',
type=str.lower, # make it lowercase for check
choices=['overwrite', 'location', 'city', 'state', 'country', 'countrycode'],
@@ -658,77 +702,120 @@ parser.add_argument('-f', '--field',
'Overwrite (write all new), Location, City, State, Country, CountryCode. '\
'Multiple can be given for combination overwrite certain fields only or set only certain fields. '\
'If with overwrite the field will be overwritten if already set, else it will be always skipped.'
)
)
parser.add_argument(
'-d',
'--fuzzy-cache',
type=str.lower,
action=distance_values,
nargs='?',
const='10m', # default is 10m
dest='fuzzy_distance',
metavar='FUZZY DISTANCE',
help='Allow fuzzy distance cache lookup. Optional distance can be given, '\
'if not set default of 10m is used. '\
'Allowed argument is in the format of 12m or 12km'
)
# Google Maps API key to overcome restrictions
parser.add_argument('-g', '--google',
parser.add_argument(
'-g',
'--google',
dest='google_api_key',
metavar='GOOGLE API KEY',
help='Set a Google API Maps key to overcome the default lookup limitations'
)
)
# use open street maps
parser.add_argument('-o', '--openstreetmap',
parser.add_argument(
'-o',
'--openstreetmap',
dest='use_openstreetmap',
action='store_true',
help='Use openstreetmap instead of Google'
)
)
# email of open street maps requests
parser.add_argument('-e', '--email',
parser.add_argument(
'-e',
'--email',
dest='email',
metavar='EMIL ADDRESS',
help='An email address for OpenStreetMap'
)
)
# write api/email settings to config file
parser.add_argument('-w', '--write-settings',
parser.add_argument(
'-w',
'--write-settings',
dest='config_write',
action='store_true',
help='Write Google API or OpenStreetMap email to config file'
)
)
# only read data and print on screen, do not write anything
parser.add_argument('-r', '--read-only',
parser.add_argument(
'-r',
'--read-only',
dest='read_only',
action='store_true',
help='Read current values from the XMP file only, do not read from LR or lookup any data and write back'
)
)
# only list unset ones
parser.add_argument('-u', '--unset-only',
parser.add_argument(
'-u',
'--unset-only',
dest='unset_only',
action='store_true',
help='Only list unset XMP files'
)
)
# only list unset GPS codes
parser.add_argument(
'-p',
'--unset-gps-only',
dest='unset_gps_only',
action='store_true',
help='Only list unset XMP files for GPS fields'
)
# don't try to do auto adjust in list view
parser.add_argument('-a', '--no-autoadjust',
parser.add_argument(
'-a',
'--no-autoadjust',
dest='no_autoadjust',
action='store_true',
help='Don\'t try to auto adjust columns'
)
)
# compact view, compresses columns down to a minimum
parser.add_argument('-c', '--compact',
parser.add_argument(
'-c',
'--compact',
dest='compact_view',
action='store_true',
help='Very compact list view'
)
)
# Do not create backup files
parser.add_argument('-n', '--nobackup',
parser.add_argument(
'-n',
'--nobackup',
dest='no_xmp_backup',
action='store_true',
help='Do not create a backup from the XMP file'
)
)
# verbose args for more detailed output
parser.add_argument('-v', '--verbose',
parser.add_argument(
'-v',
'--verbose',
action='count',
dest='verbose',
help='Set verbose output level'
)
)
# debug flag
parser.add_argument('--debug', action='store_true', dest='debug', help='Set detailed debug output')
@@ -753,11 +840,12 @@ if not args.unset_only:
args.unset_only = 0
if args.debug:
print("### ARGUMENT VARS: I: {incl}, X: {excl}, L: {lr}, F: {fc}, M: {osm}, G: {gp}, E: {em}, R: {read}, U: {us}, A: {adj}, C: {cmp}, N: {nbk}, W: {wrc}, V: {v}, D: {d}, T: {t}".format(
print("### ARGUMENT VARS: I: {incl}, X: {excl}, L: {lr}, F: {fc}, D: {fdist}, M: {osm}, G: {gp}, E: {em}, R: {read}, U: {us}, A: {adj}, C: {cmp}, N: {nbk}, W: {wrc}, V: {v}, D: {d}, T: {t}".format(
incl=args.xmp_sources,
excl=args.exclude_sources,
lr=args.lightroom_folder,
fc=args.field_controls,
fdist=args.fuzzy_distance,
osm=args.use_openstreetmap,
gp=args.google_api_key,
em=args.email,
@@ -790,7 +878,7 @@ if args.email and not args.use_openstreetmap:
error = True
# if email and not basic valid email (@ .)
if args.email:
if not re.match('^.+@.+\.[A-Za-z]{1,}$', args.email):
if not re.match(r'^.+@.+\.[A-Za-z]{1,}$', args.email):
print("Not a valid email for OpenStreetMap: {}".format(args.email))
error = True
# on error exit here
@@ -887,6 +975,7 @@ count = {
'read': 0,
'map': 0,
'cache': 0,
'fuzzy_cache': 0,
'lightroom': 0,
'changed': 0,
'failed': 0,
@@ -932,6 +1021,8 @@ if args.lightroom_folder:
cur = lrdb.cursor()
# flag that we have Lightroom DB
use_lightroom = True
if args.debug:
print("### USE Lightroom {}".format(use_lightroom))
# on error exit here
if error:
@@ -1040,7 +1131,7 @@ if args.read_only:
# ### MAIN WORK LOOP
# now we just loop through each file and work on them
for xmp_file in work_files:
for xmp_file in work_files: # noqa: C901
if not args.read_only:
print("---> {}: ".format(xmp_file), end='')
@@ -1054,12 +1145,16 @@ for xmp_file in work_files:
# read fields from the XMP file and store in hash
xmp.parse_from_str(strbuffer)
for xmp_field in xmp_fields:
# need to check if propert exist or it will the exempi routine will fail
if xmp.does_property_exist(xmp_fields[xmp_field], xmp_field):
data_set[xmp_field] = xmp.get_property(xmp_fields[xmp_field], xmp_field)
else:
data_set[xmp_field] = ''
if args.debug:
print("### => XMP: {}:{} => {}".format(xmp_fields[xmp_field], xmp_field, data_set[xmp_field]))
if args.read_only:
# view only if list all or if data is unset
if not args.unset_only or (args.unset_only and '' in data_set.values()):
if (not args.unset_only and not args.unset_gps_only) or (args.unset_only and '' in data_set.values()) or (args.unset_gps_only and (not data_set['GPSLatitude'] or not data_set['GPSLongitude'])):
# for read only we print out the data formatted
# headline check, do we need to print that
count['read'] = printHeader(header_line.format(page_no=page_no, page_all=page_all), count['read'], header_repeat)
@@ -1140,23 +1235,59 @@ for xmp_file in work_files:
# run this through the overwrite checker to get unset if we have a forced overwrite
has_unset = False
failed = False
from_cache = False
for loc in data_set_loc:
if checkOverwrite(data_set[loc], loc, args.field_controls):
has_unset = True
if has_unset:
# check if lat/long is in cache
cache_key = '{}.#.{}'.format(data_set['GPSLatitude'], data_set['GPSLongitude'])
cache_key = '{}#{}'.format(data_set['GPSLongitude'], data_set['GPSLatitude'])
if args.debug:
print("### *** CACHE: {}: {}".format(cache_key, 'NO' if cache_key not in data_cache else 'YES'))
# main chache check = identical
# second cache level check is on distance:
# default distance is 10m, can be set via flag
# check distance to previous cache entries (reverse newest to oldest) and match before we do google lookup
if cache_key not in data_cache:
has_fuzzy_cache = False
if args.fuzzy_distance:
shortest_distance = args.fuzzy_distance
best_match_latlong = ''
# check if we have fuzzy distance, if no valid found do maps lookup
for _cache_key in data_cache:
# split up cache key so we can use in the distance calc method
to_lat_long = _cache_key.split('#')
# get the distance based on current set + cached set
# print("Lookup f-long {} f-lat {} t-long {} t-lat {}".format(data_set['GPSLongitude'], data_set['GPSLatitude'], to_lat_long[0], to_lat_long[1]))
distance = getDistance(from_longitude=data_set['GPSLongitude'], from_latitude=data_set['GPSLatitude'], to_longitude=to_lat_long[0], to_latitude=to_lat_long[1])
if args.debug:
print("### **= FUZZY CACHE: => distance: {} (m), shortest: {}".format(distance, shortest_distance))
if distance <= shortest_distance:
# set new distance and keep current best matching location
shortest_distance = distance
best_match_latlong = _cache_key
has_fuzzy_cache = True
if args.debug:
print("### ***= FUZZY CACHE: YES => Best match: {}".format(best_match_latlong))
if not has_fuzzy_cache:
# get location from maps (google or openstreetmap)
maps_location = reverseGeolocate(latitude=data_set['GPSLatitude'], longitude=data_set['GPSLongitude'], map_type=map_type)
# cache data with Lat/Long
data_cache[cache_key] = maps_location
from_cache = False
else:
maps_location = data_cache[best_match_latlong]
# cache this one, because the next one will match this one too
# we don't need to loop search again for the same fuzzy location
data_cache[cache_key] = maps_location
count['cache'] += 1
count['fuzzy_cache'] += 1
from_cache = True
else:
# load location from cache
maps_location = data_cache[cache_key]
count['cache'] += 1
from_cache = True
# overwrite sets (note options check here)
if args.debug:
print("### Map Location ({}): {}".format(map_type, maps_location))
@@ -1199,7 +1330,10 @@ for xmp_file in work_files:
with open(xmp_file, 'w') as fptr:
fptr.write(xmp.serialize_to_str(omit_packet_wrapper=True))
else:
print("[TEST] Would write {} ".format(data_set, xmp_file), end='')
print("[TEST] Would write {} {}".format(data_set, xmp_file), end='')
if from_cache:
print("[UPDATED FROM CACHE]")
else:
print("[UPDATED]")
count['changed'] += 1
elif failed:
@@ -1216,7 +1350,7 @@ if use_lightroom:
lrdb.close()
# end stats only if we write
print("{}".format('=' * 39))
print("{}".format('=' * 40))
print("XMP Files found : {:9,}".format(count['all']))
if args.read_only:
print("XMP Files listed : {:9,}".format(count['listed']))
@@ -1225,6 +1359,7 @@ if not args.read_only:
print("Skipped : {:9,}".format(count['skipped']))
print("New GeoLocation from Map : {:9,}".format(count['map']))
print("GeoLocation from Cache : {:9,}".format(count['cache']))
print("GeoLocation from Fuzzy Cache : {:9,}".format(count['fuzzy_cache']))
print("Failed reverse GeoLocate : {:9,}".format(count['failed']))
if use_lightroom:
print("GeoLocaction from Lightroom : {:9,}".format(count['lightroom']))
@@ -1232,7 +1367,7 @@ if not args.read_only:
print("More than one found in LR : {:9,}".format(count['many_found']))
# if we have failed data
if len(failed_files) > 0:
print("{}".format('-' * 39))
print("{}".format('-' * 40))
print("Files that failed to update:")
print("{}".format(', '.join(failed_files)))