#!/usr/bin/env bash

# download/get files.html
# find (Events|Players|Openings updated): Month YYYY
# => store in last update
# => if one changed -> process

# in files.html
# div "Players"
# => table below
# => folder "players/"

# div "Openings"
# h2 "Modern Queen Pawn", "Classical Queen Pawn", "Modern King Pawn", "Classical King Pawn", "Flank and Unorthodox"
# => table below each h2
# => folder "openings/"

# div "Events"
# h2 "Tournaments", "Candidates and Interzonals", "World Championships"

function error() {
	if [ -t 1 ]; then echo "$1"; fi; exit 1;
}

BASE_FOLDER=$(dirname "$(readlink -f "$0")")"/";
TEMP="${BASE_FOLDER}../temp/";
DATA="${BASE_FOLDER}../data/";
DOWNLOAD="${BASE_FOLDER}../download/";
last_download="${DATA}last_download.txt";
last_update="${DATA}last_update.txt";
url_base_file="https://www.pgnmentor.com/files.html";
# has to be YYYYMMDD
current_date="${1}";
# temp download
temp_files_dl="${TEMP}files.html";
# set the download targets for file
download_target_file="${DOWNLOAD}${current_date}/master/files.html";
download_target_pgn="${DOWNLOAD}${current_date}/pgn/";
# flag if we do anything
do_download=0;

# must have curl installed (or wget but I am lazy to code in both)
type curl >/dev/null 2>&1 || error "curl not installed";
# date should be at least a bit valid
if [ -z "${current_date}" ] || ! [[ "${current_date}" =~ ^[0-9]{8}$ ]]; then
	error "Current date must be set and in the format YYYYMMDD";
fi;

# 1) last_download does not exist -> download
# 2) download files.html to tmp
# 3) if one true -> compare
# 4) if different or 1) false -> download
check_dl=0;
if [ ! -f "${last_download}" ]; then
	check_dl=1;
else
	curl -o "${temp_files_dl}" ${url_base_file};
	grep "updated: " "${download_target_file}" > "${TEMP}cur_file_updated.txt";
	file_diff=$(diff "${last_download}" "${TEMP}cur_file_updated.txt");
	if [ -n "${file_diff}" ]; then
		check_dl=1;
	fi;
fi;

if [ $check_dl = 0 ]; then
	error "Last downloaded data not outdated";
fi;

# normal download

# does that download folder exist
if [ ! -d "${DOWNLOAD}${current_date}" ]; then
	echo "Create new folders for ${current_date}";
	# build basic folders build
	mkdir -p "${DOWNLOAD}${current_date}/master";
	mkdir -p "${DOWNLOAD}${current_date}/pgn";
	mkdir -p "${DATA}${current_date}";
	# shift: current to last
	last_sym=$(readlink -f "${DATA}current");
	echo "$last_sym => ${DATA}last";
	rm "${DATA}last";
	ln -sf "${last_sym}" "${DATA}last";
	# shift: this to new current
	rm "${DATA}current";
	ln -sf "${DATA}${current_date}" "${DATA}current";
fi;

# download master file
if [ ! -f "${download_target_file}" ]; then
	echo "Download master file for ${current_date}";
	curl -o "${download_target_file}" ${url_base_file}
fi;

# diff current to last in download
# grep diffs and download to corect folders
# attach to main lists

# diff -u old new |grep "+" |grep "a href"

# parse level one:
# get the last updated date and see if it is different to the last one
# if [ ! -f "${last_download}" ]; then
# 	do_download=1;
# 	# check master file last update date
# 	grep "updated: " "${download_target_file}" > "${last_download}";
# 	echo "DL: ${dl_latest}";
# else
# 	# now we need to check
# 	echo "check last download is old enough";
# 	last_data=$(cat "${last_update}");
# 	echo "L: ${last_data}";
# 	# eg
# fi;

# if [ $do_download -eq 0 ]; then
# 	error "No updated data found";
# fi;

# start main processing

# TODO: limited download
# load previous set, so we do not download the same data twice
# all files in this files will not be downloaded

# CURRENT:
# use DownThemAll to get all the data
# extract data files
# check they exist in pgn folder -> if not

# __END__