#!/bin/bash
# SPDX-FileCopyrightText: 2024 Andreas Itzchak ("Izzy") Rehberg <izzysoft@qumran.org>
# SPDX-License-Identifier: AGPL-3.0-or-later
set -euo pipefail

if [[ "$0" != "${BASH_SOURCE[0]}" ]]; then
  BASEDIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" #"
else
  BASEDIR="$(realpath "$(dirname "$(readlink -mn "${0}")")")"
fi

# -------------------------------------------=[ Config ]=---
SCRIPTDIR="$(dirname "$(dirname "${BASEDIR}")")/iod-stats-builder"    # assumes that iod-stats-builder and iodstats share the same parent dir (are checked out next to each other)"
WEEK="$(date --date='last Sat' +%V)"                        # number of the last week (with Sat being its last day)
YEAR="$(date --date='last Sat' +%Y)"                        # year of the last week (with Sat being its last day)
STAMP="${YEAR}-${WEEK}"                                     # year-week
STATSDIR="$(dirname "${BASEDIR}")/apk_downloads"            # where the JSON files should go to
STATSFILE="${STATSDIR}/${YEAR}/iod_downloads_${STAMP}.json" # last weeks stats (which we now process)
WEBLOG="/var/log/apache2/apt/access_log.1"                  # Apache log from last week (post-rotation)
PARSER_CONFIG="${BASEDIR}/config.toml"                      # the config of the Python script
PARSER="${SCRIPTDIR}/iod_dl_parser.py"                      # the Python script that does the real parsing
INDEX_GENERATOR="${SCRIPTDIR}/iod_dl_index_generator.py"    # the Python script that generates the index.json
CANCEL_ON_DIRTY=0                                           # abort the script should the git tree be dirty (with 'git config pull.rebase true', the pull will fail anyway on a dirty tree) and exit with this return code
# ------------------------------------------=[ /Config ]=---


# helper to send error messages to STDERR
printerr() { printf "%s\n" "$*" >&2; }


# ----------------
# --    MAIN    --
# ----------------

cd "$STATSDIR"
[[ ! -d "${YEAR}" ]] && mkdir "${YEAR}"

# check for unstaged changes
git stat --porcelain | grep -Eqv '^[MADR] ' &&  {  # unstaged changes exist
  echo
  printerr "Tree is dirty:"
  git stat --porcelain >&2
  [[ $CANCEL_ON_DIRTY -gt 0 ]] && {
    printerr "aborting."
    echo
    exit $CANCEL_ON_DIRTY
  }
  echo
}

# pull in potential changes from upstream
git pull

# do not overwrite the statsfile if it already exists and is not empty
if [[ -s "$STATSFILE" ]]; then
  printerr "ERROR: Statsfile '${STATSFILE}' already exists, aborting."
  exit 21
fi

# use grep to find log lines where APK files have been downloaded, and feed them to the parser:
grep -E '\.apk .* 200 ' "$WEBLOG" | python3 "$PARSER" --config "$PARSER_CONFIG" > "$STATSFILE"

# generate an updated index.json in "$STATSDIR"
python3 "$INDEX_GENERATOR" --stats-dir "$STATSDIR"

# now add and commit the new stats file to the repo – but only if no other files are staged yet (first char is staging area, second char is working area)
git status --porcelain | grep -qE '^[MADR]' && {    # staged changes exist, we don't want to commit them alongside accidentally (locally modified but not yet staged would be '^ M', staged and modified again '^MM')
  printerr "ERROR: There are already staged files in the repo, aborting."
  exit 22
}

git add "${YEAR}/$(basename "$STATSFILE")"  # add new stats file
git add "${STATSDIR}/index.json"            # add updated index.json
git commit -q -m "adding download stats for week $STAMP"
git push
