#!/bin/bash

if [ "$#" -lt 1 ] || [[ "$1" == -* ]]; then
	echo >&2 "usage: `basename "$0"` url [html_filter arg ...]"
	exit 2
fi

if [ ! -e ".git" ]; then
	if [ -z "`ls`" ]; then
		git init
	else
		echo >&2 please run this at top level of a git repo for tracking changes, i.e. run:  git init
		exit 2
	fi
fi

url="$1"
shift
html_filter=("$@")

if [ $# -eq 0 ]; then
	html_filter=(cat)
fi

file=${url#*://}
if [[ "$file" == */ ]]; then
	file=${file}index.html
fi
dir=`dirname "$file"`
mkdir -p "$dir"

(
cd "$dir"
wget -N -q "$url"
)

is_html=
if <"$file" head | grep -i "<html" >/dev/null; then
	is_html=1
fi

if [ -n "$is_html" ]; then
	<"$file" htmltext | "${html_filter[@]}" >"$file.txt"
	mv "$file.txt" "$file"
fi

diff=`git diff "$file"`
if [ -n "$diff" ]; then
	added=`
		printf "%s\n" "$diff" |
		perl -ne '/^@@/ .. 0 and s/^\+// and print'
	`
	printf "%s:\n%s\n\n" "$file" "$added"
fi

(
cd "$dir"
git add -A
git commit -m "$file: ${added:-new}"
) >/dev/null

exit 0
