#!/bin/bash -e
# e.g. treebsdiff in1 in2 out -u

# if you want to merge changes by hand, try the "--embed" option.
# this does two things - firstly embeds all changes in the files like CVS
# embeds conflicts, secondly symlinks across unchanged files, prefixed with
# "0."
#
# you can remove all the prefixes with "treediff_remove_prefix", and copy the
# symlinked files with "unsymlinktree"
#
# if you use the --embed option, it preserves permissions on d.* files too.

# todo improve so it handles other types of nodes, not just files, and so it diffs inode metadata too....

# this does not attempt to identify files that have been renamed.
# One strategy to handle this might be to do a single bsdiff between two tars of the files that only appear in one archive.

base1="$1"
base2="$2"
out="$3"
shift; shift; shift
#diffopts="$*"  # DODGY!

perl -e '-t STDOUT && print STDERR "^C if you want to use tee to capture the output\n"'
#[ -z "$diffopts" ] && echo "^C if you want to append the -u diff option or the --embed option!" >&2

tree1=`temp`
tree2=`temp`
pushd "$base1" >/dev/null
base1=`pwd`
find . -type f | sort >| $tree1
popd >/dev/null
pushd "$base2" >/dev/null
base2=`pwd`
find . -type f | sort >| $tree2
popd >/dev/null
comm=`temp`
comm $tree1 $tree2 | perl -pe 's/^(\t*)/(1+length($1))."\t"/e;' >| $comm
[ -d "$out" ] || mkdir "$out"
pushd "$out" >/dev/null
export base1 base2 diffopts

< $comm perl -ne '
	use File::Path;
	use L; # for rmpath
	($base1, $base2, $diffopts) = @ENV{qw(base1 base2 diffopts)};
	chomp;
	($type, $pathname) = split /\t/, $_, 2;
	($dirname, $filename) = $pathname =~ m|(.*)/(.*)|;
	$file1 = "$base1/$pathname";
	$file2 = "$base2/$pathname";
	#system "mkdir", "-p", "--", "$dirname";
	mkpath($dirname);
	if ($type == 1) {
		symlink $file1, "$dirname/1.$filename" or
			die "symlink $file1 to $dirname/1.$filename failed: $!";
		print "1";
	} elsif ($type == 2) {
		symlink $file2, "$dirname/2.$filename" or
			die "symlink $file2 to $dirname/2.$filename failed: $!";
		print "2";
	} else { # $type == 3
		# if size and mtime are the same, we assume content is the same and skip the diff to speed things up...
		my ($size1, $mtime1, $mode) = (stat($file1))[7,9,2];
		$mode &= 07777;
		my ($size2, $mtime2) = (stat($file2))[7,9];
		my $identical = 0;
		if ($size1 == $size2 && $mtime1 == $mtime2) {
			# files are almost certainly the same, skip diff
			$identical = 1;
		} else {
			# files are probably different - run diff
#			open my $oldout, ">&STDOUT" or
#				die "dup STDOUT failed: $!";
#			open STDOUT, ">", "$dirname/d.$filename" or
#				die "open $dirname/d.$filename failed: $!";
#			if ($diffopts eq "--embed") {
#				chmod $mode, "$dirname/d.$filename" or
#					die "chmod ".sprintf("%04o", $mode)." $dirname/d.$filename failed: $!";
#			}
			my $diff_exit_status;
#			if ($diffopts eq "--embed") {
#				$diff_exit_status = system("diff",
#					"--old-group-format=<<<<<<<<<<<<<<<<\n%<================\n>>>>>>>>>>>>>>>>\n",
#					"--new-group-format=<<<<<<<<<<<<<<<<\n================\n%>>>>>>>>>>>>>>>>>\n",
#					"--changed-group-format=<<<<<<<<<<<<<<<<\n%<================\n%>>>>>>>>>>>>>>>>>\n",
#					"--", $file1, $file2) >> 8;
#			} else {
				$diff_exit_status = system("cmp", "-s", $file1, $file2) >> 8;
				if ($diff_exit_status) {
					$diff_exit_status = system("bsdiff", $file1, $file2, "$dirname/d.$filename") >> 8;
				}
#			}
#			close STDOUT or
#				die "close $dirname/d.$filename failed: $!";
#			open STDOUT, ">&", $oldout or
#				die "dup \$oldout failed: $!";
#			close $oldout or
#				die "close \$oldout failed: $!";
			if ($diff_exit_status == 0) {
				$identical = 1;
#				unlink "$dirname/d.$filename" or
#					die "unlink $dirname/d.$filename failed: $!";
	#			rename "$dirname/d.$filename", "$dirname/=.$filename" or
	#				die "rename $dirname/d.$filename to $dirname/=.$filename failed: $!";
				print ".";
			} elsif ($diff_exit_status == 1) {
				print "d";
			} else {
				print STDERR "cmp $file1 and $file2 failed with exit status $diff_exit_status";
				symlink $file1, "$dirname/1.$filename" or
					die "symlink $file1 to $dirname/1.$filename failed: $!";
				print "1";
				symlink $file2, "$dirname/2.$filename" or
					die "symlink $file2 to $dirname/2.$filename failed: $!";
				print "2";
			}
		}
#		if ($identical && $diffopts eq "--embed") {
#			symlink $file1, "$dirname/0.$filename" or
#				die "symlink $file1 to $dirname/0.$filename failed: $!";
#		}
	}
	print "\t$pathname\n";
	#open my $olderr, ">&STDERR" or    # there MUST be a better way...!!  :/  XXX
	#	die "dup STDERR failed: $!";
	#open STDERR, ">", "/dev/null" or
	#	die "open /dev/null failed: $!";
	#system "rmdir", "-p", "--", "$dirname";  # just try to be tidy, it should often fail XXX inefficient
	rmpath($dirname);
	#close STDERR or
	#	die "close /dev/null failed: $!";
	#open STDERR, ">&", $olderr or
	#	die "dup \$olderr failed: $!";
	#close $olderr or
	#	die "close \$olderr failed: $!";
'

rm $tree1 $tree2 $comm
