#!/usr/bin/perl -w

use IO::File;
use strict;

my $fields_out = ($ARGV[0]||'') eq "f"
	and shift;

@ARGV == 2
	or die "usage $0 [f] field_types table.dat > table\n";

my ($field_types, $msql_dat) = @ARGV;

# load_field_types
# ------------------------------------------------------------
my @fields;
{
	my $file = IO::File->new($field_types);
	my $line;
	while (defined ($line = <$file>)) {
		chomp $line;
		my ($field, $type, $size) = split /\t/, $line, -1;
		push @fields, [$field, $type, $size];
	}
}

# build unpacker
# ------------------------------------------------------------
my $unpacker = "";
my $record_len = 0;
{
	for my $fieldref (@fields) {
		my ($field, $type, $size) = @$fieldref;

		$unpacker .= "c1"; # for `null' byte
		$record_len ++;

		if ($type eq "char") {
			$unpacker .= "a$size";
			$record_len += $size;
		} elsif ($type eq "int") {
			$unpacker .= "i";
			$record_len += 4;
		} elsif ($type eq "real") {
			$unpacker .= "dc"; # XXX this is certainly wrong!
			$record_len += 8;
		} else {
			die "unknown field type `$type'\n";
		}
	}
}

# print fields if wanted
# ------------------------------------------------------------
if ($fields_out) {
	print join "\t", map $_->[0], @fields;
	print "\n";
}

# convert the data
# ------------------------------------------------------------
my $record_block = (8 + $record_len + 8) & ~7;
$unpacker = "ii${unpacker}a*"; # for zeros on end

my $in = IO::File->new($msql_dat)
	or die "cannot open `$msql_dat'\n";

my $buf;
	
read $in, $buf, 24; # read table header info

my ($sig, $del_rows_minus_1, $total_rows, $active_rows, $unknown) = unpack "a4iiia8", $buf;

warn "unknown table signature at 0\n" if $sig ne "\x14\x00\x00\x00";
warn "unknown table signature at 16\n" if $unknown ne "\0\0\0\0\0\0\0\0";

# this is wrong: warn "row counts do not add up!" unless $total_rows == $active_rows + $del_rows_minus_1 + 1;

my $count_active_rows = 0;

for (my $i=0; $i<$total_rows; ++$i) {
	read $in, $buf, $record_block;

	my @data = unpack $unpacker, $buf;

	my ($in_use, $time) = splice @data, 0, 2;
	next if $in_use == 0; # skip delete stuff

	$count_active_rows ++;
		
	$in_use == 1
		or warn "in-use flag is `$in_use' - should be 0 or 1\n";

	$time > 900000000
		or warn "bad timestamp `$time' on row\n";
	
	my @row;
	while (@data > 1) {
		my ($notnull, $data) = splice @data, 0, 2;
		undef($data) unless $notnull;
		push @row, $data;
	}
	@data == 1
		or warn "missing zeros at end of row\n";

	$data[0] =~ /^\0+$/
		or warn "row padding not zero";
	@row == @fields
		or warn "wrong number of fields unpacked from row!!";
		
	# write the row in TSV
	for (@row) {
		if (defined $_) {
			s/\0.*//;  # make up for 'a' unpacker ('Z' not in 5.004)
			s/\\/\\\\/g;
			s/\t/\\t/g;
			s/\n/\\n/g;
		} else {
			$_ = "\0";
		}
	}
	print join "\t", @row;
	print "\n";
}

read $in, $buf, 1024*1024;

$buf =~ /^\0*$/
	or warn "guff after rows in table\n";
