# Table.pm
# table manipulation functions

package WWW::Extractor::Generic::Table;

use strict;

use WWW::Extractor::Generic::Predicates qw(EqWrap);

# convert a table to a string
use overload
    '""' => "as_Text";

# make a new, empty table, or bless an existing array of rows
sub new {
    my $class = shift;
    my $self;
    if ($#_<0) { $self = [] }                            # empty
    elsif ($#{$_[0]}<0 || ref $_[0] && ref $_[0]->[0]) { $self = shift } # args: [ [1a 1b] [2a 2b] ]
    else { $self = [@_] }                                # args: [1a 1b] [2a 2b]
    
    return bless $self, $class;
}

# make a new table, from TSV test
sub from_TSV {
    my $class = shift;

    my $self = $class->new([map {[split /\t/, $_]} split /\n/, $_[0]]);

    $self->trim;
    return $self;
}

# convert table to TSV format (of sorts)
sub as_TSV {
    my $self = shift;
    return map {(join "\t", map { defined $_ ? $_ : '' } @$_)."\n"} @$self;
}

# convert table to HTML format
sub as_HTML {
    my $self = shift;
    return "<table border='1'>\n". (join '', (map {"<tr>\n".(join '', map {"\t<td>".(defined $_ ? $_ : '')."</td>\n"} @$_).'</tr>'} @$self)) . "\n</table>\n";
}

# convert table to text format
sub as_Text {
    my $self = shift;
    # find max width of each column
    my @col_width = ();
    for my $row (@$self) {
	for my $i (0..$#$row) {
	    my $t = $row->[$i]; $t = '' unless defined $t;
	    my $l = length($t);
	    $col_width[$i] = $l if not defined $col_width[$i] or $l > $col_width[$i];
	}
    }
    # assemble table
    my $text = '';
    for my $row (@$self) {
	for my $i (0..$#$row) {
	    my $t = $row->[$i]; $t = '' unless defined $t;
	    $text .= $t . ' ' x ($col_width[$i] + 2 - length($t));
	}
	$text =~ s/ $/\n/;
    }
    return $text;
}

# trim whitespace from table data
#  - trim_html...?
sub trim {
    my $self = shift;
    for my $row (@$self) {
	for (@$row) {
	    $_ = '' unless defined $_;
	    s/^\s+//;
	    s/\s+$//;
	}
    }
}

# function to extract certain columns from a table - can use numbers (from 1)
# or tests for column headers (to match first row)
sub cut {
    my $self = shift;
    my @indices;
    if ($_[0] =~ /\D/) {
	# they gave us string indices, so look them up in the header (1st row)
	my $t;
	my $l = $#{$self->[0]};
	INDEX: while ($t = shift) {
	    $t = EqWrap($t);  # this creates a function that does an exact match to $t
	    for (my $i=0; $i<=$l; ++$i) {
		if (&$t($self->[0][$i])) { # we test each column head
		    push @indices, $i;
		    next INDEX;
		}
	    }
	    return; # failed to find the index
	}
    } else {
	# they gave us numerical indices - subtract one so columns are numbered from 1, not 0
	@indices = map {$_ - 1} @_;
    }
    @$self = map { [@{$_}[@indices]] } @$self;
    return 1;
}

sub total_column {
    my $self = shift;
    my ($index) = @_;
    if ($index =~ /\D/) {
	# they gave us a string index, so look it up in the header (1st row)
	my $l = $#{$self->[0]};
	my $t = EqWrap($index);  # this creates a function that does an exact match to $index
	for (my $i=0; $i<=$l; ++$i) {
	    if (&$t($self->[0][$i])) {
		$index = $i;
		last;
	    }
	}
	return if $index =~ /\D/;
    } else {
	$index -= 1;
    }
    my $total = 0;
    for my $r (1..$#$self) {
	$total += $self->[$r][$index];
    }
    return $total;
}

# get rid of header / footer rows of a table
sub drop_head_foot {
    my $self = shift;
    my ($header_lines, $footer_lines) = @_;
    $header_lines = 1 unless defined $header_lines;
    @$self = splice @$self, $header_lines, $#$self+1-($header_lines+$footer_lines);
    return 1;
}

sub field_names {
    my $self = shift;
    unshift @$self, @_;
}

# add a new row or rows of data to the table
# hardly worth having this method now
sub push_rows {
    my $self = shift;
    push @$self, @_;
}

1
