# XML::Cursor

package WWW::Extractor::XML::Cursor;

use strict;
use vars qw(@ISA);

use WWW::Extractor::Generic::Cursor;

@ISA = qw|WWW::Extractor::Generic::Cursor|;

sub new {
    my $class = shift;
    my $self = SUPER::new $class;
    $self->{stack} = [];
    return $self;
}

sub next {
    my ($self, $list) = @_;

    my $e = $list->item;
    if (defined $e && $e->type eq 'S' && $e->has_end) {
	# we are entering a container
	push @{$self->{stack}}, $e;
    }

    my $on = $self->SUPER::next($list);

    $e = $list->item;
    if (defined $e && $e->type eq 'E') {
	# we are leaving a container

	# check container integrity - examine stack
	my $stack = $self->{stack};
	# match end tag with its start tag...
	my $tag = $e->tag;
	my $tag_weight = $list->{doc_type}->tag_weight($tag);
	for (my $i=$#$stack; $i>=0; --$i) {
	    my $cmp = $$stack[$i];
	    my $tag_cmp = $cmp->tag;
	    if ($tag_cmp eq $tag) {
		# put ref to start tag in end tag
		$e->start($cmp);
		# notify the start tag that it has an end tag
		$cmp->has_end(1);
		# pop anything under this container off the stack
		$#$stack = $i;

		last;
	    } else {
		# notify this 'start' tag that it does not have an end tag
		$cmp->has_end(0);
	    }
	    # do not allow the document structure to be wrecked by a </font> tag!
	    if ($tag_weight < $list->{doc_type}->tag_weight($tag_cmp)) {
		$e->start(undef);
		last;
	    }
	}

	pop @{$self->{stack}} if $e->has_start;
    }

    $self->reset unless $on;
    return $on;
}

sub prev {
    my ($self, $list) = @_;

    my $e = $list->item;
    if (defined $e && $e->type eq 'E' && $e->has_start) {
	# we are entering a container backward
	push @{$self->{stack}}, $e->start;
    }

    my $on = $self->SUPER::prev($list);

    $e = $list->item;
    if (defined $e && $e->type eq 'S' && $e->has_end) {
	# we are leaving a container
	# assume stack integrity
	pop @{$self->{stack}};
    }

    $self->reset unless $on;
    return $on;
}

sub reset {
    my $self = shift;
    $self->SUPER::reset;
    @{$self->{stack}} = ();
}

sub as_string {
    my $self = shift;
    return join ' ', $self->SUPER::as_string, map {$_->tag} @{$self->{stack}};
}

1
