#!/usr/bin/perl -w

use strict;

sub compare_strings {
	my @strings = @_;

	my $longest = 0;
	my $second_longest = 0;

	for my $s (@strings) {
		my $l = length $s;
		if ($l > $longest) {
			$second_longest = $longest;
			$longest = $l;
		} elsif ($l > $second_longest) {
			$second_longest = $l;
		}
	}

	my %I_i_set_by_l_substr;
	my %l_by_I_i;

	my $I = 0;
	for my $s (@strings) {
		my $l = length $s;
		for (my $i=0; $i<$l; ++$i) {
			my $I_i = "${I}_$i";
			my $substr = substr $s, $i, 1;
			$I_i_set_by_l_substr{1}{$substr}{$I_i} = 1;
		}
		$I++;
	}

	my $not_last;
	my $l = 1;

	do {
		while (my ($substr, $I_i_set) = each %{$I_i_set_by_l_substr{$l}}) {
			$not_last = 0;
			if (keys %$I_i_set == 1) {
				delete $I_i_set_by_l_substr{$l}{$substr};
			} else {
				my $prev_set;
				$prev_set = $I_i_set_by_l_substr{$l-1}{substr $substr, 0, -1} if $l > 1;

				# sort the set by I then by i - so we can merge matches before we get to the ending
				my @keys_sorted = map {$_->[0]}
					sort { $a->[1] <=> $b->[1] || $a->[2] <=> $b->[2] }
						map {[$_, split /_/, $_]}
							keys %$I_i_set;

				for my $I_i ( @keys_sorted ) {
					next unless exists $I_i_set->{$I_i}; # might have been deleted! see below
					delete $prev_set->{$I_i} if $prev_set; # delete the pattern starting one shorter than this one

					my ($I, $i) = split /_/, $I_i;
					if ($l > 1) {
						# look for potential joins
						# could loop here... I mean get all joins at once - might make code more complex?
						my $end_i = $i + $l;
						my $end_I_i = $I . "_" . $end_i;
						if (my $end_l = delete $l_by_I_i{$end_I_i}) {
							# merge two strings
							$l_by_I_i{$I_i} = $l + $end_l;
							my $end_substr = substr $strings[$I], $i+$l, $end_l;
							$I_i_set_by_l_substr{$l + $end_l}{$substr . $end_substr}{$I_i} = 1;
							delete $I_i_set_by_l_substr{$l}{$substr}{$I_i};
							# when $end_l == $l, the next delete will delete something we haven't reached yet, because we sort
							delete $I_i_set_by_l_substr{$end_l}{$end_substr}{$end_I_i};
						}
					} else {
						$l_by_I_i{$I_i} = $l;
					}

					if (length $strings[$I] > $i+$l) {
						my $substr = substr $strings[$I], $i, $l+1;
						$I_i_set_by_l_substr{$l+1}{$substr}{$I_i} = 1;
					}
				}
				$not_last = 1;
			}
		}
	} until keys %{$I_i_set_by_l_substr{$l++}} == 0;

	my $n = $l;
	for (my $l=1; $l<$n; ++$l) {
		my @to_delete = ();
		while (my ($substr, $I_i_set) = each %{$I_i_set_by_l_substr{$l}}) {
			push @to_delete, $substr if keys %$I_i_set < 2;
		}
		delete @{$I_i_set_by_l_substr{$l}}{@to_delete};
		delete $I_i_set_by_l_substr{$l} if keys %{$I_i_set_by_l_substr{$l}} == 0;
	}

	return \%I_i_set_by_l_substr;
}

use Data::Dumper;
print Dumper compare_strings("Hello World!", "Hello", "Goodbye World");
