#!/bin/bash
tr -d $'\r' | htmlsplit |
perl -ne 'if (/^<(p|br)\b/) { print "\n"; } elsif (!/^</) { chomp; s/ *$/ /; s/^ *//; print }' |
perl -pe 'chomp; s/^\s+//; s/\s+$/\n/; s/&quot;/"/g; s/&amp;/\&/g; s/&lt;/</g; s/&gt;/>/g; s/&#(\d+);/chr($1)/ge;' |
tr -s $'\n' | tr -s ' ' | perl -pe 's/&nbsp;/ /g;'
