#!/bin/bash
tr -d $'\r' | htmlsplit | perl -pe '$_ = "" if (/<script/i .. /<\/script/i or /<style/i .. /<\/style/i);' |
grep -v '^<' |
perl -pe 's/^\s+//; s/&quot;/"/g; s/&amp;/\&/g; s/&lt;/</g; s/&gt;/>/g; s/&#(\d+);/chr($1)/ge;' |
tr -s $'\n' | tr -s ' ' | perl -pe 's/&nbsp;/ /g;'
