blob: adce198038cab921441b8cc82573372de71670ba [file] [log] [blame] [edit]
#!/usr/bin/perl -n
# hevea-retarget-crossrefs
# Michael Ernst
# Last updated: May 20, 2012
# To use:
# hevea-retarget-crossrefs < orig.html > new.html
# This script replaces HTML cross-references of the form
# <a href="#htoc1">
# by cross-refenences to named labels, such as
# <a href="#introduction">
# It is required that the original .tex source file contained a \label
# command at the end of each \chapter or \[sub]section command, like so:
# \chapter{Introduction\label{introduction}}
# The given label that will replace the "htoc" one in the .html file.
# Rationale:
# In the table of contents, Hevea creates HTML cross-references that use
# Hevea-generated labels of the form "htoc99", even when a \label already
# exists. This leads to users following a link from the table of contents,
# then bookmarking or mentioning that link. The "htoc99" link may point
# to a completely different section if the manual is reordered or even if a
# new section is added. So, it is better for webpages not to contain the
# easy-to-misuse "htoc99" cross-references.
# This script does not work with in-place editing (perl's -i argument).
# use strict;
# use English;
# $WARNING = 1;
$debug = 0;
# $debug = 1;
# if (scalar(@ARGV) != 1) {
# die "Expected exactly 1 argument, got " . scalar(@ARGV);
# }
# my $filename = $ARGV[0];
push @lines, $_;
END {
for (my $i = 0; $i<scalar(@lines); $i++) {
# Handle lines *with* htoc, substituting it by the first other anchor and moving others forward.
if ($lines[$i] =~ s:<A NAME="(htoc[0-9]+)">(((Chapter&#XA0;)?([0-9]+|[A-Z]))(\.[0-9]+)*)(</A>)(.*?)(<A NAME="(.*?)">)</A>((<A NAME=".*"></A>)*)(</H[0-9]+>):$9$2$7$11$8$13:) {
$mapping{$1} = $10;
if ($debug) { print STDERR "$1 => $mapping{$1}\n"; }
}
# Move around the "<A NAME=" for sections *without* htoc (anything not in
# tocdepth, which is not in a table of contents). If the anchor comes
# within but at the end of a header, then when going to that URL, some browsers
# will position the header off the top of the screen. Putting the
# anchor at the beginning of the header fixes this problem.
$lines[$i] =~ s:(<(H[345]) CLASS="((sub)*section|paragraph)">)(.*?)(<A NAME=".*">(</A><A NAME=".*">)*)(</A></\2>):$1$6$5$8:;
}
foreach my $line (@lines) {
if ($line =~ /<A HREF="#(htoc[0-9]+)">/) {
my $htoc = $1;
my $replacement = $mapping{$htoc};
if (defined($replacement)) {
if ($debug) { print STDERR $line; }
# Also remove "Chapter" if present, for brevity
$line =~ s/$htoc(">)(Chapter&#XA0;)?/$replacement$1/;
if ($debug) { print STDERR $line; }
} else {
print STDERR "No symbolic name for section $htoc\n";
}
}
$line =~ s/(<IMG SRC="([^"]+\.[^".]+)")>/$1 ALT="$2">/g;
print $line;
}
}
# Local Variables:
# time-stamp-start: "^# Last updated: "
# time-stamp-end: "\\.?$"
# time-stamp-format: "%:b %:d, %:y"
# time-stamp-line-limit: 10
# End: