#!/usr/local/bin/perl # # Arnaud Le Hors - lehors@w3.org # $Id: htmlcat,v 1.4 2006-10-09 18:55:52 ihickson Exp $ use strict; use warnings; my $PROGNAME = substr($0, rindex($0, "/") + 1); if (!$ARGV[0]) { print "Usage: $PROGNAME output file1 file2 ...\n"; exit 1; } open(OUTPUT, "> $ARGV[0]"); shift; # Fix up links & anchors sub fixup($$) { my ($name, $s) = @_; # Make local links unique $s =~ s/(\bhref\s*=\s*[\"\']?)#([^\"\'> ]+)([\"\']?)/$1$name.$2$3/ig; $s =~ s/(\b(?:id|name)\s*=\s*[\"\']?)([^\"\'> ]+)([\"\']?)/$1$name.$2$3/ig; # Give the first H1 an ID equal to its source file name $s =~ s/

/

/i; # Try to make links to other chapters into links to the file itself $s =~ s/(\bhref\s=\s[\"\'])([^#\/:\"\']+)([\"\'])/$1\#$2$3/ig; $s =~ s/(\bhref\s=\s[\"\']?)([^\/:\"\'> ]+)#([^\"\'> ]+)([\"\']?)/$1\#$2.$3$4/ig; return $s; } # copy file in memory sub readfile { my $buf = ""; if (!open(INPUT, $_[0])) { print STDERR "$PROGNAME Error: Cannot open file: $_[0]\n"; return; } while () { $buf .= $_; } close(INPUT); return $buf; } # 1st file: take everything up to the body end tag or the end otherwise $_ = readfile($ARGV[0]); if (/<\/body>/sio) { print OUTPUT fixup($ARGV[0], $`); } else { print OUTPUT fixup($ARGV[0], $_); } shift; # then for each other given html file only take the body content foreach my $file (@ARGV) { $_ = readfile($file); # look for the beginning of the body content if (/(.*)/sio) { $_ = $1; } elsif (/<\/head>/sio) { $_ = $'; } # look for the end of the body content if (/<\/body>/sio) { print OUTPUT fixup($file, $`); } elsif (/<\/html>/sio) { print OUTPUT fixup($file, $`); } else { print OUTPUT fixup($file, $_); } } # close body print OUTPUT "\n"; close(OUTPUT);