#!/usr/local/bin/perl # Add heading anchors. # The files are rwritten. # Arnaud Le Hors - lehors@w3.org # Modifications by Bert Bos # $Id: addhanch,v 2.5 2006-10-09 18:55:52 ihickson Exp $ use DB_File; use Getopt::Std; use lib 'bin'; use utils; $PROG = substr($0, rindex($0, "/") + 1); $USAGE = "Usage: $PROG [-r realname] [-c config] headingDB [file [outfile]]\n"; my %anchors = (); my @counts; # item [0] is not used # $1 = starttag # $2 = level (1-6) # $3 = value of title # $4 = content # $5 = content of comment # $6 = endtag #$headingp = '()(.*?)(?:)?(]*>)'; #$cmthdr = '(?!\s*<\/h[1-6])'; #$anamep = ']*?name\s*=\s*(?:([^\s\">]+)|\"([^\"]+)\"|\'([^\']+)\')'; # Generate roman numeral for 1 <= $_[0] <= 4000 sub romannumeral { my $n = $_[0]; my $result = ""; while ($n >= 1000) {$result .= 'M'; $n -= 1000;} if ($n >= 500) {$result .= 'D'; $n -= 500;} while ($n >= 100) {$result .= 'C'; $n -= 100;} if ($n >= 50) {$result .= 'L'; $n -= 50;} while ($n >= 10) {$result .= 'X'; $n -= 10;} if ($n >= 9) {$result .= 'IX'; $n -= 9;} if ($n >= 5) {$result .= 'V'; $n -= 5;} if ($n >= 4) {$result .= 'IV'; $n -= 4;} while ($n >= 1) {$result .= 'I'; $n -= 1;} return $result; } # Generate the next appropriate number using format $_[0] sub gen_num { # Loop over format my $result = ""; # Collects result string my $i = 1; # Next count to insert my $p = $_[0]; # Loop over pattern, looking for escaped and bare format specifiers while ($p ne '') { if ($p =~ /^[^AaIi10\\]+/) { $result .= $&; } elsif ($p =~ /^A/o) { $result .= chr(ord('A') + $counts[$i++] - 1); } elsif ($p =~ /^a/o) { $result .= chr(ord('a') + $counts[$i++] - 1); } elsif ($p =~ /^I/o) { $result .= romannumeral($counts[$i++]); } elsif ($p =~ /^i/o) { $result .= lc(romannumeral($counts[$i++])); } elsif ($p =~ /^1/o) { $result .= "$counts[$i++]"; } elsif ($p =~ /^0/o) { $i++; } elsif ($p =~ /^\\(.)/o) { $result .= $1; } else { die "Cannot happen\n"; } $p = $'; } return $result; } sub cleanup { my $text = $_[0]; $text =~ tr/\t\n/ /; # Newlines & tabs -> spaces $text =~ s/^ +//go; # Remove initial spaces $text =~ s/ +$//go; # Remove trailing spaces return $text; } # Generate a new header that includes the number and the target anchor sub addanchor { my ($chapno, $seqno, $stag, $lvl, $title, $content, $cmt, $etag) = @_; my ($anchor, $anchor1, $txt, $i); my $file = $chapter[$chapno]; # Generate the anchor if ($content =~ /]*?name\s*=\s*(?:([^\s\">]+)|\"([^\"]+)\"|\'([^\']+)\')/io) { # Reuse first anchor in header. $anchor = "$1$2$3"; # Only one of $1, $2, $3 will match. $anchor1 = "#$anchor"; } else { $anchor = "q$seqno"; # Use simple sequence number $anchor1 = "#$anchor"; } $anchor1 = "" if ($lvl == 1); # if H1, just go to the top of the page # Increment the count, reset higher level counts $counts[$lvl]++; for ($i = $lvl + 1; $i <= 6; $i++) {$counts[$i] = 0;} # Generate number according to format my $num = gen_num($format[$chapno][$lvl]); # Remove anchors from heading text $txt = cleanup($content); $txt =~ s/<\/?a[^>]*>//gio; # Determine title/comment $cmt = cleanup(defined $title ? $title : defined $cmt ? $cmt : ''); # Store in database my $dbentry = "$file\t$seqno\t$txt\t$lvl\t$num\t$anchor1\t$cmt"; my $dbkey = "$file\t$seqno"; $anchors{$dbkey} = $dbentry; # Construct the new heading if ($content =~ /]*?name\s*=/sio) { # Don't add anchors if there is already an anchor return "$stag$num $content$etag"; } elsif ($content =~ /$num $`$&$'$etag"; } else { # There is no A tag, enclose whole content return "$stag$num $content$etag"; } } ### main getopts('r:c:') || die $USAGE; if ($#ARGV >= 0) {$dbase = $ARGV[0]; shift;} else {die $USAGE;} if ($#ARGV >= 0) {$file = $ARGV[0]; shift;} else {$file = '-';} if ($#ARGV >= 0) {$output = $ARGV[0]; shift;} else {$output = '-';} if ($#ARGV >= 0) {die $USAGE;} $prefix = defined $opt_r ? $opt_r : $file; $config = defined $opt_c ? $opt_c : 'Project.cfg'; # Read config file read_config($config); defined $lookup{$prefix} or die "$PROG: file $prefix not found in config file\n"; my $chap = $lookup{$prefix}; # Determine chapter number for this file @counts = (-1, $resetnumber[$chap], 0, 0, 0, 0, 0); # Open heading database dbmopen(%anchors, $dbase, 0666) || die "$PROG: cannot open database $dbase\n"; my $i = 0; my $buf = readfile($file); $buf =~ s/(?!\s*<\/h[1-6])//sgio; # rm comments, except before # Do the real work: insert numbers and anchors $buf =~ s/()(.*?)(?:)?(<\/h[1-6][^>]*>)/addanchor($chap, $i++, $1, $2, $3, $4, $5, $6)/sgieo; # Remove next keys, if any (may happen if the chapter has become shorter) my $file = $chapter[$chap]; LOOP: while (1) { my $dbkey = "$file\t$i"; last LOOP if (! $anchors{$dbkey}); delete $anchors{$dbkey}; $i++; } writefile($output, $buf); dbmclose(%anchors);