#!/usr/local/bin/perl # Add index anchors to source file _and_ generate index database # Arnaud Le Hors - lehors@w3.org # $Id: addianch,v 2.6 2006-10-09 18:55:52 ihickson Exp $ use DB_File; use Getopt::Std; use lib 'bin'; use utils; $PROG = substr($0, rindex($0, "/") + 1); $USAGE = "Usage: $PROG [-r url-prefix] dbase [file [output]]\n"; # Remove A tags, initial and trailing ws, collapse all ws to single space sub cleanup { my $h = $_[0]; $h =~ tr/\n\t/ /s; # newline & tab -> spaces, and collapse $h =~ s/<\/?a\b.*?>//gio; # remove A tags $h =~ s/^ +//o; # remove initial space $h =~ s/ +$//o; # remove trailing space return $h; } # After matching: $1=starttag, $2=class value, $3=content $pre = '(]*?class\s*=\s*[\"\']?'; $post = '[\"\']?[^>]*>)(.*?)'; $defp = $pre.'(index-def|index-inst)'.$post; ### main getopts('r:') || die $USAGE; if ($#ARGV >= 0) {$db = $ARGV[0]; shift;} else {die $USAGE;} if ($#ARGV >= 0) {$file = $ARGV[0]; shift;} else {$file = '-';} if ($#ARGV >= 0) {$output = $ARGV[0]; shift;} else {$output = '-';} if ($#ARGV >= 0) {die $USAGE;} $prefix = defined $opt_r ? $opt_r : $file; # URL prefix dbmopen(%anchors, $db, 0666) || die "$PROG: cannot open database $db\n"; $buf = readfile($file); # Load file $buf =~ s///gio; # Remove comments open(OUTPUT, ">$output") || die "$PROG: cannot create file $output\n"; $seqno = 0; while ($buf =~ /$defp/sio) { print OUTPUT $`; $buf = $'; $stag = $1; $class = $2; $content = $3; $elem = $&; # Check if there is a title attribute, otherwise use content if ($stag =~ /title\s*=\s*(?:\"([^\"]*)\"|\'([^\']*)\'|([^\s>]*))/sio) { $entry = $1.$2.$3; # Only one of the three is non-empty } else { $entry = $content; # Assume content is index term } # Split entry in case there are several @entries = split(/\|/o, cleanup($entry)); # Create an anchor if ($content =~ /]*?name\s*=\s*(?:[\"]([^\"]+)[\"]|[\']([^\']+)[\']|[\s]([^\s>]+)[\s>])/sio) { # Already a name in the content, re-use it $anchor = "$1$2$3"; print OUTPUT $elem; } elsif ($elem =~ /$elem"; } # Put URL in database: url-prefix class anchor entry foreach $e (@entries) { $key = "$prefix\t$seqno"; $h = "$e\t$prefix\t$class\t$anchor"; $anchors{$key} = $h; $seqno++; } } print OUTPUT $buf; # Print final part of file close(OUTPUT); dbmclose(%anchors);