#!/usr/local/bin/perl
# Add index anchors to source file _and_ generate index database
# Arnaud Le Hors - lehors@w3.org
# $Id: addianch,v 2.6 2006-10-09 18:55:52 ihickson Exp $
use DB_File;
use Getopt::Std;
use lib 'bin';
use utils;
$PROG = substr($0, rindex($0, "/") + 1);
$USAGE = "Usage: $PROG [-r url-prefix] dbase [file [output]]\n";
# Remove A tags, initial and trailing ws, collapse all ws to single space
sub cleanup {
my $h = $_[0];
$h =~ tr/\n\t/ /s; # newline & tab -> spaces, and collapse
$h =~ s/<\/?a\b.*?>//gio; # remove A tags
$h =~ s/^ +//o; # remove initial space
$h =~ s/ +$//o; # remove trailing space
return $h;
}
# After matching: $1=starttag, $2=class value, $3=content
$pre = '(]*?class\s*=\s*[\"\']?';
$post = '[\"\']?[^>]*>)(.*?)';
$defp = $pre.'(index-def|index-inst)'.$post;
### main
getopts('r:') || die $USAGE;
if ($#ARGV >= 0) {$db = $ARGV[0]; shift;} else {die $USAGE;}
if ($#ARGV >= 0) {$file = $ARGV[0]; shift;} else {$file = '-';}
if ($#ARGV >= 0) {$output = $ARGV[0]; shift;} else {$output = '-';}
if ($#ARGV >= 0) {die $USAGE;}
$prefix = defined $opt_r ? $opt_r : $file; # URL prefix
dbmopen(%anchors, $db, 0666) || die "$PROG: cannot open database $db\n";
$buf = readfile($file); # Load file
$buf =~ s///gio; # Remove comments
open(OUTPUT, ">$output") || die "$PROG: cannot create file $output\n";
$seqno = 0;
while ($buf =~ /$defp/sio) {
print OUTPUT $`;
$buf = $';
$stag = $1;
$class = $2;
$content = $3;
$elem = $&;
# Check if there is a title attribute, otherwise use content
if ($stag =~ /title\s*=\s*(?:\"([^\"]*)\"|\'([^\']*)\'|([^\s>]*))/sio) {
$entry = $1.$2.$3; # Only one of the three is non-empty
} else {
$entry = $content; # Assume content is index term
}
# Split entry in case there are several
@entries = split(/\|/o, cleanup($entry));
# Create an anchor
if ($content =~ /]*?name\s*=\s*(?:[\"]([^\"]+)[\"]|[\']([^\']+)[\']|[\s]([^\s>]+)[\s>])/sio) {
# Already a name in the content, re-use it
$anchor = "$1$2$3";
print OUTPUT $elem;
} elsif ($elem =~ /$elem";
}
# Put URL in database: url-prefix class anchor entry
foreach $e (@entries) {
$key = "$prefix\t$seqno";
$h = "$e\t$prefix\t$class\t$anchor";
$anchors{$key} = $h;
$seqno++;
}
}
print OUTPUT $buf; # Print final part of file
close(OUTPUT);
dbmclose(%anchors);