-
Notifications
You must be signed in to change notification settings - Fork 791
Expand file tree
/
Copy pathaddhanch
More file actions
174 lines (147 loc) · 5.25 KB
/
addhanch
File metadata and controls
174 lines (147 loc) · 5.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/local/bin/perl
# Add heading anchors.
# The files are rwritten.
# Arnaud Le Hors - lehors@w3.org
# Modifications by Bert Bos <bert@w3.org>
# $Id: addhanch,v 2.5 2006-10-09 18:55:52 ihickson Exp $
use DB_File;
use Getopt::Std;
use lib 'bin';
use utils;
$PROG = substr($0, rindex($0, "/") + 1);
$USAGE = "Usage: $PROG [-r realname] [-c config] headingDB [file [outfile]]\n";
my %anchors = ();
my @counts; # item [0] is not used
# $1 = starttag
# $2 = level (1-6)
# $3 = value of title
# $4 = content
# $5 = content of comment
# $6 = endtag
#$headingp = '(<h([1-6]).*?(?:title\s*=\s*\"(.*?)\".*?)?>)(.*?)(?:<!--(.*?)-->)?(</h[1-6][^>]*>)';
#$cmthdr = '<!--.*?-->(?!\s*<\/h[1-6])';
#$anamep = '<a\s+[^>]*?name\s*=\s*(?:([^\s\">]+)|\"([^\"]+)\"|\'([^\']+)\')';
# Generate roman numeral for 1 <= $_[0] <= 4000
sub romannumeral {
my $n = $_[0];
my $result = "";
while ($n >= 1000) {$result .= 'M'; $n -= 1000;}
if ($n >= 500) {$result .= 'D'; $n -= 500;}
while ($n >= 100) {$result .= 'C'; $n -= 100;}
if ($n >= 50) {$result .= 'L'; $n -= 50;}
while ($n >= 10) {$result .= 'X'; $n -= 10;}
if ($n >= 9) {$result .= 'IX'; $n -= 9;}
if ($n >= 5) {$result .= 'V'; $n -= 5;}
if ($n >= 4) {$result .= 'IV'; $n -= 4;}
while ($n >= 1) {$result .= 'I'; $n -= 1;}
return $result;
}
# Generate the next appropriate number using format $_[0]
sub gen_num {
# Loop over format
my $result = ""; # Collects result string
my $i = 1; # Next count to insert
my $p = $_[0];
# Loop over pattern, looking for escaped and bare format specifiers
while ($p ne '') {
if ($p =~ /^[^AaIi10\\]+/) {
$result .= $&;
} elsif ($p =~ /^A/o) {
$result .= chr(ord('A') + $counts[$i++] - 1);
} elsif ($p =~ /^a/o) {
$result .= chr(ord('a') + $counts[$i++] - 1);
} elsif ($p =~ /^I/o) {
$result .= romannumeral($counts[$i++]);
} elsif ($p =~ /^i/o) {
$result .= lc(romannumeral($counts[$i++]));
} elsif ($p =~ /^1/o) {
$result .= "$counts[$i++]";
} elsif ($p =~ /^0/o) {
$i++;
} elsif ($p =~ /^\\(.)/o) {
$result .= $1;
} else {
die "Cannot happen\n";
}
$p = $';
}
return $result;
}
sub cleanup {
my $text = $_[0];
$text =~ tr/\t\n/ /; # Newlines & tabs -> spaces
$text =~ s/^ +//go; # Remove initial spaces
$text =~ s/ +$//go; # Remove trailing spaces
return $text;
}
# Generate a new header that includes the number and the target anchor
sub addanchor {
my ($chapno, $seqno, $stag, $lvl, $title, $content, $cmt, $etag) = @_;
my ($anchor, $anchor1, $txt, $i);
my $file = $chapter[$chapno];
# Generate the anchor
if ($content =~ /<a\s+[^>]*?name\s*=\s*(?:([^\s\">]+)|\"([^\"]+)\"|\'([^\']+)\')/io) { # Reuse first anchor in header.
$anchor = "$1$2$3"; # Only one of $1, $2, $3 will match.
$anchor1 = "#$anchor";
} else {
$anchor = "q$seqno"; # Use simple sequence number
$anchor1 = "#$anchor";
}
$anchor1 = "" if ($lvl == 1); # if H1, just go to the top of the page
# Increment the count, reset higher level counts
$counts[$lvl]++;
for ($i = $lvl + 1; $i <= 6; $i++) {$counts[$i] = 0;}
# Generate number according to format
my $num = gen_num($format[$chapno][$lvl]);
# Remove anchors from heading text
$txt = cleanup($content);
$txt =~ s/<\/?a[^>]*>//gio;
# Determine title/comment
$cmt = cleanup(defined $title ? $title : defined $cmt ? $cmt : '');
# Store in database
my $dbentry = "$file\t$seqno\t$txt\t$lvl\t$num\t$anchor1\t$cmt";
my $dbkey = "$file\t$seqno";
$anchors{$dbkey} = $dbentry;
# Construct the new heading
if ($content =~ /<a\s+[^>]*?name\s*=/sio) {
# Don't add anchors if there is already an anchor
return "$stag$num $content$etag";
} elsif ($content =~ /<a\b/sio) {
# There is an A tag, insert anchor before it
return "$stag<a name=\"$anchor\">$num $`</a>$&$'$etag";
} else {
# There is no A tag, enclose whole content
return "$stag<a name=\"$anchor\">$num $content</a>$etag";
}
}
### main
getopts('r:c:') || die $USAGE;
if ($#ARGV >= 0) {$dbase = $ARGV[0]; shift;} else {die $USAGE;}
if ($#ARGV >= 0) {$file = $ARGV[0]; shift;} else {$file = '-';}
if ($#ARGV >= 0) {$output = $ARGV[0]; shift;} else {$output = '-';}
if ($#ARGV >= 0) {die $USAGE;}
$prefix = defined $opt_r ? $opt_r : $file;
$config = defined $opt_c ? $opt_c : 'Project.cfg';
# Read config file
read_config($config);
defined $lookup{$prefix} or die "$PROG: file $prefix not found in config file\n";
my $chap = $lookup{$prefix};
# Determine chapter number for this file
@counts = (-1, $resetnumber[$chap], 0, 0, 0, 0, 0);
# Open heading database
dbmopen(%anchors, $dbase, 0666) || die "$PROG: cannot open database $dbase\n";
my $i = 0;
my $buf = readfile($file);
$buf =~ s/<!--.*?-->(?!\s*<\/h[1-6])//sgio; # rm comments, except before </h?>
# Do the real work: insert numbers and anchors
$buf =~ s/(<h([1-6]).*?(?:title\s*=\s*\"(.*?)\".*?)?>)(.*?)(?:<!--(.*?)-->)?(<\/h[1-6][^>]*>)/addanchor($chap, $i++, $1, $2, $3, $4, $5, $6)/sgieo;
# Remove next keys, if any (may happen if the chapter has become shorter)
my $file = $chapter[$chap];
LOOP: while (1) {
my $dbkey = "$file\t$i";
last LOOP if (! $anchors{$dbkey});
delete $anchors{$dbkey};
$i++;
}
writefile($output, $buf);
dbmclose(%anchors);