Skip to content

Commit 0bb605a

Browse files
committed
[css2] Fixed bug: if a URL contained regexp metacharacters (?, *, etc), they
were interpreted by grep, rather than taken literally. --HG-- extra : convert_revision : svn%3A73dc7c4b-06e6-40f3-b4f7-9ed1dbc14bfc/trunk%401888
1 parent ee7419c commit 0bb605a

File tree

1 file changed

+43
-7
lines changed

1 file changed

+43
-7
lines changed

css2/bin/html2ps

+43-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@
22
eval 'exec perl -S $0 "$@"'
33
if $running_under_some_shell;
44

5+
6+
# Patched again by Bert Bos, 8 Jun 2000
7+
# - perl5 apparently didn't accept 99999 on line 4333, changed to 32766
8+
#
9+
# This version has been patched by Bert Bos, 6 Aug 1999 - bert@w3.org
10+
# Changes:
11+
# - added check for ETag: different URL but same ETag means the document
12+
# is not printed again.
13+
# - Now remembers username/password after the first time.
514
#
15+
616
# This is version has been patched by Arnaud Le Hors 1997 - lehors@w3.org
717
# Changes are:
818
# - fixed bug in cross references (some weren't generated)
@@ -2011,6 +2021,7 @@ $toc.="($toc{'heading'})WB\n" if(!$tocdoc);
20112021
$fl1="";
20122022
$fl2="";
20132023
$np="NP RC ZF";
2024+
my %etags; # Initially empty set of etags seen (BB)
20142025
open(P3,"+>$scr.p3");
20152026
foreach $html (@docs) { # MAIN LOOP
20162027
$ndoc++;
@@ -2030,7 +2041,7 @@ foreach $html (@docs) { # MAIN LOOP
20302041
} else {
20312042
@docs=(@links,@docs);
20322043
}
2033-
$rem=$#docs+1;
2044+
$rem=$#docs-$nrem;
20342045
if($rem && $opt_W) {
20352046
&dbg("At least $rem document".($rem>1?"s":"")." remaining\n");
20362047
}
@@ -2193,7 +2204,6 @@ if(($first || $opt_R) && $xref{'passes'}) {
21932204
for $i (1..$xref{'passes'}) {&dbg(" pass #$i...") if($opt_d); &ref};
21942205
&dbg("\n") if($opt_d);
21952206
}
2196-
21972207
&fin;
21982208
### End of Main proc
21992209

@@ -2218,9 +2228,15 @@ sub h2p { # Convert HTML to PS
22182228
return 0 unless($ans=~/y/i);
22192229
}
22202230
&geturl($html,$_) || return;
2231+
#&dbg("ETag = $etag\n"); # BB 19990806
22212232
$nrem++;
22222233
if($contyp!~m|text/html|i) {$_=" <plaintext>\n$_"};
22232234
unless(($ba2)=$html=~m|(.*://.*/)|) {$ba2=$html."/"};
2235+
if ($etag && $etags{$etag}) { # Already seen this file
2236+
$_=""; # Remove content
2237+
&dbg("Already seen this document, skipping.\n");
2238+
}
2239+
$etags{$etag}=1; # Add etag to list of seen etags
22242240
} else {
22252241
if(open(FILE,$html)) {
22262242
&dbg("Reading $html\n") if($opt_W || $opt_d);
@@ -2288,8 +2304,8 @@ sub h2p { # Convert HTML to PS
22882304
if(!defined $B2) {$B2=$b2};
22892305
$levl{$b2.$html}=$levl{$html};
22902306

2291-
while($link && $head=~/<link\s+[^>]*?rel\s*=\s*["']?next$R/goi) {
2292-
if(($lnk)=$&=~/href\s*=\s*["']?\s*([^"' >]*)/goi) {
2307+
while($link && $head=~/<link\s+[^>]*?rel\s*=\s*["']?next\b/gois) {
2308+
if(($lnk)=$&=~/href\s*=\s*["']?\s*([^"' >]*)/gois) {
22932309
if($lnk=~m|.+//[^/]+$|) {$lnk=$&."/"}
22942310
if($lnk=~m|://|) {
22952311
$rlnk=0;
@@ -2684,15 +2700,15 @@ sub Subst{
26842700
$rlnk=0;
26852701
} else {
26862702
$rlnk=1;
2687-
if($lnk=~m|^/|) {$lnk=$b1.$lnk} elsif($lnk!~m|^\w+:|) {$lnk=$b2.$lnk};
2703+
if($lnk=~m|^/|) {$lnk=$b1.$lnk} elsif($lnk!~m|^\w+:|) {$lnk=$b2.$lnk}
26882704
}
26892705
while($lnk!~m|^\.\./| && $lnk=~m|[^/]*/\.\./|) {$lnk=$`.$'};
26902706
$lnk=~s"(^|/)\./"$1"g;
26912707
($doc)=$lnk=~/([^#]*)/;
26922708
($doctit{$doc})=$tag=~/title\s*=['"]([^'"]*)['"]/i;
26932709
$T=0;
26942710
$anch=2;
2695-
if($loc || grep(/^$doc$/,(@docs,@links))
2711+
if($loc || grep('^$doc$',(@docs,@links))
26962712
|| $opt_W && !$link && $level<=$maxlev && &follow){
26972713
$refs{$lnk}=$nref++ unless(defined $refs{$lnk});
26982714
$anch="$refs{$lnk} 1";
@@ -3489,15 +3505,27 @@ sub run {
34893505
&dbg("@_\n") if($opt_d);
34903506
$pm=`@_`;
34913507
}
3508+
3509+
my %users;
3510+
my %passwords;
3511+
34923512
sub geturl {
34933513
local($url)=@_;
34943514
&dbg("Retrieving $url");
34953515
if($package{'libwww-perl'} || $package{'jfriedl'}) {
34963516
warn "\n";
34973517
&gu();
3518+
if($code==401) { # See if we have a stored password already
3519+
$domain=$url=~m|://([^/]+)|;
3520+
$user=$users{"$domain/$realm"};
3521+
$pass=$passwords{"$domain/$realm"};
3522+
if (defined $user) {&gu($user,$pass);}
3523+
}
34983524
if($code==401) {
34993525
&prompt("\nDocument requires username and password\n\nUsername: ",$user);
3526+
$users{"$domain/$realm"}=$user;
35003527
&prompt("Password: ",$pass);
3528+
$passwords{"$domain/$realm"}=$pass;
35013529
&gu($user,$pass);
35023530
}
35033531
$_[1]=$cont;
@@ -3532,6 +3560,9 @@ sub gu {
35323560
$code=$res->code;
35333561
$contyp=$res->header('content-type');
35343562
$cont=$res->content;
3563+
$etag=$res->header('ETag'); # BB 19990806
3564+
#&dbg("WWW-Authenticate: ".$res->header('WWW-Authenticate')."\n");
3565+
($realm)=($res->header('WWW-Authenticate'))=~m/\s*basic\s*realm=(.*)/io;
35353566
} else {
35363567
require "www.pl";
35373568
@opts=@_?("authorization=$_[0]:$_[1]"):();
@@ -3541,6 +3572,8 @@ sub gu {
35413572
$code=$info{'CODE'};
35423573
($contyp)=$info{'HEADER'}=~/Content-type:\s+(.*)/i;
35433574
$cont=<FILE>;
3575+
$etag=0; # ?? BB 19990806
3576+
($realm)=$info{'HEADER'}=~m/WWW-Authenticate:\s*basic\s*realm=(.*)/io;
35443577
}
35453578
}
35463579
sub pictops {
@@ -4301,7 +4334,10 @@ sub img {
43014334
push(@IT,1);
43024335
$nli=99999;
43034336
$n=1;
4304-
$npr=$ps=~s|(.*\n){$nli}|sprintf("$&} D\n/P$nimg\_%d {",$n++)|eg;
4337+
#$npr=$ps=~s|(.*\n){$nli}|sprintf("$&} D\n/P$nimg\_%d {",$n++)|eg;
4338+
# Replaced prev line by next one. BB, 9 Jun 2000
4339+
$npr=0;
4340+
43054341
if($npr) {
43064342
$proc=" (";
43074343
for $i (0..$npr) {

0 commit comments

Comments
 (0)