2
2
eval 'exec perl -S $0 "$@"'
3
3
if $running_under_some_shell;
4
4
5
+
6
+ # Patched again by Bert Bos, 8 Jun 2000
7
+ # - perl5 apparently didn't accept 99999 on line 4333, changed to 32766
8
+ #
9
+ # This version has been patched by Bert Bos, 6 Aug 1999 - bert@w3.org
10
+ # Changes:
11
+ # - added check for ETag: different URL but same ETag means the document
12
+ # is not printed again.
13
+ # - Now remembers username/password after the first time.
5
14
#
15
+
6
16
# This is version has been patched by Arnaud Le Hors 1997 - lehors@w3.org
7
17
# Changes are:
8
18
# - fixed bug in cross references (some weren't generated)
@@ -2011,6 +2021,7 @@ $toc.="($toc{'heading'})WB\n" if(!$tocdoc);
2011
2021
$fl1="";
2012
2022
$fl2="";
2013
2023
$np="NP RC ZF";
2024
+ my %etags; # Initially empty set of etags seen (BB)
2014
2025
open(P3,"+>$scr.p3");
2015
2026
foreach $html (@docs) { # MAIN LOOP
2016
2027
$ndoc++;
@@ -2030,7 +2041,7 @@ foreach $html (@docs) { # MAIN LOOP
2030
2041
} else {
2031
2042
@docs=(@links,@docs);
2032
2043
}
2033
- $rem=$#docs+1 ;
2044
+ $rem=$#docs-$nrem ;
2034
2045
if($rem && $opt_W) {
2035
2046
&dbg("At least $rem document".($rem>1?"s":"")." remaining\n");
2036
2047
}
@@ -2193,7 +2204,6 @@ if(($first || $opt_R) && $xref{'passes'}) {
2193
2204
for $i (1..$xref{'passes'}) {&dbg(" pass #$i...") if($opt_d); &ref};
2194
2205
&dbg("\n") if($opt_d);
2195
2206
}
2196
-
2197
2207
&fin;
2198
2208
### End of Main proc
2199
2209
@@ -2218,9 +2228,15 @@ sub h2p { # Convert HTML to PS
2218
2228
return 0 unless($ans=~/y/i);
2219
2229
}
2220
2230
&geturl($html,$_) || return;
2231
+ #&dbg("ETag = $etag\n"); # BB 19990806
2221
2232
$nrem++;
2222
2233
if($contyp!~m|text/html|i) {$_=" <plaintext>\n$_"};
2223
2234
unless(($ba2)=$html=~m|(.*://.*/)|) {$ba2=$html."/"};
2235
+ if ($etag && $etags{$etag}) { # Already seen this file
2236
+ $_=""; # Remove content
2237
+ &dbg("Already seen this document, skipping.\n");
2238
+ }
2239
+ $etags{$etag}=1; # Add etag to list of seen etags
2224
2240
} else {
2225
2241
if(open(FILE,$html)) {
2226
2242
&dbg("Reading $html\n") if($opt_W || $opt_d);
@@ -2288,8 +2304,8 @@ sub h2p { # Convert HTML to PS
2288
2304
if(!defined $B2) {$B2=$b2};
2289
2305
$levl{$b2.$html}=$levl{$html};
2290
2306
2291
- while($link && $head=~/<link\s+[^>]*?rel\s*=\s*["']?next$R/goi ) {
2292
- if(($lnk)=$&=~/href\s*=\s*["']?\s*([^"' >]*)/goi ) {
2307
+ while($link && $head=~/<link\s+[^>]*?rel\s*=\s*["']?next\b/gois ) {
2308
+ if(($lnk)=$&=~/href\s*=\s*["']?\s*([^"' >]*)/gois ) {
2293
2309
if($lnk=~m|.+//[^/]+$|) {$lnk=$&."/"}
2294
2310
if($lnk=~m|://|) {
2295
2311
$rlnk=0;
@@ -2684,15 +2700,15 @@ sub Subst{
2684
2700
$rlnk=0;
2685
2701
} else {
2686
2702
$rlnk=1;
2687
- if($lnk=~m|^/|) {$lnk=$b1.$lnk} elsif($lnk!~m|^\w+:|) {$lnk=$b2.$lnk};
2703
+ if($lnk=~m|^/|) {$lnk=$b1.$lnk} elsif($lnk!~m|^\w+:|) {$lnk=$b2.$lnk}
2688
2704
}
2689
2705
while($lnk!~m|^\.\./| && $lnk=~m|[^/]*/\.\./|) {$lnk=$`.$'};
2690
2706
$lnk=~s"(^|/)\./"$1"g;
2691
2707
($doc)=$lnk=~/([^#]*)/;
2692
2708
($doctit{$doc})=$tag=~/title\s*=['"]([^'"]*)['"]/i;
2693
2709
$T=0;
2694
2710
$anch=2;
2695
- if($loc || grep(/ ^$doc$/ ,(@docs,@links))
2711
+ if($loc || grep(' ^$doc$' ,(@docs,@links))
2696
2712
|| $opt_W && !$link && $level<=$maxlev && &follow){
2697
2713
$refs{$lnk}=$nref++ unless(defined $refs{$lnk});
2698
2714
$anch="$refs{$lnk} 1";
@@ -3489,15 +3505,27 @@ sub run {
3489
3505
&dbg("@_\n") if($opt_d);
3490
3506
$pm=`@_`;
3491
3507
}
3508
+
3509
+ my %users;
3510
+ my %passwords;
3511
+
3492
3512
sub geturl {
3493
3513
local($url)=@_;
3494
3514
&dbg("Retrieving $url");
3495
3515
if($package{'libwww-perl'} || $package{'jfriedl'}) {
3496
3516
warn "\n";
3497
3517
&gu();
3518
+ if($code==401) { # See if we have a stored password already
3519
+ $domain=$url=~m|://([^/]+)|;
3520
+ $user=$users{"$domain/$realm"};
3521
+ $pass=$passwords{"$domain/$realm"};
3522
+ if (defined $user) {&gu($user,$pass);}
3523
+ }
3498
3524
if($code==401) {
3499
3525
&prompt("\nDocument requires username and password\n\nUsername: ",$user);
3526
+ $users{"$domain/$realm"}=$user;
3500
3527
&prompt("Password: ",$pass);
3528
+ $passwords{"$domain/$realm"}=$pass;
3501
3529
&gu($user,$pass);
3502
3530
}
3503
3531
$_[1]=$cont;
@@ -3532,6 +3560,9 @@ sub gu {
3532
3560
$code=$res->code;
3533
3561
$contyp=$res->header('content-type');
3534
3562
$cont=$res->content;
3563
+ $etag=$res->header('ETag'); # BB 19990806
3564
+ #&dbg("WWW-Authenticate: ".$res->header('WWW-Authenticate')."\n");
3565
+ ($realm)=($res->header('WWW-Authenticate'))=~m/\s*basic\s*realm=(.*)/io;
3535
3566
} else {
3536
3567
require "www.pl";
3537
3568
@opts=@_?("authorization=$_[0]:$_[1]"):();
@@ -3541,6 +3572,8 @@ sub gu {
3541
3572
$code=$info{'CODE'};
3542
3573
($contyp)=$info{'HEADER'}=~/Content-type:\s+(.*)/i;
3543
3574
$cont=<FILE>;
3575
+ $etag=0; # ?? BB 19990806
3576
+ ($realm)=$info{'HEADER'}=~m/WWW-Authenticate:\s*basic\s*realm=(.*)/io;
3544
3577
}
3545
3578
}
3546
3579
sub pictops {
@@ -4301,7 +4334,10 @@ sub img {
4301
4334
push(@IT,1);
4302
4335
$nli=99999;
4303
4336
$n=1;
4304
- $npr=$ps=~s|(.*\n){$nli}|sprintf("$&} D\n/P$nimg\_%d {",$n++)|eg;
4337
+ #$npr=$ps=~s|(.*\n){$nli}|sprintf("$&} D\n/P$nimg\_%d {",$n++)|eg;
4338
+ # Replaced prev line by next one. BB, 9 Jun 2000
4339
+ $npr=0;
4340
+
4305
4341
if($npr) {
4306
4342
$proc=" (";
4307
4343
for $i (0..$npr) {
0 commit comments