22eval 'exec perl -S $0 "$@"'
33 if $running_under_some_shell;
44
5+
6+ # Patched again by Bert Bos, 8 Jun 2000
7+ # - perl5 apparently didn't accept 99999 on line 4333, changed to 32766
8+ #
9+ # This version has been patched by Bert Bos, 6 Aug 1999 - bert@w3.org
10+ # Changes:
11+ # - added check for ETag: different URL but same ETag means the document
12+ # is not printed again.
13+ # - Now remembers username/password after the first time.
514#
15+
616# This is version has been patched by Arnaud Le Hors 1997 - lehors@w3.org
717# Changes are:
818# - fixed bug in cross references (some weren't generated)
@@ -2011,6 +2021,7 @@ $toc.="($toc{'heading'})WB\n" if(!$tocdoc);
20112021$fl1="";
20122022$fl2="";
20132023$np="NP RC ZF";
2024+ my %etags; # Initially empty set of etags seen (BB)
20142025open(P3,"+>$scr.p3");
20152026foreach $html (@docs) { # MAIN LOOP
20162027 $ndoc++;
@@ -2030,7 +2041,7 @@ foreach $html (@docs) { # MAIN LOOP
20302041 } else {
20312042 @docs=(@links,@docs);
20322043 }
2033- $rem=$#docs+1 ;
2044+ $rem=$#docs-$nrem ;
20342045 if($rem && $opt_W) {
20352046 &dbg("At least $rem document".($rem>1?"s":"")." remaining\n");
20362047 }
@@ -2193,7 +2204,6 @@ if(($first || $opt_R) && $xref{'passes'}) {
21932204 for $i (1..$xref{'passes'}) {&dbg(" pass #$i...") if($opt_d); &ref};
21942205 &dbg("\n") if($opt_d);
21952206}
2196-
21972207&fin;
21982208### End of Main proc
21992209
@@ -2218,9 +2228,15 @@ sub h2p { # Convert HTML to PS
22182228 return 0 unless($ans=~/y/i);
22192229 }
22202230 &geturl($html,$_) || return;
2231+ #&dbg("ETag = $etag\n"); # BB 19990806
22212232 $nrem++;
22222233 if($contyp!~m|text/html|i) {$_=" <plaintext>\n$_"};
22232234 unless(($ba2)=$html=~m|(.*://.*/)|) {$ba2=$html."/"};
2235+ if ($etag && $etags{$etag}) { # Already seen this file
2236+ $_=""; # Remove content
2237+ &dbg("Already seen this document, skipping.\n");
2238+ }
2239+ $etags{$etag}=1; # Add etag to list of seen etags
22242240 } else {
22252241 if(open(FILE,$html)) {
22262242 &dbg("Reading $html\n") if($opt_W || $opt_d);
@@ -2288,8 +2304,8 @@ sub h2p { # Convert HTML to PS
22882304 if(!defined $B2) {$B2=$b2};
22892305 $levl{$b2.$html}=$levl{$html};
22902306
2291- while($link && $head=~/<link\s+[^>]*?rel\s*=\s*["']?next$R/goi ) {
2292- if(($lnk)=$&=~/href\s*=\s*["']?\s*([^"' >]*)/goi ) {
2307+ while($link && $head=~/<link\s+[^>]*?rel\s*=\s*["']?next\b/gois ) {
2308+ if(($lnk)=$&=~/href\s*=\s*["']?\s*([^"' >]*)/gois ) {
22932309 if($lnk=~m|.+//[^/]+$|) {$lnk=$&."/"}
22942310 if($lnk=~m|://|) {
22952311 $rlnk=0;
@@ -2684,15 +2700,15 @@ sub Subst{
26842700 $rlnk=0;
26852701 } else {
26862702 $rlnk=1;
2687- if($lnk=~m|^/|) {$lnk=$b1.$lnk} elsif($lnk!~m|^\w+:|) {$lnk=$b2.$lnk};
2703+ if($lnk=~m|^/|) {$lnk=$b1.$lnk} elsif($lnk!~m|^\w+:|) {$lnk=$b2.$lnk}
26882704 }
26892705 while($lnk!~m|^\.\./| && $lnk=~m|[^/]*/\.\./|) {$lnk=$`.$'};
26902706 $lnk=~s"(^|/)\./"$1"g;
26912707 ($doc)=$lnk=~/([^#]*)/;
26922708 ($doctit{$doc})=$tag=~/title\s*=['"]([^'"]*)['"]/i;
26932709 $T=0;
26942710 $anch=2;
2695- if($loc || grep(/ ^$doc$/ ,(@docs,@links))
2711+ if($loc || grep(' ^$doc$' ,(@docs,@links))
26962712 || $opt_W && !$link && $level<=$maxlev && &follow){
26972713 $refs{$lnk}=$nref++ unless(defined $refs{$lnk});
26982714 $anch="$refs{$lnk} 1";
@@ -3489,15 +3505,27 @@ sub run {
34893505 &dbg("@_\n") if($opt_d);
34903506 $pm=`@_`;
34913507}
3508+
3509+ my %users;
3510+ my %passwords;
3511+
34923512sub geturl {
34933513 local($url)=@_;
34943514 &dbg("Retrieving $url");
34953515 if($package{'libwww-perl'} || $package{'jfriedl'}) {
34963516 warn "\n";
34973517 &gu();
3518+ if($code==401) { # See if we have a stored password already
3519+ $domain=$url=~m|://([^/]+)|;
3520+ $user=$users{"$domain/$realm"};
3521+ $pass=$passwords{"$domain/$realm"};
3522+ if (defined $user) {&gu($user,$pass);}
3523+ }
34983524 if($code==401) {
34993525 &prompt("\nDocument requires username and password\n\nUsername: ",$user);
3526+ $users{"$domain/$realm"}=$user;
35003527 &prompt("Password: ",$pass);
3528+ $passwords{"$domain/$realm"}=$pass;
35013529 &gu($user,$pass);
35023530 }
35033531 $_[1]=$cont;
@@ -3532,6 +3560,9 @@ sub gu {
35323560 $code=$res->code;
35333561 $contyp=$res->header('content-type');
35343562 $cont=$res->content;
3563+ $etag=$res->header('ETag'); # BB 19990806
3564+ #&dbg("WWW-Authenticate: ".$res->header('WWW-Authenticate')."\n");
3565+ ($realm)=($res->header('WWW-Authenticate'))=~m/\s*basic\s*realm=(.*)/io;
35353566 } else {
35363567 require "www.pl";
35373568 @opts=@_?("authorization=$_[0]:$_[1]"):();
@@ -3541,6 +3572,8 @@ sub gu {
35413572 $code=$info{'CODE'};
35423573 ($contyp)=$info{'HEADER'}=~/Content-type:\s+(.*)/i;
35433574 $cont=<FILE>;
3575+ $etag=0; # ?? BB 19990806
3576+ ($realm)=$info{'HEADER'}=~m/WWW-Authenticate:\s*basic\s*realm=(.*)/io;
35443577 }
35453578}
35463579sub pictops {
@@ -4301,7 +4334,10 @@ sub img {
43014334 push(@IT,1);
43024335 $nli=99999;
43034336 $n=1;
4304- $npr=$ps=~s|(.*\n){$nli}|sprintf("$&} D\n/P$nimg\_%d {",$n++)|eg;
4337+ #$npr=$ps=~s|(.*\n){$nli}|sprintf("$&} D\n/P$nimg\_%d {",$n++)|eg;
4338+ # Replaced prev line by next one. BB, 9 Jun 2000
4339+ $npr=0;
4340+
43054341 if($npr) {
43064342 $proc=" (";
43074343 for $i (0..$npr) {
0 commit comments