Skip to content

Commit 21df46f

Browse files
committed
[css2] *** empty log message ***
--HG-- extra : convert_revision : svn%3A73dc7c4b-06e6-40f3-b4f7-9ed1dbc14bfc/trunk%401842
1 parent 1661323 commit 21df46f

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

css2/bin/extractmeta

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/usr/local/bin/perl -w
2+
#
3+
# Extract info from LINK and META and put them in an RDF file
4+
#
5+
# Reads from 'file' (default stdin) and writes to 'output'
6+
# (default stdout). The URL of the input is assumed to be URL
7+
# (default 'file').
8+
#
9+
# Bert Bos <bert@w3.org>
10+
# $Id: extractmeta,v 1.1 2001-06-26 10:53:48 bbos Exp $
11+
12+
use Getopt::Std;
13+
#use lib 'bin';
14+
#use utils;
15+
16+
$PROG = substr($0, rindex($0, "/") + 1);
17+
$USAGE = "Usage: $PROG [-r realname] [file [output]]\n";
18+
19+
# $attvalre matches attribute values; the value is $1.$2.$3
20+
$attvalre = '\s*=\s*(?:(\w+)|\"([^\"]+)\"|\'([^\']+)\')';
21+
22+
23+
# clean replaces non-alphanum characters with '_', and collapses whitespace
24+
sub clean {
25+
my $r = $_[0];
26+
$r =~ s/^\s+//o;
27+
$r =~ s/\s+/ /go;
28+
$r =~ s/ $//o;
29+
$r =~ tr/a-z/A-Z/;
30+
$r =~ s/[^A-z0-9_.-]/_/go;
31+
return lc($r);
32+
}
33+
34+
35+
# urlexpand expands a relative URL to an absolute one
36+
sub urlexpand {
37+
my ($url, $base) = @_;
38+
my $result;
39+
if ($url =~ /^\w+:/) { # Already absolute
40+
$result = $url; # Keep as is
41+
} elsif ($url =~ /^\//o) { # Starts with '/'
42+
$base =~ /^\w+:(\/\/[^\/]*\/?)?/o;
43+
$result = $&.$url; # Prefix protocol and possibly machine
44+
} else { # Starts with path segment
45+
$base =~ /[^\/:]*$/;
46+
$result = $`.$url; # Prefix everything except last segment
47+
$result =~ s/[^\/]*\/\.\.//go;
48+
}
49+
return $result;
50+
}
51+
52+
53+
getopts('r:') || die $USAGE;
54+
if ($#ARGV >= 0) {$file = $ARGV[0]; shift;} else {$file = '-';}
55+
if ($#ARGV >= 0) {$output = $ARGV[0]; shift;} else {$output = '-';}
56+
if ($#ARGV >= 0) {die $USAGE;}
57+
58+
my $url = defined $opt_r ? $opt_r : $file;
59+
60+
open(IN, $file) or die "$PROG: cannot read file $file\n";
61+
open(OUT, ">$output") or die "$PROG: cannot write to file $output\n";
62+
63+
print OUT '<!--
64+
The database schema is defined as follows:
65+
- there is one table, called "meta"
66+
- it has three columns called "href", "property", and "value"
67+
- "href" is a URL
68+
- "property" is a keyword
69+
- "value" is a string
70+
The meaning of a record, paraphrased in English, is:
71+
"the _property_ property of the resource at _href_ has value _value_"
72+
There are further constraints on the value that depend on the property,
73+
but which are not spelled out here.
74+
-->
75+
';
76+
print OUT "<RDF schema=\"http://www.w3.org/TR/REC-ACL\">\n";
77+
78+
$/ = '<';
79+
while (<IN>) {
80+
if (/^link\b/sio) {
81+
if (/\brel$attvalre/sio) {
82+
my $rel = clean(defined($1) ? $1 : defined($2) ? $2 : $3);
83+
if (/\bhref$attvalre/sio) {
84+
my $href = urlexpand(defined($1)?$1:defined($2)?$2:$3, $url);
85+
print OUT " <meta
86+
href=\"$url\"
87+
property=\"$rel\"
88+
value=\"$href\"/>\n";
89+
if (/\btype$attvalre/sio) {
90+
my $type = lc(defined($1) ? $1 : defined($2) ? $2 : $3);
91+
print OUT " <meta
92+
href=\"$href\"
93+
property=\"content-type\"
94+
value=\"$type\"/>\n";
95+
}
96+
if (/\btitle$attvalre/sio) {
97+
my $title = defined($1) ? $1 : defined($2) ? $2 : $3;
98+
print OUT " <meta
99+
href=\"$href\"
100+
property=\"title\"
101+
value=\"$title\"/>\n";
102+
}
103+
if (/\bmedia$attvalre/sio) {
104+
my $h = lc(defined($1) ? $1 : defined($2) ? $2 : $3);
105+
my @media = split("\w+", $h);
106+
foreach my $h (@media) {
107+
print OUT " <meta
108+
href=\"$href\"
109+
property=\"medium\"
110+
value=\"$h\"/>\n";
111+
}
112+
}
113+
}
114+
}
115+
} elsif (/^meta\b/sio) {
116+
if (/\bcontent$attvalre/sio) {
117+
$value = defined($1) ? $1 : defined($2) ? $2 : $3;
118+
if (/\b(?:name|http-equiv)$attvalre/sio) {
119+
$property = clean(defined($1) ? $1 : defined($2) ? $2 : $3);
120+
print OUT " <meta
121+
href=\"$url\"
122+
property=\"$property\"
123+
value=\"$value\"/>\n";
124+
}
125+
}
126+
}
127+
}
128+
129+
print OUT "</RDF>\n";
130+
close(IN);
131+
close(OUT);

0 commit comments

Comments
 (0)