Skip to content

Commit f4e920b

Browse files
committed
[css2] Put in new grammar and tokenizer. Remaining problem: how to forbid a
space between ":" and "active" in selectors (but not on the rhs)? --HG-- extra : convert_revision : svn%3A73dc7c4b-06e6-40f3-b4f7-9ed1dbc14bfc/trunk%40698
1 parent e34c936 commit f4e920b

1 file changed

Lines changed: 170 additions & 181 deletions

File tree

css2/grammar.src

Lines changed: 170 additions & 181 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
22
<html lang="en">
3-
<!-- $Id: grammar.src,v 1.20 1997-12-12 18:03:00 ijacobs Exp $ -->
3+
<!-- $Id: grammar.src,v 1.21 1998-01-26 21:46:55 bbos Exp $ -->
44
<HEAD>
55
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
66
<TITLE>Appendix D: The grammar of CSS2</TITLE>
@@ -38,218 +38,207 @@ human consumption and some shorthand notation beyond
3838
<li><strong>[]</strong>: grouping
3939
</ul>
4040

41-
<P>The productions are:
41+
<p>The productions are:
4242

43-
<PRE>
43+
<pre>
4444
stylesheet
45-
: [CDO|CDC]* [ import [CDO|CDC]* ]* [ [ ruleset | media ] [CDO|CDC]* ]*
46-
;
45+
: [CDO|CDC]* [ import [CDO|CDC]* ]*
46+
[ [ruleset|media|page|font_face] [CDO|CDC]* ]*
47+
;
4748
import
48-
: IMPORT_SYM [STRING|URI] medium* ';' /* E.g., @import url(fun.css); */
49-
;
49+
: IMPORT_SYM [STRING|URL] [ medium [ ',' medium]* ]? ';'
50+
;
5051
media
51-
: MEDIA_SYM medium+ '{' ruleset* '}'
52-
;
53-
medium /* e.g., SPEECH */
54-
: IDENT
55-
;
56-
unary_operator
57-
: '-' | '+'
58-
;
52+
: MEDIA_SYM medium [ ',' medium ]* '{' ruleset* '}'
53+
;
54+
medium
55+
: IDENT
56+
;
57+
page
58+
: PAGE_SYM pseudo_page?
59+
'{' declaration [ ';' declaration ]* '}'
60+
;
61+
pseudo_page
62+
: colon IDENT
63+
;
64+
font_face
65+
: FONT_FACE_SYM '{' declaration [ ';' declaration ]* '}'
66+
;
5967
operator
60-
: '/' | ',' | /* empty */
61-
;
68+
: '/' | ',' | /* empty */
69+
;
70+
combinator
71+
: '+' | '>' | /* empty */
72+
;
73+
unary_operator
74+
: '-' | '+'
75+
;
6276
property
63-
: IDENT
64-
;
77+
: IDENT
78+
;
6579
ruleset
66-
: selector [ ',' selector ]*
67-
'{' declaration [ ';' declaration ]* '}'
68-
;
80+
: selector [ ',' selector ]*
81+
'{' declaration [ ';' declaration ]* '}'
82+
;
6983
selector
70-
: sequential_selector [ '~'? sequential_selector ]*
71-
[ pseudo_element | solitary_pseudo_element ]?
72-
| solitary_pseudo_element
73-
;
74-
sequential_selector
75-
: simple_selector
76-
| '/' simple_selector '~'? simple_selector '/'
77-
| '//' simple_selector '/'
78-
;
79-
/* An "id" is an ID that is attached to an element type
80-
** on its left, as in: P#p007
81-
** A "solitary_id" is an ID that is not so attached,
82-
** as in: #p007
83-
** Analogously for classes and pseudo-classes.
84-
*/
84+
: simple_selector [ combinator simple_selector ]*
85+
;
8586
simple_selector
86-
: element_name [ id | class | attrib | pseudo_class ]*
87-
| solitary_id [ class | attrib | pseudo_class ]* /* eg: #xyz33 */
88-
| solitary_class [ id | class | attrib | pseudo_class ]* /* eg: .author */
89-
| solitary_pseudo_class [ id | class | attrib | pseudo_class ]* /* eg: :link */
90-
| solitary_attrib [ id | class | attrib | pseudo_class ]* /* eg: [ALIGN] */
91-
;
87+
: element_name [ id | class | attrib | pseudo ]*
88+
| solitary_id [ class | attrib | pseudo ]*
89+
| solitary_class [ id | class | attrib | pseudo ]*
90+
| solitary_pseudo [ id | class | attrib | pseudo ]*
91+
;
9292
element_name
93-
: IDENT
94-
;
95-
attrib /* as in: [lang=fr] */
96-
: LBRACK_AFTER_IDENT IDENT [ [ EQ | INCLUDES ] [ IDENT | STRING ] ]? ']'
97-
;
98-
solitary_attrib
99-
: '[' IDENT [ [ EQ | INCLUDES ] [ IDENT | STRING ] ]? ']'
100-
;
101-
pseudo_class /* as in: A:link */
102-
: LINK_PSCLASS_AFTER_IDENT
103-
| VISITED_PSCLASS_AFTER_IDENT
104-
| ACTIVE_PSCLASS_AFTER_IDENT
105-
;
106-
solitary_pseudo_class /* as in: :link */
107-
: LINK_PSCLASS
108-
| VISITED_PSCLASS
109-
| ACTIVE_PSCLASS
110-
;
111-
class /* as in: P.note */
112-
: CLASS_AFTER_IDENT
113-
;
114-
solitary_class /* as in: .note */
115-
: CLASS
116-
;
117-
pseudo_element /* as in: P:first-line */
118-
: FIRST_LETTER_AFTER_IDENT
119-
| FIRST_LINE_AFTER_IDENT
120-
;
121-
solitary_pseudo_element /* as in: :first-line */
122-
: FIRST_LETTER
123-
| FIRST_LINE
124-
;
125-
/* There is a constraint on the id and solitary_id that the
126-
** part after the "#" must be a valid HTML ID value;
127-
** e.g., "#x77" is OK, but "#77" is not.
128-
*/
93+
: IDENT | '*'
94+
;
95+
attrib
96+
: '[' IDENT [ [ '=' | INCLUDES ] [ IDENT | STRING ] ]? ']'
97+
;
98+
pseudo
99+
: COLON_AFTER_IDENT IDENT
100+
;
101+
solitary_pseudo
102+
: COLON IDENT
103+
;
104+
class
105+
: CLASS_AFTER_IDENT
106+
;
107+
solitary_class
108+
: CLASS
109+
;
129110
id
130-
: HASH_AFTER_IDENT
131-
;
111+
: HASH_AFTER_IDENT
112+
;
132113
solitary_id
133-
: HASH
134-
;
114+
: HASH
115+
;
135116
declaration
136-
: property ':' expr prio?
137-
| /* empty */ /* Prevents syntax errors... */
138-
;
117+
: property colon expr prio?
118+
| /* empty */
119+
;
120+
colon
121+
: COLON
122+
| COLON_AFTER_IDENT
123+
;
139124
prio
140-
: IMPORTANT_SYM /* !important */
141-
;
125+
: IMPORTANT_SYM
126+
;
142127
expr
143-
: term [ operator term ]*
144-
;
128+
: term [ operator term ]*
129+
;
145130
term
146-
: unary_operator?
147-
[ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS
148-
| IDENT | hexcolor | URI | RGB | UNICODERANGE
149-
| ANGLE | TIME | FREQ ]
150-
;
151-
/* There is a constraint on the color that it must
152-
** have either 3 or 6 hex-digits (i.e., [0-9a-fA-F])
153-
** after the "#"; e.g., "#000" is OK, but "#abcd" is not.
154-
*/
131+
: unary_operator?
132+
[ NUMBER | PERCENTAGE | LENGTH | EMS | EXS | ANGLE |
133+
TIME | FREQ | function ]
134+
| STRING | IDENT | hexcolor | URL | RGB | UNICODERANGE
135+
;
136+
function
137+
: FUNCTION expr ')'
138+
;
155139
hexcolor
156-
: HASH | HASH_AFTER_IDENT
157-
;
158-
</PRE>
159-
160-
<!-- Allow any number of simple selectors between / / ? -->
140+
: HASH | HASH_AFTER_IDENT
141+
;
142+
</pre>
161143

162-
<P> The following is the tokenizer, written in flex <a
144+
<p> The following is the tokenizer, written in flex <a
163145
rel="biblioentry" href="./refs.html#ref-FLEX" class="normref">[FLEX]</a>
164-
notation. Note that this assumes an 8-bit implementation of flex. The
165-
tokenizer is case-insensitive (flex command line option -i).
166-
167-
<PRE>
168-
unicode \\[0-9a-f]{1,4}
169-
latin1 [&#161;-&yuml;]
170-
escape {unicode}|\\[ -~&#161;-&yuml;]
171-
stringchar {escape}|{latin1}|[ !#$%&amp;(-~]
172-
nmstrt [a-z]|{latin1}|{escape}
173-
nmchar [-a-z0-9]|{latin1}|{escape}
174-
ident {nmstrt}{nmchar}*
146+
notation. The
147+
tokenizer is case-insensitive (flex command line
148+
option -i).
149+
150+
<p>The two occurrences of "\377" represent the highest character
151+
number that current versions of Flex can deal with (decimal 255). They
152+
should be read as "\4177777" (decimal 1114111), which is the highest
153+
possible code point in <span class="index-inst"
154+
title="unicode">Unicode</span>/<span class="index-inst"
155+
title="iso-10646">ISO-10646</span>.
156+
157+
<pre>
158+
h [0-9a-f]
159+
nonascii [\200-\377]
160+
unicode \\{h}{1,6}
161+
escape {unicode}|\\[ -~\200-\377]
162+
nmstart [a-z]|{nonascii}|{escape}
163+
nmchar [a-z0-9-]|{nonascii}|{escape}
164+
string1 \"([\t\n !#$%&amp;(-~]|\'|{nonascii}|{escape})*\"
165+
string2 \'([\t\n !#$%&amp;(-~]|\"|{nonascii}|{escape})*\'
166+
167+
ident {nmstart}{nmchar}*
175168
name {nmchar}+
176-
d [0-9]
177-
notnm [^-a-z0-9\\]|{latin1}
178-
w [ \t\n]*
179-
num {d}+|{d}*\.{d}+
180-
string \"({stringchar}|\')*\"|\'({stringchar}|\")*\'
169+
num [0-9]+|[0-9]*"."[0-9]+
170+
string {string1}|{string2}
171+
url ([!#$%&amp;*-~]|{nonascii}|{escape})*
172+
w [ \t\r\n\f]*
173+
174+
urange {h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h})))))
181175

182-
%x COMMENT
183176
%s AFTER_IDENT
184177

185178
%%
186-
"/*" {BEGIN(COMMENT);}
187-
&lt;COMMENT&gt;"*/" {BEGIN(0);}
188-
&lt;COMMENT&gt;\n {/* ignore */}
189-
&lt;COMMENT&gt;. {/* ignore */}
190-
@import {BEGIN(0); return IMPORT_SYM;}
191-
@media {BEGIN(0); return MEDIA_SYM;}
192-
"!"{w}important {BEGIN(0); return IMPORTANT_SYM;}
193-
{ident} {BEGIN(AFTER_IDENT); return IDENT;}
194-
{string} {BEGIN(0); return STRING;}
195179

196-
{num} {BEGIN(0); return NUMBER;}
197-
{num}"%" {BEGIN(0); return PERCENTAGE;}
198-
{num}pt/{notnm} {BEGIN(0); return LENGTH;}
199-
{num}mm/{notnm} {BEGIN(0); return LENGTH;}
200-
{num}cm/{notnm} {BEGIN(0); return LENGTH;}
201-
{num}pc/{notnm} {BEGIN(0); return LENGTH;}
202-
{num}in/{notnm} {BEGIN(0); return LENGTH;}
203-
{num}px/{notnm} {BEGIN(0); return LENGTH;}
204-
{num}em/{notnm} {BEGIN(0); return EMS;}
205-
{num}ex/{notnm} {BEGIN(0); return EXS;}
206-
{num}deg/{notnm} {BEGIN(0); return ANGLE;}
207-
{num}grad/{notnm} {BEGIN(0); return ANGLE;}
208-
{num}rad/{notnm} {BEGIN(0); return ANGLE;}
209-
{num}ms/{notnm} {BEGIN(0); return TIME;}
210-
{num}s/{notnm} {BEGIN(0); return TIME;}
211-
{num}Hz/{notnm} {BEGIN(0); return FREQ;}
212-
{num}kHz/{notnm} {BEGIN(0); return FREQ;}
180+
[ \t\r\n\f]+ {BEGIN(INITIAL);}
213181

214-
&lt;AFTER_IDENT&gt;":"link {return LINK_PSCLASS_AFTER_IDENT;}
215-
&lt;AFTER_IDENT&gt;":"visited {return VISITED_PSCLASS_AFTER_IDENT;}
216-
&lt;AFTER_IDENT&gt;":"active {return ACTIVE_PSCLASS_AFTER_IDENT;}
217-
&lt;AFTER_IDENT&gt;":"first-line {return FIRST_LINE_AFTER_IDENT;}
218-
&lt;AFTER_IDENT&gt;":"first-letter {return FIRST_LETTER_AFTER_IDENT;}
219-
&lt;AFTER_IDENT&gt;"#"{name} {return HASH_AFTER_IDENT;}
220-
&lt;AFTER_IDENT&gt;"."{name} {return CLASS_AFTER_IDENT;}
182+
\/\*[^*]*\*+([^/][^*]*\*+)*\/ /* ignore comments */
221183

222-
":"link {BEGIN(AFTER_IDENT); return LINK_PSCLASS;}
223-
":"visited {BEGIN(AFTER_IDENT); return VISITED_PSCLASS;}
224-
":"active {BEGIN(AFTER_IDENT); return ACTIVE_PSCLASS;}
225-
":"first-line {BEGIN(AFTER_IDENT); return FIRST_LINE;}
226-
":"first-letter {BEGIN(AFTER_IDENT); return FIRST_LETTER;}
227-
"#"{name} {BEGIN(AFTER_IDENT); return HASH;}
228-
"."{name} {BEGIN(AFTER_IDENT); return CLASS;}
184+
"&lt;!--" {return CDO;}
185+
"-->" {return CDC;}
186+
"~=" {BEGIN(INITIAL); return INCLUDES;}
229187

230-
&lt;AFTER_IDENT&gt;'[' {BEGIN(0); return LBRACK_AFTER_IDENT;}
231-
'[' {return LBRACK_AFTER_IDENT;}
232-
']' {BEGIN(AFTER_IDENT); return ']';}
233-
'=' {return EQ;}
234-
'~=' {return INCLUDES;}
188+
{string} {return STRING;}
235189

236-
url\({w}{string}{w}\) |
237-
url\({w}([^ \n\'\")]|\\\ |\\\'|\\\"|\\\))+{w}\) {BEGIN(0); return URI;}
238-
rgb\({w}{num}%?{w}\,{w}{num}%?{w}\,{w}{num}%?{w}\) {BEGIN(0); return RGB;}
190+
{ident} {BEGIN(AFTER_IDENT); return IDENT;}
239191

240-
U\+[0-9a-f?]{1,6}(-{h}{1,6})? {BEGIN(0); return UNICODERANGE;}
192+
&lt;AFTER_IDENT>"."{ident} {return CLASS_AFTER_IDENT;}
193+
"."{ident} {BEGIN(AFTER_IDENT); return CLASS;}
241194

242-
[-/+{};,#:] {BEGIN(0); return *yytext;}
243-
[ \t]+ {BEGIN(0); /* ignore whitespace */}
244-
\n {BEGIN(0); /* ignore whitespace */}
245-
\&lt;\!\-\- {BEGIN(0); return CDO;}
246-
\-\-\&gt; {BEGIN(0); return CDC;}
247-
. {fprintf(stderr, "%d: Illegal character (%d)\n",
248-
lineno, *yytext);}
249-
</PRE>
195+
&lt;AFTER_IDENT>":" {return COLON_AFTER_IDENT;}
196+
":" {return COLON;}
197+
198+
"#"{name} {BEGIN(AFTER_IDENT); return HASH;}
250199

251-
</BODY>
252-
</HTML>
200+
"@import" {BEGIN(INITIAL); return IMPORT_SYM;}
201+
"@page" {BEGIN(INITIAL); return PAGE_SYM;}
202+
"@media" {BEGIN(INITIAL); return MEDIA_SYM;}
203+
"@font-face" {BEGIN(INITIAL); return FONT_FACE_SYM;}
204+
"@"{ident} {BEGIN(INITIAL); return ATKEYWORD;}
205+
206+
"!important" {return IMPORTANT_SYM;}
207+
208+
{num}em {return EMS;}
209+
{num}ex {return EXS;}
210+
{num}px {return LENGTH;}
211+
{num}cm {return LENGTH;}
212+
{num}mm {return LENGTH;}
213+
{num}in {return LENGTH;}
214+
{num}pt {return LENGTH;}
215+
{num}pc {return LENGTH;}
216+
{num}deg {return ANGLE;}
217+
{num}rad {return ANGLE;}
218+
{num}grad {return ANGLE;}
219+
{num}ms {return TIME;}
220+
{num}s {return TIME;}
221+
{num}Hz {return FREQ;}
222+
{num}kHz {return FREQ;}
223+
{num}{ident} {return DIMEN;}
224+
{num}% {return PERCENTAGE;}
225+
{num} {return NUMBER;}
226+
227+
"url("{w}{string}{w}")" {return URL;}
228+
"url("{w}{url}{w}")" {return URL;}
229+
{ident}"(" {return FUNCTION;}
230+
231+
U\+{urange} |
232+
U\+{h}{1,6}-{h}{1,6} {BEGIN(INITIAL); return UNICODERANGE;}
233+
234+
. {BEGIN(INITIAL); return *yytext;}
235+
</pre>
236+
237+
<p>[This grammar allows "A:active" to be written as "A: active", which
238+
is incorrect. How to fix that?]
239+
240+
</body>
241+
</html>
253242
<!-- Keep this comment at the end of the file
254243
Local variables:
255244
mode: sgml

0 commit comments

Comments
 (0)