Skip to content

Commit 56b3cb3

Browse files
committed
Merge pull request servo#14 from SimonSapin/unicode-range
Fix tokenization of <unicode-range> tokens, per spec change.
2 parents 75b428f + f841ee9 commit 56b3cb3

File tree

5 files changed

+92
-35
lines changed

5 files changed

+92
-35
lines changed

ast.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ pub enum ComponentValue {
3737
Number(NumericValue),
3838
Percentage(NumericValue),
3939
Dimension(NumericValue, ~str),
40-
UnicodeRange(char, char), // UnicodeRange {start: char, end: char},
41-
EmptyUnicodeRange,
40+
UnicodeRange { start: u32, end: u32 },
4241
WhiteSpace,
4342
Colon, // :
4443
Semicolon, // ;

css-parsing-tests/README.rst

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,16 @@ associated with the expected result.
9494

9595
``color3_hsl.json``
9696
Same as ``color3.json``.
97-
This file is generated the ``make_color3_hsl.py`` Python script.
97+
This file is generated by the ``make_color3_hsl.py`` Python script.
9898

9999
``color3_keywords.json``
100100
Same as ``color3.json``,
101101
except that the values for the Red, Green and Blue channel
102102
are between 0 and 255.
103-
This file is generated the ``make_color3_keywords.py`` Python script.
103+
This file is generated by the ``make_color3_keywords.py`` Python script.
104104

105-
``an+b.json``
106-
Tests the `an+b <http://dev.w3.org/csswg/css-syntax/#the-anb-type>`_
105+
``An+B.json``
106+
Tests the `An+B <http://dev.w3.org/csswg/css-syntax/#the-anb-type>`_
107107
syntax defined in CSS Syntax Level 3.
108108
This `differs <http://dev.w3.org/csswg/css-syntax/#changes>`_ from the
109109
`nth grammar rule <http://www.w3.org/TR/css3-selectors/#nth-child-pseudo>`_
@@ -121,7 +121,7 @@ AST nodes (the results of parsing) are represented in JSON as follow.
121121
This representation was chosen to be compact
122122
(and thus less annoying to write by hand)
123123
while staying unambiguous.
124-
For example, the difference between @import and \@import is not lost:
124+
For example, the difference between ``@import`` and ``\@import`` is not lost:
125125
they are represented as ``["at-keyword", "import"]`` and ``["ident", "@import"]``,
126126
respectively.
127127

@@ -200,8 +200,8 @@ Component values
200200
and the unit as a string.
201201

202202
<unicode-range>
203-
Array of length 2: the string ``"unicode-range"``, and the range as either
204-
null for the empty range, or an array of two numbers.
203+
Array of length 3: the string ``"unicode-range"``,
204+
followed by the *start* and *end* integers as two numbers.
205205

206206
<include-match>
207207
The string ``"~="``.

css-parsing-tests/component_value_list.json

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,79 @@
317317
["dimension", "12", 12, "integer", "rêd"]
318318
],
319319

320+
"u+1 U+10 U+100 U+1000 U+10000 U+100000 U+1000000", [
321+
["unicode-range", 1, 1], " ",
322+
["unicode-range", 16, 16], " ",
323+
["unicode-range", 256, 256], " ",
324+
["unicode-range", 4096, 4096], " ",
325+
["unicode-range", 65536, 65536], " ",
326+
["unicode-range", 1048576, 1048576], " ",
327+
["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"]
328+
],
329+
330+
"u+? u+1? U+10? U+100? U+1000? U+10000? U+100000?", [
331+
["unicode-range", 0, 15], " ",
332+
["unicode-range", 16, 31], " ",
333+
["unicode-range", 256, 271], " ",
334+
["unicode-range", 4096, 4111], " ",
335+
["unicode-range", 65536, 65551], " ",
336+
["unicode-range", 1048576, 1048591], " ",
337+
["unicode-range", 1048576, 1048576], "?"
338+
],
339+
340+
"u+?? U+1?? U+10?? U+100?? U+1000?? U+10000??", [
341+
["unicode-range", 0, 255], " ",
342+
["unicode-range", 256, 511], " ",
343+
["unicode-range", 4096, 4351], " ",
344+
["unicode-range", 65536, 65791], " ",
345+
["unicode-range", 1048576, 1048831], " ",
346+
["unicode-range", 1048576, 1048591], "?"
347+
],
348+
349+
"u+??? U+1??? U+10??? U+100??? U+1000???", [
350+
["unicode-range", 0, 4095], " ",
351+
["unicode-range", 4096, 8191], " ",
352+
["unicode-range", 65536, 69631], " ",
353+
["unicode-range", 1048576, 1052671], " ",
354+
["unicode-range", 1048576, 1048831], "?"
355+
],
356+
357+
"u+???? U+1???? U+10???? U+100????", [
358+
["unicode-range", 0, 65535], " ",
359+
["unicode-range", 65536, 131071], " ",
360+
["unicode-range", 1048576, 1114111], " ",
361+
["unicode-range", 1048576, 1052671], "?"
362+
],
363+
364+
"u+????? U+1????? U+10?????", [
365+
["unicode-range", 0, 1048575], " ",
366+
["unicode-range", 1048576, 2097151], " ",
367+
["unicode-range", 1048576, 1114111], "?"
368+
],
369+
370+
"u+?????? U+1??????", [
371+
["unicode-range", 0, 16777215], " ",
372+
["unicode-range", 1048576, 2097151], "?"
373+
],
374+
375+
"u+1-2 U+100000-2 U+1000000-2 U+10-200000", [
376+
["unicode-range", 1, 2], " ",
377+
["unicode-range", 1048576, 2], " ",
378+
["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"],
379+
["number", "-2", -2, "integer"], " ",
380+
["unicode-range", 16, 2097152]
381+
],
382+
383+
"ù+12 Ü+12 u +12 U+ 12 U+12 - 20 U+1?2 U+1?-50", [
384+
["ident", "ù"], ["number", "+12", 12, "integer"], " ",
385+
["ident", "Ü"], ["number", "+12", 12, "integer"], " ",
386+
["ident", "u"], " ", ["number", "+12", 12, "integer"], " ",
387+
["ident", "U"], "+", " ", ["number", "12", 12, "integer"], " ",
388+
["unicode-range", 18, 18], " ", "-", " ", ["number", "20", 20, "integer"], " ",
389+
["unicode-range", 16, 31], ["number", "2", 2, "integer"], " ",
390+
["unicode-range", 16, 31], ["number", "-50", -50, "integer"]
391+
],
392+
320393
"~=|=^=$=*=||<!------> |/**/| ~/**/=", [
321394
"~=", "|=", "^=", "$=", "*=", "||", "<!--", "-", "-", "-->",
322395
" ", "|", "|", " ", "~", "="

tests.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,9 +325,8 @@ impl ToJson for ComponentValue {
325325
Dimension(ref value, ref unit)
326326
=> JList(~[JString(~"dimension")] + numeric(value) + ~[unit.to_json()]),
327327
328-
// TODO:
329-
UnicodeRange(_start, _end) => fail!(),
330-
EmptyUnicodeRange => fail!(),
328+
UnicodeRange { start: s, end: e }
329+
=> JList(~[JString(~"unicode-range"), s.to_json(), e.to_json()]),
331330
332331
WhiteSpace => JString(~" "),
333332
Colon => JString(~":"),

tokenizer.rs

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -544,13 +544,13 @@ fn consume_unicode_range(tokenizer: &mut Tokenizer) -> ComponentValue {
544544
question_marks += 1;
545545
tokenizer.position += 1
546546
}
547-
let start: char;
548-
let end: char;
547+
let start;
548+
let end;
549549
if question_marks > 0 {
550-
start = char_from_hex(hex + "0".repeat(question_marks));
551-
end = char_from_hex(hex + "F".repeat(question_marks));
550+
start = u32::from_str_radix(hex + "0".repeat(question_marks), 16).unwrap();
551+
end = u32::from_str_radix(hex + "F".repeat(question_marks), 16).unwrap();
552552
} else {
553-
start = char_from_hex(hex);
553+
start = u32::from_str_radix(hex, 16).unwrap();
554554
hex = ~"";
555555
if !tokenizer.is_eof() && tokenizer.current_char() == '-' {
556556
tokenizer.position += 1;
@@ -563,21 +563,12 @@ fn consume_unicode_range(tokenizer: &mut Tokenizer) -> ComponentValue {
563563
}
564564
}
565565
}
566-
end = if hex.len() > 0 { char_from_hex(hex) } else { start }
567-
}
568-
if start > MAX_UNICODE || end < start {
569-
EmptyUnicodeRange
570-
} else {
571-
let end = if end <= MAX_UNICODE { end } else { MAX_UNICODE };
572-
// UnicodeRange {start: start, end: end}
573-
UnicodeRange(start, end)
566+
end = if hex.len() > 0 { u32::from_str_radix(hex, 16).unwrap() } else { start }
574567
}
568+
UnicodeRange {start: start, end: end}
575569
}
576570

577571

578-
static MAX_UNICODE: char = '\U0010FFFF';
579-
580-
581572
// Assumes that the U+005C REVERSE SOLIDUS (\) has already been consumed
582573
// and that the next input character has already been verified
583574
// to not be a newline.
@@ -602,16 +593,11 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char {
602593
_ => ()
603594
}
604595
}
605-
let c = char_from_hex(hex);
596+
let c = u32::from_str_radix(hex, 16).unwrap() as char as char;
597+
static MAX_UNICODE: char = '\U0010FFFF';
606598
if '\x00' < c && c <= MAX_UNICODE { c }
607599
else { '\uFFFD' } // Replacement character
608600
},
609601
c => c
610602
}
611603
}
612-
613-
614-
#[inline]
615-
fn char_from_hex(hex: &str) -> char {
616-
u32::from_str_radix(hex, 16).unwrap() as char
617-
}

0 commit comments

Comments
 (0)