Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ pub enum ComponentValue {
Number(NumericValue),
Percentage(NumericValue),
Dimension(NumericValue, ~str),
UnicodeRange(char, char), // UnicodeRange {start: char, end: char},
EmptyUnicodeRange,
UnicodeRange { start: u32, end: u32 },
WhiteSpace,
Colon, // :
Semicolon, // ;
Expand Down
14 changes: 7 additions & 7 deletions css-parsing-tests/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,16 @@ associated with the expected result.

``color3_hsl.json``
Same as ``color3.json``.
This file is generated the ``make_color3_hsl.py`` Python script.
This file is generated by the ``make_color3_hsl.py`` Python script.

``color3_keywords.json``
Same as ``color3.json``,
except that the values for the Red, Green and Blue channel
are between 0 and 255.
This file is generated the ``make_color3_keywords.py`` Python script.
This file is generated by the ``make_color3_keywords.py`` Python script.

``an+b.json``
Tests the `an+b <http://dev.w3.org/csswg/css-syntax/#the-anb-type>`_
``An+B.json``
Tests the `An+B <http://dev.w3.org/csswg/css-syntax/#the-anb-type>`_
syntax defined in CSS Syntax Level 3.
This `differs <http://dev.w3.org/csswg/css-syntax/#changes>`_ from the
`nth grammar rule <http://www.w3.org/TR/css3-selectors/#nth-child-pseudo>`_
Expand All @@ -121,7 +121,7 @@ AST nodes (the results of parsing) are represented in JSON as follow.
This representation was chosen to be compact
(and thus less annoying to write by hand)
while staying unambiguous.
For example, the difference between @import and \@import is not lost:
For example, the difference between ``@import`` and ``\@import`` is not lost:
they are represented as ``["at-keyword", "import"]`` and ``["ident", "@import"]``,
respectively.

Expand Down Expand Up @@ -200,8 +200,8 @@ Component values
and the unit as a string.

<unicode-range>
Array of length 2: the string ``"unicode-range"``, and the range as either
null for the empty range, or an array of two numbers.
Array of length 3: the string ``"unicode-range"``,
followed by the *start* and *end* integers as two numbers.

<include-match>
The string ``"~="``.
Expand Down
73 changes: 73 additions & 0 deletions css-parsing-tests/component_value_list.json
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,79 @@
["dimension", "12", 12, "integer", "rêd"]
],

"u+1 U+10 U+100 U+1000 U+10000 U+100000 U+1000000", [
["unicode-range", 1, 1], " ",
["unicode-range", 16, 16], " ",
["unicode-range", 256, 256], " ",
["unicode-range", 4096, 4096], " ",
["unicode-range", 65536, 65536], " ",
["unicode-range", 1048576, 1048576], " ",
["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"]
],

"u+? u+1? U+10? U+100? U+1000? U+10000? U+100000?", [
["unicode-range", 0, 15], " ",
["unicode-range", 16, 31], " ",
["unicode-range", 256, 271], " ",
["unicode-range", 4096, 4111], " ",
["unicode-range", 65536, 65551], " ",
["unicode-range", 1048576, 1048591], " ",
["unicode-range", 1048576, 1048576], "?"
],

"u+?? U+1?? U+10?? U+100?? U+1000?? U+10000??", [
["unicode-range", 0, 255], " ",
["unicode-range", 256, 511], " ",
["unicode-range", 4096, 4351], " ",
["unicode-range", 65536, 65791], " ",
["unicode-range", 1048576, 1048831], " ",
["unicode-range", 1048576, 1048591], "?"
],

"u+??? U+1??? U+10??? U+100??? U+1000???", [
["unicode-range", 0, 4095], " ",
["unicode-range", 4096, 8191], " ",
["unicode-range", 65536, 69631], " ",
["unicode-range", 1048576, 1052671], " ",
["unicode-range", 1048576, 1048831], "?"
],

"u+???? U+1???? U+10???? U+100????", [
["unicode-range", 0, 65535], " ",
["unicode-range", 65536, 131071], " ",
["unicode-range", 1048576, 1114111], " ",
["unicode-range", 1048576, 1052671], "?"
],

"u+????? U+1????? U+10?????", [
["unicode-range", 0, 1048575], " ",
["unicode-range", 1048576, 2097151], " ",
["unicode-range", 1048576, 1114111], "?"
],

"u+?????? U+1??????", [
["unicode-range", 0, 16777215], " ",
["unicode-range", 1048576, 2097151], "?"
],

"u+1-2 U+100000-2 U+1000000-2 U+10-200000", [
["unicode-range", 1, 2], " ",
["unicode-range", 1048576, 2], " ",
["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"],
["number", "-2", -2, "integer"], " ",
["unicode-range", 16, 2097152]
],

"ù+12 Ü+12 u +12 U+ 12 U+12 - 20 U+1?2 U+1?-50", [
["ident", "ù"], ["number", "+12", 12, "integer"], " ",
["ident", "Ü"], ["number", "+12", 12, "integer"], " ",
["ident", "u"], " ", ["number", "+12", 12, "integer"], " ",
["ident", "U"], "+", " ", ["number", "12", 12, "integer"], " ",
["unicode-range", 18, 18], " ", "-", " ", ["number", "20", 20, "integer"], " ",
["unicode-range", 16, 31], ["number", "2", 2, "integer"], " ",
["unicode-range", 16, 31], ["number", "-50", -50, "integer"]
],

"~=|=^=$=*=||<!------> |/**/| ~/**/=", [
"~=", "|=", "^=", "$=", "*=", "||", "<!--", "-", "-", "-->",
" ", "|", "|", " ", "~", "="
Expand Down
5 changes: 2 additions & 3 deletions tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,8 @@ impl ToJson for ComponentValue {
Dimension(ref value, ref unit)
=> JList(~[JString(~"dimension")] + numeric(value) + ~[unit.to_json()]),

// TODO:
UnicodeRange(_start, _end) => fail!(),
EmptyUnicodeRange => fail!(),
UnicodeRange { start: s, end: e }
=> JList(~[JString(~"unicode-range"), s.to_json(), e.to_json()]),

WhiteSpace => JString(~" "),
Colon => JString(~":"),
Expand Down
32 changes: 9 additions & 23 deletions tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,13 +544,13 @@ fn consume_unicode_range(tokenizer: &mut Tokenizer) -> ComponentValue {
question_marks += 1;
tokenizer.position += 1
}
let start: char;
let end: char;
let start;
let end;
if question_marks > 0 {
start = char_from_hex(hex + "0".repeat(question_marks));
end = char_from_hex(hex + "F".repeat(question_marks));
start = u32::from_str_radix(hex + "0".repeat(question_marks), 16).unwrap();
end = u32::from_str_radix(hex + "F".repeat(question_marks), 16).unwrap();
} else {
start = char_from_hex(hex);
start = u32::from_str_radix(hex, 16).unwrap();
hex = ~"";
if !tokenizer.is_eof() && tokenizer.current_char() == '-' {
tokenizer.position += 1;
Expand All @@ -563,21 +563,12 @@ fn consume_unicode_range(tokenizer: &mut Tokenizer) -> ComponentValue {
}
}
}
end = if hex.len() > 0 { char_from_hex(hex) } else { start }
}
if start > MAX_UNICODE || end < start {
EmptyUnicodeRange
} else {
let end = if end <= MAX_UNICODE { end } else { MAX_UNICODE };
// UnicodeRange {start: start, end: end}
UnicodeRange(start, end)
end = if hex.len() > 0 { u32::from_str_radix(hex, 16).unwrap() } else { start }
}
UnicodeRange {start: start, end: end}
}


static MAX_UNICODE: char = '\U0010FFFF';


// Assumes that the U+005C REVERSE SOLIDUS (\) has already been consumed
// and that the next input character has already been verified
// to not be a newline.
Expand All @@ -602,16 +593,11 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char {
_ => ()
}
}
let c = char_from_hex(hex);
let c = u32::from_str_radix(hex, 16).unwrap() as char as char;
static MAX_UNICODE: char = '\U0010FFFF';
if '\x00' < c && c <= MAX_UNICODE { c }
else { '\uFFFD' } // Replacement character
},
c => c
}
}


#[inline]
fn char_from_hex(hex: &str) -> char {
u32::from_str_radix(hex, 16).unwrap() as char
}