From 4e7f295918b24b7935da6cdc47bea1d6ff7a3c15 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 10 Aug 2013 09:39:11 +0100 Subject: [PATCH 1/3] Formatting fixes in the README --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 7531ec53..a29340df 100644 --- a/README.rst +++ b/README.rst @@ -94,16 +94,16 @@ associated with the expected result. ``color3_hsl.json`` Same as ``color3.json``. - This file is generated the ``make_color3_hsl.py`` Python script. + This file is generated by the ``make_color3_hsl.py`` Python script. ``color3_keywords.json`` Same as ``color3.json``, except that the values for the Red, Green and Blue channel are between 0 and 255. - This file is generated the ``make_color3_keywords.py`` Python script. + This file is generated by the ``make_color3_keywords.py`` Python script. -``an+b.json`` - Tests the `an+b `_ +``An+B.json`` + Tests the `An+B `_ syntax defined in CSS Syntax Level 3. This `differs `_ from the `nth grammar rule `_ @@ -121,7 +121,7 @@ AST nodes (the results of parsing) are represented in JSON as follow. This representation was chosen to be compact (and thus less annoying to write by hand) while staying unambiguous. -For example, the difference between @import and \@import is not lost: +For example, the difference between ``@import`` and ``\@import`` is not lost: they are represented as ``["at-keyword", "import"]`` and ``["ident", "@import"]``, respectively. From f38c4c09383407be08977001b1e9870e1bac8197 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Sep 2013 18:49:47 +0100 Subject: [PATCH 2/3] Add tests for tokens. --- README.rst | 4 +-- component_value_list.json | 73 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index a29340df..7342d91b 100644 --- a/README.rst +++ b/README.rst @@ -200,8 +200,8 @@ Component values and the unit as a string. - Array of length 2: the string ``"unicode-range"``, and the range as either - null for the empty range, or an array of two numbers. + Array of length 3: the string ``"unicode-range"``, + followed by the *start* and *end* integers as two numbers. The string ``"~="``. diff --git a/component_value_list.json b/component_value_list.json index a5ef1bf3..2ed07808 100644 --- a/component_value_list.json +++ b/component_value_list.json @@ -317,6 +317,79 @@ ["dimension", "12", 12, "integer", "rêd"] ], +"u+1 U+10 U+100 U+1000 U+10000 U+100000 U+1000000", [ + ["unicode-range", 1, 1], " ", + ["unicode-range", 16, 16], " ", + ["unicode-range", 256, 256], " ", + ["unicode-range", 4096, 4096], " ", + ["unicode-range", 65536, 65536], " ", + ["unicode-range", 1048576, 1048576], " ", + ["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"] +], + +"u+? u+1? U+10? U+100? U+1000? U+10000? U+100000?", [ + ["unicode-range", 0, 15], " ", + ["unicode-range", 16, 31], " ", + ["unicode-range", 256, 271], " ", + ["unicode-range", 4096, 4111], " ", + ["unicode-range", 65536, 65551], " ", + ["unicode-range", 1048576, 1048591], " ", + ["unicode-range", 1048576, 1048576], "?" +], + +"u+?? U+1?? U+10?? U+100?? U+1000?? U+10000??", [ + ["unicode-range", 0, 255], " ", + ["unicode-range", 256, 511], " ", + ["unicode-range", 4096, 4351], " ", + ["unicode-range", 65536, 65791], " ", + ["unicode-range", 1048576, 1048831], " ", + ["unicode-range", 1048576, 1048591], "?" +], + +"u+??? U+1??? U+10??? U+100??? U+1000???", [ + ["unicode-range", 0, 4095], " ", + ["unicode-range", 4096, 8191], " ", + ["unicode-range", 65536, 69631], " ", + ["unicode-range", 1048576, 1052671], " ", + ["unicode-range", 1048576, 1048831], "?" +], + +"u+???? U+1???? U+10???? U+100????", [ + ["unicode-range", 0, 65535], " ", + ["unicode-range", 65536, 131071], " ", + ["unicode-range", 1048576, 1114111], " ", + ["unicode-range", 1048576, 1052671], "?" +], + +"u+????? U+1????? U+10?????", [ + ["unicode-range", 0, 1048575], " ", + ["unicode-range", 1048576, 2097151], " ", + ["unicode-range", 1048576, 1114111], "?" +], + +"u+?????? U+1??????", [ + ["unicode-range", 0, 16777215], " ", + ["unicode-range", 1048576, 2097151], "?" +], + +"u+1-2 U+100000-2 U+1000000-2 U+10-200000", [ + ["unicode-range", 1, 2], " ", + ["unicode-range", 1048576, 2], " ", + ["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"], + ["number", "-2", -2, "integer"], " ", + ["unicode-range", 16, 2097152] +], + +"ù+12 Ü+12 u +12 U+ 12 U+12 - 20 U+1?2 U+1?-50", [ + ["ident", "ù"], ["number", "+12", 12, "integer"], " ", + ["ident", "Ü"], ["number", "+12", 12, "integer"], " ", + ["ident", "u"], " ", ["number", "+12", 12, "integer"], " ", + ["ident", "U"], "+", " ", ["number", "12", 12, "integer"], " ", + ["unicode-range", 18, 18], " ", "-", " ", ["number", "20", 20, "integer"], " ", + ["unicode-range", 16, 31], ["number", "2", 2, "integer"], " ", + ["unicode-range", 16, 31], ["number", "-50", -50, "integer"] +], + "~=|=^=$=*=|| |/**/| ~/**/=", [ "~=", "|=", "^=", "$=", "*=", "||", "", " ", "|", "|", " ", "~", "=" From 53e49c32967302688362625ce5b3eb7b939a0559 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Sep 2013 19:13:15 +0100 Subject: [PATCH 3/3] Fix tokenization of tokens, per spec change. --- ast.rs | 3 +-- tests.rs | 5 ++--- tokenizer.rs | 32 +++++++++----------------------- 3 files changed, 12 insertions(+), 28 deletions(-) diff --git a/ast.rs b/ast.rs index 632f526e..1bf59697 100644 --- a/ast.rs +++ b/ast.rs @@ -37,8 +37,7 @@ pub enum ComponentValue { Number(NumericValue), Percentage(NumericValue), Dimension(NumericValue, ~str), - UnicodeRange(char, char), // UnicodeRange {start: char, end: char}, - EmptyUnicodeRange, + UnicodeRange { start: u32, end: u32 }, WhiteSpace, Colon, // : Semicolon, // ; diff --git a/tests.rs b/tests.rs index 38588f51..8ab56e8f 100644 --- a/tests.rs +++ b/tests.rs @@ -325,9 +325,8 @@ impl ToJson for ComponentValue { Dimension(ref value, ref unit) => JList(~[JString(~"dimension")] + numeric(value) + ~[unit.to_json()]), - // TODO: - UnicodeRange(_start, _end) => fail!(), - EmptyUnicodeRange => fail!(), + UnicodeRange { start: s, end: e } + => JList(~[JString(~"unicode-range"), s.to_json(), e.to_json()]), WhiteSpace => JString(~" "), Colon => JString(~":"), diff --git a/tokenizer.rs b/tokenizer.rs index dca90643..fbeac8b7 100644 --- a/tokenizer.rs +++ b/tokenizer.rs @@ -544,13 +544,13 @@ fn consume_unicode_range(tokenizer: &mut Tokenizer) -> ComponentValue { question_marks += 1; tokenizer.position += 1 } - let start: char; - let end: char; + let start; + let end; if question_marks > 0 { - start = char_from_hex(hex + "0".repeat(question_marks)); - end = char_from_hex(hex + "F".repeat(question_marks)); + start = u32::from_str_radix(hex + "0".repeat(question_marks), 16).unwrap(); + end = u32::from_str_radix(hex + "F".repeat(question_marks), 16).unwrap(); } else { - start = char_from_hex(hex); + start = u32::from_str_radix(hex, 16).unwrap(); hex = ~""; if !tokenizer.is_eof() && tokenizer.current_char() == '-' { tokenizer.position += 1; @@ -563,21 +563,12 @@ fn consume_unicode_range(tokenizer: &mut Tokenizer) -> ComponentValue { } } } - end = if hex.len() > 0 { char_from_hex(hex) } else { start } - } - if start > MAX_UNICODE || end < start { - EmptyUnicodeRange - } else { - let end = if end <= MAX_UNICODE { end } else { MAX_UNICODE }; -// UnicodeRange {start: start, end: end} - UnicodeRange(start, end) + end = if hex.len() > 0 { u32::from_str_radix(hex, 16).unwrap() } else { start } } + UnicodeRange {start: start, end: end} } -static MAX_UNICODE: char = '\U0010FFFF'; - - // Assumes that the U+005C REVERSE SOLIDUS (\) has already been consumed // and that the next input character has already been verified // to not be a newline. @@ -602,16 +593,11 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char { _ => () } } - let c = char_from_hex(hex); + let c = u32::from_str_radix(hex, 16).unwrap() as char as char; + static MAX_UNICODE: char = '\U0010FFFF'; if '\x00' < c && c <= MAX_UNICODE { c } else { '\uFFFD' } // Replacement character }, c => c } } - - -#[inline] -fn char_from_hex(hex: &str) -> char { - u32::from_str_radix(hex, 16).unwrap() as char -}