From c33638269fadb22b4c2339ff3269070a683032f9 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Thu, 9 Feb 2017 18:40:02 +0100
Subject: [PATCH 1/6] Fix a warning.

---
 build.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/build.rs b/build.rs
index 84e36be5..30e3d8b1 100644
--- a/build.rs
+++ b/build.rs
@@ -2,7 +2,7 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#[macro_use] extern crate quote;
+extern crate quote;
 extern crate syn;
 
 use std::env;

From 87a1eee8f84417a802ce9a80f2a3a330080a241c Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Thu, 9 Feb 2017 19:37:13 +0100
Subject: [PATCH 2/6] Make encoding support generic.

Use encoding-rs in tests.
---
 Cargo.toml        |  7 +++--
 src/from_bytes.rs | 69 +++++++++++++++++++++++------------------------
 src/lib.rs        | 10 ++++---
 src/tests.rs      | 37 +++++++++++++++++++------
 4 files changed, 73 insertions(+), 50 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9efd886e..a459df26 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 
 name = "cssparser"
-version = "0.8.0"
+version = "0.9.0"
 authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
 
 description = "Rust implementation of CSS Syntax Level 3"
@@ -14,12 +14,15 @@ build = "build.rs"
 
 exclude = ["src/css-parsing-tests"]
 
+[lib]
+doctest = false
+
 [dev-dependencies]
 rustc-serialize = "0.3"
 tempdir = "0.3"
+encoding_rs = "0.3.2"
 
 [dependencies]
-encoding = "0.2"
 heapsize = {version = ">=0.1.1, <0.4.0", optional = true}
 matches = "0.1"
 serde = {version = ">=0.6.6, <0.9", optional = true}
diff --git a/src/from_bytes.rs b/src/from_bytes.rs
index 705b65e3..d71d9efb 100644
--- a/src/from_bytes.rs
+++ b/src/from_bytes.rs
@@ -2,14 +2,23 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use std::cmp;
+/// Abstraction for avoiding a dependency from cssparser to an encoding library
+pub trait EncodingSupport {
+    /// One character encoding
+    type Encoding;
 
-use encoding::label::encoding_from_whatwg_label;
-use encoding::all::UTF_8;
-use encoding::{EncodingRef, DecoderTrap, decode};
+    /// https://encoding.spec.whatwg.org/#concept-encoding-get
+    fn from_label(ascii_label: &[u8]) -> Option<Self::Encoding>;
 
+    /// Return the UTF-8 encoding
+    fn utf8() -> Self::Encoding;
 
-/// Determine the character encoding of a CSS stylesheet and decode it.
+    /// Whether the given encoding is UTF-16BE or UTF-16LE
+    fn is_utf16_be_or_le(encoding: &Self::Encoding) -> bool;
+}
+
+
+/// Determine the character encoding of a CSS stylesheet.
 ///
 /// This is based on the presence of a BOM (Byte Order Mark), an `@charset` rule, and
 /// encoding meta-information.
@@ -20,48 +29,36 @@ use encoding::{EncodingRef, DecoderTrap, decode};
 /// * `environment_encoding`: An optional `Encoding` object for the [environment encoding]
 ///     (https://drafts.csswg.org/css-syntax/#environment-encoding), if any.
 ///
-/// Returns a 2-tuple of a decoded Unicode string and the `Encoding` object that was used.
-pub fn decode_stylesheet_bytes(css: &[u8], protocol_encoding_label: Option<&str>,
-                               environment_encoding: Option<EncodingRef>)
-                            -> (String, EncodingRef) {
+/// Returns the encoding to use.
+pub fn stylesheet_encoding<E>(css: &[u8], protocol_encoding_label: Option<&[u8]>,
+                              environment_encoding: Option<E::Encoding>)
+                              -> E::Encoding
+                              where E: EncodingSupport {
     // https://drafts.csswg.org/css-syntax/#the-input-byte-stream
     match protocol_encoding_label {
         None => (),
-        Some(label) => match encoding_from_whatwg_label(label) {
+        Some(label) => match E::from_label(label) {
             None => (),
-            Some(fallback) => return decode_replace(css, fallback)
+            Some(protocol_encoding) => return protocol_encoding
         }
     }
-    if css.starts_with("@charset \"".as_bytes()) {
-        // 10 is "@charset \"".len()
-        // 100 is arbitrary so that no encoding label is more than 100-10 bytes.
-        match css[10..cmp::min(css.len(), 100)].iter().position(|&b| b == b'"') {
+    let prefix = b"@charset \"";
+    if css.starts_with(prefix) {
+        let rest = &css[prefix.len()..];
+        match rest.iter().position(|&b| b == b'"') {
             None => (),
-            Some(label_length)
-            => if css[10 + label_length..].starts_with("\";".as_bytes()) {
-                let label = &css[10..10 + label_length];
-                let label = label.iter().map(|&b| b as char).collect::<String>();
-                match encoding_from_whatwg_label(&*label) {
+            Some(label_length) => if rest[label_length..].starts_with(b"\";") {
+                let label = &rest[..label_length];
+                match E::from_label(label) {
                     None => (),
-                    Some(fallback) => match fallback.name() {
-                        "utf-16be" | "utf-16le"
-                        => return decode_replace(css, UTF_8 as EncodingRef),
-                        _ => return decode_replace(css, fallback),
+                    Some(charset_encoding) => if E::is_utf16_be_or_le(&charset_encoding) {
+                        return E::utf8()
+                    } else {
+                        return charset_encoding
                     }
                 }
             }
         }
     }
-    match environment_encoding {
-        None => (),
-        Some(fallback) => return decode_replace(css, fallback)
-    }
-    return decode_replace(css, UTF_8 as EncodingRef)
-}
-
-
-#[inline]
-fn decode_replace(input: &[u8], fallback_encoding: EncodingRef)-> (String, EncodingRef) {
-    let (result, used_encoding) = decode(input, DecoderTrap::Replace, fallback_encoding);
-    (result.unwrap(), used_encoding)
+    environment_encoding.unwrap_or_else(E::utf8)
 }
diff --git a/src/lib.rs b/src/lib.rs
index 58859e88..fbc6c5a1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,8 +15,10 @@ Implementation of [CSS Syntax Module Level 3](https://drafts.csswg.org/css-synta
 # Input
 
 Everything is based on `Parser` objects, which borrow a `&str` input.
-If you have bytes (from a file, the network, or something),
-see the `decode_stylesheet_bytes` function.
+If you have bytes (from a file, the network, or something)
+and want to support character encodings other than UTF-8,
+see the `stylesheet_encoding` function,
+which can be used together with rust-encoding or encoding-rs.
 
 # Conventions for parsing functions
 
@@ -66,8 +68,8 @@ fn parse_border_spacing(_context: &ParserContext, input: &mut Parser)
 
 #![recursion_limit="200"]  // For color::parse_color_keyword
 
-extern crate encoding;
 #[macro_use] extern crate matches;
+#[cfg(test)] extern crate encoding_rs;
 #[cfg(test)] extern crate tempdir;
 #[cfg(test)] extern crate rustc_serialize;
 #[cfg(feature = "serde")] extern crate serde;
@@ -78,7 +80,7 @@ pub use rules_and_declarations::{parse_important};
 pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration};
 pub use rules_and_declarations::{RuleListParser, parse_one_rule};
 pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser};
-pub use from_bytes::decode_stylesheet_bytes;
+pub use from_bytes::{stylesheet_encoding, EncodingSupport};
 pub use color::{RGBA, Color, parse_color_keyword};
 pub use nth::parse_nth;
 pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
diff --git a/src/tests.rs b/src/tests.rs
index 9800c416..326f6f07 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -5,6 +5,7 @@
 #[cfg(feature = "bench")]
 extern crate test;
 
+use encoding_rs;
 use std::borrow::Cow::{self, Borrowed};
 use std::fs::File;
 use std::io::{self, Write};
@@ -16,17 +17,14 @@ use tempdir::TempDir;
 #[cfg(feature = "bench")]
 use self::test::Bencher;
 
-use encoding::label::encoding_from_whatwg_label;
-
 use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocation,
             DeclarationListParser, DeclarationParser, RuleListParser,
             AtRuleType, AtRuleParser, QualifiedRuleParser,
             parse_one_declaration, parse_one_rule, parse_important,
-            decode_stylesheet_bytes,
+            stylesheet_encoding, EncodingSupport,
             TokenSerializationType,
             Color, RGBA, parse_nth, ToCss};
 
-
 macro_rules! JArray {
     ($($e: expr,)*) => { JArray![ $( $e ),* ] };
     ($($e: expr),*) => { Json::Array(vec!( $( $e.to_json() ),* )) }
@@ -198,6 +196,26 @@ fn one_rule() {
 
 #[test]
 fn stylesheet_from_bytes() {
+    pub struct EncodingRs;
+
+    impl EncodingSupport for EncodingRs {
+        type Encoding = &'static encoding_rs::Encoding;
+
+        fn utf8() -> Self::Encoding {
+            encoding_rs::UTF_8
+        }
+
+        fn is_utf16_be_or_le(encoding: &Self::Encoding) -> bool {
+            *encoding == encoding_rs::UTF_16LE ||
+            *encoding == encoding_rs::UTF_16BE
+        }
+
+        fn from_label(ascii_label: &[u8]) -> Option<Self::Encoding> {
+            encoding_rs::Encoding::for_label(ascii_label)
+        }
+    }
+
+
     run_raw_json_tests(include_str!("css-parsing-tests/stylesheet_bytes.json"),
                        |input, expected| {
         let map = match input {
@@ -210,17 +228,20 @@ fn stylesheet_from_bytes() {
                 assert!(c as u32 <= 0xFF);
                 c as u8
             }).collect::<Vec<u8>>();
-            let protocol_encoding_label = get_string(&map, "protocol_encoding");
+            let protocol_encoding_label = get_string(&map, "protocol_encoding")
+                .map(|s| s.as_bytes());
             let environment_encoding = get_string(&map, "environment_encoding")
-                .and_then(encoding_from_whatwg_label);
+                .map(|s| s.as_bytes())
+                .and_then(EncodingRs::from_label);
 
-            let (css_unicode, encoding) = decode_stylesheet_bytes(
+            let encoding = stylesheet_encoding::<EncodingRs>(
                 &css, protocol_encoding_label, environment_encoding);
+            let (css_unicode, used_encoding, _) = encoding.decode(&css);
             let input = &mut Parser::new(&css_unicode);
             let rules = RuleListParser::new_for_stylesheet(input, JsonParser)
                         .map(|result| result.unwrap_or(JArray!["error", "invalid"]))
                         .collect::<Vec<_>>();
-            JArray![rules, encoding.name()]
+            JArray![rules, used_encoding.name().to_lowercase()]
         };
         assert_json_eq(result, expected, Json::Object(map).to_string());
     });

From 46e0e80f8f7e8f6c64573c1af23fa25dff48a133 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Thu, 9 Feb 2017 20:03:23 +0100
Subject: [PATCH 3/6] Remove unicode-range tokens, per spec change.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

https://github.com/w3c/csswg-drafts/commit/01c55ee4c9a0bf565b88b6d581c24a0462d8257a

They’re being replaced by a micro-syntax / parsing algorithm
based on other tokens:

https://drafts.csswg.org/css-syntax/#urange
---
 src/css-parsing-tests/README.rst              |  4 -
 .../component_value_list.json                 | 74 -------------------
 src/serializer.rs                             | 36 +--------
 src/tests.rs                                  |  2 -
 src/tokenizer.rs                              | 48 ------------
 5 files changed, 4 insertions(+), 160 deletions(-)

diff --git a/src/css-parsing-tests/README.rst b/src/css-parsing-tests/README.rst
index d54b0d8b..9779cf24 100644
--- a/src/css-parsing-tests/README.rst
+++ b/src/css-parsing-tests/README.rst
@@ -228,10 +228,6 @@ Component values
     the value as a number, the type as the string ``"integer"`` or ``"number"``,
     and the unit as a string.
 
-<unicode-range>
-    Array of length 3: the string ``"unicode-range"``,
-    followed by the *start* and *end* integers as two numbers.
-
 <include-match>
     The string ``"~="``.
 
diff --git a/src/css-parsing-tests/component_value_list.json b/src/css-parsing-tests/component_value_list.json
index a3a2a8b3..42cac5fd 100644
--- a/src/css-parsing-tests/component_value_list.json
+++ b/src/css-parsing-tests/component_value_list.json
@@ -325,80 +325,6 @@
 	["dimension", "12", 12, "integer", "rêd"]
 ],
 
-"u+1 U+10 U+100 U+1000 U+10000 U+100000 U+1000000", [
-    ["unicode-range", 1, 1], " ",
-    ["unicode-range", 16, 16], " ",
-    ["unicode-range", 256, 256], " ",
-    ["unicode-range", 4096, 4096], " ",
-    ["unicode-range", 65536, 65536], " ",
-    ["unicode-range", 1048576, 1048576], " ",
-    ["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"]
-],
-
-"u+? u+1? U+10? U+100? U+1000? U+10000? U+100000?", [
-    ["unicode-range", 0, 15], " ",
-    ["unicode-range", 16, 31], " ",
-    ["unicode-range", 256, 271], " ",
-    ["unicode-range", 4096, 4111], " ",
-    ["unicode-range", 65536, 65551], " ",
-    ["unicode-range", 1048576, 1048591], " ",
-    ["unicode-range", 1048576, 1048576], "?"
-],
-
-"u+?? U+1?? U+10?? U+100?? U+1000?? U+10000??", [
-    ["unicode-range", 0, 255], " ",
-    ["unicode-range", 256, 511], " ",
-    ["unicode-range", 4096, 4351], " ",
-    ["unicode-range", 65536, 65791], " ",
-    ["unicode-range", 1048576, 1048831], " ",
-    ["unicode-range", 1048576, 1048591], "?"
-],
-
-"u+??? U+1??? U+10??? U+100??? U+1000???", [
-    ["unicode-range", 0, 4095], " ",
-    ["unicode-range", 4096, 8191], " ",
-    ["unicode-range", 65536, 69631], " ",
-    ["unicode-range", 1048576, 1052671], " ",
-    ["unicode-range", 1048576, 1048831], "?"
-],
-
-"u+???? U+1???? U+10???? U+100????", [
-    ["unicode-range", 0, 65535], " ",
-    ["unicode-range", 65536, 131071], " ",
-    ["unicode-range", 1048576, 1114111], " ",
-    ["unicode-range", 1048576, 1052671], "?"
-],
-
-"u+????? U+1????? U+10?????", [
-    ["unicode-range", 0, 1048575], " ",
-    ["unicode-range", 1048576, 2097151], " ",
-    ["unicode-range", 1048576, 1114111], "?"
-],
-
-"u+?????? U+1??????", [
-    ["unicode-range", 0, 16777215], " ",
-    ["unicode-range", 1048576, 2097151], "?"
-],
-
-"u+20-3F U+100000-2 U+1000000-2 U+10-200000", [
-    ["unicode-range", 32, 63], " ",
-    ["unicode-range", 1048576, 2], " ",
-    ["unicode-range", 1048576, 1048576], ["number", "0", 0, "integer"],
-        ["number", "-2", -2, "integer"], " ",
-    ["unicode-range", 16, 2097152]
-],
-
-"ù+12 Ü+12 u +12 U+ 12 U+12 - 20 U+1?2 U+1?-50 U+1- 2", [
-    ["ident", "ù"], ["number", "+12", 12, "integer"], " ",
-    ["ident", "Ü"], ["number", "+12", 12, "integer"], " ",
-    ["ident", "u"], " ", ["number", "+12", 12, "integer"], " ",
-    ["ident", "U"], "+", " ", ["number", "12", 12, "integer"], " ",
-    ["unicode-range", 18, 18], " ", "-", " ", ["number", "20", 20, "integer"], " ",
-    ["unicode-range", 16, 31], ["number", "2", 2, "integer"], " ",
-    ["unicode-range", 16, 31], ["number", "-50", -50, "integer"], " ",
-    ["unicode-range", 1, 1], "-", " ", ["number", "2", 2, "integer"]
-],
-
 "~=|=^=$=*=||<!------> |/**/| ~/**/=", [
 	"~=", "|=", "^=", "$=", "*=", "||", "<!--", ["ident", "----"], ">",
 	" ", "|", "|", " ", "~", "="
diff --git a/src/serializer.rs b/src/serializer.rs
index ded6bb21..ec8482e1 100644
--- a/src/serializer.rs
+++ b/src/serializer.rs
@@ -3,7 +3,6 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use std::ascii::AsciiExt;
-use std::cmp;
 use std::fmt::{self, Write};
 
 use super::{Token, NumericValue, PercentageValue};
@@ -110,28 +109,6 @@ impl<'a> ToCss for Token<'a> {
                 }
             },
 
-            Token::UnicodeRange(start, end) => {
-                try!(dest.write_str("U+"));
-                let bits = cmp::min(start.trailing_zeros(), (!end).trailing_zeros());
-                let question_marks = bits / 4;
-                let bits = question_marks * 4;
-                let truncated_start = start >> bits;
-                let truncated_end = end >> bits;
-                if truncated_start == truncated_end {
-                    if truncated_start != 0 {
-                        try!(write!(dest, "{:X}", truncated_start));
-                    }
-                    for _ in 0..question_marks {
-                        try!(dest.write_str("?"));
-                    }
-                } else {
-                    try!(write!(dest, "{:X}", start));
-                    if end != start {
-                        try!(write!(dest, "-{:X}", end));
-                    }
-                }
-            }
-
             Token::WhiteSpace(content) => try!(dest.write_str(content)),
             Token::Comment(content) => try!(write!(dest, "/*{}*/", content)),
             Token::Colon => try!(dest.write_str(":")),
@@ -343,17 +320,14 @@ impl TokenSerializationType {
         match self.0 {
             Ident => matches!(other.0,
                 Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
-                UnicodeRange | CDC | OpenParen),
+                CDC | OpenParen),
             AtKeywordOrHash | Dimension => matches!(other.0,
                 Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
-                UnicodeRange | CDC),
+                CDC),
             DelimHash | DelimMinus | Number => matches!(other.0,
-                Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
-                UnicodeRange),
+                Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension),
             DelimAt => matches!(other.0,
-                Ident | Function | UrlOrBadUrl | DelimMinus | UnicodeRange),
-            UnicodeRange => matches!(other.0,
-                Ident | Function | Number | Percentage | Dimension | DelimQuestion),
+                Ident | Function | UrlOrBadUrl | DelimMinus),
             DelimDotOrPlus => matches!(other.0, Number | Percentage | Dimension),
             DelimAssorted | DelimAsterisk => matches!(other.0, DelimEquals),
             DelimBar => matches!(other.0, DelimEquals | DelimBar | DashMatch),
@@ -372,7 +346,6 @@ enum TokenSerializationTypeVariants {
     Number,
     Dimension,
     Percentage,
-    UnicodeRange,
     UrlOrBadUrl,
     Function,
     Ident,
@@ -417,7 +390,6 @@ impl<'a> Token<'a> {
             Token::Number(_) => Number,
             Token::Percentage(_) => Percentage,
             Token::Dimension(..) => Dimension,
-            Token::UnicodeRange(..) => UnicodeRange,
             Token::WhiteSpace(_) => WhiteSpace,
             Token::Comment(_) => DelimSlash,
             Token::DashMatch => DashMatch,
diff --git a/src/tests.rs b/src/tests.rs
index 326f6f07..93f34d16 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -761,8 +761,6 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json {
             v
         }),
 
-        Token::UnicodeRange(start, end) => JArray!["unicode-range", start, end],
-
         Token::WhiteSpace(_) => " ".to_json(),
         Token::Comment(_) => "/**/".to_json(),
         Token::Colon => ":".to_json(),
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 64d1a046..09bf63b2 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -62,14 +62,6 @@ pub enum Token<'a> {
     /// A [`<dimension-token>`](https://drafts.csswg.org/css-syntax/#dimension-token-diagram)
     Dimension(NumericValue, Cow<'a, str>),
 
-    /// A [`<unicode-range-token>`](https://drafts.csswg.org/css-syntax/#unicode-range-token-diagram)
-    ///
-    /// Components are the start and end code points, respectively.
-    ///
-    /// The tokenizer only reads up to 6 hex digit (up to 0xFF_FFFF),
-    /// but does not check that code points are within the range of Unicode (up to U+10_FFFF).
-    UnicodeRange(u32, u32),
-
     /// A [`<whitespace-token>`](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram)
     WhiteSpace(&'a str),
 
@@ -522,13 +514,6 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
             if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) }
             else { Delim('@') }
         },
-        b'u' | b'U' => {
-            if tokenizer.has_at_least(2)
-               && tokenizer.byte_at(1) == b'+'
-               && matches!(tokenizer.byte_at(2), b'0'...b'9' | b'a'...b'f' | b'A'...b'F' | b'?')
-            { consume_unicode_range(tokenizer) }
-            else { consume_ident_like(tokenizer) }
-        },
         b'a'...b'z' | b'A'...b'Z' | b'_' | b'\0' => { consume_ident_like(tokenizer) },
         b'[' => { tokenizer.advance(1); SquareBracketBlock },
         b'\\' => {
@@ -1017,39 +1002,6 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
     }
 }
 
-
-
-fn consume_unicode_range<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
-    tokenizer.advance(2);  // Skip U+
-    let (hex_value, hex_digits) = consume_hex_digits(tokenizer);
-    let max_question_marks = 6 - hex_digits;
-    let mut question_marks = 0;
-    while question_marks < max_question_marks && !tokenizer.is_eof()
-            && tokenizer.next_byte_unchecked() == b'?' {
-        question_marks += 1;
-        tokenizer.advance(1)
-    }
-    let start;
-    let end;
-    if question_marks > 0 {
-        start = hex_value << (question_marks * 4);
-        end = ((hex_value + 1) << (question_marks * 4)) - 1;
-    } else {
-        start = hex_value;
-        if tokenizer.has_at_least(1) &&
-           tokenizer.next_byte_unchecked() == b'-' &&
-           matches!(tokenizer.byte_at(1), b'0'...b'9' | b'A'...b'F' | b'a'...b'f') {
-            tokenizer.advance(1);
-            let (hex_value, _) = consume_hex_digits(tokenizer);
-            end = hex_value;
-        } else {
-            end = start;
-        }
-    }
-    UnicodeRange(start, end)
-}
-
-
 // (value, number of digits up to 6)
 fn consume_hex_digits<'a>(tokenizer: &mut Tokenizer<'a>) -> (u32, u32) {
     let mut value = 0;

From 2ec91db08619a0dbdb9c7982a8d7140933f7cb09 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Thu, 9 Feb 2017 21:43:27 +0100
Subject: [PATCH 4/6] Add parsing and serialization for <urange>

https://drafts.csswg.org/css-syntax/#urange-syntax
---
 src/lib.rs           |   2 +
 src/unicode_range.rs | 260 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 262 insertions(+)
 create mode 100644 src/unicode_range.rs

diff --git a/src/lib.rs b/src/lib.rs
index fbc6c5a1..905f5066 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -85,6 +85,7 @@ pub use color::{RGBA, Color, parse_color_keyword};
 pub use nth::parse_nth;
 pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
 pub use parser::{Parser, Delimiter, Delimiters, SourcePosition};
+pub use unicode_range::UnicodeRange;
 
 
 /**
@@ -163,6 +164,7 @@ mod from_bytes;
 mod color;
 mod nth;
 mod serializer;
+mod unicode_range;
 
 #[cfg(test)]
 mod tests;
diff --git a/src/unicode_range.rs b/src/unicode_range.rs
new file mode 100644
index 00000000..0de8f1ee
--- /dev/null
+++ b/src/unicode_range.rs
@@ -0,0 +1,260 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! https://drafts.csswg.org/css-syntax/#urange
+
+use {Parser, ToCss};
+use std::char;
+use std::cmp;
+use std::fmt;
+use std::io::{self, Write};
+use tokenizer::{Token, NumericValue};
+
+/// One contiguous range of code points.
+///
+/// Can not be empty. Can represent a single code point when start == end.
+#[derive(PartialEq, Eq, Clone, Hash)]
+pub struct UnicodeRange {
+    /// Inclusive start of the range. In [0, end].
+    pub start: u32,
+
+    /// Inclusive end of the range. In [0, 0x10FFFF].
+    pub end: u32,
+}
+
+impl UnicodeRange {
+    /// https://drafts.csswg.org/css-syntax/#urange-syntax
+    pub fn parse(input: &mut Parser) -> Result<Self, ()> {
+        // <urange> =
+        //   u '+' <ident-token> '?'* |
+        //   u <dimension-token> '?'* |
+        //   u <number-token> '?'* |
+        //   u <number-token> <dimension-token> |
+        //   u <number-token> <number-token> |
+        //   u '+' '?'+
+
+        input.expect_ident_matching("u")?;
+
+        // Since start or end can’t be above 0x10FFFF, they can’t have more than 6 hex digits
+        // Conversely, input with more digits would end up returning Err anyway.
+        const MAX_LENGTH_AFTER_U_PLUS: usize = 6 + 1 + 6; // 6 digits, '-', 6 digits
+        let mut buffer = [0; MAX_LENGTH_AFTER_U_PLUS];
+
+        let remaining_len;
+        {
+            let mut remaining = &mut buffer[..];
+            concatenate_tokens(input, &mut remaining)?;
+            remaining_len = remaining.len();
+        }
+
+        let text_len = buffer.len() - remaining_len;
+        let text = &buffer[..text_len];
+        let range = parse_concatenated(text)?;
+        if range.end > char::MAX as u32 || range.start > range.end {
+            Err(())
+        } else {
+            Ok(range)
+        }
+    }
+}
+
+fn concatenate_tokens(input: &mut Parser, remaining: &mut &mut [u8]) -> Result<(), Error> {
+    match input.next_including_whitespace()? {
+        Token::Delim('+') => {
+            match input.next_including_whitespace()? {
+                Token::Ident(ident) => remaining.write_all(ident.as_bytes())?,
+                Token::Delim('?') => remaining.write_all(b"?")?,
+                _ => return Err(Error)
+            }
+            parse_question_marks(input, remaining)
+        }
+
+        Token::Dimension(ref value, ref unit) => {
+            // Require a '+' sign as part of the number
+            let int_value = positive_integer_with_plus_sign(value)?;
+            write!(remaining, "{}{}", int_value, unit)?;
+            parse_question_marks(input, remaining)
+        }
+
+        Token::Number(ref value) => {
+            // Require a '+' sign as part of the number
+            let int_value = positive_integer_with_plus_sign(value)?;
+            write!(remaining, "{}", int_value)?;
+
+            match input.next_including_whitespace() {
+                // EOF here is fine
+                Err(()) => {},
+
+                Ok(Token::Delim('?')) => {
+                    // If `remaining` is already full, `int_value` has too many digits
+                    // so we can use `result?` Rust syntax.
+                    remaining.write_all(b"?")?;
+                    parse_question_marks(input, remaining)
+                }
+
+                Ok(Token::Dimension(ref value, ref unit)) => {
+                    // Require a '-' sign as part of the number
+                    let int_value = negative_integer(value)?;
+                    write!(remaining, "{}{}", int_value, unit)?
+                }
+
+                Ok(Token::Number(ref value)) => {
+                    // Require a '-' sign as part of the number
+                    let int_value = negative_integer(value)?;
+                    write!(remaining, "{}", int_value)?
+                }
+
+                _ => return Err(Error)
+            }
+        }
+
+        _ => return Err(Error)
+    }
+    Ok(())
+}
+
+/// Consume as many '?' as possible and write them to `remaining` until it’s full
+fn parse_question_marks(input: &mut Parser, remaining: &mut &mut [u8]) {
+    loop {
+        let result = input.try(|input| {
+            match input.next_including_whitespace() {
+                Ok(Token::Delim('?')) => remaining.write_all(b"?").map_err(|_| ()),
+                _ => Err(())
+            }
+        });
+        if result.is_err() {
+            return
+        }
+    }
+}
+
+fn positive_integer_with_plus_sign(value: &NumericValue) -> Result<i32, ()> {
+    let int_value = value.int_value.ok_or(())?;
+    if value.has_sign && int_value >= 0 {
+        Ok(int_value)
+    } else {
+        Err(())
+    }
+}
+
+fn negative_integer(value: &NumericValue) -> Result<i32, ()> {  // Necessarily had a negative sign.
+    let int_value = value.int_value.ok_or(())?;
+    if int_value <= 0 {
+        Ok(int_value)
+    } else {
+        Err(())
+    }
+}
+
+fn parse_concatenated(mut text: &[u8]) -> Result<UnicodeRange, ()> {
+    let (first_hex_value, hex_digit_count) = consume_hex(&mut text);
+    let question_marks = consume_question_marks(&mut text);
+    let consumed = hex_digit_count + question_marks;
+    if consumed == 0 || consumed > 6 {
+        return Err(())
+    }
+
+    if question_marks > 0 {
+        if text.is_empty() {
+            return Ok(UnicodeRange {
+                start: first_hex_value << (question_marks * 4),
+                end: ((first_hex_value + 1) << (question_marks * 4)) - 1,
+            })
+        }
+    } else if text.is_empty() {
+        return Ok(UnicodeRange {
+            start: first_hex_value,
+            end: first_hex_value,
+        })
+    } else {
+        if let Some((&b'-', mut text)) = text.split_first() {
+            let (second_hex_value, hex_digit_count) = consume_hex(&mut text);
+            if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() {
+                return Ok(UnicodeRange {
+                    start: first_hex_value,
+                    end: second_hex_value,
+                })
+            }
+        }
+    }
+    Err(())
+}
+
+fn consume_hex(text: &mut &[u8]) -> (u32, usize) {
+    let mut value = 0;
+    let mut digits = 0;
+    while let Some((&byte, rest)) = text.split_first() {
+        if let Some(digit_value) = (byte as char).to_digit(16) {
+            value = value * 0x10 + digit_value;
+            digits += 1;
+            *text = rest
+        } else {
+            break
+        }
+    }
+    (value, digits)
+}
+
+fn consume_question_marks(text: &mut &[u8]) -> usize {
+    let mut question_marks = 0;
+    while let Some((&b'?', rest)) = text.split_first() {
+        question_marks += 1;
+        *text = rest
+    }
+    question_marks
+}
+
+impl fmt::Debug for UnicodeRange {
+    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        self.to_css(formatter)
+    }
+}
+
+impl ToCss for UnicodeRange {
+    fn to_css<W>(&self, dest: &mut W) -> fmt::Result where W: fmt::Write {
+        dest.write_str("U+")?;
+
+        // How many bits are 0 at the end of start and also 1 at the end of end.
+        let bits = cmp::min(self.start.trailing_zeros(), (!self.end).trailing_zeros());
+
+        let question_marks = bits / 4;
+
+        // How many lower bits can be represented as question marks
+        let bits = question_marks * 4;
+
+        let truncated_start = self.start >> bits;
+        let truncated_end = self.end >> bits;
+        if truncated_start == truncated_end {
+            // Bits not covered by question marks are the same in start and end,
+            // we can use the question mark syntax.
+            if truncated_start != 0 {
+                write!(dest, "{:X}", truncated_start)?;
+            }
+            for _ in 0..question_marks {
+                dest.write_str("?")?;
+            }
+        } else {
+            write!(dest, "{:X}", self.start)?;
+            if self.end != self.start {
+                write!(dest, "-{:X}", self.end)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+/// Make conversions from io::Error implicit in `?` syntax.
+struct Error;
+
+impl From<Error> for () {
+    fn from(_: Error) -> Self { () }
+}
+
+impl From<()> for Error {
+    fn from(_: ()) -> Self { Error }
+}
+
+impl From<io::Error> for Error {
+    fn from(_: io::Error) -> Self { Error }
+}

From 3a9ffa07206a22e280567d9ce9227cb4bd8e9d41 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Fri, 10 Feb 2017 00:24:25 +0100
Subject: [PATCH 5/6] Add unicode-range tests.

---
 src/css-parsing-tests/README.rst  |  7 +++
 src/css-parsing-tests/urange.json | 81 +++++++++++++++++++++++++++++++
 src/tests.rs                      | 17 ++++++-
 3 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 src/css-parsing-tests/urange.json

diff --git a/src/css-parsing-tests/README.rst b/src/css-parsing-tests/README.rst
index 9779cf24..6f025255 100644
--- a/src/css-parsing-tests/README.rst
+++ b/src/css-parsing-tests/README.rst
@@ -142,6 +142,13 @@ associated with the expected result.
     the output as null for invalid syntax,
     or an array of two integers ``[A, B]``.
 
+``urange.json``
+    Tests the `urange <https://drafts.csswg.org/css-syntax-3/#urange>`_
+    syntax defined in CSS Syntax Level 3.
+    The Unicode input is represented by a JSON string,
+    the output as null for invalid syntax,
+    or an array of two integers ``[start, end]``.
+
 
 Result representation
 =====================
diff --git a/src/css-parsing-tests/urange.json b/src/css-parsing-tests/urange.json
new file mode 100644
index 00000000..d3b6bb54
--- /dev/null
+++ b/src/css-parsing-tests/urange.json
@@ -0,0 +1,81 @@
+[
+
+"u+1, U+10, U+100, U+1000, U+10000, U+100000, U+1000000", [
+    [1, 1],
+    [16, 16],
+    [256, 256],
+    [4096, 4096],
+    [65536, 65536],
+    [1048576, 1048576],
+    null
+],
+
+"u+?, u+1?, U+10?, U+100?, U+1000?, U+10000?, U+100000?", [
+    [0, 15],
+    [16, 31],
+    [256, 271],
+    [4096, 4111],
+    [65536, 65551],
+    [1048576, 1048591],
+    null
+],
+
+"u+??, U+1??, U+10??, U+100??, U+1000??, U+10000??", [
+    [0, 255],
+    [256, 511],
+    [4096, 4351],
+    [65536, 65791],
+    [1048576, 1048831],
+    null
+],
+
+"u+???, U+1???, U+10???, U+100???, U+1000???", [
+    [0, 4095],
+    [4096, 8191],
+    [65536, 69631],
+    [1048576, 1052671],
+    null
+],
+
+"u+????, U+1????, U+10????, U+100????", [
+    [0, 65535],
+    [65536, 131071],
+    [1048576, 1114111],
+    null
+],
+
+"u+?????, U+1?????, U+10?????", [
+    [0, 1048575],
+    null,
+    null
+],
+
+"u+??????, U+1??????", [
+    null,
+    null
+],
+
+
+"u+20-3F, u+3F-3F, u+3F-3E, U+0-110000, U+0-10FFFF, U+100000-2, U+1000000-2, U+10-200000", [
+    [32, 63],
+    [63, 63],
+    null,
+    null,
+    [0, 1114111],
+    null,
+    null,
+    null
+],
+
+"ù+12, Ü+12, u +12, U+ 12, U+12 - 20, U+1?2, U+1?-50, U+1- 2", [
+    null,
+    null,
+    null,
+    null,
+    null,
+    null,
+    null,
+    null
+]
+
+]
diff --git a/src/tests.rs b/src/tests.rs
index 93f34d16..b5063138 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -23,7 +23,7 @@ use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocat
             parse_one_declaration, parse_one_rule, parse_important,
             stylesheet_encoding, EncodingSupport,
             TokenSerializationType,
-            Color, RGBA, parse_nth, ToCss};
+            Color, RGBA, parse_nth, UnicodeRange, ToCss};
 
 macro_rules! JArray {
     ($($e: expr,)*) => { JArray![ $( $e ),* ] };
@@ -350,6 +350,21 @@ fn nth() {
     });
 }
 
+#[test]
+fn unicode_range() {
+    run_json_tests(include_str!("css-parsing-tests/urange.json"), |input| {
+        input.parse_comma_separated(|input| {
+            let result = UnicodeRange::parse(input).ok().map(|r| (r.start, r.end));
+            if input.is_exhausted() {
+                Ok(result)
+            } else {
+                while let Ok(_) = input.next() {}
+                Ok(None)
+            }
+        }).unwrap().to_json()
+    });
+}
+
 
 #[test]
 fn serializer_not_preserving_comments() {

From f2927df5cac1375f6348a7523bf6894d3a5fcfc3 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Fri, 10 Feb 2017 00:24:39 +0100
Subject: [PATCH 6/6] Update CSSWG drafts URL.

---
 src/css-parsing-tests/README.rst | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/css-parsing-tests/README.rst b/src/css-parsing-tests/README.rst
index 6f025255..b3baa740 100644
--- a/src/css-parsing-tests/README.rst
+++ b/src/css-parsing-tests/README.rst
@@ -4,7 +4,7 @@ CSS parsing tests
 This repository contains implementation-independent test for CSS parsers,
 based on the 2013 draft of the `CSS Syntax Level 3`_ specification.
 
-.. _CSS Syntax Level 3: http://dev.w3.org/csswg/css-syntax-3/
+.. _CSS Syntax Level 3: https://drafts.csswg.org/css-syntax-3/
 
 The upstream repository for these tests is at
 https://github.com/SimonSapin/css-parsing-tests
@@ -51,51 +51,51 @@ associated with the expected result.
 
 ``component_value_list.json``
     Tests `Parse a list of component values
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-list-of-component-values>`_.
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-list-of-component-values>`_.
     The Unicode input is represented by a JSON string,
     the output as an array of `component values`_ as described below.
 
 ``component_value_list.json``
     Tests `Parse a component value
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-component-value>`_.
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-component-value>`_.
     The Unicode input is represented by a JSON string,
     the output as a `component value`_.
 
 ``declaration_list.json``
     Tests `Parse a list of declarations
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-list-of-declarations>`_.
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-list-of-declarations>`_.
     The Unicode input is represented by a JSON string,
     the output as an array of declarations_ and at-rules_.
 
 ``one_declaration.json``
     Tests `Parse a declaration
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-declaration>`_.
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-declaration>`_.
     The Unicode input is represented by a JSON string,
     the output as a declaration_.
 
 ``one_rule.json``
     Tests `Parse a rule
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-rule>`_.
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-rule>`_.
     The Unicode input is represented by a JSON string,
     the output as a `qualified rule`_ or at-rule_.
 
 ``rule_list.json``
     Tests `Parse a list of rules
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-list-of-rules>`_.
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-list-of-rules>`_.
     The Unicode input is represented by a JSON string,
     the output as a list of `qualified rules`_ or at-rules_.
 
 ``stylesheet.json``
     Tests `Parse a stylesheet
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-stylesheet>`_.
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-stylesheet>`_.
     The Unicode input is represented by a JSON string,
     the output as a list of `qualified rules`_ or at-rules_.
 
 ``stylesheet_bytes.json``
     Tests `Parse a stylesheet
-    <http://dev.w3.org/csswg/css-syntax-3/#parse-a-stylesheet>`_
+    <https://drafts.csswg.org/css-syntax-3/#parse-a-stylesheet>`_
     together with `The input byte stream
-    <http://dev.w3.org/csswg/css-syntax/#input-byte-stream>`_.
+    <https://drafts.csswg.org/css-syntax-3/#input-byte-stream>`_.
     The input is represented as a JSON object containing:
 
     * A required ``css_bytes``, the input byte string,
@@ -132,12 +132,12 @@ associated with the expected result.
     This file is generated by the ``make_color3_keywords.py`` Python script.
 
 ``An+B.json``
-    Tests the `An+B <http://dev.w3.org/csswg/css-syntax/#the-anb-type>`_
+    Tests the `An+B <https://drafts.csswg.org/css-syntax-3/#the-anb-type>`_
     syntax defined in CSS Syntax Level 3.
-    This `differs <http://dev.w3.org/csswg/css-syntax/#changes>`_ from the
+    This `differs <https://drafts.csswg.org/css-syntax/#changes>`_ from the
     `nth grammar rule <http://www.w3.org/TR/css3-selectors/#nth-child-pseudo>`_
     in Selectors Level 3 only in that
-    ``-`` charecters and digits can be escaped in some cases.
+    ``-`` characters and digits can be escaped in some cases.
     The Unicode input is represented by a JSON string,
     the output as null for invalid syntax,
     or an array of two integers ``[A, B]``.