Fix another panic in bad-url token parsing

SimonSapin · SimonSapin · commit d9435a25c456 · 2017-07-25T15:24:17.000+02:00
https://bugzilla.mozilla.org/show_bug.cgi?id=1383975
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 
 name = "cssparser"
-version = "0.18.1"
+version = "0.18.2"
 authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
 
 description = "Rust implementation of CSS Syntax Level 3"
diff --git a/src/tests.rs b/src/tests.rs
@@ -276,7 +276,17 @@ fn outer_block_end_consumed() {
 fn bad_url_slice_out_of_bounds() {
     let mut input = ParserInput::new("url(\u{1}\\");
     let mut parser = Parser::new(&mut input);
-    let _ = parser.next_including_whitespace_and_comments(); // This used to panic
+    let result = parser.next_including_whitespace_and_comments();  // This used to panic
+    assert_eq!(result, Ok(&Token::BadUrl("\u{1}\\".into())));
+}
+
+/// https://bugzilla.mozilla.org/show_bug.cgi?id=1383975
+#[test]
+fn bad_url_slice_not_at_char_boundary() {
+    let mut input = ParserInput::new("url(9\n۰");
+    let mut parser = Parser::new(&mut input);
+    let result = parser.next_including_whitespace_and_comments();  // This used to panic
+    assert_eq!(result, Ok(&Token::BadUrl("9\n۰".into())));
 }
 
 #[test]
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
@@ -964,7 +964,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
                 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
                     let value = tokenizer.slice_from(start_pos);
                     tokenizer.advance(1);
-                    return consume_url_end(tokenizer, value.into())
+                    return consume_url_end(tokenizer, start_pos, value.into())
                 }
                 b')' => {
                     let value = tokenizer.slice_from(start_pos);
@@ -974,7 +974,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
                 b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F'  // non-printable
                     | b'"' | b'\'' | b'(' => {
                     tokenizer.advance(1);
-                    return consume_bad_url(tokenizer)
+                    return consume_bad_url(tokenizer, start_pos)
                 },
                 b'\\' | b'\0' => {
                     // * The tokenizer’s input is UTF-8 since it’s `&str`.
@@ -993,22 +993,20 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
         while !tokenizer.is_eof() {
             match_byte! { tokenizer.consume_byte(),
                 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
-                    return consume_url_end(
-                        tokenizer,
-                        // string_bytes is well-formed UTF-8, see other comments.
-                        unsafe { from_utf8_release_unchecked(string_bytes) }.into()
-                    )
+                    // string_bytes is well-formed UTF-8, see other comments.
+                    let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into();
+                    return consume_url_end(tokenizer, start_pos, string)
                 }
                 b')' => {
                     break;
                 }
                 b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F'  // non-printable
                     | b'"' | b'\'' | b'(' => {
-                    return consume_bad_url(tokenizer);
+                    return consume_bad_url(tokenizer, start_pos);
                 }
                 b'\\' => {
                     if tokenizer.has_newline_at(0) {
-                        return consume_bad_url(tokenizer)
+                        return consume_bad_url(tokenizer, start_pos)
                     }
 
                     // This pushes one well-formed code point to string_bytes
@@ -1028,21 +1026,23 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
         )
     }
 
-    fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: CowRcStr<'a>) -> Token<'a> {
+    fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>,
+                           start_pos: SourcePosition,
+                           string: CowRcStr<'a>)
+                           -> Token<'a> {
         while !tokenizer.is_eof() {
             match_byte! { tokenizer.consume_byte(),
                 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {},
                 b')' => { break },
                 _ => {
-                    return consume_bad_url(tokenizer);
+                    return consume_bad_url(tokenizer, start_pos);
                 }
             }
         }
         UnquotedUrl(string)
     }
 
-    fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
-        let start_pos = tokenizer.position();
+    fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> {
         // Consume up to the closing )
         while !tokenizer.is_eof() {
             match_byte! { tokenizer.consume_byte(),