Skip to content

Commit d9435a2

Browse files
committed
Fix another panic in bad-url token parsing
https://bugzilla.mozilla.org/show_bug.cgi?id=1383975
1 parent fdd7852 commit d9435a2

File tree

3 files changed

+25
-15
lines changed

3 files changed

+25
-15
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.18.1"
4+
version = "0.18.2"
55
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/tests.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,17 @@ fn outer_block_end_consumed() {
276276
fn bad_url_slice_out_of_bounds() {
277277
let mut input = ParserInput::new("url(\u{1}\\");
278278
let mut parser = Parser::new(&mut input);
279-
let _ = parser.next_including_whitespace_and_comments(); // This used to panic
279+
let result = parser.next_including_whitespace_and_comments(); // This used to panic
280+
assert_eq!(result, Ok(&Token::BadUrl("\u{1}\\".into())));
281+
}
282+
283+
/// https://bugzilla.mozilla.org/show_bug.cgi?id=1383975
284+
#[test]
285+
fn bad_url_slice_not_at_char_boundary() {
286+
let mut input = ParserInput::new("url(9\n۰");
287+
let mut parser = Parser::new(&mut input);
288+
let result = parser.next_including_whitespace_and_comments(); // This used to panic
289+
assert_eq!(result, Ok(&Token::BadUrl("9\n۰".into())));
280290
}
281291

282292
#[test]

src/tokenizer.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
964964
b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
965965
let value = tokenizer.slice_from(start_pos);
966966
tokenizer.advance(1);
967-
return consume_url_end(tokenizer, value.into())
967+
return consume_url_end(tokenizer, start_pos, value.into())
968968
}
969969
b')' => {
970970
let value = tokenizer.slice_from(start_pos);
@@ -974,7 +974,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
974974
b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F' // non-printable
975975
| b'"' | b'\'' | b'(' => {
976976
tokenizer.advance(1);
977-
return consume_bad_url(tokenizer)
977+
return consume_bad_url(tokenizer, start_pos)
978978
},
979979
b'\\' | b'\0' => {
980980
// * The tokenizer’s input is UTF-8 since it’s `&str`.
@@ -993,22 +993,20 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
993993
while !tokenizer.is_eof() {
994994
match_byte! { tokenizer.consume_byte(),
995995
b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
996-
return consume_url_end(
997-
tokenizer,
998-
// string_bytes is well-formed UTF-8, see other comments.
999-
unsafe { from_utf8_release_unchecked(string_bytes) }.into()
1000-
)
996+
// string_bytes is well-formed UTF-8, see other comments.
997+
let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into();
998+
return consume_url_end(tokenizer, start_pos, string)
1001999
}
10021000
b')' => {
10031001
break;
10041002
}
10051003
b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F' // non-printable
10061004
| b'"' | b'\'' | b'(' => {
1007-
return consume_bad_url(tokenizer);
1005+
return consume_bad_url(tokenizer, start_pos);
10081006
}
10091007
b'\\' => {
10101008
if tokenizer.has_newline_at(0) {
1011-
return consume_bad_url(tokenizer)
1009+
return consume_bad_url(tokenizer, start_pos)
10121010
}
10131011

10141012
// This pushes one well-formed code point to string_bytes
@@ -1028,21 +1026,23 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
10281026
)
10291027
}
10301028

1031-
fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: CowRcStr<'a>) -> Token<'a> {
1029+
fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>,
1030+
start_pos: SourcePosition,
1031+
string: CowRcStr<'a>)
1032+
-> Token<'a> {
10321033
while !tokenizer.is_eof() {
10331034
match_byte! { tokenizer.consume_byte(),
10341035
b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {},
10351036
b')' => { break },
10361037
_ => {
1037-
return consume_bad_url(tokenizer);
1038+
return consume_bad_url(tokenizer, start_pos);
10381039
}
10391040
}
10401041
}
10411042
UnquotedUrl(string)
10421043
}
10431044

1044-
fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
1045-
let start_pos = tokenizer.position();
1045+
fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> {
10461046
// Consume up to the closing )
10471047
while !tokenizer.is_eof() {
10481048
match_byte! { tokenizer.consume_byte(),

0 commit comments

Comments
 (0)