Skip to content

Commit 62c7f0e

Browse files
committed
Parse <unicode-range> based on tokens’s source representation
Fix #135, where e.g. `+4E-9` in `U+49-9F` is a scientific-notation number.
1 parent d43cd36 commit 62c7f0e

File tree

3 files changed

+38
-98
lines changed

3 files changed

+38
-98
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.12.2"
4+
version = "0.12.3"
55
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/css-parsing-tests/urange.json

+4
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@
6060
[0, 4095]
6161
],
6262

63+
"U+4E-9F", [
64+
[78, 159]
65+
],
66+
6367
"u+20-3F, u+3F-3F, u+3F-3E, U+0-110000, U+0-10FFFF, U+100000-2, U+1000000-2, U+10-200000", [
6468
[32, 63],
6569
[63, 63],

src/unicode_range.rs

+33-97
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ use {Parser, ToCss};
88
use std::char;
99
use std::cmp;
1010
use std::fmt;
11-
use std::io::{self, Write};
12-
use tokenizer::{Token, NumericValue};
11+
use tokenizer::Token;
1312

1413
/// One contiguous range of code points.
1514
///
@@ -35,22 +34,15 @@ impl UnicodeRange {
3534
// u '+' '?'+
3635

3736
input.expect_ident_matching("u")?;
37+
let after_u = input.position();
38+
parse_tokens(input)?;
3839

39-
// Since start or end can’t be above 0x10FFFF, they can’t have more than 6 hex digits
40-
// Conversely, input with more digits would end up returning Err anyway.
41-
const MAX_LENGTH_AFTER_U_PLUS: usize = 6 + 1 + 6; // 6 digits, '-', 6 digits
42-
let mut buffer = [0; MAX_LENGTH_AFTER_U_PLUS];
40+
// This deviates from the spec in case there are CSS comments
41+
// between tokens in the middle of one <unicode-range>,
42+
// but oh well…
43+
let concatenated_tokens = input.slice_from(after_u);
4344

44-
let remaining_len;
45-
{
46-
let mut remaining = &mut buffer[..];
47-
concatenate_tokens(input, &mut remaining)?;
48-
remaining_len = remaining.len();
49-
}
50-
51-
let text_len = buffer.len() - remaining_len;
52-
let text = &buffer[..text_len];
53-
let range = parse_concatenated(text)?;
45+
let range = parse_concatenated(concatenated_tokens.as_bytes())?;
5446
if range.end > char::MAX as u32 || range.start > range.end {
5547
Err(())
5648
} else {
@@ -59,93 +51,52 @@ impl UnicodeRange {
5951
}
6052
}
6153

62-
fn concatenate_tokens(input: &mut Parser, remaining: &mut &mut [u8]) -> Result<(), Error> {
54+
fn parse_tokens(input: &mut Parser) -> Result<(), ()> {
6355
match input.next_including_whitespace()? {
6456
Token::Delim('+') => {
6557
match input.next_including_whitespace()? {
66-
Token::Ident(ident) => remaining.write_all(ident.as_bytes())?,
67-
Token::Delim('?') => remaining.write_all(b"?")?,
68-
_ => return Err(Error)
58+
Token::Ident(_) => {}
59+
Token::Delim('?') => {}
60+
_ => return Err(())
6961
}
70-
parse_question_marks(input, remaining)
62+
parse_question_marks(input)
7163
}
72-
73-
Token::Dimension(ref value, ref unit) => {
74-
// Require a '+' sign as part of the number
75-
let int_value = positive_integer_with_plus_sign(value)?;
76-
write!(remaining, "{}{}", int_value, unit)?;
77-
parse_question_marks(input, remaining)
64+
Token::Dimension(..) => {
65+
parse_question_marks(input)
7866
}
79-
80-
Token::Number(ref value) => {
81-
// Require a '+' sign as part of the number
82-
let int_value = positive_integer_with_plus_sign(value)?;
83-
write!(remaining, "{}", int_value)?;
84-
67+
Token::Number(_) => {
8568
let after_number = input.position();
8669
match input.next_including_whitespace() {
87-
Ok(Token::Delim('?')) => {
88-
// If `remaining` is already full, `int_value` has too many digits
89-
// so we can use `result?` Rust syntax.
90-
remaining.write_all(b"?")?;
91-
parse_question_marks(input, remaining)
92-
}
93-
94-
Ok(Token::Dimension(ref value, ref unit)) => {
95-
// Require a '-' sign as part of the number
96-
let int_value = negative_integer(value)?;
97-
write!(remaining, "{}{}", int_value, unit)?
98-
}
99-
100-
Ok(Token::Number(ref value)) => {
101-
// Require a '-' sign as part of the number
102-
let int_value = negative_integer(value)?;
103-
write!(remaining, "{}", int_value)?
104-
}
105-
70+
Ok(Token::Delim('?')) => parse_question_marks(input),
71+
Ok(Token::Dimension(..)) => {}
72+
Ok(Token::Number(_)) => {}
10673
_ => input.reset(after_number)
10774
}
10875
}
109-
110-
_ => return Err(Error)
76+
_ => return Err(())
11177
}
11278
Ok(())
11379
}
11480

115-
/// Consume as many '?' as possible and write them to `remaining` until it’s full
116-
fn parse_question_marks(input: &mut Parser, remaining: &mut &mut [u8]) {
81+
/// Consume as many '?' as possible
82+
fn parse_question_marks(input: &mut Parser) {
11783
loop {
118-
let result = input.try(|input| {
119-
match input.next_including_whitespace() {
120-
Ok(Token::Delim('?')) => remaining.write_all(b"?").map_err(|_| ()),
121-
_ => Err(())
84+
let position = input.position();
85+
match input.next_including_whitespace() {
86+
Ok(Token::Delim('?')) => {}
87+
_ => {
88+
input.reset(position);
89+
return
12290
}
123-
});
124-
if result.is_err() {
125-
return
12691
}
12792
}
12893
}
12994

130-
fn positive_integer_with_plus_sign(value: &NumericValue) -> Result<i32, ()> {
131-
let int_value = value.int_value.ok_or(())?;
132-
if value.has_sign && int_value >= 0 {
133-
Ok(int_value)
134-
} else {
135-
Err(())
136-
}
137-
}
138-
139-
fn negative_integer(value: &NumericValue) -> Result<i32, ()> { // Necessarily had a negative sign.
140-
let int_value = value.int_value.ok_or(())?;
141-
if int_value <= 0 {
142-
Ok(int_value)
143-
} else {
144-
Err(())
145-
}
146-
}
147-
148-
fn parse_concatenated(mut text: &[u8]) -> Result<UnicodeRange, ()> {
95+
fn parse_concatenated(text: &[u8]) -> Result<UnicodeRange, ()> {
96+
let mut text = match text.split_first() {
97+
Some((&b'+', text)) => text,
98+
_ => return Err(())
99+
};
149100
let (first_hex_value, hex_digit_count) = consume_hex(&mut text);
150101
let question_marks = consume_question_marks(&mut text);
151102
let consumed = hex_digit_count + question_marks;
@@ -241,18 +192,3 @@ impl ToCss for UnicodeRange {
241192
Ok(())
242193
}
243194
}
244-
245-
/// Make conversions from io::Error implicit in `?` syntax.
246-
struct Error;
247-
248-
impl From<Error> for () {
249-
fn from(_: Error) -> Self { () }
250-
}
251-
252-
impl From<()> for Error {
253-
fn from(_: ()) -> Self { Error }
254-
}
255-
256-
impl From<io::Error> for Error {
257-
fn from(_: io::Error) -> Self { Error }
258-
}

0 commit comments

Comments
 (0)