Skip to content

Commit 067eab1

Browse files
committed
Add parsing and serialization for <urange>
https://drafts.csswg.org/css-syntax/#urange-syntax
1 parent 0550d9d commit 067eab1

File tree

2 files changed

+255
-0
lines changed

2 files changed

+255
-0
lines changed

src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ pub use color::{RGBA, Color, parse_color_keyword};
8585
pub use nth::parse_nth;
8686
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
8787
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition};
88+
pub use unicode_range::UnicodeRange;
8889

8990

9091
/**
@@ -163,6 +164,7 @@ mod from_bytes;
163164
mod color;
164165
mod nth;
165166
mod serializer;
167+
mod unicode_range;
166168

167169
#[cfg(test)]
168170
mod tests;

src/unicode_range.rs

+253
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4+
5+
//! https://drafts.csswg.org/css-syntax/#urange
6+
7+
use {Parser, ToCss};
8+
use std::char;
9+
use std::cmp;
10+
use std::fmt;
11+
use std::io::{self, Write};
12+
use tokenizer::{Token, NumericValue};
13+
14+
/// One contiguous range of code points.
15+
///
16+
/// Can not be empty. Can represent a single code point when start == end.
17+
pub struct UnicodeRange {
18+
/// Inclusive start of the range. In [0, end].
19+
pub start: u32,
20+
21+
/// Inclusive end of the range. In [0, 0x10FFFF].
22+
pub end: u32,
23+
}
24+
25+
impl UnicodeRange {
26+
/// https://drafts.csswg.org/css-syntax/#urange-syntax
27+
pub fn parse(input: &mut Parser) -> Result<Self, ()> {
28+
// <urange> =
29+
// u '+' <ident-token> '?'* |
30+
// u <dimension-token> '?'* |
31+
// u <number-token> '?'* |
32+
// u <number-token> <dimension-token> |
33+
// u <number-token> <number-token> |
34+
// u '+' '?'+
35+
36+
input.expect_ident_matching("u")?;
37+
38+
// Since start or end can’t be above 0x10FFFF, they can’t have more than 6 hex digits
39+
// Conversely, input with more digits would end up returning Err anyway.
40+
const MAX_LENGTH_AFTER_U_PLUS: usize = 6 + 1 + 6; // 6 digits, '-', 6 digits
41+
let mut buffer = [0; MAX_LENGTH_AFTER_U_PLUS];
42+
43+
let remaining_len;
44+
{
45+
let mut remaining = &mut buffer[..];
46+
concatenate_tokens(input, &mut remaining)?;
47+
remaining_len = remaining.len();
48+
}
49+
50+
let text_len = buffer.len() - remaining_len;
51+
let text = &buffer[..text_len];
52+
let range = parse_concatenated(text)?;
53+
if range.end > char::MAX as u32 || range.start > range.end {
54+
Err(())
55+
} else {
56+
Ok(range)
57+
}
58+
}
59+
}
60+
61+
fn concatenate_tokens(input: &mut Parser, remaining: &mut &mut [u8]) -> Result<(), Error> {
62+
match input.next_including_whitespace()? {
63+
Token::Delim('+') => {
64+
match input.next_including_whitespace()? {
65+
Token::Ident(ident) => remaining.write_all(ident.as_bytes())?,
66+
Token::Delim('?') => remaining.write_all(b"?")?,
67+
_ => return Err(Error)
68+
}
69+
parse_question_marks(input, remaining)
70+
}
71+
72+
Token::Dimension(ref value, ref unit) => {
73+
// Require a '+' sign as part of the number
74+
let int_value = positive_integer_with_plus_sign(value)?;
75+
write!(remaining, "{}{}", int_value, unit)?;
76+
parse_question_marks(input, remaining)
77+
}
78+
79+
Token::Number(ref value) => {
80+
// Require a '+' sign as part of the number
81+
let int_value = positive_integer_with_plus_sign(value)?;
82+
write!(remaining, "{}", int_value)?;
83+
84+
match input.next_including_whitespace() {
85+
// EOF here is fine
86+
Err(()) => {},
87+
88+
Ok(Token::Delim('?')) => {
89+
// If `remaining` is already full, `int_value` has too many digits
90+
// so we can use `result?` Rust syntax.
91+
remaining.write_all(b"?")?;
92+
parse_question_marks(input, remaining)
93+
}
94+
95+
Ok(Token::Dimension(ref value, ref unit)) => {
96+
// Require a '-' sign as part of the number
97+
let int_value = negative_integer(value)?;
98+
write!(remaining, "{}{}", int_value, unit)?
99+
}
100+
101+
Ok(Token::Number(ref value)) => {
102+
// Require a '-' sign as part of the number
103+
let int_value = negative_integer(value)?;
104+
write!(remaining, "{}", int_value)?
105+
}
106+
107+
_ => return Err(Error)
108+
}
109+
}
110+
111+
_ => return Err(Error)
112+
}
113+
Ok(())
114+
}
115+
116+
/// Consume as many '?' as possible and write them to `remaining` until it’s full
117+
fn parse_question_marks(input: &mut Parser, remaining: &mut &mut [u8]) {
118+
loop {
119+
let result = input.try(|input| {
120+
match input.next_including_whitespace() {
121+
Ok(Token::Delim('?')) => remaining.write_all(b"?").map_err(|_| ()),
122+
_ => Err(())
123+
}
124+
});
125+
if result.is_err() {
126+
return
127+
}
128+
}
129+
}
130+
131+
fn positive_integer_with_plus_sign(value: &NumericValue) -> Result<i32, ()> {
132+
let int_value = value.int_value.ok_or(())?;
133+
if value.has_sign && int_value >= 0 {
134+
Ok(int_value)
135+
} else {
136+
Err(())
137+
}
138+
}
139+
140+
fn negative_integer(value: &NumericValue) -> Result<i32, ()> { // Necessarily had a negative sign.
141+
let int_value = value.int_value.ok_or(())?;
142+
if int_value <= 0 {
143+
Ok(int_value)
144+
} else {
145+
Err(())
146+
}
147+
}
148+
149+
fn parse_concatenated(mut text: &[u8]) -> Result<UnicodeRange, ()> {
150+
let (first_hex_value, hex_digit_count) = consume_hex(&mut text);
151+
let question_marks = consume_question_marks(&mut text);
152+
let consumed = hex_digit_count + question_marks;
153+
if consumed == 0 || consumed > 6 {
154+
return Err(())
155+
}
156+
157+
if question_marks > 0 {
158+
if text.is_empty() {
159+
return Ok(UnicodeRange {
160+
start: first_hex_value << (question_marks * 4),
161+
end: ((first_hex_value + 1) << (question_marks * 4)) - 1,
162+
})
163+
}
164+
} else if text.is_empty() {
165+
return Ok(UnicodeRange {
166+
start: first_hex_value,
167+
end: first_hex_value,
168+
})
169+
} else {
170+
if let Some((&b'-', mut text)) = text.split_first() {
171+
let (second_hex_value, hex_digit_count) = consume_hex(&mut text);
172+
if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() {
173+
return Ok(UnicodeRange {
174+
start: first_hex_value,
175+
end: second_hex_value,
176+
})
177+
}
178+
}
179+
}
180+
Err(())
181+
}
182+
183+
fn consume_hex(text: &mut &[u8]) -> (u32, usize) {
184+
let mut value = 0;
185+
let mut digits = 0;
186+
while let Some((&byte, rest)) = text.split_first() {
187+
if let Some(digit_value) = (byte as char).to_digit(16) {
188+
value = value * 0x10 + digit_value;
189+
digits += 1;
190+
*text = rest
191+
} else {
192+
break
193+
}
194+
}
195+
(value, digits)
196+
}
197+
198+
fn consume_question_marks(text: &mut &[u8]) -> usize {
199+
let mut question_marks = 0;
200+
while let Some((&b'?', rest)) = text.split_first() {
201+
question_marks += 1;
202+
*text = rest
203+
}
204+
question_marks
205+
}
206+
207+
impl ToCss for UnicodeRange {
208+
fn to_css<W>(&self, dest: &mut W) -> fmt::Result where W: fmt::Write {
209+
dest.write_str("U+")?;
210+
211+
// How many bits are 0 at the end of start and also 1 at the end of end.
212+
let bits = cmp::min(self.start.trailing_zeros(), (!self.end).trailing_zeros());
213+
214+
let question_marks = bits / 4;
215+
216+
// How many lower bits can be represented as question marks
217+
let bits = question_marks * 4;
218+
219+
let truncated_start = self.start >> bits;
220+
let truncated_end = self.end >> bits;
221+
if truncated_start == truncated_end {
222+
// Bits not covered by question marks are the same in start and end,
223+
// we can use the question mark syntax.
224+
if truncated_start != 0 {
225+
write!(dest, "{:X}", truncated_start)?;
226+
}
227+
for _ in 0..question_marks {
228+
dest.write_str("?")?;
229+
}
230+
} else {
231+
write!(dest, "{:X}", self.start)?;
232+
if self.end != self.start {
233+
write!(dest, "-{:X}", self.end)?;
234+
}
235+
}
236+
Ok(())
237+
}
238+
}
239+
240+
/// Make conversions from io::Error implicit in `?` syntax.
241+
struct Error;
242+
243+
impl From<Error> for () {
244+
fn from(_: Error) -> Self { () }
245+
}
246+
247+
impl From<()> for Error {
248+
fn from(_: ()) -> Self { Error }
249+
}
250+
251+
impl From<io::Error> for Error {
252+
fn from(_: io::Error) -> Self { Error }
253+
}

0 commit comments

Comments
 (0)