Skip to content

Commit 2ec91db

Browse files
committed
Add parsing and serialization for <urange>
https://drafts.csswg.org/css-syntax/#urange-syntax
1 parent 46e0e80 commit 2ec91db

File tree

2 files changed

+262
-0
lines changed

2 files changed

+262
-0
lines changed

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ pub use color::{RGBA, Color, parse_color_keyword};
8585
pub use nth::parse_nth;
8686
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
8787
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition};
88+
pub use unicode_range::UnicodeRange;
8889

8990

9091
/**
@@ -163,6 +164,7 @@ mod from_bytes;
163164
mod color;
164165
mod nth;
165166
mod serializer;
167+
mod unicode_range;
166168

167169
#[cfg(test)]
168170
mod tests;

src/unicode_range.rs

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4+
5+
//! https://drafts.csswg.org/css-syntax/#urange
6+
7+
use {Parser, ToCss};
8+
use std::char;
9+
use std::cmp;
10+
use std::fmt;
11+
use std::io::{self, Write};
12+
use tokenizer::{Token, NumericValue};
13+
14+
/// One contiguous range of code points.
15+
///
16+
/// Can not be empty. Can represent a single code point when start == end.
17+
#[derive(PartialEq, Eq, Clone, Hash)]
18+
pub struct UnicodeRange {
19+
/// Inclusive start of the range. In [0, end].
20+
pub start: u32,
21+
22+
/// Inclusive end of the range. In [0, 0x10FFFF].
23+
pub end: u32,
24+
}
25+
26+
impl UnicodeRange {
27+
/// https://drafts.csswg.org/css-syntax/#urange-syntax
28+
pub fn parse(input: &mut Parser) -> Result<Self, ()> {
29+
// <urange> =
30+
// u '+' <ident-token> '?'* |
31+
// u <dimension-token> '?'* |
32+
// u <number-token> '?'* |
33+
// u <number-token> <dimension-token> |
34+
// u <number-token> <number-token> |
35+
// u '+' '?'+
36+
37+
input.expect_ident_matching("u")?;
38+
39+
// Since start or end can’t be above 0x10FFFF, they can’t have more than 6 hex digits
40+
// Conversely, input with more digits would end up returning Err anyway.
41+
const MAX_LENGTH_AFTER_U_PLUS: usize = 6 + 1 + 6; // 6 digits, '-', 6 digits
42+
let mut buffer = [0; MAX_LENGTH_AFTER_U_PLUS];
43+
44+
let remaining_len;
45+
{
46+
let mut remaining = &mut buffer[..];
47+
concatenate_tokens(input, &mut remaining)?;
48+
remaining_len = remaining.len();
49+
}
50+
51+
let text_len = buffer.len() - remaining_len;
52+
let text = &buffer[..text_len];
53+
let range = parse_concatenated(text)?;
54+
if range.end > char::MAX as u32 || range.start > range.end {
55+
Err(())
56+
} else {
57+
Ok(range)
58+
}
59+
}
60+
}
61+
62+
fn concatenate_tokens(input: &mut Parser, remaining: &mut &mut [u8]) -> Result<(), Error> {
63+
match input.next_including_whitespace()? {
64+
Token::Delim('+') => {
65+
match input.next_including_whitespace()? {
66+
Token::Ident(ident) => remaining.write_all(ident.as_bytes())?,
67+
Token::Delim('?') => remaining.write_all(b"?")?,
68+
_ => return Err(Error)
69+
}
70+
parse_question_marks(input, remaining)
71+
}
72+
73+
Token::Dimension(ref value, ref unit) => {
74+
// Require a '+' sign as part of the number
75+
let int_value = positive_integer_with_plus_sign(value)?;
76+
write!(remaining, "{}{}", int_value, unit)?;
77+
parse_question_marks(input, remaining)
78+
}
79+
80+
Token::Number(ref value) => {
81+
// Require a '+' sign as part of the number
82+
let int_value = positive_integer_with_plus_sign(value)?;
83+
write!(remaining, "{}", int_value)?;
84+
85+
match input.next_including_whitespace() {
86+
// EOF here is fine
87+
Err(()) => {},
88+
89+
Ok(Token::Delim('?')) => {
90+
// If `remaining` is already full, `int_value` has too many digits
91+
// so we can use `result?` Rust syntax.
92+
remaining.write_all(b"?")?;
93+
parse_question_marks(input, remaining)
94+
}
95+
96+
Ok(Token::Dimension(ref value, ref unit)) => {
97+
// Require a '-' sign as part of the number
98+
let int_value = negative_integer(value)?;
99+
write!(remaining, "{}{}", int_value, unit)?
100+
}
101+
102+
Ok(Token::Number(ref value)) => {
103+
// Require a '-' sign as part of the number
104+
let int_value = negative_integer(value)?;
105+
write!(remaining, "{}", int_value)?
106+
}
107+
108+
_ => return Err(Error)
109+
}
110+
}
111+
112+
_ => return Err(Error)
113+
}
114+
Ok(())
115+
}
116+
117+
/// Consume as many '?' as possible and write them to `remaining` until it’s full
118+
fn parse_question_marks(input: &mut Parser, remaining: &mut &mut [u8]) {
119+
loop {
120+
let result = input.try(|input| {
121+
match input.next_including_whitespace() {
122+
Ok(Token::Delim('?')) => remaining.write_all(b"?").map_err(|_| ()),
123+
_ => Err(())
124+
}
125+
});
126+
if result.is_err() {
127+
return
128+
}
129+
}
130+
}
131+
132+
fn positive_integer_with_plus_sign(value: &NumericValue) -> Result<i32, ()> {
133+
let int_value = value.int_value.ok_or(())?;
134+
if value.has_sign && int_value >= 0 {
135+
Ok(int_value)
136+
} else {
137+
Err(())
138+
}
139+
}
140+
141+
fn negative_integer(value: &NumericValue) -> Result<i32, ()> { // Necessarily had a negative sign.
142+
let int_value = value.int_value.ok_or(())?;
143+
if int_value <= 0 {
144+
Ok(int_value)
145+
} else {
146+
Err(())
147+
}
148+
}
149+
150+
fn parse_concatenated(mut text: &[u8]) -> Result<UnicodeRange, ()> {
151+
let (first_hex_value, hex_digit_count) = consume_hex(&mut text);
152+
let question_marks = consume_question_marks(&mut text);
153+
let consumed = hex_digit_count + question_marks;
154+
if consumed == 0 || consumed > 6 {
155+
return Err(())
156+
}
157+
158+
if question_marks > 0 {
159+
if text.is_empty() {
160+
return Ok(UnicodeRange {
161+
start: first_hex_value << (question_marks * 4),
162+
end: ((first_hex_value + 1) << (question_marks * 4)) - 1,
163+
})
164+
}
165+
} else if text.is_empty() {
166+
return Ok(UnicodeRange {
167+
start: first_hex_value,
168+
end: first_hex_value,
169+
})
170+
} else {
171+
if let Some((&b'-', mut text)) = text.split_first() {
172+
let (second_hex_value, hex_digit_count) = consume_hex(&mut text);
173+
if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() {
174+
return Ok(UnicodeRange {
175+
start: first_hex_value,
176+
end: second_hex_value,
177+
})
178+
}
179+
}
180+
}
181+
Err(())
182+
}
183+
184+
fn consume_hex(text: &mut &[u8]) -> (u32, usize) {
185+
let mut value = 0;
186+
let mut digits = 0;
187+
while let Some((&byte, rest)) = text.split_first() {
188+
if let Some(digit_value) = (byte as char).to_digit(16) {
189+
value = value * 0x10 + digit_value;
190+
digits += 1;
191+
*text = rest
192+
} else {
193+
break
194+
}
195+
}
196+
(value, digits)
197+
}
198+
199+
fn consume_question_marks(text: &mut &[u8]) -> usize {
200+
let mut question_marks = 0;
201+
while let Some((&b'?', rest)) = text.split_first() {
202+
question_marks += 1;
203+
*text = rest
204+
}
205+
question_marks
206+
}
207+
208+
impl fmt::Debug for UnicodeRange {
209+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
210+
self.to_css(formatter)
211+
}
212+
}
213+
214+
impl ToCss for UnicodeRange {
215+
fn to_css<W>(&self, dest: &mut W) -> fmt::Result where W: fmt::Write {
216+
dest.write_str("U+")?;
217+
218+
// How many bits are 0 at the end of start and also 1 at the end of end.
219+
let bits = cmp::min(self.start.trailing_zeros(), (!self.end).trailing_zeros());
220+
221+
let question_marks = bits / 4;
222+
223+
// How many lower bits can be represented as question marks
224+
let bits = question_marks * 4;
225+
226+
let truncated_start = self.start >> bits;
227+
let truncated_end = self.end >> bits;
228+
if truncated_start == truncated_end {
229+
// Bits not covered by question marks are the same in start and end,
230+
// we can use the question mark syntax.
231+
if truncated_start != 0 {
232+
write!(dest, "{:X}", truncated_start)?;
233+
}
234+
for _ in 0..question_marks {
235+
dest.write_str("?")?;
236+
}
237+
} else {
238+
write!(dest, "{:X}", self.start)?;
239+
if self.end != self.start {
240+
write!(dest, "-{:X}", self.end)?;
241+
}
242+
}
243+
Ok(())
244+
}
245+
}
246+
247+
/// Make conversions from io::Error implicit in `?` syntax.
248+
struct Error;
249+
250+
impl From<Error> for () {
251+
fn from(_: Error) -> Self { () }
252+
}
253+
254+
impl From<()> for Error {
255+
fn from(_: ()) -> Self { Error }
256+
}
257+
258+
impl From<io::Error> for Error {
259+
fn from(_: io::Error) -> Self { Error }
260+
}

0 commit comments

Comments
 (0)