Skip to content

Commit 1080ea7

Browse files
committed
Use the css parser to consume an identifier.
Needs servo/rust-cssparser#249
1 parent 3060cf2 commit 1080ea7

File tree

3 files changed

+55
-65
lines changed

3 files changed

+55
-65
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ edition = "2018"
66
description = "Parser https://drafts.css-houdini.org/css-properties-values-api-1/#parsing-syntax"
77

88
[dependencies]
9+
cssparser = "0.25.6" # Need https://github.com/servo/rust-cssparser/pull/249

src/ascii.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,18 @@
22

33
/// Trims ascii whitespace characters from a slice, and returns the trimmed
44
/// input.
5-
pub fn trim_ascii_whitespace(input: &[u8]) -> &[u8] {
5+
pub fn trim_ascii_whitespace(input: &str) -> &str {
66
if input.is_empty() {
77
return input;
88
}
99

1010
let mut start = 0;
1111
{
12-
let mut iter = input.iter();
12+
let mut iter = input.as_bytes().iter();
1313
loop {
1414
let byte = match iter.next() {
1515
Some(b) => b,
16-
None => return &[],
16+
None => return "",
1717
};
1818

1919
if !byte.is_ascii_whitespace() {
@@ -26,13 +26,13 @@ pub fn trim_ascii_whitespace(input: &[u8]) -> &[u8] {
2626
let mut end = input.len();
2727
assert!(start < end);
2828
{
29-
let mut iter = input[start..].iter().rev();
29+
let mut iter = input.as_bytes()[start..].iter().rev();
3030
loop {
3131
let byte = match iter.next() {
3232
Some(b) => b,
3333
None => {
3434
debug_assert!(false, "We should have caught this in the loop above!");
35-
return &[];
35+
return "";
3636
},
3737
};
3838

@@ -49,10 +49,7 @@ pub fn trim_ascii_whitespace(input: &[u8]) -> &[u8] {
4949
#[test]
5050
fn trim_ascii_whitespace_test() {
5151
fn test(i: &str, o: &str) {
52-
assert_eq!(
53-
trim_ascii_whitespace(i.as_bytes()),
54-
o.as_bytes(),
55-
)
52+
assert_eq!(trim_ascii_whitespace(i), o)
5653
}
5754

5855
test("", "");

src/lib.rs

Lines changed: 48 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub enum ParseError {
1818
UnexpectedPipe,
1919
InvalidCustomIdent,
2020
InvalidNameStart,
21-
EmptyName,
21+
InvalidName,
2222
UnclosedDataTypeName,
2323
UnknownDataTypeName,
2424
}
@@ -63,22 +63,21 @@ impl Component {
6363
}
6464

6565
#[derive(Clone, Debug, PartialEq)]
66-
pub struct CustomIdent(Box<[u8]>);
66+
pub struct CustomIdent(Box<str>);
6767

6868
impl CustomIdent {
69-
fn from_bytes(ident: &[u8]) -> Result<Self, ParseError> {
70-
if ident.eq_ignore_ascii_case(b"inherit") ||
71-
ident.eq_ignore_ascii_case(b"reset") ||
72-
ident.eq_ignore_ascii_case(b"revert") ||
73-
ident.eq_ignore_ascii_case(b"unset") ||
74-
ident.eq_ignore_ascii_case(b"default") {
75-
return Err(ParseError::InvalidCustomIdent);
69+
fn from_ident(ident: &str) -> Result<Self, ()> {
70+
if ident.eq_ignore_ascii_case("inherit") ||
71+
ident.eq_ignore_ascii_case("reset") ||
72+
ident.eq_ignore_ascii_case("revert") ||
73+
ident.eq_ignore_ascii_case("unset") ||
74+
ident.eq_ignore_ascii_case("default") {
75+
return Err(());
7676
}
77-
Ok(CustomIdent(ident.to_vec().into_boxed_slice()))
77+
Ok(CustomIdent(ident.to_owned().into_boxed_str()))
7878
}
7979
}
8080

81-
8281
#[derive(Clone, Debug, PartialEq)]
8382
pub enum ComponentName {
8483
DataType(DataType),
@@ -153,7 +152,6 @@ impl DataType {
153152

154153
/// Parse a syntax descriptor or universal syntax descriptor.
155154
pub fn parse_descriptor(input: &str) -> Result<Descriptor, ParseError> {
156-
let input = input.as_bytes();
157155
// 1. Strip leading and trailing ASCII whitespace from string.
158156
let input = ascii::trim_ascii_whitespace(input);
159157

@@ -164,7 +162,7 @@ pub fn parse_descriptor(input: &str) -> Result<Descriptor, ParseError> {
164162

165163
// 3. If string's length is 1, and the only code point in string is U+002A
166164
// ASTERISK (*), return the universal syntax descriptor.
167-
if input.len() == 1 && input[0] == b'*' {
165+
if input.len() == 1 && input.as_bytes()[0] == b'*' {
168166
return Ok(Descriptor::universal());
169167
}
170168

@@ -184,7 +182,7 @@ pub fn parse_descriptor(input: &str) -> Result<Descriptor, ParseError> {
184182
}
185183

186184
struct Parser<'a, 'b> {
187-
input: &'a [u8],
185+
input: &'a str,
188186
position: usize,
189187
output: &'b mut Vec<Component>,
190188
}
@@ -211,26 +209,13 @@ fn is_non_ascii(byte: u8) -> bool {
211209
byte >= 0x80
212210
}
213211

214-
/// https://drafts.csswg.org/css-syntax-3/#digit
215-
fn is_digit(byte: u8) -> bool {
216-
match byte {
217-
b'0'...b'9' => true,
218-
_ => false,
219-
}
220-
}
221-
222212
/// https://drafts.csswg.org/css-syntax-3/#name-start-code-point
223213
fn is_name_start(byte: u8) -> bool {
224214
is_letter(byte) || is_non_ascii(byte) || byte == b'_'
225215
}
226216

227-
/// https://drafts.csswg.org/css-syntax-3/#name-code-point
228-
fn is_name(byte: u8) -> bool {
229-
is_name_start(byte) || is_digit(byte) || byte == b'-'
230-
}
231-
232217
impl<'a, 'b> Parser<'a, 'b> {
233-
fn new(input: &'a [u8], output: &'b mut Vec<Component>) -> Self {
218+
fn new(input: &'a str, output: &'b mut Vec<Component>) -> Self {
234219
Self {
235220
input,
236221
position: 0,
@@ -239,7 +224,7 @@ impl<'a, 'b> Parser<'a, 'b> {
239224
}
240225

241226
fn peek(&self) -> Option<u8> {
242-
self.input.get(self.position).cloned()
227+
self.input.as_bytes().get(self.position).cloned()
243228
}
244229

245230
fn parse(&mut self) -> Result<(), ParseError> {
@@ -298,9 +283,10 @@ impl<'a, 'b> Parser<'a, 'b> {
298283
None => return Err(ParseError::UnclosedDataTypeName),
299284
};
300285
if byte != b'>' {
286+
self.position += 1;
301287
continue;
302288
}
303-
let ty = match DataType::from_bytes(&self.input[start..self.position]) {
289+
let ty = match DataType::from_bytes(&self.input.as_bytes()[start..self.position]) {
304290
Some(ty) => ty,
305291
None => return Err(ParseError::UnknownDataTypeName),
306292
};
@@ -309,27 +295,6 @@ impl<'a, 'b> Parser<'a, 'b> {
309295
}
310296
}
311297

312-
/// https://drafts.csswg.org/css-syntax-3/#consume-a-name
313-
/// FIXME(emilio): This should actually use cssparser's consume_name
314-
/// to handle correctly escaping and nulls.
315-
fn consume_name(&mut self) -> &'a [u8] {
316-
let start = self.position;
317-
318-
loop {
319-
let byte = match self.peek() {
320-
None => return &self.input[start..],
321-
Some(b) => b,
322-
};
323-
324-
if !is_name(byte) {
325-
break;
326-
}
327-
self.position += 1;
328-
}
329-
330-
&self.input[start..self.position]
331-
}
332-
333298
fn parse_name(&mut self) -> Result<ComponentName, ParseError> {
334299
let b = match self.peek() {
335300
Some(b) => b,
@@ -345,11 +310,19 @@ impl<'a, 'b> Parser<'a, 'b> {
345310
return Err(ParseError::InvalidNameStart);
346311
}
347312

348-
let name = self.consume_name();
349-
if name.is_empty() {
350-
return Err(ParseError::EmptyName);
351-
}
352-
return Ok(ComponentName::Ident(CustomIdent::from_bytes(name)?))
313+
let input = &self.input[self.position..];
314+
let mut input = cssparser::ParserInput::new(input);
315+
let mut input = cssparser::Parser::new(&mut input);
316+
let name = input
317+
.expect_ident()
318+
.map_err(|_| ())
319+
.and_then(|name| CustomIdent::from_ident(name.as_ref()));
320+
let name = match name {
321+
Ok(name) => name,
322+
Err(..) => return Err(ParseError::InvalidName),
323+
};
324+
self.position += input.position().byte_index();
325+
return Ok(ComponentName::Ident(name))
353326
}
354327

355328
fn parse_multiplier(&mut self) -> Option<Multiplier> {
@@ -382,3 +355,22 @@ fn universal() {
382355
assert_eq!(parse_descriptor(syntax), Ok(Descriptor::universal()));
383356
}
384357
}
358+
359+
#[test]
360+
fn simple_length() {
361+
macro_rules! ident {
362+
($str:expr) => {
363+
ComponentName::Ident(CustomIdent::from_ident($str).unwrap())
364+
}
365+
}
366+
assert_eq!(parse_descriptor("foo <length>#"), Ok(Descriptor(Box::new([
367+
Component {
368+
name: ident!("foo"),
369+
multiplier: None,
370+
},
371+
Component {
372+
name: ComponentName::DataType(DataType::Length),
373+
multiplier: Some(Multiplier::Comma),
374+
},
375+
]))))
376+
}

0 commit comments

Comments
 (0)