Skip to content

Commit 5e9c5cb

Browse files
author
bors-servo
authored
Auto merge of servo#168 - servo:foo, r=SimonSapin
Parser changes for Gecko integration This is a grab bag of changes that were important for getting Stylo tests passing. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/168) <!-- Reviewable:end -->
2 parents 1373dac + c6156c0 commit 5e9c5cb

File tree

6 files changed

+97
-47
lines changed

6 files changed

+97
-47
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.16.1"
4+
version = "0.17.0"
55
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/parser.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ pub struct SourcePosition {
2525
pub enum BasicParseError<'a> {
2626
/// An unexpected token was encountered.
2727
UnexpectedToken(Token<'a>),
28-
/// A particular token was expected but not found.
29-
ExpectedToken(Token<'a>),
3028
/// The end of the input was encountered unexpectedly.
3129
EndOfInput,
3230
/// An `@` rule was encountered that was invalid.
33-
AtRuleInvalid,
31+
AtRuleInvalid(CompactCowStr<'a>),
32+
/// The body of an '@' rule was invalid.
33+
AtRuleBodyInvalid,
3434
/// A qualified rule was encountered that was invalid.
3535
QualifiedRuleInvalid,
3636
}
@@ -188,6 +188,11 @@ impl<'i: 't, 't> Parser<'i, 't> {
188188
}
189189
}
190190

191+
/// Return the current line that is being parsed.
192+
pub fn current_line(&self) -> &'i str {
193+
self.tokenizer.0.current_source_line()
194+
}
195+
191196
/// Check whether the input is exhausted. That is, if `.next()` would return a token.
192197
///
193198
/// This ignores whitespace and comments.
@@ -357,9 +362,9 @@ impl<'i: 't, 't> Parser<'i, 't> {
357362
#[inline]
358363
pub fn parse_entirely<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>>
359364
where F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ParseError<'i, E>> {
360-
let result = parse(self);
365+
let result = parse(self)?;
361366
self.expect_exhausted()?;
362-
result
367+
Ok(result)
363368
}
364369

365370
/// Parse a list of comma-separated values, all with the same syntax.
@@ -482,8 +487,7 @@ impl<'i: 't, 't> Parser<'i, 't> {
482487
match self.next()? {
483488
Token::UnquotedUrl(value) => Ok(value),
484489
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
485-
self.parse_nested_block(|input| input.expect_string()
486-
.map_err(|e| ParseError::Basic(e)))
490+
self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic))
487491
.map_err(ParseError::<()>::basic)
488492
},
489493
t => Err(BasicParseError::UnexpectedToken(t))
@@ -497,7 +501,7 @@ impl<'i: 't, 't> Parser<'i, 't> {
497501
Token::UnquotedUrl(value) => Ok(value),
498502
Token::QuotedString(value) => Ok(value),
499503
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
500-
self.parse_nested_block(|input| input.expect_string().map_err(|e| ParseError::Basic(e)))
504+
self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic))
501505
.map_err(ParseError::<()>::basic)
502506
},
503507
t => Err(BasicParseError::UnexpectedToken(t))

src/rules_and_declarations.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ pub trait AtRuleParser<'i> {
116116
-> Result<AtRuleType<Self::Prelude, Self::AtRule>, ParseError<'i, Self::Error>> {
117117
let _ = name;
118118
let _ = input;
119-
Err(ParseError::Basic(BasicParseError::AtRuleInvalid))
119+
Err(ParseError::Basic(BasicParseError::AtRuleInvalid(name)))
120120
}
121121

122122
/// Parse the content of a `{ /* ... */ }` block for the body of the at-rule.
@@ -131,7 +131,7 @@ pub trait AtRuleParser<'i> {
131131
-> Result<Self::AtRule, ParseError<'i, Self::Error>> {
132132
let _ = prelude;
133133
let _ = input;
134-
Err(ParseError::Basic(BasicParseError::AtRuleInvalid))
134+
Err(ParseError::Basic(BasicParseError::AtRuleBodyInvalid))
135135
}
136136

137137
/// An `OptionalBlock` prelude was followed by `;`.
@@ -257,9 +257,9 @@ where P: DeclarationParser<'i, Declaration = I, Error = E> +
257257
Ok(Token::AtKeyword(name)) => {
258258
return Some(parse_at_rule(start_position, name, self.input, &mut self.parser))
259259
}
260-
Ok(_) => {
260+
Ok(t) => {
261261
return Some(self.input.parse_until_after(Delimiter::Semicolon,
262-
|_| Err(ParseError::Basic(BasicParseError::ExpectedToken(Token::Semicolon))))
262+
|_| Err(ParseError::Basic(BasicParseError::UnexpectedToken(t))))
263263
.map_err(|e| PreciseParseError {
264264
error: e,
265265
span: start_position..self.input.position()
@@ -462,16 +462,14 @@ fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: Compact
462462
_ => unreachable!()
463463
}
464464
}
465-
Err(_) => {
465+
Err(error) => {
466466
let end_position = input.position();
467-
let error = match input.next() {
468-
Ok(Token::CurlyBracketBlock) => BasicParseError::UnexpectedToken(Token::CurlyBracketBlock),
469-
Ok(Token::Semicolon) => BasicParseError::UnexpectedToken(Token::Semicolon),
470-
Err(e) => e,
467+
match input.next() {
468+
Ok(Token::CurlyBracketBlock) | Ok(Token::Semicolon) | Err(_) => {},
471469
_ => unreachable!()
472470
};
473471
Err(PreciseParseError {
474-
error: ParseError::Basic(error),
472+
error: error,
475473
span: start_position..end_position,
476474
})
477475
}

src/serializer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ impl<'a> ToCss for Token<'a> {
129129
Token::SquareBracketBlock => dest.write_str("[")?,
130130
Token::CurlyBracketBlock => dest.write_str("{")?,
131131

132-
Token::BadUrl => dest.write_str("url(<bad url>)")?,
133-
Token::BadString => dest.write_str("\"<bad string>\n")?,
132+
Token::BadUrl(_) => dest.write_str("url(<bad url>)")?,
133+
Token::BadString(_) => dest.write_str("\"<bad string>\n")?,
134134
Token::CloseParenthesis => dest.write_str(")")?,
135135
Token::CloseSquareBracket => dest.write_str("]")?,
136136
Token::CloseCurlyBracket => dest.write_str("}")?,
@@ -376,7 +376,7 @@ impl<'a> Token<'a> {
376376
TokenSerializationType(match *self {
377377
Token::Ident(_) => Ident,
378378
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
379-
Token::UnquotedUrl(_) | Token::BadUrl => UrlOrBadUrl,
379+
Token::UnquotedUrl(_) | Token::BadUrl(_) => UrlOrBadUrl,
380380
Token::Delim('#') => DelimHash,
381381
Token::Delim('@') => DelimAt,
382382
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
@@ -400,7 +400,7 @@ impl<'a> Token<'a> {
400400
Token::ParenthesisBlock => OpenParen,
401401
Token::SquareBracketBlock | Token::CurlyBracketBlock |
402402
Token::CloseParenthesis | Token::CloseSquareBracket | Token::CloseCurlyBracket |
403-
Token::QuotedString(_) | Token::BadString |
403+
Token::QuotedString(_) | Token::BadString(_) |
404404
Token::Delim(_) | Token::Colon | Token::Semicolon | Token::Comma | Token::CDO |
405405
Token::IncludeMatch | Token::PrefixMatch | Token::SuffixMatch
406406
=> Other,

src/tests.rs

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -451,26 +451,26 @@ fn serialize_rgba_two_digit_float_if_roundtrips() {
451451
fn line_numbers() {
452452
let mut input = ParserInput::new("foo bar\nbaz\r\n\n\"a\\\r\nb\"");
453453
let mut input = Parser::new(&mut input);
454-
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 1 });
454+
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 0 });
455455
assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("foo".into())));
456-
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 4 });
456+
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 3 });
457457
assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace(" ")));
458-
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 5 });
458+
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 4 });
459459
assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("bar".into())));
460-
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 8 });
460+
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 7 });
461461
assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\n")));
462-
assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 1 });
462+
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 0 });
463463
assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("baz".into())));
464-
assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 4 });
464+
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 3 });
465465
let position = input.position();
466466

467467
assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\r\n\n")));
468-
assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 1 });
468+
assert_eq!(input.current_source_location(), SourceLocation { line: 3, column: 0 });
469469

470-
assert_eq!(input.source_location(position), SourceLocation { line: 2, column: 4 });
470+
assert_eq!(input.source_location(position), SourceLocation { line: 1, column: 3 });
471471

472472
assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString("ab".into())));
473-
assert_eq!(input.current_source_location(), SourceLocation { line: 5, column: 3 });
473+
assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 2 });
474474
assert!(input.next_including_whitespace().is_err());
475475
}
476476

@@ -848,8 +848,8 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json {
848848
v.extend(nested(input));
849849
v
850850
}),
851-
Token::BadUrl => JArray!["error", "bad-url"],
852-
Token::BadString => JArray!["error", "bad-string"],
851+
Token::BadUrl(_) => JArray!["error", "bad-url"],
852+
Token::BadString(_) => JArray!["error", "bad-string"],
853853
Token::CloseParenthesis => JArray!["error", ")"],
854854
Token::CloseSquareBracket => JArray!["error", "]"],
855855
Token::CloseCurlyBracket => JArray!["error", "}"],
@@ -920,3 +920,32 @@ fn parse_until_before_stops_at_delimiter_or_end_of_input() {
920920
}
921921
}
922922
}
923+
924+
#[test]
925+
fn parser_maintains_current_line() {
926+
let mut input = ParserInput::new("ident ident;\nident ident ident;\nident");
927+
let mut parser = Parser::new(&mut input);
928+
assert_eq!(parser.current_line(), "ident ident;");
929+
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
930+
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
931+
assert_eq!(parser.next(), Ok(Token::Semicolon));
932+
933+
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
934+
assert_eq!(parser.current_line(), "ident ident ident;");
935+
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
936+
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
937+
assert_eq!(parser.next(), Ok(Token::Semicolon));
938+
939+
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
940+
assert_eq!(parser.current_line(), "ident");
941+
}
942+
943+
#[test]
944+
fn parse_entirely_reports_first_error() {
945+
#[derive(PartialEq, Debug)]
946+
enum E { Foo }
947+
let mut input = ParserInput::new("ident");
948+
let mut parser = Parser::new(&mut input);
949+
let result: Result<(), _> = parser.parse_entirely(|_| Err(ParseError::Custom(E::Foo)));
950+
assert_eq!(result, Err(ParseError::Custom(E::Foo)));
951+
}

src/tokenizer.rs

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,12 @@ pub enum Token<'a> {
157157
/// A `<bad-url-token>`
158158
///
159159
/// This token always indicates a parse error.
160-
BadUrl,
160+
BadUrl(CompactCowStr<'a>),
161161

162162
/// A `<bad-string-token>`
163163
///
164164
/// This token always indicates a parse error.
165-
BadString,
165+
BadString(CompactCowStr<'a>),
166166

167167
/// A `<)-token>`
168168
///
@@ -194,7 +194,7 @@ impl<'a> Token<'a> {
194194
pub fn is_parse_error(&self) -> bool {
195195
matches!(
196196
*self,
197-
BadUrl | BadString | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
197+
BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
198198
)
199199
}
200200
}
@@ -226,7 +226,7 @@ impl<'a> Tokenizer<'a> {
226226
input: input,
227227
position: 0,
228228
last_known_source_location: Cell::new((SourcePosition(0),
229-
SourceLocation { line: 1, column: 1 })),
229+
SourceLocation { line: 0, column: 0 })),
230230
var_functions: SeenStatus::DontCare,
231231
viewport_percentages: SeenStatus::DontCare,
232232
}
@@ -287,6 +287,17 @@ impl<'a> Tokenizer<'a> {
287287
self.source_location(position)
288288
}
289289

290+
pub fn current_source_line(&self) -> &'a str {
291+
let current = self.position;
292+
let start = self.input[0..current]
293+
.rfind(|c| matches!(c, '\r' | '\n' | '\x0C'))
294+
.map_or(0, |start| start + 1);
295+
let end = self.input[current..]
296+
.find(|c| matches!(c, '\r' | '\n' | '\x0C'))
297+
.map_or(self.input.len(), |end| current + end);
298+
&self.input[start..end]
299+
}
300+
290301
pub fn source_location(&self, position: SourcePosition) -> SourceLocation {
291302
let target = position.0;
292303
let mut location;
@@ -301,7 +312,7 @@ impl<'a> Tokenizer<'a> {
301312
// So if the requested position is before the last known one,
302313
// start over from the beginning.
303314
position = 0;
304-
location = SourceLocation { line: 1, column: 1 };
315+
location = SourceLocation { line: 0, column: 0 };
305316
}
306317
let mut source = &self.input[position..target];
307318
while let Some(newline_position) = source.find(|c| matches!(c, '\n' | '\r' | '\x0C')) {
@@ -310,7 +321,7 @@ impl<'a> Tokenizer<'a> {
310321
source = &source[offset..];
311322
position += offset;
312323
location.line += 1;
313-
location.column = 1;
324+
location.column = 0;
314325
}
315326
debug_assert!(position <= target);
316327
location.column += (target - position) as u32;
@@ -386,10 +397,10 @@ pub struct SourcePosition(usize);
386397
/// The line and column number for a given position within the input.
387398
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
388399
pub struct SourceLocation {
389-
/// The line number, starting at 1 for the first line.
400+
/// The line number, starting at 0 for the first line.
390401
pub line: u32,
391402

392-
/// The column number within a line, starting at 1 for first the character of the line.
403+
/// The column number within a line, starting at 0 for first the character of the line.
393404
pub column: u32,
394405
}
395406

@@ -556,14 +567,14 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
556567
fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
557568
match consume_quoted_string(tokenizer, single_quote) {
558569
Ok(value) => QuotedString(value),
559-
Err(()) => BadString
570+
Err(value) => BadString(value)
560571
}
561572
}
562573

563574

564575
/// Return `Err(())` on syntax error (ie. unescaped newline)
565576
fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
566-
-> Result<CompactCowStr<'a>, ()> {
577+
-> Result<CompactCowStr<'a>, CompactCowStr<'a>> {
567578
tokenizer.advance(1); // Skip the initial quote
568579
// start_pos is at code point boundary, after " or '
569580
let start_pos = tokenizer.position();
@@ -596,15 +607,22 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
596607
string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
597608
break
598609
}
599-
b'\n' | b'\r' | b'\x0C' => { return Err(()) },
610+
b'\n' | b'\r' | b'\x0C' => {
611+
return Err(tokenizer.slice_from(start_pos).into())
612+
},
600613
_ => {}
601614
}
602615
tokenizer.consume_byte();
603616
}
604617

605618
while !tokenizer.is_eof() {
606619
if matches!(tokenizer.next_byte_unchecked(), b'\n' | b'\r' | b'\x0C') {
607-
return Err(());
620+
return Err(
621+
// string_bytes is well-formed UTF-8, see other comments.
622+
unsafe {
623+
from_utf8_release_unchecked(string_bytes)
624+
}.into()
625+
);
608626
}
609627
let b = tokenizer.consume_byte();
610628
match_byte! { b,
@@ -1013,6 +1031,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
10131031
}
10141032

10151033
fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
1034+
let start_pos = tokenizer.position();
10161035
// Consume up to the closing )
10171036
while !tokenizer.is_eof() {
10181037
match_byte! { tokenizer.consume_byte(),
@@ -1023,7 +1042,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
10231042
_ => {},
10241043
}
10251044
}
1026-
BadUrl
1045+
BadUrl(tokenizer.slice_from(start_pos).into())
10271046
}
10281047
}
10291048

0 commit comments

Comments
 (0)