Skip to content

Add Parser::skip_whitespace #181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cssparser"
version = "0.19.2"
version = "0.19.3"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

description = "Rust implementation of CSS Syntax Level 3"
Expand Down
37 changes: 29 additions & 8 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,32 @@ impl<'i: 't, 't> Parser<'i, 't> {
}
}

/// Advance the input until the next token that’s not whitespace or a comment.
#[inline]
pub fn skip_whitespace(&mut self) {
// If we just consumed `{`, `[`, `(`, or `function(`, leave whitespace
// or comments inside the block or function up to the nested parser.
if self.at_start_of.is_some() {
return
}

self.input.tokenizer.skip_whitespace()
}

#[inline]
pub(crate) fn skip_cdc_and_cdo(&mut self) {
self.input.tokenizer.skip_cdc_and_cdo()
}

#[inline]
pub(crate) fn next_byte(&self) -> Option<u8> {
let byte = self.input.tokenizer.next_byte();
if self.stop_before.contains(Delimiters::from_byte(byte)) {
return None
}
byte
}

/// Restore the internal state of the parser (including position within the input)
/// to what was previously saved by the `Parser::position` method.
///
Expand Down Expand Up @@ -364,14 +390,8 @@ impl<'i: 't, 't> Parser<'i, 't> {
///
/// This only returns a closing token when it is unmatched (and therefore an error).
pub fn next(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> {
loop {
match self.next_including_whitespace_and_comments() {
Err(e) => return Err(e),
Ok(&Token::WhiteSpace(_)) | Ok(&Token::Comment(_)) => {},
_ => break
}
}
Ok(self.input.cached_token_ref())
self.skip_whitespace();
self.next_including_whitespace_and_comments()
}

/// Same as `Parser::next`, but does not skip whitespace tokens.
Expand Down Expand Up @@ -459,6 +479,7 @@ impl<'i: 't, 't> Parser<'i, 't> {
where F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>> {
let mut values = vec![];
loop {
self.skip_whitespace(); // Unnecessary for correctness, but may help try() in parse_one rewind less.
values.push(self.parse_until_before(Delimiter::Comma, &mut parse_one)?);
match self.next() {
Err(_) => return Ok(values),
Expand Down
64 changes: 42 additions & 22 deletions src/rules_and_declarations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,15 +342,29 @@ where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> +

fn next(&mut self) -> Option<Result<R, PreciseParseError<'i, E>>> {
loop {
if self.is_stylesheet {
self.input.skip_cdc_and_cdo()
} else {
self.input.skip_whitespace()
}
let start = self.input.state();
// FIXME: remove intermediate variable when lifetimes are non-lexical
let at_keyword = match self.input.next_including_whitespace_and_comments() {
Ok(&Token::WhiteSpace(_)) | Ok(&Token::Comment(_)) => continue,
Ok(&Token::CDO) | Ok(&Token::CDC) if self.is_stylesheet => continue,
Ok(&Token::AtKeyword(ref name)) => Some(name.clone()),
Ok(_) => None,
Err(_) => return None,
};

let at_keyword;
match self.input.next_byte() {
Some(b'@') => {
match self.input.next_including_whitespace_and_comments() {
Ok(&Token::AtKeyword(ref name)) => at_keyword = Some(name.clone()),
_ => at_keyword = None,
}
// FIXME: move this back inside `match` when lifetimes are non-lexical
if at_keyword.is_none() {
self.input.reset(&start)
}
}
Some(_) => at_keyword = None,
None => return None
}

if let Some(name) = at_keyword {
let first_stylesheet_rule = self.is_stylesheet && !self.any_rule_so_far;
self.any_rule_so_far = true;
Expand All @@ -362,7 +376,6 @@ where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> +
}
} else {
self.any_rule_so_far = true;
self.input.reset(&start);
return Some(parse_qualified_rule(self.input, &mut self.parser)
.map_err(|e| PreciseParseError {
error: e,
Expand Down Expand Up @@ -400,20 +413,27 @@ pub fn parse_one_rule<'i, 't, R, P, E>(input: &mut Parser<'i, 't>, parser: &mut
where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> +
AtRuleParser<'i, AtRule = R, Error = E> {
input.parse_entirely(|input| {
loop {
let start = input.state();
// FIXME: remove intermediate variable when lifetimes are non-lexical
let at_keyword = match *input.next_including_whitespace_and_comments()? {
Token::WhiteSpace(_) | Token::Comment(_) => continue,
Token::AtKeyword(ref name) => Some(name.clone()),
_ => None
};
if let Some(name) = at_keyword {
return parse_at_rule(&start, name, input, parser).map_err(|e| e.error)
} else {
input.reset(&start);
return parse_qualified_rule(input, parser)
input.skip_whitespace();
let start = input.state();

let at_keyword;
if input.next_byte() == Some(b'@') {
match *input.next_including_whitespace_and_comments()? {
Token::AtKeyword(ref name) => at_keyword = Some(name.clone()),
_ => at_keyword = None,
}
// FIXME: move this back inside `match` when lifetimes are non-lexical
if at_keyword.is_none() {
input.reset(&start)
}
} else {
at_keyword = None
}

if let Some(name) = at_keyword {
parse_at_rule(&start, name, input, parser).map_err(|e| e.error)
} else {
parse_qualified_rule(input, parser)
}
})
}
Expand Down
82 changes: 77 additions & 5 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,76 @@ impl<'a> Tokenizer<'a> {
fn starts_with(&self, needle: &[u8]) -> bool {
self.input.as_bytes()[self.position..].starts_with(needle)
}

pub fn skip_whitespace(&mut self) {
while !self.is_eof() {
match_byte! { self.next_byte_unchecked(),
b' ' | b'\t' => {
self.advance(1)
},
b'\n' | b'\x0C' => {
self.advance(1);
self.seen_newline(false);
},
b'\r' => {
self.advance(1);
self.seen_newline(true);
},
b'/' => {
if self.starts_with(b"/*") {
consume_comment(self);
} else {
return
}
}
_ => {
return
}
}
}
}

pub fn skip_cdc_and_cdo(&mut self) {
while !self.is_eof() {
match_byte! { self.next_byte_unchecked(),
b' ' | b'\t' => {
self.advance(1)
},
b'\n' | b'\x0C' => {
self.advance(1);
self.seen_newline(false);
},
b'\r' => {
self.advance(1);
self.seen_newline(true);
},
b'/' => {
if self.starts_with(b"/*") {
consume_comment(self);
} else {
return
}
}
b'<' => {
if self.starts_with(b"<!--") {
self.advance(4)
} else {
return
}
}
b'-' => {
if self.starts_with(b"-->") {
self.advance(4)
} else {
return
}
}
_ => {
return
}
}
}
}
}

/// A position from the start of the input, counted in UTF-8 bytes.
Expand Down Expand Up @@ -514,9 +584,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
}
b'/' => {
if tokenizer.starts_with(b"/*") {
let contents = consume_comment(tokenizer);
check_for_source_map(tokenizer, contents);
Comment(contents)
Comment(consume_comment(tokenizer))
} else {
tokenizer.advance(1);
Delim('/')
Expand Down Expand Up @@ -627,7 +695,9 @@ fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
tokenizer.advance(1);
if tokenizer.next_byte() == Some(b'/') {
tokenizer.advance(1);
return tokenizer.slice(start_position..end_position)
let contents = tokenizer.slice(start_position..end_position);
check_for_source_map(tokenizer, contents);
return contents
}
}
b'\n' | b'\x0C' => {
Expand All @@ -643,7 +713,9 @@ fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
}
}
}
tokenizer.slice_from(start_position)
let contents = tokenizer.slice_from(start_position);
check_for_source_map(tokenizer, contents);
contents
}

fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
Expand Down