From 4cdcd58a6325c355c7a24b7349ed04c6ad891497 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 7 Aug 2013 19:54:39 +0100 Subject: [PATCH 01/14] Only accept variable (no complex expression) in the fake for loop. --- parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser.rs b/parser.rs index d83ffc1f..6f70172e 100644 --- a/parser.rs +++ b/parser.rs @@ -64,7 +64,7 @@ impl Iterator<(ComponentValue, SourceLocation)> for ComponentValueIterator { // Work around "error: cannot borrow `*iter` as mutable more than once at a time" // when using a normal for loop. macro_rules! for_iter( - ($iter: expr, $pattern: pat, $loop_body: expr) => ( + ($iter: ident, $pattern: pat, $loop_body: expr) => ( loop { match $iter.next() { None => break, Some($pattern) => $loop_body } } From 1424ac59b8b8719c136fa8363b39eac9269ffc13 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 7 Aug 2013 19:55:12 +0100 Subject: [PATCH 02/14] Remove unused code: ComponentValueIterator.from_parser() --- parser.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/parser.rs b/parser.rs index 6f70172e..dc0727e1 100644 --- a/parser.rs +++ b/parser.rs @@ -20,7 +20,7 @@ use tokenizer::*; // TODO: Use a trait? enum ComponentValueIterator { - ParserIter(~Parser), + ParserIter(Parser), VectorIter(vec::ConsumeIterator<(ComponentValue, SourceLocation)>), } @@ -28,12 +28,7 @@ enum ComponentValueIterator { impl ComponentValueIterator { #[inline] pub fn from_str(input: ~str) -> ComponentValueIterator { - ComponentValueIterator::from_parser(~Parser::from_str(input)) - } - - #[inline] - pub fn from_parser(parser: ~Parser) -> ComponentValueIterator { - ParserIter(parser) + ParserIter(Parser::from_str(input)) } #[inline] @@ -54,7 +49,7 @@ impl ComponentValueIterator { impl Iterator<(ComponentValue, SourceLocation)> for ComponentValueIterator { fn next(&mut self) -> Option<(ComponentValue, SourceLocation)> { match self { - &ParserIter(ref mut parser) => next_component_value(*parser), + &ParserIter(ref mut parser) => next_component_value(parser), &VectorIter(ref mut iter) => iter.next() } } From cb67994f840b9b8657168d7f3610446038e2111e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 7 Aug 2013 19:55:38 +0100 Subject: [PATCH 03/14] Add a "skip whitespace" iterator that also ignores source locations. --- ast.rs | 28 ++++++++++++++++++++++++++++ color.rs | 4 +--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/ast.rs b/ast.rs index 4d996a25..7bb15757 100644 --- a/ast.rs +++ b/ast.rs @@ -3,6 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::str::ToStr; +use std::vec; #[deriving(Eq)] @@ -114,3 +115,30 @@ pub enum ErrorReason { impl ToStr for ErrorReason { fn to_str(&self) -> ~str { fmt!("%?", self) } } + + +pub trait SkipWhitespaceIterable<'self> { + pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self>; +} + +impl<'self> SkipWhitespaceIterable<'self> for &'self [(ComponentValue, SourceLocation)] { + pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self> { + SkipWhitespaceIterator{ iter: self.iter() } + } +} + +struct SkipWhitespaceIterator<'self> { + iter: vec::VecIterator<'self, (ComponentValue, SourceLocation)>, +} + +impl<'self> Iterator<&'self ComponentValue> for SkipWhitespaceIterator<'self> { + fn next(&mut self) -> Option<&'self ComponentValue> { + loop { + match self.iter.next() { + Some(&(WhiteSpace, _)) => (), + Some(&(ref component_value, _)) => return Some(component_value), + None => return None + } + } + } +} diff --git a/color.rs b/color.rs index 10bf8f13..d0acb0f7 100644 --- a/color.rs +++ b/color.rs @@ -87,9 +87,7 @@ fn parse_color_function(name: &str, arguments: &[(ComponentValue, SourceLocation else if "hsla" == lower_name { (false, true) } else { return None }; - let mut iter = do arguments.iter().filter_map |&(ref c, _)| { - if c != &WhiteSpace { Some(c) } else { None } - }; + let mut iter = arguments.skip_whitespace(); macro_rules! expect_comma( () => ( if iter.next() != Some(&Comma) { return None } ); ) From bff9dfc2b7292105c9c768676c41a5e1b16cfbad Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 7 Aug 2013 20:58:25 +0100 Subject: [PATCH 04/14] Make the tokenizer an iterator. --- ast.rs | 38 +++++++-------- parser.rs | 92 ++++++++++++----------------------- tests.rs | 24 ++++------ tokenizer.rs | 133 ++++++++++++++++++++++++++------------------------- 4 files changed, 125 insertions(+), 162 deletions(-) diff --git a/ast.rs b/ast.rs index 7bb15757..c1925f14 100644 --- a/ast.rs +++ b/ast.rs @@ -21,13 +21,16 @@ pub struct SourceLocation { } +pub type Node = (ComponentValue, SourceLocation); // TODO this is not a good name + + #[deriving(Eq)] pub enum ComponentValue { - // Preserved tokens. Same as in the tokenizer. + // Preserved tokens. Ident(~str), AtKeyword(~str), Hash(~str), - IDHash(~str), // Hash token that is a valid ID selector. + IDHash(~str), // Hash that is a valid ID selector. String(~str), URL(~str), Delim(char), @@ -50,12 +53,12 @@ pub enum ComponentValue { CDC, // --> // Function - Function(~str, ~[(ComponentValue, SourceLocation)]), // name, arguments + Function(~str, ~[Node]), // name, arguments // Simple block - ParenthesisBlock(~[(ComponentValue, SourceLocation)]), // (…) - SquareBracketBlock(~[(ComponentValue, SourceLocation)]), // […] - CurlyBracketBlock(~[(ComponentValue, SourceLocation)]), // {…} + ParenthesisBlock(~[Node]), // (…) + SquareBracketBlock(~[Node]), // […] + CurlyBracketBlock(~[Node]), // {…} // These are always invalid BadURL, @@ -70,23 +73,23 @@ pub enum ComponentValue { pub struct Declaration { location: SourceLocation, name: ~str, - value: ~[(ComponentValue, SourceLocation)], + value: ~[Node], important: bool, } #[deriving(Eq)] pub struct QualifiedRule { location: SourceLocation, - prelude: ~[(ComponentValue, SourceLocation)], - block: ~[(ComponentValue, SourceLocation)], + prelude: ~[Node], + block: ~[Node], } #[deriving(Eq)] pub struct AtRule { location: SourceLocation, name: ~str, - prelude: ~[(ComponentValue, SourceLocation)], - block: Option<~[(ComponentValue, SourceLocation)]>, + prelude: ~[Node], + block: Option<~[Node]>, } #[deriving(Eq)] @@ -121,24 +124,21 @@ pub trait SkipWhitespaceIterable<'self> { pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self>; } -impl<'self> SkipWhitespaceIterable<'self> for &'self [(ComponentValue, SourceLocation)] { +impl<'self> SkipWhitespaceIterable<'self> for &'self [Node] { pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self> { SkipWhitespaceIterator{ iter: self.iter() } } } struct SkipWhitespaceIterator<'self> { - iter: vec::VecIterator<'self, (ComponentValue, SourceLocation)>, + iter: vec::VecIterator<'self, Node>, } impl<'self> Iterator<&'self ComponentValue> for SkipWhitespaceIterator<'self> { fn next(&mut self) -> Option<&'self ComponentValue> { - loop { - match self.iter.next() { - Some(&(WhiteSpace, _)) => (), - Some(&(ref component_value, _)) => return Some(component_value), - None => return None - } + for &(ref component_value, _) in self.iter { + if component_value != &WhiteSpace { return Some(component_value) } } + None } } diff --git a/parser.rs b/parser.rs index dc0727e1..fd5dcea3 100644 --- a/parser.rs +++ b/parser.rs @@ -11,49 +11,9 @@ use std::iterator::Iterator; -use std::vec; use std::ascii::eq_ignore_ascii_case; use ast::*; -use tokenizer::*; - - -// TODO: Use a trait? -enum ComponentValueIterator { - ParserIter(Parser), - VectorIter(vec::ConsumeIterator<(ComponentValue, SourceLocation)>), -} - - -impl ComponentValueIterator { - #[inline] - pub fn from_str(input: ~str) -> ComponentValueIterator { - ParserIter(Parser::from_str(input)) - } - - #[inline] - pub fn from_vector(values: ~[(ComponentValue, SourceLocation)]) -> ComponentValueIterator { - VectorIter(values.consume_iter()) - } - - #[inline] - pub fn next_non_whitespace(&mut self) -> Option<(ComponentValue, SourceLocation)> { - for (component_value, location) in *self { - if component_value != WhiteSpace { return Some((component_value, location)) } - } - None - } -} - - -impl Iterator<(ComponentValue, SourceLocation)> for ComponentValueIterator { - fn next(&mut self) -> Option<(ComponentValue, SourceLocation)> { - match self { - &ParserIter(ref mut parser) => next_component_value(parser), - &VectorIter(ref mut iter) => iter.next() - } - } -} // Work around "error: cannot borrow `*iter` as mutable more than once at a time" @@ -68,7 +28,7 @@ macro_rules! for_iter( /// Call repeatedly for the top-level of a CSS stylesheet -pub fn parse_stylesheet_rule(iter: &mut ComponentValueIterator) -> Option> { +pub fn parse_stylesheet_rule>(iter: &mut T) -> Option> { for_iter!(iter, (component_value, location), { match component_value { WhiteSpace | CDO | CDC => (), @@ -85,7 +45,7 @@ pub fn parse_stylesheet_rule(iter: &mut ComponentValueIterator) -> Option Option> { +pub fn parse_rule>(iter: &mut T) -> Option> { for_iter!(iter, (component_value, location), { match component_value { WhiteSpace => (), @@ -101,10 +61,10 @@ pub fn parse_rule(iter: &mut ComponentValueIterator) -> Option Result { +pub fn parse_one_rule>(iter: &mut T) -> Result { match parse_rule(iter) { None => Err(ErrEmptyInput), - Some(result) => if result.is_err() || iter.next_non_whitespace().is_none() { result } + Some(result) => if result.is_err() || next_non_whitespace(iter).is_none() { result } else { Err(ErrExtraInput) } } } @@ -112,8 +72,8 @@ pub fn parse_one_rule(iter: &mut ComponentValueIterator) -> Result Option> { +pub fn parse_declaration_or_at_rule>(iter: &mut T) + -> Option> { for_iter!(iter, (component_value, location), { match component_value { WhiteSpace | Semicolon => (), @@ -133,12 +93,12 @@ pub fn parse_declaration_or_at_rule(iter: &mut ComponentValueIterator) /// Used eg. in @supports -pub fn parse_one_declaration(iter: &mut ComponentValueIterator) -> Result { - match iter.next_non_whitespace() { +pub fn parse_one_declaration>(iter: &mut T) -> Result { + match next_non_whitespace(iter) { None => Err(ErrEmptyInput), Some(item) => { let result = parse_declaration(iter, item); - if result.is_err() || iter.next_non_whitespace().is_none() { result } + if result.is_err() || next_non_whitespace(iter).is_none() { result } else { Err(ErrExtraInput) } } } @@ -146,12 +106,11 @@ pub fn parse_one_declaration(iter: &mut ComponentValueIterator) -> Result Result<(ComponentValue, SourceLocation), ErrorReason> { - match iter.next_non_whitespace() { +pub fn parse_one_component_value>(iter: &mut T) -> Result { + match next_non_whitespace(iter) { None => Err(ErrEmptyInput), Some(item) => { - if iter.next_non_whitespace().is_none() { Ok(item) } + if next_non_whitespace(iter).is_none() { Ok(item) } else { Err(ErrExtraInput) } } } @@ -161,7 +120,7 @@ pub fn parse_one_component_value(iter: &mut ComponentValueIterator) // *********** End of public API *********** -fn parse_at_rule(iter: &mut ComponentValueIterator, name: ~str, location: SourceLocation) +fn parse_at_rule>(iter: &mut T, name: ~str, location: SourceLocation) -> AtRule { let mut prelude = ~[]; let mut block = None; @@ -176,8 +135,8 @@ fn parse_at_rule(iter: &mut ComponentValueIterator, name: ~str, location: Source } -fn parse_qualified_rule(iter: &mut ComponentValueIterator, first: (ComponentValue, SourceLocation)) - -> Result { +fn parse_qualified_rule>(iter: &mut T, first: Node) + -> Result { match first { (CurlyBracketBlock(content), location) => return Ok(QualifiedRule { location: location, prelude: ~[], block: content }), @@ -195,13 +154,13 @@ fn parse_qualified_rule(iter: &mut ComponentValueIterator, first: (ComponentValu } -fn parse_declaration(iter: &mut ComponentValueIterator, first: (ComponentValue, SourceLocation)) - -> Result { +fn parse_declaration>(iter: &mut T, first: Node) + -> Result { let (name, location) = match first { (Ident(name), location) => (name, location), _ => return Err(ErrInvalidDeclarationSyntax) }; - match iter.next_non_whitespace() { + match next_non_whitespace(iter) { Some((Colon, _)) => (), _ => return Err(ErrInvalidDeclarationSyntax), } @@ -224,15 +183,24 @@ fn parse_declaration(iter: &mut ComponentValueIterator, first: (ComponentValue, #[inline] -fn parse_declaration_important(iter: &mut ComponentValueIterator) -> bool { - let ident_value = match iter.next_non_whitespace() { +fn parse_declaration_important>(iter: &mut T) -> bool { + let ident_value = match next_non_whitespace(iter) { Some((Ident(value), _)) => value, _ => return false, }; if !eq_ignore_ascii_case(ident_value, "important") { return false } - match iter.next_non_whitespace() { + match next_non_whitespace(iter) { Some((Semicolon, _)) => true, None => true, _ => false } } + + +#[inline] +fn next_non_whitespace>(iter: &mut T) -> Option { + for (component_value, location) in *iter { + if component_value != WhiteSpace { return Some((component_value, location)) } + } + None +} diff --git a/tests.rs b/tests.rs index 2ce0243c..5cd86ad5 100644 --- a/tests.rs +++ b/tests.rs @@ -7,7 +7,7 @@ use extra::{tempfile, json}; use extra::json::ToJson; use ast::*; -use tokenizer::*; +use tokenizer::tokenize; use parser::*; use color::*; @@ -79,14 +79,8 @@ fn run_json_tests(json_data: &str, parse: &fn (input: ~str) -> json::Json) { #[test] fn component_value_list() { do run_json_tests(include_str!("css-parsing-tests/component_value_list.json")) |input| { - let parser = &mut Parser::from_str(input); let mut results = ~[]; - loop { - match next_component_value(parser) { - Some((c, _)) => results.push(c), - None => break, - } - } + for (c, _) in &mut tokenize(input) { results.push(c) } results.to_json() } } @@ -95,8 +89,8 @@ fn component_value_list() { #[test] fn one_component_value() { do run_json_tests(include_str!("css-parsing-tests/one_component_value.json")) |input| { - let iter = &mut ComponentValueIterator::from_str(input); - result_to_json(parse_one_component_value(iter).chain(|(c, _)| Ok(c))) + let result = parse_one_component_value(&mut tokenize(input)); + result_to_json(result.chain(|(c, _)| Ok(c))) } } @@ -104,7 +98,7 @@ fn one_component_value() { #[test] fn declaration_list() { do run_json_tests(include_str!("css-parsing-tests/declaration_list.json")) |input| { - let iter = &mut ComponentValueIterator::from_str(input); + let iter = &mut tokenize(input); let mut declarations = ~[]; loop { match parse_declaration_or_at_rule(iter) { @@ -120,7 +114,7 @@ fn declaration_list() { #[test] fn one_declaration() { do run_json_tests(include_str!("css-parsing-tests/one_declaration.json")) |input| { - result_to_json(parse_one_declaration(&mut ComponentValueIterator::from_str(input))) + result_to_json(parse_one_declaration(&mut tokenize(input))) } } @@ -128,7 +122,7 @@ fn one_declaration() { #[test] fn rule_list() { do run_json_tests(include_str!("css-parsing-tests/rule_list.json")) |input| { - let iter = &mut ComponentValueIterator::from_str(input); + let iter = &mut tokenize(input); let mut rules = ~[]; loop { match parse_rule(iter) { @@ -144,14 +138,14 @@ fn rule_list() { #[test] fn one_rule() { do run_json_tests(include_str!("css-parsing-tests/one_rule.json")) |input| { - result_to_json(parse_one_rule(&mut ComponentValueIterator::from_str(input))) + result_to_json(parse_one_rule(&mut tokenize(input))) } } fn run_color_tests(json_data: &str, to_json: &fn(result: Option) -> json::Json) { do run_json_tests(json_data) |input| { - match parse_one_component_value(&mut ComponentValueIterator::from_str(input)) { + match parse_one_component_value(&mut tokenize(input)) { Ok((component_value, _location)) => to_json(parse_color(&component_value)), Err(_reason) => json::Null, } diff --git a/tokenizer.rs b/tokenizer.rs index c0925854..f9af9da9 100644 --- a/tokenizer.rs +++ b/tokenizer.rs @@ -10,6 +10,41 @@ use std::ascii::eq_ignore_ascii_case; use ast::*; +pub fn tokenize(input: &str) -> Parser { + let input = preprocess(input); + Parser { + length: input.len(), + input: input, + position: 0, + line: 1, + last_line_start: 0, + } +} + +impl Iterator for Parser { + #[inline] + pub fn next(&mut self) -> Option { next_component_value(self) } +} + + +// *********** End of public API *********** + + +#[inline] +fn preprocess(input: &str) -> ~str { + // TODO: Is this faster if done in one pass? + input.replace("\r\n", "\n").replace("\r", "\n").replace("\x0C", "\n").replace("\x00", "\uFFFD") +} + + +#[test] +fn test_preprocess() { + assert!(preprocess("") == ~""); + assert!(preprocess("Lorem\r\n\t\x00ipusm\ndoror\uFFFD\r") + == ~"Lorem\n\t\uFFFDipusm\ndoror\uFFFD\n"); +} + + struct Parser { input: ~str, length: uint, // All counted in bytes, not characters @@ -20,19 +55,40 @@ struct Parser { impl Parser { - pub fn from_str(input: &str) -> Parser { - let input = preprocess(input); - Parser { - length: input.len(), - input: input, - position: 0, - line: 1, - last_line_start: 0, + #[inline] + fn is_eof(&self) -> bool { self.position >= self.length } + + // Assumes non-EOF + #[inline] + fn current_char(&self) -> char { self.char_at(0) } + + #[inline] + fn char_at(&self, offset: uint) -> char { + self.input.char_at(self.position + offset) + } + + #[inline] + fn consume_char(&mut self) -> char { + let range = self.input.char_range_at(self.position); + self.position = range.next; + range.ch + } + + #[inline] + fn starts_with(&self, needle: &str) -> bool { + self.input.slice_from(self.position).starts_with(needle) + } + + #[inline] + fn new_line(&mut self) { + if cfg!(test) { + assert!(self.input.char_at(self.position - 1) == '\n') } + self.line += 1; + self.last_line_start = self.position; } } - macro_rules! is_match( ($value:expr, $($pattern:pat)|+) => ( match $value { $($pattern)|+ => true, _ => false } @@ -40,7 +96,7 @@ macro_rules! is_match( ) -pub fn next_component_value(parser: &mut Parser) -> Option<(ComponentValue, SourceLocation)> { +fn next_component_value(parser: &mut Parser) -> Option { consume_comments(parser); if parser.is_eof() { if cfg!(test) { @@ -194,60 +250,6 @@ pub fn next_component_value(parser: &mut Parser) -> Option<(ComponentValue, Sour } -// *********** End of public API *********** - - -#[inline] -fn preprocess(input: &str) -> ~str { - // TODO: Is this faster if done in one pass? - input.replace("\r\n", "\n").replace("\r", "\n").replace("\x0C", "\n").replace("\x00", "\uFFFD") -} - - -#[test] -fn test_preprocess() { - assert!(preprocess("") == ~""); - assert!(preprocess("Lorem\r\n\t\x00ipusm\ndoror\uFFFD\r") - == ~"Lorem\n\t\uFFFDipusm\ndoror\uFFFD\n"); -} - - -impl Parser { - #[inline] - fn is_eof(&self) -> bool { self.position >= self.length } - - // Assumes non-EOF - #[inline] - fn current_char(&self) -> char { self.char_at(0) } - - #[inline] - fn char_at(&self, offset: uint) -> char { - self.input.char_at(self.position + offset) - } - - #[inline] - fn consume_char(&mut self) -> char { - let range = self.input.char_range_at(self.position); - self.position = range.next; - range.ch - } - - #[inline] - fn starts_with(&self, needle: &str) -> bool { - self.input.slice_from(self.position).starts_with(needle) - } - - #[inline] - fn new_line(&mut self) { - if cfg!(test) { - assert!(self.input.char_at(self.position - 1) == '\n') - } - self.line += 1; - self.last_line_start = self.position; - } -} - - #[inline] fn consume_comments(parser: &mut Parser) { while parser.starts_with("/*") { @@ -268,8 +270,7 @@ fn consume_comments(parser: &mut Parser) { } -fn consume_block(parser: &mut Parser, ending_token: ComponentValue) - -> ~[(ComponentValue, SourceLocation)] { +fn consume_block(parser: &mut Parser, ending_token: ComponentValue) -> ~[Node] { parser.position += 1; // Skip the initial {[( let mut content = ~[]; loop { From 0bce2f610c3b229499e6604beb69a153a5b58e1e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 7 Aug 2013 21:42:27 +0100 Subject: [PATCH 05/14] Make the parser return iterators. --- parser.rs | 196 +++++++++++++++++++++++++++++++-------------------- tests.rs | 95 +++++++++++++++---------- tokenizer.rs | 1 + 3 files changed, 180 insertions(+), 112 deletions(-) diff --git a/parser.rs b/parser.rs index fd5dcea3..5f8e988e 100644 --- a/parser.rs +++ b/parser.rs @@ -3,11 +3,15 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ // http://dev.w3.org/csswg/css-syntax/#parsing -// -// The input to the tree construction stage is a sequence of tokens -// from the tokenization stage. -// The output is a tree of items with a stylesheet at the root -// and all other nodes being at-rules, style rules, or declarations. + +/// The input to these functions needs to implement Iterator<(ComponentValue, SourceLocation)>. +/// The input is consumed to avoid doing a lot of copying. +/// A conforming input can be obtained: +/// +/// * From a string in CSS syntax, with tokenize() +/// * From a ~[(ComponentValue, SourceLocation)] vector +/// (as found in "nested" component values such as CurlyBracketBlock), +/// with v.consume_iter() use std::iterator::Iterator; @@ -16,101 +20,67 @@ use std::ascii::eq_ignore_ascii_case; use ast::*; -// Work around "error: cannot borrow `*iter` as mutable more than once at a time" -// when using a normal for loop. -macro_rules! for_iter( - ($iter: ident, $pattern: pat, $loop_body: expr) => ( - loop { - match $iter.next() { None => break, Some($pattern) => $loop_body } - } - ); -) +/// Parse top-level of a CSS stylesheet. +/// Return a Iterator> +#[inline] +pub fn parse_stylesheet>(iter: T) -> StylesheetParser { + StylesheetParser(iter) +} -/// Call repeatedly for the top-level of a CSS stylesheet -pub fn parse_stylesheet_rule>(iter: &mut T) -> Option> { - for_iter!(iter, (component_value, location), { - match component_value { - WhiteSpace | CDO | CDC => (), - AtKeyword(name) => return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_qualified_rule(iter, (component_value, location)) { - Ok(rule) => Ok(QualifiedRule(rule)), - Err(reason) => Err(reason), - }), - } - }) - None +/// Parse a non-top level list of rules eg. the content of an @media rule. +/// Return a Iterator> +#[inline] +pub fn parse_rule_list>(iter: T) -> RuleListParser { + RuleListParser(iter) } -/// Call repeatedly for a non-top level list of rules eg. the content of an @media rule. -/// Same as parse_stylesheet() except for the handling of top-level CDO and CDC -pub fn parse_rule>(iter: &mut T) -> Option> { - for_iter!(iter, (component_value, location), { - match component_value { - WhiteSpace => (), - AtKeyword(name) => return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_qualified_rule(iter, (component_value, location)) { - Ok(rule) => Ok(QualifiedRule(rule)), - Err(reason) => Err(reason), - }), - } - }) - None +/// Parse a list of declarations and at-rules, +/// like @page in CSS 2.1, all declaration lists in level 3 +/// Return a Iterator> +#[inline] +pub fn parse_declaration_list>(iter: T) -> DeclarationListParser { + DeclarationListParser(iter) } +/// Parse a single rule. /// Used eg. for CSSRuleList.insertRule() -pub fn parse_one_rule>(iter: &mut T) -> Result { - match parse_rule(iter) { +pub fn parse_one_rule>(iter: T) -> Result { + let mut parser = RuleListParser(iter); + match parser.next() { None => Err(ErrEmptyInput), - Some(result) => if result.is_err() || next_non_whitespace(iter).is_none() { result } - else { Err(ErrExtraInput) } - } -} - - -/// Call repeatedly of a list of declarations. -/// @page in CSS 2.1, all declaration lists in level 3 -pub fn parse_declaration_or_at_rule>(iter: &mut T) - -> Option> { - for_iter!(iter, (component_value, location), { - match component_value { - WhiteSpace | Semicolon => (), - AtKeyword(name) => return Some(Ok(Decl_AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_declaration(iter, (component_value, location)) { - Ok(declaration) => Ok(Declaration(declaration)), - Err(reason) => { - // Find the end of the declaration - for (v, _) in *iter { if v == Semicolon { break } } - Err(reason) - } - }), + Some(result) => { + if result.is_err() || next_non_whitespace(&mut *parser).is_none() { result } + else { Err(ErrExtraInput) } } - }) - None + } } +/// Parse a single declaration (not an at-rule) /// Used eg. in @supports -pub fn parse_one_declaration>(iter: &mut T) -> Result { - match next_non_whitespace(iter) { +pub fn parse_one_declaration>(mut iter: T) -> Result { + match next_non_whitespace(&mut iter) { None => Err(ErrEmptyInput), Some(item) => { - let result = parse_declaration(iter, item); - if result.is_err() || next_non_whitespace(iter).is_none() { result } + let result = parse_declaration(&mut iter, item); + if result.is_err() || next_non_whitespace(&mut iter).is_none() { result } else { Err(ErrExtraInput) } } } } +/// Parse a single component value. /// Used eg. in attr(foo, color) -pub fn parse_one_component_value>(iter: &mut T) -> Result { - match next_non_whitespace(iter) { +pub fn parse_one_component_value>(mut iter: T) + -> Result { + match next_non_whitespace(&mut iter) { None => Err(ErrEmptyInput), - Some(item) => { - if next_non_whitespace(iter).is_none() { Ok(item) } + Some((component_value, _location)) => { + if next_non_whitespace(&mut iter).is_none() { Ok(component_value) } else { Err(ErrExtraInput) } } } @@ -120,6 +90,82 @@ pub fn parse_one_component_value>(iter: &mut T) -> Result(T); +struct RuleListParser(T); +struct DeclarationListParser(T); + + +// Work around "error: cannot borrow `*iter` as mutable more than once at a time" +// when using a normal for loop. +macro_rules! for_iter( + ($iter: ident, $pattern: pat, $loop_body: expr) => ( + loop { + match $iter.next() { None => break, Some($pattern) => $loop_body } + } + ); +) + + +impl> Iterator> for StylesheetParser { + fn next(&mut self) -> Option> { + let iter = &mut **self; + for_iter!(iter, (component_value, location), { + match component_value { + WhiteSpace | CDO | CDC => (), + AtKeyword(name) => return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), + _ => return Some(match parse_qualified_rule(iter, (component_value, location)) { + Ok(rule) => Ok(QualifiedRule(rule)), + Err(reason) => Err(reason), + }), + } + }) + None + } +} + + +impl> Iterator> for RuleListParser { + fn next(&mut self) -> Option> { + let iter = &mut **self; + for_iter!(iter, (component_value, location), { + match component_value { + WhiteSpace => (), + AtKeyword(name) => return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), + _ => return Some(match parse_qualified_rule(iter, (component_value, location)) { + Ok(rule) => Ok(QualifiedRule(rule)), + Err(reason) => Err(reason), + }), + } + }) + None + } +} + + +impl> Iterator> +for DeclarationListParser { + fn next(&mut self) -> Option> { + let iter = &mut **self; + for_iter!(iter, (component_value, location), { + match component_value { + WhiteSpace | Semicolon => (), + AtKeyword(name) + => return Some(Ok(Decl_AtRule(parse_at_rule(iter, name, location)))), + _ => return Some(match parse_declaration(iter, (component_value, location)) { + Ok(declaration) => Ok(Declaration(declaration)), + Err(reason) => { + // Find the end of the declaration + for (v, _) in *iter { if v == Semicolon { break } } + Err(reason) + } + }), + } + }) + None + } +} + + fn parse_at_rule>(iter: &mut T, name: ~str, location: SourceLocation) -> AtRule { let mut prelude = ~[]; diff --git a/tests.rs b/tests.rs index 5cd86ad5..cefd6fee 100644 --- a/tests.rs +++ b/tests.rs @@ -55,7 +55,7 @@ fn assert_json_eq(results: json::Json, expected: json::Json, message: ~str) { } -fn run_json_tests(json_data: &str, parse: &fn (input: ~str) -> json::Json) { +fn run_json_tests(json_data: &str, parse: &fn (input: ~str) -> T) { let items = match json::from_str(json_data) { Ok(json::List(items)) => items, _ => fail!("Invalid JSON") @@ -67,7 +67,7 @@ fn run_json_tests(json_data: &str, parse: &fn (input: ~str) -> json::Json) { (&None, json::String(string)) => input = Some(string), (&Some(_), expected) => { let input = input.take_unwrap(); - let result = parse(input.to_owned()); + let result = parse(input.to_owned()).to_json(); assert_json_eq(result, expected, input); }, _ => fail!("Unexpected JSON") @@ -79,9 +79,7 @@ fn run_json_tests(json_data: &str, parse: &fn (input: ~str) -> json::Json) { #[test] fn component_value_list() { do run_json_tests(include_str!("css-parsing-tests/component_value_list.json")) |input| { - let mut results = ~[]; - for (c, _) in &mut tokenize(input) { results.push(c) } - results.to_json() + tokenize(input).transform(|(c, _)| c).to_owned_vec() } } @@ -89,8 +87,7 @@ fn component_value_list() { #[test] fn one_component_value() { do run_json_tests(include_str!("css-parsing-tests/one_component_value.json")) |input| { - let result = parse_one_component_value(&mut tokenize(input)); - result_to_json(result.chain(|(c, _)| Ok(c))) + parse_one_component_value(tokenize(input)) } } @@ -98,15 +95,7 @@ fn one_component_value() { #[test] fn declaration_list() { do run_json_tests(include_str!("css-parsing-tests/declaration_list.json")) |input| { - let iter = &mut tokenize(input); - let mut declarations = ~[]; - loop { - match parse_declaration_or_at_rule(iter) { - None => break, - Some(result) => declarations.push(result_to_json(result)), - } - } - json::List(declarations) + parse_declaration_list(tokenize(input)).to_owned_vec() } } @@ -114,7 +103,7 @@ fn declaration_list() { #[test] fn one_declaration() { do run_json_tests(include_str!("css-parsing-tests/one_declaration.json")) |input| { - result_to_json(parse_one_declaration(&mut tokenize(input))) + parse_one_declaration(tokenize(input)) } } @@ -122,15 +111,15 @@ fn one_declaration() { #[test] fn rule_list() { do run_json_tests(include_str!("css-parsing-tests/rule_list.json")) |input| { - let iter = &mut tokenize(input); - let mut rules = ~[]; - loop { - match parse_rule(iter) { - None => break, - Some(result) => rules.push(result_to_json(result)), - } - } - json::List(rules) + parse_rule_list(tokenize(input)).to_owned_vec() + } +} + + +#[test] +fn stylesheet() { + do run_json_tests(include_str!("css-parsing-tests/stylesheet.json")) |input| { + parse_stylesheet(tokenize(input)).to_owned_vec() } } @@ -138,15 +127,15 @@ fn rule_list() { #[test] fn one_rule() { do run_json_tests(include_str!("css-parsing-tests/one_rule.json")) |input| { - result_to_json(parse_one_rule(&mut tokenize(input))) + parse_one_rule(tokenize(input)) } } fn run_color_tests(json_data: &str, to_json: &fn(result: Option) -> json::Json) { do run_json_tests(json_data) |input| { - match parse_one_component_value(&mut tokenize(input)) { - Ok((component_value, _location)) => to_json(parse_color(&component_value)), + match parse_one_component_value(tokenize(input)) { + Ok(component_value) => to_json(parse_color(&component_value)), Err(_reason) => json::Null, } } @@ -159,10 +148,10 @@ fn color3() { } -//#[test] -//fn color3_hsl() { -// run_color_tests(include_str!("css-parsing-tests/color3_hsl.json"), |c| c.to_json()) -//} +#[test] +fn color3_hsl() { + run_color_tests(include_str!("css-parsing-tests/color3_hsl.json"), |c| c.to_json()) +} /// color3_keywords.json is different: R, G and B are in 0..255 rather than 0..1 @@ -178,10 +167,42 @@ fn color3_keywords() { } -fn result_to_json(result: Result) -> json::Json { - match result { - Ok(ref a) => a.to_json(), - Err(ref b) => b.to_json(), +impl ToJson for Result { + fn to_json(&self) -> json::Json { + match *self { + Ok(ref a) => a.to_json(), + Err(ref b) => b.to_json(), + } + } +} + + +impl ToJson for Result { + fn to_json(&self) -> json::Json { + match *self { + Ok(ref a) => a.to_json(), + Err(ref b) => b.to_json(), + } + } +} + + +impl ToJson for Result { + fn to_json(&self) -> json::Json { + match *self { + Ok(ref a) => a.to_json(), + Err(ref b) => b.to_json(), + } + } +} + + +impl ToJson for Result { + fn to_json(&self) -> json::Json { + match *self { + Ok(ref a) => a.to_json(), + Err(ref b) => b.to_json(), + } } } diff --git a/tokenizer.rs b/tokenizer.rs index f9af9da9..87df3934 100644 --- a/tokenizer.rs +++ b/tokenizer.rs @@ -10,6 +10,7 @@ use std::ascii::eq_ignore_ascii_case; use ast::*; +/// Returns a Iterator<(ComponentValue, SourceLocation)> pub fn tokenize(input: &str) -> Parser { let input = preprocess(input); Parser { From 574898de0257215585c63fa63eac2335139ae507 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Aug 2013 13:48:51 +0100 Subject: [PATCH 06/14] Flatten the exported API --- cssparser.rc | 13 +++++++++---- parser.rs | 2 +- tests.rs | 7 ++----- tokenizer.rs | 10 +++++----- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/cssparser.rc b/cssparser.rc index 303b4d8b..d070a02b 100644 --- a/cssparser.rc +++ b/cssparser.rc @@ -7,10 +7,15 @@ extern mod extra; -pub mod tokenizer; -pub mod parser; -pub mod ast; -pub mod color; +pub use ast::*; +pub use tokenizer::*; +pub use parser::*; +pub use color::*; + +mod ast; +mod tokenizer; +mod parser; +mod color; #[cfg(test)] mod tests; diff --git a/parser.rs b/parser.rs index 5f8e988e..6c75c7ea 100644 --- a/parser.rs +++ b/parser.rs @@ -23,7 +23,7 @@ use ast::*; /// Parse top-level of a CSS stylesheet. /// Return a Iterator> #[inline] -pub fn parse_stylesheet>(iter: T) -> StylesheetParser { +pub fn parse_stylesheet_rules>(iter: T) -> StylesheetParser { StylesheetParser(iter) } diff --git a/tests.rs b/tests.rs index cefd6fee..64d6e31c 100644 --- a/tests.rs +++ b/tests.rs @@ -6,10 +6,7 @@ use std::{io, os, str, run, task}; use extra::{tempfile, json}; use extra::json::ToJson; -use ast::*; -use tokenizer::tokenize; -use parser::*; -use color::*; +use super::*; fn write_whole_file(path: &Path, data: &str) { @@ -119,7 +116,7 @@ fn rule_list() { #[test] fn stylesheet() { do run_json_tests(include_str!("css-parsing-tests/stylesheet.json")) |input| { - parse_stylesheet(tokenize(input)).to_owned_vec() + parse_stylesheet_rules(tokenize(input)).to_owned_vec() } } diff --git a/tokenizer.rs b/tokenizer.rs index 87df3934..33731c66 100644 --- a/tokenizer.rs +++ b/tokenizer.rs @@ -47,11 +47,11 @@ fn test_preprocess() { struct Parser { - input: ~str, - length: uint, // All counted in bytes, not characters - position: uint, // All counted in bytes, not characters - line: uint, - last_line_start: uint, // All counted in bytes, not characters + priv input: ~str, + priv length: uint, // All counted in bytes, not characters + priv position: uint, // All counted in bytes, not characters + priv line: uint, + priv last_line_start: uint, // All counted in bytes, not characters } From 8a6143606dd953be3e073e5cc205311aaec786e4 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Aug 2013 13:51:26 +0100 Subject: [PATCH 07/14] Rename tokenizer::Parser to tokenizer::Tokenizer --- tokenizer.rs | 417 ++++++++++++++++++++++++++------------------------- 1 file changed, 210 insertions(+), 207 deletions(-) diff --git a/tokenizer.rs b/tokenizer.rs index 33731c66..6a510fcf 100644 --- a/tokenizer.rs +++ b/tokenizer.rs @@ -11,9 +11,9 @@ use ast::*; /// Returns a Iterator<(ComponentValue, SourceLocation)> -pub fn tokenize(input: &str) -> Parser { +pub fn tokenize(input: &str) -> Tokenizer { let input = preprocess(input); - Parser { + Tokenizer { length: input.len(), input: input, position: 0, @@ -22,7 +22,7 @@ pub fn tokenize(input: &str) -> Parser { } } -impl Iterator for Parser { +impl Iterator for Tokenizer { #[inline] pub fn next(&mut self) -> Option { next_component_value(self) } } @@ -46,7 +46,7 @@ fn test_preprocess() { } -struct Parser { +struct Tokenizer { priv input: ~str, priv length: uint, // All counted in bytes, not characters priv position: uint, // All counted in bytes, not characters @@ -55,7 +55,7 @@ struct Parser { } -impl Parser { +impl Tokenizer { #[inline] fn is_eof(&self) -> bool { self.position >= self.length } @@ -97,152 +97,155 @@ macro_rules! is_match( ) -fn next_component_value(parser: &mut Parser) -> Option { - consume_comments(parser); - if parser.is_eof() { +fn next_component_value(tokenizer: &mut Tokenizer) -> Option { + consume_comments(tokenizer); + if tokenizer.is_eof() { if cfg!(test) { - assert!(parser.line == parser.input.split_iter('\n').len_(), - "The tokenizer is missing a parser.new_line() call somewhere.") + assert!(tokenizer.line == tokenizer.input.split_iter('\n').len_(), + "The tokenizer is missing a tokenizer.new_line() call somewhere.") } return None } let start_location = SourceLocation{ - line: parser.line, + line: tokenizer.line, // The start of the line is column 1: - column: parser.position - parser.last_line_start + 1, + column: tokenizer.position - tokenizer.last_line_start + 1, }; - let c = parser.current_char(); + let c = tokenizer.current_char(); let component_value = match c { '\t' | '\n' | ' ' => { - while !parser.is_eof() { - match parser.current_char() { - ' ' | '\t' => parser.position += 1, + while !tokenizer.is_eof() { + match tokenizer.current_char() { + ' ' | '\t' => tokenizer.position += 1, '\n' => { - parser.position += 1; - parser.new_line(); + tokenizer.position += 1; + tokenizer.new_line(); }, _ => break, } } WhiteSpace }, - '"' => consume_string(parser, false), + '"' => consume_string(tokenizer, false), '#' => { - parser.position += 1; - if is_ident_start(parser) { IDHash(consume_name(parser)) } - else if !parser.is_eof() && match parser.current_char() { + tokenizer.position += 1; + if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) } + else if !tokenizer.is_eof() && match tokenizer.current_char() { 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' => true, - '\\' => !parser.starts_with("\\\n"), + '\\' => !tokenizer.starts_with("\\\n"), _ => c > '\x7F', // Non-ASCII - } { Hash(consume_name(parser)) } + } { Hash(consume_name(tokenizer)) } else { Delim(c) } }, '$' => { - if parser.starts_with("$=") { parser.position += 2; SuffixMatch } - else { parser.position += 1; Delim(c) } + if tokenizer.starts_with("$=") { tokenizer.position += 2; SuffixMatch } + else { tokenizer.position += 1; Delim(c) } }, - '\'' => consume_string(parser, true), - '(' => ParenthesisBlock(consume_block(parser, CloseParenthesis)), - ')' => { parser.position += 1; CloseParenthesis }, + '\'' => consume_string(tokenizer, true), + '(' => ParenthesisBlock(consume_block(tokenizer, CloseParenthesis)), + ')' => { tokenizer.position += 1; CloseParenthesis }, '*' => { - if parser.starts_with("*=") { parser.position += 2; SubstringMatch } - else { parser.position += 1; Delim(c) } + if tokenizer.starts_with("*=") { tokenizer.position += 2; SubstringMatch } + else { tokenizer.position += 1; Delim(c) } }, '+' => { if ( - parser.position + 1 < parser.length - && is_match!(parser.char_at(1), '0'..'9') + tokenizer.position + 1 < tokenizer.length + && is_match!(tokenizer.char_at(1), '0'..'9') ) || ( - parser.position + 2 < parser.length - && parser.char_at(1) == '.' - && is_match!(parser.char_at(2), '0'..'9') + tokenizer.position + 2 < tokenizer.length + && tokenizer.char_at(1) == '.' + && is_match!(tokenizer.char_at(2), '0'..'9') ) { - consume_numeric(parser) + consume_numeric(tokenizer) } else { - parser.position += 1; + tokenizer.position += 1; Delim(c) } }, - ',' => { parser.position += 1; Comma }, + ',' => { tokenizer.position += 1; Comma }, '-' => { if ( - parser.position + 1 < parser.length - && is_match!(parser.char_at(1), '0'..'9') + tokenizer.position + 1 < tokenizer.length + && is_match!(tokenizer.char_at(1), '0'..'9') ) || ( - parser.position + 2 < parser.length - && parser.char_at(1) == '.' - && is_match!(parser.char_at(2), '0'..'9') + tokenizer.position + 2 < tokenizer.length + && tokenizer.char_at(1) == '.' + && is_match!(tokenizer.char_at(2), '0'..'9') ) { - consume_numeric(parser) - } else if is_ident_start(parser) { - consume_ident_like(parser) - } else if parser.starts_with("-->") { - parser.position += 3; + consume_numeric(tokenizer) + } else if is_ident_start(tokenizer) { + consume_ident_like(tokenizer) + } else if tokenizer.starts_with("-->") { + tokenizer.position += 3; CDC } else { - parser.position += 1; + tokenizer.position += 1; Delim(c) } }, '.' => { - if parser.position + 1 < parser.length && is_match!(parser.char_at(1), '0'..'9') { - consume_numeric(parser) + if ( + tokenizer.position + 1 < tokenizer.length + && is_match!(tokenizer.char_at(1), '0'..'9') + ) { + consume_numeric(tokenizer) } else { - parser.position += 1; + tokenizer.position += 1; Delim(c) } } - '0'..'9' => consume_numeric(parser), - ':' => { parser.position += 1; Colon }, - ';' => { parser.position += 1; Semicolon }, + '0'..'9' => consume_numeric(tokenizer), + ':' => { tokenizer.position += 1; Colon }, + ';' => { tokenizer.position += 1; Semicolon }, '<' => { - if parser.starts_with(" // Function - Function(~str, ~[Node]), // name, arguments + Function(~str, ~[ComponentValue]), // name, arguments // Simple block - ParenthesisBlock(~[Node]), // (…) - SquareBracketBlock(~[Node]), // […] + ParenthesisBlock(~[ComponentValue]), // (…) + SquareBracketBlock(~[ComponentValue]), // […] CurlyBracketBlock(~[Node]), // {…} // These are always invalid @@ -73,14 +73,14 @@ pub enum ComponentValue { pub struct Declaration { location: SourceLocation, name: ~str, - value: ~[Node], + value: ~[ComponentValue], important: bool, } #[deriving(Eq)] pub struct QualifiedRule { location: SourceLocation, - prelude: ~[Node], + prelude: ~[ComponentValue], block: ~[Node], } @@ -88,7 +88,7 @@ pub struct QualifiedRule { pub struct AtRule { location: SourceLocation, name: ~str, - prelude: ~[Node], + prelude: ~[ComponentValue], block: Option<~[Node]>, } @@ -124,19 +124,19 @@ pub trait SkipWhitespaceIterable<'self> { pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self>; } -impl<'self> SkipWhitespaceIterable<'self> for &'self [Node] { +impl<'self> SkipWhitespaceIterable<'self> for &'self [ComponentValue] { pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self> { SkipWhitespaceIterator{ iter: self.iter() } } } struct SkipWhitespaceIterator<'self> { - iter: vec::VecIterator<'self, Node>, + iter: vec::VecIterator<'self, ComponentValue>, } impl<'self> Iterator<&'self ComponentValue> for SkipWhitespaceIterator<'self> { fn next(&mut self) -> Option<&'self ComponentValue> { - for &(ref component_value, _) in self.iter { + for component_value in self.iter { if component_value != &WhiteSpace { return Some(component_value) } } None diff --git a/color.rs b/color.rs index d0acb0f7..153905a0 100644 --- a/color.rs +++ b/color.rs @@ -76,7 +76,7 @@ fn parse_color_hash(value: &str) -> Option { #[inline] -fn parse_color_function(name: &str, arguments: &[(ComponentValue, SourceLocation)]) +fn parse_color_function(name: &str, arguments: &[ComponentValue]) -> Option { let lower_name = to_ascii_lower(name); diff --git a/parser.rs b/parser.rs index 6c75c7ea..6ad50bbe 100644 --- a/parser.rs +++ b/parser.rs @@ -64,8 +64,8 @@ pub fn parse_one_rule>(iter: T) -> Result { pub fn parse_one_declaration>(mut iter: T) -> Result { match next_non_whitespace(&mut iter) { None => Err(ErrEmptyInput), - Some(item) => { - let result = parse_declaration(&mut iter, item); + Some((component_value, location)) => { + let result = parse_declaration(&mut iter, component_value, location); if result.is_err() || next_non_whitespace(&mut iter).is_none() { result } else { Err(ErrExtraInput) } } @@ -113,7 +113,7 @@ impl> Iterator> for StylesheetParser match component_value { WhiteSpace | CDO | CDC => (), AtKeyword(name) => return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_qualified_rule(iter, (component_value, location)) { + _ => return Some(match parse_qualified_rule(iter, component_value, location) { Ok(rule) => Ok(QualifiedRule(rule)), Err(reason) => Err(reason), }), @@ -131,7 +131,7 @@ impl> Iterator> for RuleListParser (), AtKeyword(name) => return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_qualified_rule(iter, (component_value, location)) { + _ => return Some(match parse_qualified_rule(iter, component_value, location) { Ok(rule) => Ok(QualifiedRule(rule)), Err(reason) => Err(reason), }), @@ -151,7 +151,7 @@ for DeclarationListParser { WhiteSpace | Semicolon => (), AtKeyword(name) => return Some(Ok(Decl_AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_declaration(iter, (component_value, location)) { + _ => return Some(match parse_declaration(iter, component_value, location) { Ok(declaration) => Ok(Declaration(declaration)), Err(reason) => { // Find the end of the declaration @@ -170,40 +170,42 @@ fn parse_at_rule>(iter: &mut T, name: ~str, location: SourceLo -> AtRule { let mut prelude = ~[]; let mut block = None; - for_iter!(iter, (component_value, location), { + for_iter!(iter, (component_value, _location), { match component_value { CurlyBracketBlock(content) => { block = Some(content); break }, Semicolon => break, - component_value => prelude.push((component_value, location)), + component_value => prelude.push(component_value), } }) AtRule {location: location, name: name, prelude: prelude, block: block} } -fn parse_qualified_rule>(iter: &mut T, first: Node) - -> Result { +fn parse_qualified_rule>(iter: &mut T, first: ComponentValue, + location: SourceLocation) + -> Result { match first { - (CurlyBracketBlock(content), location) + CurlyBracketBlock(content) => return Ok(QualifiedRule { location: location, prelude: ~[], block: content }), _ => (), } let mut prelude = ~[first]; - for_iter!(iter, (component_value, location), { + for_iter!(iter, (component_value, _location), { match component_value { CurlyBracketBlock(content) => return Ok(QualifiedRule {location: location, prelude: prelude, block: content}), - component_value => prelude.push((component_value, location)), + component_value => prelude.push(component_value), } }) Err(ErrMissingQualifiedRuleBlock) } -fn parse_declaration>(iter: &mut T, first: Node) - -> Result { - let (name, location) = match first { - (Ident(name), location) => (name, location), +fn parse_declaration>(iter: &mut T, first: ComponentValue, + location: SourceLocation) + -> Result { + let name = match first { + Ident(name) => name, _ => return Err(ErrInvalidDeclarationSyntax) }; match next_non_whitespace(iter) { @@ -212,7 +214,7 @@ fn parse_declaration>(iter: &mut T, first: Node) } let mut value = ~[]; let mut important = false; - for_iter!(iter, (component_value, location), { + for_iter!(iter, (component_value, _location), { match component_value { Semicolon => break, Delim('!') => if parse_declaration_important(iter) { @@ -221,7 +223,7 @@ fn parse_declaration>(iter: &mut T, first: Node) } else { return Err(ErrInvalidBangImportantSyntax) }, - component_value => value.push((component_value, location)), + component_value => value.push(component_value), } }) Ok(Declaration{location: location, name: name, value: value, important: important}) diff --git a/tests.rs b/tests.rs index 64d6e31c..8d37bb26 100644 --- a/tests.rs +++ b/tests.rs @@ -259,8 +259,7 @@ impl ToJson for AtRule { match *self { AtRule{name: ref name, prelude: ref prelude, block: ref block, _} => json::List(~[json::String(~"at-rule"), name.to_json(), - json::List(list_to_json(prelude)), - block.map(list_to_json).to_json()]) + prelude.to_json(), block.map(list_to_json).to_json()]) } } } @@ -271,7 +270,7 @@ impl ToJson for QualifiedRule { match *self { QualifiedRule{prelude: ref prelude, block: ref block, _} => json::List(~[json::String(~"qualified rule"), - json::List(list_to_json(prelude)), json::List(list_to_json(block))]) + prelude.to_json(), json::List(list_to_json(block))]) } } } @@ -282,7 +281,7 @@ impl ToJson for Declaration { match *self { Declaration{name: ref name, value: ref value, important: ref important, _} => json::List(~[json::String(~"declaration"), name.to_json(), - json::List(list_to_json(value)), important.to_json()]) + value.to_json(), important.to_json()]) } } } @@ -335,11 +334,11 @@ impl ToJson for ComponentValue { CDC => JString(~"-->"), Function(ref name, ref arguments) - => JList(~[JString(~"function"), name.to_json()] + list_to_json(arguments)), + => JList(~[JString(~"function"), name.to_json()] + arguments.map(|a| a.to_json())), ParenthesisBlock(ref content) - => JList(~[JString(~"()")] + list_to_json(content)), + => JList(~[JString(~"()")] + content.map(|c| c.to_json())), SquareBracketBlock(ref content) - => JList(~[JString(~"[]")] + list_to_json(content)), + => JList(~[JString(~"[]")] + content.map(|c| c.to_json())), CurlyBracketBlock(ref content) => JList(~[JString(~"{}")] + list_to_json(content)), diff --git a/tokenizer.rs b/tokenizer.rs index 6a510fcf..4ce2ea4a 100644 --- a/tokenizer.rs +++ b/tokenizer.rs @@ -230,7 +230,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option { if tokenizer.starts_with("^=") { tokenizer.position += 2; PrefixMatch } else { tokenizer.position += 1; Delim(c) } }, - '{' => CurlyBracketBlock(consume_block(tokenizer, CloseCurlyBracket)), + '{' => CurlyBracketBlock(consume_block_with_location(tokenizer, CloseCurlyBracket)), '|' => { if tokenizer.starts_with("|=") { tokenizer.position += 2; DashMatch } else if tokenizer.starts_with("||") { tokenizer.position += 2; Column } @@ -274,7 +274,23 @@ fn consume_comments(tokenizer: &mut Tokenizer) { } -fn consume_block(tokenizer: &mut Tokenizer, ending_token: ComponentValue) -> ~[Node] { +fn consume_block(tokenizer: &mut Tokenizer, ending_token: ComponentValue) -> ~[ComponentValue] { + tokenizer.position += 1; // Skip the initial {[( + let mut content = ~[]; + loop { + match next_component_value(tokenizer) { + Some((component_value, _location)) => { + if component_value == ending_token { break } + else { content.push(component_value) } + }, + None => break, + } + } + content +} + + +fn consume_block_with_location(tokenizer: &mut Tokenizer, ending_token: ComponentValue) -> ~[Node] { tokenizer.position += 1; // Skip the initial {[( let mut content = ~[]; loop { From a4b758745ae895e0d9600f98e3f14f2d5d5220f1 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Aug 2013 17:38:57 +0100 Subject: [PATCH 09/14] Add SourceLocation to syntax errors. --- ast.rs | 12 +++++++-- parser.rs | 80 ++++++++++++++++++++++++++++++++++--------------------- tests.rs | 30 ++++++++++----------- 3 files changed, 74 insertions(+), 48 deletions(-) diff --git a/ast.rs b/ast.rs index 4d8bbb4d..8afa5f40 100644 --- a/ast.rs +++ b/ast.rs @@ -105,6 +105,12 @@ pub enum Rule { AtRule(AtRule), } +#[deriving(Eq)] +pub struct SyntaxError { + location: SourceLocation, + reason: ErrorReason, +} + #[deriving(Eq)] pub enum ErrorReason { ErrEmptyInput, // Parsing a single "thing", found only whitespace. @@ -115,8 +121,10 @@ pub enum ErrorReason { // This is meant to be extended } -impl ToStr for ErrorReason { - fn to_str(&self) -> ~str { fmt!("%?", self) } +impl ToStr for SyntaxError { + fn to_str(&self) -> ~str { + fmt!("%u:%u %?", self.location.line, self.location.column, self.reason) + } } diff --git a/parser.rs b/parser.rs index 6ad50bbe..7b21e259 100644 --- a/parser.rs +++ b/parser.rs @@ -21,7 +21,7 @@ use ast::*; /// Parse top-level of a CSS stylesheet. -/// Return a Iterator> +/// Return a Iterator> #[inline] pub fn parse_stylesheet_rules>(iter: T) -> StylesheetParser { StylesheetParser(iter) @@ -29,7 +29,7 @@ pub fn parse_stylesheet_rules>(iter: T) -> StylesheetParser /// Parse a non-top level list of rules eg. the content of an @media rule. -/// Return a Iterator> +/// Return a Iterator> #[inline] pub fn parse_rule_list>(iter: T) -> RuleListParser { RuleListParser(iter) @@ -38,7 +38,7 @@ pub fn parse_rule_list>(iter: T) -> RuleListParser { /// Parse a list of declarations and at-rules, /// like @page in CSS 2.1, all declaration lists in level 3 -/// Return a Iterator> +/// Return a Iterator> #[inline] pub fn parse_declaration_list>(iter: T) -> DeclarationListParser { DeclarationListParser(iter) @@ -47,13 +47,16 @@ pub fn parse_declaration_list>(iter: T) -> DeclarationListPars /// Parse a single rule. /// Used eg. for CSSRuleList.insertRule() -pub fn parse_one_rule>(iter: T) -> Result { +pub fn parse_one_rule>(iter: T) -> Result { let mut parser = RuleListParser(iter); match parser.next() { - None => Err(ErrEmptyInput), + None => error(START_LOCATION, ErrEmptyInput), Some(result) => { - if result.is_err() || next_non_whitespace(&mut *parser).is_none() { result } - else { Err(ErrExtraInput) } + if result.is_err() { result } + else { match next_non_whitespace(&mut *parser) { + None => result, + Some((_component_value, location)) => error(location, ErrExtraInput), + }} } } } @@ -61,13 +64,16 @@ pub fn parse_one_rule>(iter: T) -> Result { /// Parse a single declaration (not an at-rule) /// Used eg. in @supports -pub fn parse_one_declaration>(mut iter: T) -> Result { +pub fn parse_one_declaration>(mut iter: T) -> Result { match next_non_whitespace(&mut iter) { - None => Err(ErrEmptyInput), + None => error(START_LOCATION, ErrEmptyInput), Some((component_value, location)) => { let result = parse_declaration(&mut iter, component_value, location); - if result.is_err() || next_non_whitespace(&mut iter).is_none() { result } - else { Err(ErrExtraInput) } + if result.is_err() { result } + else { match next_non_whitespace(&mut iter) { + None => result, + Some((_component_value, location)) => error(location, ErrExtraInput), + }} } } } @@ -76,12 +82,14 @@ pub fn parse_one_declaration>(mut iter: T) -> Result>(mut iter: T) - -> Result { + -> Result { match next_non_whitespace(&mut iter) { - None => Err(ErrEmptyInput), + None => error(START_LOCATION, ErrEmptyInput), Some((component_value, _location)) => { - if next_non_whitespace(&mut iter).is_none() { Ok(component_value) } - else { Err(ErrExtraInput) } + match next_non_whitespace(&mut iter) { + None => Ok(component_value), + Some((_component_value, location)) => error(location, ErrExtraInput), + } } } } @@ -106,8 +114,8 @@ macro_rules! for_iter( ) -impl> Iterator> for StylesheetParser { - fn next(&mut self) -> Option> { +impl> Iterator> for StylesheetParser { + fn next(&mut self) -> Option> { let iter = &mut **self; for_iter!(iter, (component_value, location), { match component_value { @@ -115,7 +123,7 @@ impl> Iterator> for StylesheetParser AtKeyword(name) => return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), _ => return Some(match parse_qualified_rule(iter, component_value, location) { Ok(rule) => Ok(QualifiedRule(rule)), - Err(reason) => Err(reason), + Err(e) => Err(e), }), } }) @@ -124,8 +132,8 @@ impl> Iterator> for StylesheetParser } -impl> Iterator> for RuleListParser { - fn next(&mut self) -> Option> { +impl> Iterator> for RuleListParser { + fn next(&mut self) -> Option> { let iter = &mut **self; for_iter!(iter, (component_value, location), { match component_value { @@ -133,7 +141,7 @@ impl> Iterator> for RuleListParser return Some(Ok(AtRule(parse_at_rule(iter, name, location)))), _ => return Some(match parse_qualified_rule(iter, component_value, location) { Ok(rule) => Ok(QualifiedRule(rule)), - Err(reason) => Err(reason), + Err(e) => Err(e), }), } }) @@ -142,9 +150,9 @@ impl> Iterator> for RuleListParser> Iterator> +impl> Iterator> for DeclarationListParser { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { let iter = &mut **self; for_iter!(iter, (component_value, location), { match component_value { @@ -153,10 +161,10 @@ for DeclarationListParser { => return Some(Ok(Decl_AtRule(parse_at_rule(iter, name, location)))), _ => return Some(match parse_declaration(iter, component_value, location) { Ok(declaration) => Ok(Declaration(declaration)), - Err(reason) => { + Err(e) => { // Find the end of the declaration for (v, _) in *iter { if v == Semicolon { break } } - Err(reason) + Err(e) } }), } @@ -183,7 +191,7 @@ fn parse_at_rule>(iter: &mut T, name: ~str, location: SourceLo fn parse_qualified_rule>(iter: &mut T, first: ComponentValue, location: SourceLocation) - -> Result { + -> Result { match first { CurlyBracketBlock(content) => return Ok(QualifiedRule { location: location, prelude: ~[], block: content }), @@ -197,20 +205,20 @@ fn parse_qualified_rule>(iter: &mut T, first: ComponentValue, component_value => prelude.push(component_value), } }) - Err(ErrMissingQualifiedRuleBlock) + error(location, ErrMissingQualifiedRuleBlock) } fn parse_declaration>(iter: &mut T, first: ComponentValue, location: SourceLocation) - -> Result { + -> Result { let name = match first { Ident(name) => name, - _ => return Err(ErrInvalidDeclarationSyntax) + _ => return error(location, ErrInvalidDeclarationSyntax) }; match next_non_whitespace(iter) { Some((Colon, _)) => (), - _ => return Err(ErrInvalidDeclarationSyntax), + _ => return error(location, ErrInvalidDeclarationSyntax), } let mut value = ~[]; let mut important = false; @@ -221,7 +229,7 @@ fn parse_declaration>(iter: &mut T, first: ComponentValue, important = true; break } else { - return Err(ErrInvalidBangImportantSyntax) + return error(location, ErrInvalidBangImportantSyntax) }, component_value => value.push(component_value), } @@ -252,3 +260,13 @@ fn next_non_whitespace>(iter: &mut T) -> Option { } None } + + +#[inline] +fn error(location: SourceLocation, reason: ErrorReason) -> Result { + Err(SyntaxError{location: location, reason: reason}) +} + + +// When parsing one thing on an empty input +static START_LOCATION: SourceLocation = SourceLocation{ line: 1, column: 1 }; diff --git a/tests.rs b/tests.rs index 8d37bb26..2b02c34a 100644 --- a/tests.rs +++ b/tests.rs @@ -164,7 +164,7 @@ fn color3_keywords() { } -impl ToJson for Result { +impl ToJson for Result { fn to_json(&self) -> json::Json { match *self { Ok(ref a) => a.to_json(), @@ -174,7 +174,7 @@ impl ToJson for Result { } -impl ToJson for Result { +impl ToJson for Result { fn to_json(&self) -> json::Json { match *self { Ok(ref a) => a.to_json(), @@ -184,7 +184,7 @@ impl ToJson for Result { } -impl ToJson for Result { +impl ToJson for Result { fn to_json(&self) -> json::Json { match *self { Ok(ref a) => a.to_json(), @@ -194,7 +194,7 @@ impl ToJson for Result { } -impl ToJson for Result { +impl ToJson for Result { fn to_json(&self) -> json::Json { match *self { Ok(ref a) => a.to_json(), @@ -204,23 +204,23 @@ impl ToJson for Result { } -impl ToJson for Color { +impl ToJson for SyntaxError { fn to_json(&self) -> json::Json { - match *self { - RGBA(r, g, b, a) => (~[r, g, b, a]).to_json(), - CurrentColor => json::String(~"currentColor"), - } + json::List(~[json::String(~"error"), json::String(match self.reason { + ErrEmptyInput => ~"empty", + ErrExtraInput => ~"extra-input", + _ => ~"invalid", + })]) } } -impl ToJson for ErrorReason { +impl ToJson for Color { fn to_json(&self) -> json::Json { - json::List(~[json::String(~"error"), json::String(match *self { - ErrEmptyInput => ~"empty", - ErrExtraInput => ~"extra-input", - _ => ~"invalid", - })]) + match *self { + RGBA(r, g, b, a) => (~[r, g, b, a]).to_json(), + CurrentColor => json::String(~"currentColor"), + } } } From 9d8273abf8c99f1841e944b802bd873f6cf317f5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Aug 2013 18:38:40 +0100 Subject: [PATCH 10/14] Add ~[ComponentValue].consume_skip_whitespace() --- ast.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ast.rs b/ast.rs index 8afa5f40..22bc6e79 100644 --- a/ast.rs +++ b/ast.rs @@ -150,3 +150,27 @@ impl<'self> Iterator<&'self ComponentValue> for SkipWhitespaceIterator<'self> { None } } + + +pub trait ConsumeSkipWhitespaceIterable { + pub fn consume_skip_whitespace(self) -> ConsumeSkipWhitespaceIterator; +} + +impl ConsumeSkipWhitespaceIterable for ~[ComponentValue] { + pub fn consume_skip_whitespace(self) -> ConsumeSkipWhitespaceIterator { + ConsumeSkipWhitespaceIterator{ iter: self.consume_iter() } + } +} + +struct ConsumeSkipWhitespaceIterator { + iter: vec::ConsumeIterator, +} + +impl Iterator for ConsumeSkipWhitespaceIterator { + fn next(&mut self) -> Option { + for component_value in self.iter { + if component_value != WhiteSpace { return Some(component_value) } + } + None + } +} From 33ff92850bddd710a2303e12fc5e7a004c85d4c1 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Aug 2013 18:39:48 +0100 Subject: [PATCH 11/14] Update the README --- README.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5608c027..eb1c6ccf 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,14 @@ rust-cssparser ============== -WIP rust implementation of the 2013 version of +Rust implementation of the 2013 version of [css3-syntax](http://dev.w3.org/csswg/css3-syntax/) TODO ---- -* [x] Tokenization -* [x] Declaration and rule parsing -* [ ] Detect character encoding & decode from bytes -* [ ] Track line/column number for tokens. -* [ ] Figure out float and integer overflow -* [ ] Make it fast! +* Detect character encoding & decode from bytes +* Figure out float and integer overflow +* Serialize tokens back to CSS +* Make it fast! From 7ef8a5bc6f459929fe2f6b3142707bd6c7f851b0 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 9 Aug 2013 11:15:08 +0100 Subject: [PATCH 12/14] Typo fix. --- ast.rs | 2 +- tokenizer.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ast.rs b/ast.rs index 22bc6e79..eb7ea4c5 100644 --- a/ast.rs +++ b/ast.rs @@ -43,7 +43,7 @@ pub enum ComponentValue { Colon, // : Semicolon, // ; Comma, // , - IncludeMath, // ~= + IncludeMatch, // ~= DashMatch, // |= PrefixMatch, // ^= SuffixMatch, // $= diff --git a/tokenizer.rs b/tokenizer.rs index 4ce2ea4a..f3988637 100644 --- a/tokenizer.rs +++ b/tokenizer.rs @@ -238,7 +238,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option { }, '}' => { tokenizer.position += 1; CloseCurlyBracket }, '~' => { - if tokenizer.starts_with("~=") { tokenizer.position += 2; IncludeMath } + if tokenizer.starts_with("~=") { tokenizer.position += 2; IncludeMatch } else { tokenizer.position += 1; Delim(c) } }, _ => { From bb35329bd6afc5e592645d8e163b5d6d0e0d6f7f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 9 Aug 2013 11:16:11 +0100 Subject: [PATCH 13/14] Avoid tuple-like structs. See mozilla/rust#7899 --- parser.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/parser.rs b/parser.rs index 7b21e259..38538da4 100644 --- a/parser.rs +++ b/parser.rs @@ -24,7 +24,7 @@ use ast::*; /// Return a Iterator> #[inline] pub fn parse_stylesheet_rules>(iter: T) -> StylesheetParser { - StylesheetParser(iter) + StylesheetParser{ iter: iter } } @@ -32,7 +32,7 @@ pub fn parse_stylesheet_rules>(iter: T) -> StylesheetParser /// Return a Iterator> #[inline] pub fn parse_rule_list>(iter: T) -> RuleListParser { - RuleListParser(iter) + RuleListParser{ iter: iter } } @@ -41,19 +41,19 @@ pub fn parse_rule_list>(iter: T) -> RuleListParser { /// Return a Iterator> #[inline] pub fn parse_declaration_list>(iter: T) -> DeclarationListParser { - DeclarationListParser(iter) + DeclarationListParser{ iter: iter } } /// Parse a single rule. /// Used eg. for CSSRuleList.insertRule() pub fn parse_one_rule>(iter: T) -> Result { - let mut parser = RuleListParser(iter); + let mut parser = RuleListParser{ iter: iter }; match parser.next() { None => error(START_LOCATION, ErrEmptyInput), Some(result) => { if result.is_err() { result } - else { match next_non_whitespace(&mut *parser) { + else { match next_non_whitespace(&mut parser.iter) { None => result, Some((_component_value, location)) => error(location, ErrExtraInput), }} @@ -98,9 +98,9 @@ pub fn parse_one_component_value>(mut iter: T) // *********** End of public API *********** -struct StylesheetParser(T); -struct RuleListParser(T); -struct DeclarationListParser(T); +struct StylesheetParser{ iter: T } +struct RuleListParser{ iter: T } +struct DeclarationListParser{ iter: T } // Work around "error: cannot borrow `*iter` as mutable more than once at a time" @@ -116,7 +116,7 @@ macro_rules! for_iter( impl> Iterator> for StylesheetParser { fn next(&mut self) -> Option> { - let iter = &mut **self; + let iter = &mut self.iter; for_iter!(iter, (component_value, location), { match component_value { WhiteSpace | CDO | CDC => (), @@ -134,7 +134,7 @@ impl> Iterator> for StylesheetParser impl> Iterator> for RuleListParser { fn next(&mut self) -> Option> { - let iter = &mut **self; + let iter = &mut self.iter; for_iter!(iter, (component_value, location), { match component_value { WhiteSpace => (), @@ -153,7 +153,7 @@ impl> Iterator> for RuleListParser> Iterator> for DeclarationListParser { fn next(&mut self) -> Option> { - let iter = &mut **self; + let iter = &mut self.iter; for_iter!(iter, (component_value, location), { match component_value { WhiteSpace | Semicolon => (), From a4ef7792d394295f77418f46a7c2ceac8f5e93c0 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 9 Aug 2013 14:32:26 +0100 Subject: [PATCH 14/14] Typo fix. --- tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests.rs b/tests.rs index 2b02c34a..cc483166 100644 --- a/tests.rs +++ b/tests.rs @@ -324,7 +324,7 @@ impl ToJson for ComponentValue { Colon => JString(~":"), Semicolon => JString(~";"), Comma => JString(~","), - IncludeMath => JString(~"~="), + IncludeMatch => JString(~"~="), DashMatch => JString(~"|="), PrefixMatch => JString(~"^="), SuffixMatch => JString(~"$="),