Skip to content

Add <An+B> parsing. #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Aug 13, 2013
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Make the tokenizer an iterator.
  • Loading branch information
SimonSapin committed Aug 7, 2013
commit bff9dfc2b7292105c9c768676c41a5e1b16cfbad
38 changes: 19 additions & 19 deletions ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@ pub struct SourceLocation {
}


pub type Node = (ComponentValue, SourceLocation); // TODO this is not a good name


#[deriving(Eq)]
pub enum ComponentValue {
// Preserved tokens. Same as in the tokenizer.
// Preserved tokens.
Ident(~str),
AtKeyword(~str),
Hash(~str),
IDHash(~str), // Hash token that is a valid ID selector.
IDHash(~str), // Hash that is a valid ID selector.
String(~str),
URL(~str),
Delim(char),
Expand All @@ -50,12 +53,12 @@ pub enum ComponentValue {
CDC, // -->

// Function
Function(~str, ~[(ComponentValue, SourceLocation)]), // name, arguments
Function(~str, ~[Node]), // name, arguments

// Simple block
ParenthesisBlock(~[(ComponentValue, SourceLocation)]), // (…)
SquareBracketBlock(~[(ComponentValue, SourceLocation)]), // […]
CurlyBracketBlock(~[(ComponentValue, SourceLocation)]), // {…}
ParenthesisBlock(~[Node]), // (…)
SquareBracketBlock(~[Node]), // […]
CurlyBracketBlock(~[Node]), // {…}

// These are always invalid
BadURL,
Expand All @@ -70,23 +73,23 @@ pub enum ComponentValue {
pub struct Declaration {
location: SourceLocation,
name: ~str,
value: ~[(ComponentValue, SourceLocation)],
value: ~[Node],
important: bool,
}

#[deriving(Eq)]
pub struct QualifiedRule {
location: SourceLocation,
prelude: ~[(ComponentValue, SourceLocation)],
block: ~[(ComponentValue, SourceLocation)],
prelude: ~[Node],
block: ~[Node],
}

#[deriving(Eq)]
pub struct AtRule {
location: SourceLocation,
name: ~str,
prelude: ~[(ComponentValue, SourceLocation)],
block: Option<~[(ComponentValue, SourceLocation)]>,
prelude: ~[Node],
block: Option<~[Node]>,
}

#[deriving(Eq)]
Expand Down Expand Up @@ -121,24 +124,21 @@ pub trait SkipWhitespaceIterable<'self> {
pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self>;
}

impl<'self> SkipWhitespaceIterable<'self> for &'self [(ComponentValue, SourceLocation)] {
impl<'self> SkipWhitespaceIterable<'self> for &'self [Node] {
pub fn skip_whitespace(self) -> SkipWhitespaceIterator<'self> {
SkipWhitespaceIterator{ iter: self.iter() }
}
}

struct SkipWhitespaceIterator<'self> {
iter: vec::VecIterator<'self, (ComponentValue, SourceLocation)>,
iter: vec::VecIterator<'self, Node>,
}

impl<'self> Iterator<&'self ComponentValue> for SkipWhitespaceIterator<'self> {
fn next(&mut self) -> Option<&'self ComponentValue> {
loop {
match self.iter.next() {
Some(&(WhiteSpace, _)) => (),
Some(&(ref component_value, _)) => return Some(component_value),
None => return None
}
for &(ref component_value, _) in self.iter {
if component_value != &WhiteSpace { return Some(component_value) }
}
None
}
}
92 changes: 30 additions & 62 deletions parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,49 +11,9 @@


use std::iterator::Iterator;
use std::vec;
use std::ascii::eq_ignore_ascii_case;

use ast::*;
use tokenizer::*;


// TODO: Use a trait?
enum ComponentValueIterator {
ParserIter(Parser),
VectorIter(vec::ConsumeIterator<(ComponentValue, SourceLocation)>),
}


impl ComponentValueIterator {
#[inline]
pub fn from_str(input: ~str) -> ComponentValueIterator {
ParserIter(Parser::from_str(input))
}

#[inline]
pub fn from_vector(values: ~[(ComponentValue, SourceLocation)]) -> ComponentValueIterator {
VectorIter(values.consume_iter())
}

#[inline]
pub fn next_non_whitespace(&mut self) -> Option<(ComponentValue, SourceLocation)> {
for (component_value, location) in *self {
if component_value != WhiteSpace { return Some((component_value, location)) }
}
None
}
}


impl Iterator<(ComponentValue, SourceLocation)> for ComponentValueIterator {
fn next(&mut self) -> Option<(ComponentValue, SourceLocation)> {
match self {
&ParserIter(ref mut parser) => next_component_value(parser),
&VectorIter(ref mut iter) => iter.next()
}
}
}


// Work around "error: cannot borrow `*iter` as mutable more than once at a time"
Expand All @@ -68,7 +28,7 @@ macro_rules! for_iter(


/// Call repeatedly for the top-level of a CSS stylesheet
pub fn parse_stylesheet_rule(iter: &mut ComponentValueIterator) -> Option<Result<Rule, ErrorReason>> {
pub fn parse_stylesheet_rule<T: Iterator<Node>>(iter: &mut T) -> Option<Result<Rule, ErrorReason>> {
for_iter!(iter, (component_value, location), {
match component_value {
WhiteSpace | CDO | CDC => (),
Expand All @@ -85,7 +45,7 @@ pub fn parse_stylesheet_rule(iter: &mut ComponentValueIterator) -> Option<Result

/// Call repeatedly for a non-top level list of rules eg. the content of an @media rule.
/// Same as parse_stylesheet() except for the handling of top-level CDO and CDC
pub fn parse_rule(iter: &mut ComponentValueIterator) -> Option<Result<Rule, ErrorReason>> {
pub fn parse_rule<T: Iterator<Node>>(iter: &mut T) -> Option<Result<Rule, ErrorReason>> {
for_iter!(iter, (component_value, location), {
match component_value {
WhiteSpace => (),
Expand All @@ -101,19 +61,19 @@ pub fn parse_rule(iter: &mut ComponentValueIterator) -> Option<Result<Rule, Erro


/// Used eg. for CSSRuleList.insertRule()
pub fn parse_one_rule(iter: &mut ComponentValueIterator) -> Result<Rule, ErrorReason> {
pub fn parse_one_rule<T: Iterator<Node>>(iter: &mut T) -> Result<Rule, ErrorReason> {
match parse_rule(iter) {
None => Err(ErrEmptyInput),
Some(result) => if result.is_err() || iter.next_non_whitespace().is_none() { result }
Some(result) => if result.is_err() || next_non_whitespace(iter).is_none() { result }
else { Err(ErrExtraInput) }
}
}


/// Call repeatedly of a list of declarations.
/// @page in CSS 2.1, all declaration lists in level 3
pub fn parse_declaration_or_at_rule(iter: &mut ComponentValueIterator)
-> Option<Result<DeclarationListItem, ErrorReason>> {
pub fn parse_declaration_or_at_rule<T: Iterator<Node>>(iter: &mut T)
-> Option<Result<DeclarationListItem, ErrorReason>> {
for_iter!(iter, (component_value, location), {
match component_value {
WhiteSpace | Semicolon => (),
Expand All @@ -133,25 +93,24 @@ pub fn parse_declaration_or_at_rule(iter: &mut ComponentValueIterator)


/// Used eg. in @supports
pub fn parse_one_declaration(iter: &mut ComponentValueIterator) -> Result<Declaration, ErrorReason> {
match iter.next_non_whitespace() {
pub fn parse_one_declaration<T: Iterator<Node>>(iter: &mut T) -> Result<Declaration, ErrorReason> {
match next_non_whitespace(iter) {
None => Err(ErrEmptyInput),
Some(item) => {
let result = parse_declaration(iter, item);
if result.is_err() || iter.next_non_whitespace().is_none() { result }
if result.is_err() || next_non_whitespace(iter).is_none() { result }
else { Err(ErrExtraInput) }
}
}
}


/// Used eg. in attr(foo, color)
pub fn parse_one_component_value(iter: &mut ComponentValueIterator)
-> Result<(ComponentValue, SourceLocation), ErrorReason> {
match iter.next_non_whitespace() {
pub fn parse_one_component_value<T: Iterator<Node>>(iter: &mut T) -> Result<Node, ErrorReason> {
match next_non_whitespace(iter) {
None => Err(ErrEmptyInput),
Some(item) => {
if iter.next_non_whitespace().is_none() { Ok(item) }
if next_non_whitespace(iter).is_none() { Ok(item) }
else { Err(ErrExtraInput) }
}
}
Expand All @@ -161,7 +120,7 @@ pub fn parse_one_component_value(iter: &mut ComponentValueIterator)
// *********** End of public API ***********


fn parse_at_rule(iter: &mut ComponentValueIterator, name: ~str, location: SourceLocation)
fn parse_at_rule<T: Iterator<Node>>(iter: &mut T, name: ~str, location: SourceLocation)
-> AtRule {
let mut prelude = ~[];
let mut block = None;
Expand All @@ -176,8 +135,8 @@ fn parse_at_rule(iter: &mut ComponentValueIterator, name: ~str, location: Source
}


fn parse_qualified_rule(iter: &mut ComponentValueIterator, first: (ComponentValue, SourceLocation))
-> Result<QualifiedRule, ErrorReason> {
fn parse_qualified_rule<T: Iterator<Node>>(iter: &mut T, first: Node)
-> Result<QualifiedRule, ErrorReason> {
match first {
(CurlyBracketBlock(content), location)
=> return Ok(QualifiedRule { location: location, prelude: ~[], block: content }),
Expand All @@ -195,13 +154,13 @@ fn parse_qualified_rule(iter: &mut ComponentValueIterator, first: (ComponentValu
}


fn parse_declaration(iter: &mut ComponentValueIterator, first: (ComponentValue, SourceLocation))
-> Result<Declaration, ErrorReason> {
fn parse_declaration<T: Iterator<Node>>(iter: &mut T, first: Node)
-> Result<Declaration, ErrorReason> {
let (name, location) = match first {
(Ident(name), location) => (name, location),
_ => return Err(ErrInvalidDeclarationSyntax)
};
match iter.next_non_whitespace() {
match next_non_whitespace(iter) {
Some((Colon, _)) => (),
_ => return Err(ErrInvalidDeclarationSyntax),
}
Expand All @@ -224,15 +183,24 @@ fn parse_declaration(iter: &mut ComponentValueIterator, first: (ComponentValue,


#[inline]
fn parse_declaration_important(iter: &mut ComponentValueIterator) -> bool {
let ident_value = match iter.next_non_whitespace() {
fn parse_declaration_important<T: Iterator<Node>>(iter: &mut T) -> bool {
let ident_value = match next_non_whitespace(iter) {
Some((Ident(value), _)) => value,
_ => return false,
};
if !eq_ignore_ascii_case(ident_value, "important") { return false }
match iter.next_non_whitespace() {
match next_non_whitespace(iter) {
Some((Semicolon, _)) => true,
None => true,
_ => false
}
}


#[inline]
fn next_non_whitespace<T: Iterator<Node>>(iter: &mut T) -> Option<Node> {
for (component_value, location) in *iter {
if component_value != WhiteSpace { return Some((component_value, location)) }
}
None
}
24 changes: 9 additions & 15 deletions tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use extra::{tempfile, json};
use extra::json::ToJson;

use ast::*;
use tokenizer::*;
use tokenizer::tokenize;
use parser::*;
use color::*;

Expand Down Expand Up @@ -79,14 +79,8 @@ fn run_json_tests(json_data: &str, parse: &fn (input: ~str) -> json::Json) {
#[test]
fn component_value_list() {
do run_json_tests(include_str!("css-parsing-tests/component_value_list.json")) |input| {
let parser = &mut Parser::from_str(input);
let mut results = ~[];
loop {
match next_component_value(parser) {
Some((c, _)) => results.push(c),
None => break,
}
}
for (c, _) in &mut tokenize(input) { results.push(c) }
results.to_json()
}
}
Expand All @@ -95,16 +89,16 @@ fn component_value_list() {
#[test]
fn one_component_value() {
do run_json_tests(include_str!("css-parsing-tests/one_component_value.json")) |input| {
let iter = &mut ComponentValueIterator::from_str(input);
result_to_json(parse_one_component_value(iter).chain(|(c, _)| Ok(c)))
let result = parse_one_component_value(&mut tokenize(input));
result_to_json(result.chain(|(c, _)| Ok(c)))
}
}


#[test]
fn declaration_list() {
do run_json_tests(include_str!("css-parsing-tests/declaration_list.json")) |input| {
let iter = &mut ComponentValueIterator::from_str(input);
let iter = &mut tokenize(input);
let mut declarations = ~[];
loop {
match parse_declaration_or_at_rule(iter) {
Expand All @@ -120,15 +114,15 @@ fn declaration_list() {
#[test]
fn one_declaration() {
do run_json_tests(include_str!("css-parsing-tests/one_declaration.json")) |input| {
result_to_json(parse_one_declaration(&mut ComponentValueIterator::from_str(input)))
result_to_json(parse_one_declaration(&mut tokenize(input)))
}
}


#[test]
fn rule_list() {
do run_json_tests(include_str!("css-parsing-tests/rule_list.json")) |input| {
let iter = &mut ComponentValueIterator::from_str(input);
let iter = &mut tokenize(input);
let mut rules = ~[];
loop {
match parse_rule(iter) {
Expand All @@ -144,14 +138,14 @@ fn rule_list() {
#[test]
fn one_rule() {
do run_json_tests(include_str!("css-parsing-tests/one_rule.json")) |input| {
result_to_json(parse_one_rule(&mut ComponentValueIterator::from_str(input)))
result_to_json(parse_one_rule(&mut tokenize(input)))
}
}


fn run_color_tests(json_data: &str, to_json: &fn(result: Option<Color>) -> json::Json) {
do run_json_tests(json_data) |input| {
match parse_one_component_value(&mut ComponentValueIterator::from_str(input)) {
match parse_one_component_value(&mut tokenize(input)) {
Ok((component_value, _location)) => to_json(parse_color(&component_value)),
Err(_reason) => json::Null,
}
Expand Down
Loading