8000 Change data structures and APIs to count line numbers eagerly · servo/rust-cssparser@e1ff8c1 · GitHub
Skip to content

Commit e1ff8c1

Browse files
committed
Change data structures and APIs to count line numbers eagerly
1 parent 17e9f0f commit e1ff8c1

9 files changed

+149
-127
lines changed

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
[package]
2-
32
name = "cssparser"
4-
version = "0.18.2"
3+
version = "0.19.0"
54
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
65

76
description = "Rust implementation of CSS Syntax Level 3"

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ fn parse_border_spacing(_context: &ParserContext, input: &mut Parser)
8080

8181
pub use cssparser_macros::*;
8282

83-
pub use tokenizer::{Token, SourceLocation};
83+
pub use tokenizer::{Token, SourcePosition, SourceLocation};
8484
pub use rules_and_declarations::{parse_important};
8585
pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration};
8686
pub use rules_and_declarations::{RuleListParser, parse_one_rule, PreciseParseError};
@@ -89,7 +89,7 @@ pub use from_bytes::{stylesheet_encoding, EncodingSupport};
8989
pub use color::{RGBA, Color, parse_color_keyword};
9090
pub use nth::parse_nth;
9191
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
92-
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition, ParseError, BasicParseError, ParserInput};
92+
pub use parser::{Parser, Delimiter, Delimiters, ParserState, ParseError, BasicParseError, ParserInput};
9393
pub use unicode_range::UnicodeRange;
9494
pub use cow_rc_str::CowRcStr;
9595

src/nth.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,13 @@ pub fn parse_nth<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(i32, i32), Basic
6969

7070

7171
fn parse_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32) -> Result<(i32, i32), BasicParseError<'i>> {
72-
let start_position = input.position();
72+
let start = input.state();
7373
match input.next() {
7474
Ok(&Token::Delim('+')) => parse_signless_b(input, a, 1),
7575
Ok(&Token::Delim('-')) => parse_signless_b(input, a, -1),
7676
Ok(&Token::Number { has_sign: true, int_value: Some(b), .. }) => Ok((a, b)),
7777
_ => {
78-
input.reset(start_position);
78+
input.reset(&start);
7979
Ok((a, 0))
8080
}
8181
}

src/parser.rs

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,40 @@ use cow_rc_str::CowRcStr;
66
use std::ops::Range;
77
use std::ascii::AsciiExt;
88
use std::ops::BitOr;
9-
use tokenizer::{self, Token, Tokenizer, SourceLocation};
9+
use tokenizer::{Token, Tokenizer, SourcePosition, SourceLocation};
1010

1111

1212
/// A capture of the internal state of a `Parser` (including the position within the input),
1313
/// obtained from the `Parser::position` method.
1414
///
1515
/// Can be used with the `Parser::reset` method to restore that state.
1616
/// Should only be used with the `Parser` instance it came from.
17-
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
18-
pub struct SourcePosition {
19-
position: tokenizer::SourcePosition,
20-
at_start_of: Option<BlockType>,
17+
#[derive(Debug, Clone)]
18+
pub struct ParserState {
19+
pub(crate) position: usize,
20+
pub(crate) current_line_start_position: usize,
21+
pub(crate) current_line_number: u32,
22+
pub(crate) at_start_of: Option<BlockType>,
2123
}
2224

25+
impl ParserState {
26+
/// The position from the start of the input, counted in UTF-8 bytes.
27+
#[inline]
28+
pub fn position(&self) -> SourcePosition {
29+
SourcePosition(self.position)
30+
}
31+
32+
/// The line number and column number
33+
#[inline]
34+
pub fn source_location(&self) -> SourceLocation {
35+
SourceLocation {
36+
line: self.current_line_number,
37+
column: (self.position - self.current_line_start_position) as u32,
38+
}
39+
}
40+
}
41+
42+
2343
/// The funamental parsing errors that can be triggered by built-in parsing routines.
2444
#[derive(Clone, Debug, PartialEq)]
2545
pub enum BasicParseError<'a> {
@@ -68,8 +88,8 @@ pub struct ParserInput<'i> {
6888

6989
struct CachedToken<'i> {
7090
token: Token<'i>,
71-
start_position: tokenizer::SourcePosition,
72-
end_position: tokenizer::SourcePosition,
91+
start_position: SourcePosition,
92+
end_state: ParserState,
7393
}
7494

7595
impl<'i> ParserInput<'i> {
@@ -100,7 +120,7 @@ pub struct Parser<'i: 't, 't> {
100120

101121

102122
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
103-
enum BlockType {
123+
pub(crate) enum BlockType {
104124
Parenthesis,
105125
SquareBracket,
106126
CurlyBracket,
@@ -224,24 +244,38 @@ impl<'i: 't, 't> Parser<'i, 't> {
224244
/// This ignores whitespace and comments.
225245
#[inline]
226246
pub fn expect_exhausted(&mut self) -> Result<(), BasicParseError<'i>> {
227-
let start_position = self.position();
247+
let start = self.state();
228248
let result = match self.next() {
229249
Err(BasicParseError::EndOfInput) => Ok(()),
230250
Err(e) => unreachable!("Unexpected error encountered: {:?}", e),
231251
Ok(t) => Err(BasicParseError::UnexpectedToken(t.clone())),
232252
};
233-
self.reset(start_position);
253+
self.reset(&start);
234254
result
235255
}
236256

257+
/// Return the current position within the input.
258+
///
259+
/// This can be used with the `Parser::slice` and `slice_from` methods.
260+
#[inline]
261+
pub fn position(&self) -> SourcePosition {
262+
self.input.tokenizer.position()
263+
}
264+
265+
/// The current line number and column number.
266+
#[inline]
267+
pub fn current_source_location(&self) -> SourceLocation {
268+
self.input.tokenizer.current_source_location()
269+
}
270+
237271
/// Return the current internal state of the parser (including position within the input).
238272
///
239273
/// This state can later be restored with the `Parser::reset` method.
240274
#[inline]
241-
pub fn position(&self) -> SourcePosition {
242-
SourcePosition {
243-
position: self.input.tokenizer.position(),
275+
pub fn state(&self) -> ParserState {
276+
ParserState {
244277
at_start_of: self.at_start_of,
278+
.. self.input.tokenizer.state()
245279
}
246280
}
247281

@@ -250,9 +284,9 @@ impl<'i: 't, 't> Parser<'i, 't> {
250284
///
251285
/// Should only be used with `SourcePosition` values from the same `Parser` instance.
252286
#[inline]
253-
pub fn reset(&mut self, new_position: SourcePosition) {
254-
self.input.tokenizer.reset(new_position.position);
255-
self.at_start_of = new_position.at_start_of;
287+
pub fn reset(&mut self, state: &ParserState) {
288+
self.input.tokenizer.reset(state);
289+
self.at_start_of = state.at_start_of;
256290
}
257291

258292
/// Start looking for `var()` functions. (See the `.seen_var_functions()` method.)
@@ -289,36 +323,24 @@ impl<'i: 't, 't> Parser<'i, 't> {
289323
#[inline]
290324
pub fn try<F, T, E>(&mut self, thing: F) -> Result<T, E>
291325
where F: FnOnce(&mut Parser<'i, 't>) -> Result<T, E> {
292-
let start_position = self.position();
326+
let start = self.state();
293327
let result = thing(self);
294328
if result.is_err() {
295-
self.reset(start_position)
329+
self.reset(&start)
296330
}
297331
result
298332
}
299333

300334
/// Return a slice of the CSS input
301335
#[inline]
302336
pub fn slice(&self, range: Range<SourcePosition>) -> &'i str {
303-
self.input.tokenizer.slice(range.start.position..range.end.position)
337+
self.input.tokenizer.slice(range)
304338
}
305339

306340
/// Return a slice of the CSS input, from the given position to the current one.
307341
#[inline]
308342
pub fn slice_from(&self, start_position: SourcePosition) -> &'i str {
309-
self.input.tokenizer.slice_from(start_position.position)
310-
}
311-
312-
/// Return the line and column number within the input for the current position.
313-
#[inline]
314-
pub fn current_source_location(&self) -> SourceLocation {
315-
self.input.tokenizer.current_source_location()
316-
}
317-
318-
/// Return the line and column number within the input for the given position.
319-
#[inline]
320-
pub fn source_location(&self, target: SourcePosition) -> SourceLocation {
321-
self.input.tokenizer.source_location(target.position)
343+
self.input.tokenizer.slice_from(start_position)
322344
}
323345

324346
/// Return the next token in the input that is neither whitespace or a comment,
@@ -374,8 +396,9 @@ impl<'i: 't, 't> Parser<'i, 't> {
374396
let token_start_position = self.input.tokenizer.position();
375397
let token;
376398
match self.input.cached_token {
377-
Some(ref cached_token) if cached_token.start_position == token_start_position => {
378-
self.input.tokenizer.reset(cached_token.end_position);
399+
Some(ref cached_token)
400+
if cached_token.start_position == token_start_position => {
401+
self.input.tokenizer.reset(&cached_token.end_state);
379402
match cached_token.token {
380403
Token::Dimension { ref unit, .. } => self.input.tokenizer.see_dimension(unit),
381404
Token::Function(ref name) => self.input.tokenizer.see_function(name),
@@ -388,7 +411,7 @@ impl<'i: 't, 't> Parser<'i, 't> {
388411
self.input.cached_token = Some(CachedToken {
389412
token: new_token,
390413
start_position: token_start_position,
391-
end_position: self.input.tokenizer.position(),
414+
end_state: self.input.tokenizer.state(),
392415
});
393416
token = self.input.cached_token_ref()
394417
}

0 commit comments

Comments
 (0)