Skip to content

Commit b021271

Browse files
committed
Track source line numbers.
1 parent 62f3149 commit b021271

File tree

3 files changed

+73
-29
lines changed

3 files changed

+73
-29
lines changed

ast.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ pub struct NumericValue {
1313

1414
#[deriving(Eq)]
1515
pub struct SourceLocation {
16-
// line: uint,
17-
// column: uint,
18-
position: uint,
16+
line: uint,
17+
column: uint,
1918
}
2019

2120

cssparser.rc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ pub mod ast;
1515
mod tests;
1616

1717

18+
#[cfg(not(test))]
19+
static CFG_TEST: bool = false;
20+
21+
#[cfg(test)]
22+
static CFG_TEST: bool = true;
23+
24+
1825
/// Return whether `string` is an ASCII case-insensitive match for `reference`,
1926
/// where `reference` is already in ASCII lower-case.
2027
pub fn eq_ascii_lower(string: &str, reference: &str) -> bool {
@@ -28,16 +35,9 @@ pub fn eq_ascii_lower(string: &str, reference: &str) -> bool {
2835
true
2936
}
3037

31-
#[cfg(not(test))]
32-
#[inline]
33-
fn check_reference(_reference: &str) {}
34-
#[cfg(test)]
35-
#[inline]
36-
fn check_reference(reference: &str) {
38+
if CFG_TEST {
3739
assert!(eq(reference, reference), ~"Reference must be ASCII lower case.");
3840
}
39-
check_reference(reference);
40-
4141
string.len() == reference.len() && eq(string, reference)
4242
}
4343

tokenizer.rs

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@ struct Parser {
1414
input: ~str,
1515
length: uint, // All counted in bytes, not characters
1616
position: uint, // All counted in bytes, not characters
17-
// TODO: add these in tokens
18-
// priv line: uint,
19-
// priv column: uint, // All counted in bytes, not characters
17+
line: uint,
18+
last_line_start: uint, // All counted in bytes, not characters
2019
}
2120

2221

@@ -27,8 +26,8 @@ impl Parser {
2726
length: input.len(),
2827
input: input,
2928
position: 0,
30-
// line: 1,
31-
// column: 1,
29+
line: 1,
30+
last_line_start: 0,
3231
}
3332
}
3433
}
@@ -43,15 +42,28 @@ macro_rules! is_match(
4342

4443
pub fn next_component_value(parser: &mut Parser) -> Option<(ComponentValue, SourceLocation)> {
4544
consume_comments(parser);
46-
if parser.is_eof() { return None }
47-
let start_location = SourceLocation{position: parser.position};
45+
if parser.is_eof() {
46+
if CFG_TEST {
47+
assert!(parser.line == parser.input.split_iter('\n').len_(),
48+
"The tokenizer is missing a parser.new_line() call somewhere.")
49+
}
50+
return None
51+
}
52+
let start_location = SourceLocation{
53+
line: parser.line,
54+
// The start of the line is column 1:
55+
column: parser.position - parser.last_line_start + 1,
56+
};
4857
let c = parser.current_char();
4958
let component_value = match c {
5059
'\t' | '\n' | ' ' => {
51-
parser.position += 1;
5260
while !parser.is_eof() {
5361
match parser.current_char() {
54-
'\t' | '\n' | ' ' => parser.position += 1,
62+
' ' | '\t' => parser.position += 1,
63+
'\n' => {
64+
parser.position += 1;
65+
parser.new_line();
66+
},
5567
_ => break,
5668
}
5769
}
@@ -184,6 +196,13 @@ pub fn next_component_value(parser: &mut Parser) -> Option<(ComponentValue, Sour
184196
// *********** End of public API ***********
185197

186198

199+
#[cfg(not(test))]
200+
static CFG_TEST: bool = false;
201+
202+
#[cfg(test)]
203+
static CFG_TEST: bool = true;
204+
205+
187206
#[inline]
188207
fn preprocess(input: &str) -> ~str {
189208
// TODO: Is this faster if done in one pass?
@@ -223,17 +242,33 @@ impl Parser {
223242
fn starts_with(&self, needle: &str) -> bool {
224243
self.input.slice_from(self.position).starts_with(needle)
225244
}
245+
246+
#[inline]
247+
fn new_line(&mut self) {
248+
if CFG_TEST {
249+
assert!(self.input.char_at(self.position - 1) == '\n')
250+
}
251+
self.line += 1;
252+
self.last_line_start = self.position;
253+
}
226254
}
227255

228256

229257
#[inline]
230258
fn consume_comments(parser: &mut Parser) {
231259
while parser.starts_with("/*") {
232260
parser.position += 2; // +2 to consume "/*"
233-
match parser.input.slice_from(parser.position).find_str("*/") {
234-
// +2 to consume "*/"
235-
Some(offset) => parser.position += offset + 2,
236-
None => parser.position = parser.length // EOF
261+
while !parser.is_eof() {
262+
match parser.consume_char() {
263+
'*' => {
264+
if !parser.is_eof() && parser.current_char() == '/' {
265+
parser.position += 1;
266+
break
267+
}
268+
},
269+
'\n' => parser.new_line(),
270+
_ => ()
271+
}
237272
}
238273
}
239274
}
@@ -264,6 +299,7 @@ fn consume_string(parser: &mut Parser, single_quote: bool) -> ComponentValue {
264299
}
265300

266301

302+
// Return None on syntax error (ie. unescaped newline)
267303
fn consume_quoted_string(parser: &mut Parser, single_quote: bool) -> Option<~str> {
268304
parser.position += 1; // Skip the initial quote
269305
let mut string: ~str = ~"";
@@ -277,7 +313,10 @@ fn consume_quoted_string(parser: &mut Parser, single_quote: bool) -> Option<~str
277313
},
278314
'\\' => {
279315
if !parser.is_eof() {
280-
if parser.current_char() == '\n' { parser.position += 1 } // Escaped newline
316+
if parser.current_char() == '\n' { // Escaped newline
317+
parser.position += 1;
318+
parser.new_line();
319+
}
281320
else { string.push_char(consume_escape(parser)) }
282321
}
283322
// else: escaped EOF, do nothing.
@@ -427,8 +466,11 @@ fn consume_url(parser: &mut Parser) -> ComponentValue {
427466
let mut string = ~"";
428467
while !parser.is_eof() {
429468
let next_char = match parser.consume_char() {
430-
'\t' | '\n' | ' '
431-
=> return consume_url_end(parser, string),
469+
' ' | '\t' => return consume_url_end(parser, string),
470+
'\n' => {
471+
parser.new_line();
472+
return consume_url_end(parser, string)
473+
},
432474
')' => break,
433475
'\x00'..'\x08' | '\x0B' | '\x0E'..'\x1F' | '\x7F' // non-printable
434476
| '"' | '\'' | '(' => return consume_bad_url(parser),
@@ -448,7 +490,8 @@ fn consume_url(parser: &mut Parser) -> ComponentValue {
448490
fn consume_url_end(parser: &mut Parser, string: ~str) -> ComponentValue {
449491
while !parser.is_eof() {
450492
match parser.consume_char() {
451-
'\t' | '\n' | ' ' => (),
493+
' ' | '\t' => (),
494+
'\n' => parser.new_line(),
452495
')' => break,
453496
_ => return consume_bad_url(parser)
454497
}
@@ -462,6 +505,7 @@ fn consume_url(parser: &mut Parser) -> ComponentValue {
462505
match parser.consume_char() {
463506
')' => break,
464507
'\\' => parser.position += 1, // Skip an escaped ')' or '\'
508+
'\n' => parser.new_line(),
465509
_ => ()
466510
}
467511
}
@@ -538,7 +582,8 @@ fn consume_escape(parser: &mut Parser) -> char {
538582
}
539583
if !parser.is_eof() {
540584
match parser.current_char() {
541-
'\t' | '\n' | ' ' => parser.position += 1,
585+
' ' | '\t' => parser.position += 1,
586+
'\n' => { parser.position += 1; parser.new_line() },
542587
_ => ()
543588
}
544589
}

0 commit comments

Comments
 (0)