Skip to content

Commit 7560c3a

Browse files
author
bors-servo
authored
Auto merge of servo#192 - tromey:utf-16-columns, r=SimonSapin
Utf 16 columns This series changes columns numbers to be reported in units of UTF-16 characters. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/192) <!-- Reviewable:end -->
2 parents ada0c6a + f721368 commit 7560c3a

File tree

3 files changed

+238
-107
lines changed

3 files changed

+238
-107
lines changed

src/parser.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ pub fn parse_until_after<'i: 't, 't, F, T, E>(parser: &mut Parser<'i, 't>,
811811
let next_byte = (parser.input.tokenizer).next_byte();
812812
if next_byte.is_some() && !parser.stop_before.contains(Delimiters::from_byte(next_byte)) {
813813
debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte)));
814+
// We know this byte is ASCII.
814815
(parser.input.tokenizer).advance(1);
815816
if next_byte == Some(b'{') {
816817
consume_until_end_of_block(BlockType::CurlyBracket, &mut parser.input.tokenizer);

src/tests.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,3 +1049,53 @@ fn roundtrip_percentage_token() {
10491049
}
10501050
}
10511051
}
1052+
1053+
#[test]
1054+
fn utf16_columns() {
1055+
// This particular test serves two purposes. First, it checks
1056+
// that the column number computations are correct. Second, it
1057+
// checks that tokenizer code paths correctly differentiate
1058+
// between the different UTF-8 encoding bytes. In particular
1059+
// different leader bytes and continuation bytes are treated
1060+
// differently, so we make sure to include all lengths in the
1061+
// tests, using the string "QΡ✈🆒". Also, remember that because
1062+
// the column is in units of UTF-16, the 4-byte sequence results
1063+
// in two columns.
1064+
let tests = vec![
1065+
("", 0),
1066+
("ascii", 5),
1067+
("/*QΡ✈🆒*/", 9),
1068+
("'QΡ✈🆒*'", 8),
1069+
("\"\\\"'QΡ✈🆒*'", 11),
1070+
("\\Q\\Ρ\\\\🆒", 9),
1071+
("QΡ✈🆒", 5),
1072+
("QΡ✈🆒\\Q\\Ρ\\\\🆒", 14),
1073+
("newline\r\nQΡ✈🆒", 5),
1074+
("url(QΡ✈🆒\\Q\\Ρ\\\\🆒)", 19),
1075+
("url(QΡ✈🆒)", 10),
1076+
("url(\r\nQΡ✈🆒\\Q\\Ρ\\\\🆒)", 15),
1077+
("url(\r\nQΡ✈🆒\\Q\\Ρ\\\\🆒", 14),
1078+
("url(\r\nQΡ✈🆒\\Q\\Ρ\\\\🆒 x", 16),
1079+
("QΡ✈🆒()", 7),
1080+
// Test that under/over-flow of current_line_start_position is
1081+
// handled properly; see the special case in consume_4byte_intro.
1082+
("🆒", 2),
1083+
];
1084+
1085+
for test in tests {
1086+
let mut input = ParserInput::new(test.0);
1087+
let mut parser = Parser::new(&mut input);
1088+
1089+
// Read all tokens.
1090+
loop {
1091+
match parser.next() {
1092+
Err(BasicParseError::EndOfInput) => { break; }
1093+
Err(_) => { assert!(false); }
1094+
Ok(_) => {}
1095+
};
1096+
}
1097+
1098+
// Check the resulting column.
1099+
assert_eq!(parser.current_source_location().column, test.1);
1100+
}
1101+
}

0 commit comments

Comments
 (0)