Skip to content

Commit 2f08b15

Browse files
committed
Simplify match block removing code for multi-byte chars
1 parent 0656606 commit 2f08b15

File tree

1 file changed

+7
-28
lines changed

1 file changed

+7
-28
lines changed

src/tokenizer.rs

+7-28
Original file line numberDiff line numberDiff line change
@@ -43,36 +43,15 @@ fn preprocess(input: &str) -> String {
4343
let bytes = input.as_bytes();
4444
let mut result: Vec<u8> = Vec::with_capacity(bytes.len());
4545
let mut last: u8 = 0;
46-
let mut offset: uint = 0;
47-
while offset < bytes.len() {
48-
let byte = bytes[offset];
49-
match byte {
50-
b'\n' if last == b'\r' => (),
51-
b'\r' | b'\n' | b'\x0C' => result.push(b'\n'),
52-
b'\0' => result.push_all("\u{FFFD}".as_bytes()),
53-
_ if byte < 128 => result.push(byte),
54-
_ => {
55-
// Multi-byte character
56-
result.push(byte);
57-
let remaining = bytes.len() - offset;
58-
if remaining >= 3 && byte >= 0xF0 {
59-
result.push(bytes[offset + 1]);
60-
result.push(bytes[offset + 2]);
61-
result.push(bytes[offset + 3]);
62-
offset += 3;
63-
} else if remaining >= 2 && byte >= 0xE0 {
64-
result.push(bytes[offset + 1]);
65-
result.push(bytes[offset + 2]);
66-
offset += 2;
67-
} else if remaining >= 1 && byte >= 0xC0 {
68-
result.push(bytes[offset + 1]);
69-
offset += 1;
70-
}
71-
}
46+
for byte in bytes.iter() {
47+
match *byte {
48+
b'\n' if last == b'\r' => (),
49+
b'\r' | b'\x0C' => result.push(b'\n'),
50+
b'\0' => result.push_all("\u{FFFD}".as_bytes()),
51+
_ => result.push(*byte),
7252
}
7353

74-
last = byte;
75-
offset += 1;
54+
last = *byte;
7655
}
7756

7857
unsafe { String::from_utf8_unchecked(result) }

0 commit comments

Comments
 (0)