@@ -1049,3 +1049,53 @@ fn roundtrip_percentage_token() {
1049
1049
}
1050
1050
}
1051
1051
}
1052
+
1053
+ #[ test]
1054
+ fn utf16_columns ( ) {
1055
+ // This particular test serves two purposes. First, it checks
1056
+ // that the column number computations are correct. Second, it
1057
+ // checks that tokenizer code paths correctly differentiate
1058
+ // between the different UTF-8 encoding bytes. In particular
1059
+ // different leader bytes and continuation bytes are treated
1060
+ // differently, so we make sure to include all lengths in the
1061
+ // tests, using the string "QΡ✈🆒". Also, remember that because
1062
+ // the column is in units of UTF-16, the 4-byte sequence results
1063
+ // in two columns.
1064
+ let tests = vec ! [
1065
+ ( "" , 0 ) ,
1066
+ ( "ascii" , 5 ) ,
1067
+ ( "/*QΡ✈🆒*/" , 9 ) ,
1068
+ ( "'QΡ✈🆒*'" , 8 ) ,
1069
+ ( "\" \\ \" 'QΡ✈🆒*'" , 11 ) ,
1070
+ ( "\\ Q\\ Ρ\\ ✈\\ 🆒" , 9 ) ,
1071
+ ( "QΡ✈🆒" , 5 ) ,
1072
+ ( "QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒" , 14 ) ,
1073
+ ( "newline\r \n QΡ✈🆒" , 5 ) ,
1074
+ ( "url(QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒)" , 19 ) ,
1075
+ ( "url(QΡ✈🆒)" , 10 ) ,
1076
+ ( "url(\r \n QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒)" , 15 ) ,
1077
+ ( "url(\r \n QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒" , 14 ) ,
1078
+ ( "url(\r \n QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒 x" , 16 ) ,
1079
+ ( "QΡ✈🆒()" , 7 ) ,
1080
+ // Test that under/over-flow of current_line_start_position is
1081
+ // handled properly; see the special case in consume_4byte_intro.
1082
+ ( "🆒" , 2 ) ,
1083
+ ] ;
1084
+
1085
+ for test in tests {
1086
+ let mut input = ParserInput :: new ( test. 0 ) ;
1087
+ let mut parser = Parser :: new ( & mut input) ;
1088
+
1089
+ // Read all tokens.
1090
+ loop {
1091
+ match parser. next ( ) {
1092
+ Err ( BasicParseError :: EndOfInput ) => { break ; }
1093
+ Err ( _) => { assert ! ( false ) ; }
1094
+ Ok ( _) => { }
1095
+ } ;
1096
+ }
1097
+
1098
+ // Check the resulting column.
1099
+ assert_eq ! ( parser. current_source_location( ) . column, test. 1 ) ;
1100
+ }
1101
+ }
0 commit comments