Skip to content

Commit dd699d4

Browse files
committed
Add tests for character encodings / parsing from bytes.
1 parent f5baaa6 commit dd699d4

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed

README.rst

+18
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,24 @@ associated with the expected result.
9191
The Unicode input is represented by a JSON string,
9292
the output as a list of `qualified rules`_ or at-rules_.
9393

94+
``stylesheet_bytes.json``
95+
Tests `Parse a stylesheet
96+
<http://dev.w3.org/csswg/css-syntax-3/#parse-a-stylesheet>`_
97+
together with `The input byte stream
98+
<http://dev.w3.org/csswg/css-syntax/#input-byte-stream>`_.
99+
The input is represented as a JSON object containing:
100+
101+
* A required ``css_bytes``, the input byte string,
102+
represented as a JSON string where code points U+0000 to U+00FF
103+
represent bytes of the same value.
104+
* An optional ``protocol_encoding``,
105+
a protocol encoding label as a JSON string, or null.
106+
* An optional ``environment_encoding``,
107+
an environment encoding label as a JSON string, or null.
108+
* An optional ``comment`` that is ignored.
109+
110+
The output is represented a list of `qualified rules`_ or at-rules_.
111+
94112
``color3.json``
95113
Tests the ``<color>`` syntax `defined in CSS Color Level 3
96114
<http://www.w3.org/TR/css3-color/#colorunits>`_.

stylesheet_bytes.json

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
[
2+
3+
{"css_bytes": ""},
4+
[[], "utf-8"],
5+
6+
{"css_bytes": "@\u00C3\u00A9",
7+
"protocol_encoding": null, "environment_encoding": null},
8+
[[["at-rule", "é", [], null]], "utf-8"],
9+
10+
{"css_bytes": "@\u00C3\u00A9"},
11+
[[["at-rule", "é", [], null]], "utf-8"],
12+
13+
{"css_bytes": "@\u0000\u00E9\u0000",
14+
"comment": "Untagged UTF-16, parsed as UTF-8"},
15+
[[["at-rule", "���", [], null]], "utf-8"],
16+
17+
{"css_bytes": "\u00FF\u00FE@\u0000\u00E9\u0000",
18+
"comment": "UTF-16 with a BOM"},
19+
[[["at-rule", "é", [], null]], "utf-16le"],
20+
21+
{"css_bytes": "\u00FE\u00FF\u0000@\u0000\u00E9"},
22+
[[["at-rule", "é", [], null]], "utf-16be"],
23+
24+
{"css_bytes": "@\u00E9"},
25+
[[["at-rule", "", [], null]], "utf-8"],
26+
27+
28+
{"css_bytes": "@\u00E9", "protocol_encoding": "ISO-8859-2"},
29+
[[["at-rule", "é", [], null]], "iso-8859-2"],
30+
31+
{"css_bytes": "@\u00E9", "protocol_encoding": "ISO-8859-5"},
32+
[[["at-rule", "щ", [], null]], "iso-8859-5"],
33+
34+
{"css_bytes": "@\u00C3\u00A9", "protocol_encoding": "ISO-8859-2"},
35+
[[["at-rule", "ĂŠ", [], null]], "iso-8859-2"],
36+
37+
{"css_bytes": "\u00EF\u00BB\u00BF @\u00C3\u00A9",
38+
"protocol_encoding": "ISO-8859-2",
39+
"comment": "BOM takes precedence over protocol"},
40+
[[["at-rule", "é", [], null]], "utf-8"],
41+
42+
43+
{"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9"},
44+
[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null],
45+
["at-rule", "щ", [], null]],
46+
"iso-8859-5"],
47+
48+
{"css_bytes": "@Charset \"ISO-8859-5\"; @\u00E9",
49+
"comment": "@charset has to match an exact byte pattern"},
50+
[[["at-rule", "Charset", [" ", ["string", "ISO-8859-5"]], null],
51+
["at-rule", "", [], null]],
52+
"utf-8"],
53+
54+
{"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9",
55+
"comment": "@charset has to match an exact byte pattern"},
56+
[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null],
57+
["at-rule", "", [], null]],
58+
"utf-8"],
59+
60+
{"css_bytes": "@charset 'ISO-8859-5'; @\u00E9",
61+
"comment": "@charset has to match an exact byte pattern"},
62+
[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null],
63+
["at-rule", "", [], null]],
64+
"utf-8"],
65+
66+
67+
{"css_bytes": "@\u0000c\u0000h\u0000a\u0000r\u0000s\u0000e\u0000t\u0000 \u0000\"\u0000U\u0000T\u0000F\u0000-\u00001\u00006\u0000L\u0000E\u0000\"\u0000;\u0000@\u0000\u00e9\u0000",
68+
"comment": "@charset has to be ASCII-compatible itself"},
69+
[[["at-rule", "�c�h�a�r�s�e�t�",
70+
[" ", ["ident", ""], ["string", "�U�T�F�-�1�6�L�E�"], ["ident", ""]], null],
71+
["error", "invalid"]],
72+
"utf-8"],
73+
74+
{"css_bytes": "@charset \"UTF-16LE\"; @\u00C3\u00A9",
75+
"comment": "@charset can only specify ASCII-compatible encodings"},
76+
[[["at-rule", "charset", [" ", ["string", "UTF-16LE"]], null],
77+
["at-rule", "é", [], null]],
78+
"utf-8"],
79+
80+
81+
{"css_bytes": "\u00EF\u00BB\u00BF @charset \"ISO-8859-5\"; @\u00E9",
82+
"comment": "BOM takes precedence over @charset"},
83+
[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null],
84+
["at-rule", "", [], null]],
85+
"utf-8"],
86+
87+
{"css_bytes": "\u00EF\u00BB\u00BF @charset \"ISO-8859-5\"; @\u00C3\u00A9",
88+
"comment": "BOM takes precedence over @charset"},
89+
[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null],
90+
["at-rule", "é", [], null]],
91+
"utf-8"],
92+
93+
{"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9",
94+
"protocol_encoding": " Iso-8859-2",
95+
"comment": "Protocol takes precedence over @charset"},
96+
[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null],
97+
["at-rule", "é", [], null]],
98+
"iso-8859-2"],
99+
100+
101+
{"css_bytes": "@\u00E9", "environment_encoding": "ISO-8859-2"},
102+
[[["at-rule", "é", [], null]], "iso-8859-2"],
103+
104+
{"css_bytes": "@\u00E9", "environment_encoding": "ISO-8859-5"},
105+
[[["at-rule", "щ", [], null]], "iso-8859-5"],
106+
107+
{"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9",
108+
"environment_encoding": "ISO-8859-2",
109+
"comment": "@character takes precedence over environment"},
110+
[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null],
111+
["at-rule", "щ", [], null]],
112+
"iso-8859-5"],
113+
114+
{"css_bytes": "@\u00E9",
115+
"protocol_encoding": "ISO-8859-2",
116+
"environment_encoding": "ISO-8859-5",
117+
"comment": "protocol takes precedence over environment"},
118+
[[["at-rule", "é", [], null]], "iso-8859-2"],
119+
120+
{"css_bytes": "\u00EF\u00BB\u00BF @\u00C3\u00A9",
121+
"environment_encoding": "ISO-8859-5",
122+
"comment": "BOM takes precedence over environment"},
123+
[[["at-rule", "é", [], null]], "utf-8"]
124+
125+
126+
]

0 commit comments

Comments
 (0)