2
2
* License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
4
5
- use std:: cmp;
5
+ /// Abstraction for avoiding a dependency from cssparser to an encoding library
6
+ pub trait EncodingSupport {
7
+ /// One character encoding
8
+ type Encoding ;
6
9
7
- use encoding:: label:: encoding_from_whatwg_label;
8
- use encoding:: all:: UTF_8 ;
9
- use encoding:: { EncodingRef , DecoderTrap , decode} ;
10
+ /// https://encoding.spec.whatwg.org/#concept-encoding-get
11
+ fn from_label ( ascii_label : & [ u8 ] ) -> Option < Self :: Encoding > ;
10
12
13
+ /// Return the UTF-8 encoding
14
+ fn utf8 ( ) -> Self :: Encoding ;
11
15
12
- /// Determine the character encoding of a CSS stylesheet and decode it.
16
+ /// Whether the given encoding is UTF-16BE or UTF-16LE
17
+ fn is_utf16_be_or_le ( encoding : & Self :: Encoding ) -> bool ;
18
+ }
19
+
20
+
21
+ /// Determine the character encoding of a CSS stylesheet.
13
22
///
14
23
/// This is based on the presence of a BOM (Byte Order Mark), an `@charset` rule, and
15
24
/// encoding meta-information.
@@ -20,48 +29,36 @@ use encoding::{EncodingRef, DecoderTrap, decode};
20
29
/// * `environment_encoding`: An optional `Encoding` object for the [environment encoding]
21
30
/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any.
22
31
///
23
- /// Returns a 2-tuple of a decoded Unicode string and the `Encoding` object that was used.
24
- pub fn decode_stylesheet_bytes ( css : & [ u8 ] , protocol_encoding_label : Option < & str > ,
25
- environment_encoding : Option < EncodingRef > )
26
- -> ( String , EncodingRef ) {
32
+ /// Returns the encoding to use.
33
+ pub fn stylesheet_encoding < E > ( css : & [ u8 ] , protocol_encoding_label : Option < & [ u8 ] > ,
34
+ environment_encoding : Option < E :: Encoding > )
35
+ -> E :: Encoding
36
+ where E : EncodingSupport {
27
37
// https://drafts.csswg.org/css-syntax/#the-input-byte-stream
28
38
match protocol_encoding_label {
29
39
None => ( ) ,
30
- Some ( label) => match encoding_from_whatwg_label ( label) {
40
+ Some ( label) => match E :: from_label ( label) {
31
41
None => ( ) ,
32
- Some ( fallback ) => return decode_replace ( css , fallback )
42
+ Some ( protocol_encoding ) => return protocol_encoding
33
43
}
34
44
}
35
- if css . starts_with ( "@charset \" " . as_bytes ( ) ) {
36
- // 10 is "@charset \"".len()
37
- // 100 is arbitrary so that no encoding label is more than 100-10 bytes.
38
- match css [ 10 ..cmp :: min ( css . len ( ) , 100 ) ] . iter ( ) . position ( |& b| b == b'"' ) {
45
+ let prefix = b "@charset \" ";
46
+ if css . starts_with ( prefix ) {
47
+ let rest = & css [ prefix . len ( ) .. ] ;
48
+ match rest . iter ( ) . position ( |& b| b == b'"' ) {
39
49
None => ( ) ,
40
- Some ( label_length)
41
- => if css[ 10 + label_length..] . starts_with ( "\" ;" . as_bytes ( ) ) {
42
- let label = & css[ 10 ..10 + label_length] ;
43
- let label = label. iter ( ) . map ( |& b| b as char ) . collect :: < String > ( ) ;
44
- match encoding_from_whatwg_label ( & * label) {
50
+ Some ( label_length) => if rest[ label_length..] . starts_with ( b"\" ;" ) {
51
+ let label = & rest[ ..label_length] ;
52
+ match E :: from_label ( label) {
45
53
None => ( ) ,
46
- Some ( fallback ) => match fallback . name ( ) {
47
- "utf-16be" | "utf-16le"
48
- => return decode_replace ( css , UTF_8 as EncodingRef ) ,
49
- _ => return decode_replace ( css , fallback ) ,
54
+ Some ( charset_encoding ) => if E :: is_utf16_be_or_le ( & charset_encoding ) {
55
+ return E :: utf8 ( )
56
+ } else {
57
+ return charset_encoding
50
58
}
51
59
}
52
60
}
53
61
}
54
62
}
55
- match environment_encoding {
56
- None => ( ) ,
57
- Some ( fallback) => return decode_replace ( css, fallback)
58
- }
59
- return decode_replace ( css, UTF_8 as EncodingRef )
60
- }
61
-
62
-
63
- #[ inline]
64
- fn decode_replace ( input : & [ u8 ] , fallback_encoding : EncodingRef ) -> ( String , EncodingRef ) {
65
- let ( result, used_encoding) = decode ( input, DecoderTrap :: Replace , fallback_encoding) ;
66
- ( result. unwrap ( ) , used_encoding)
63
+ environment_encoding. unwrap_or_else ( E :: utf8)
67
64
}
0 commit comments