22 * License, v. 2.0. If a copy of the MPL was not distributed with this
33 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
44
5- use std:: cmp;
5+ /// Abstraction for avoiding a dependency from cssparser to an encoding library
6+ pub trait EncodingSupport {
7+ /// One character encoding
8+ type Encoding ;
69
7- use encoding:: label:: encoding_from_whatwg_label;
8- use encoding:: all:: UTF_8 ;
9- use encoding:: { EncodingRef , DecoderTrap , decode} ;
10+ /// https://encoding.spec.whatwg.org/#concept-encoding-get
11+ fn from_label ( ascii_label : & [ u8 ] ) -> Option < Self :: Encoding > ;
1012
13+ /// Return the UTF-8 encoding
14+ fn utf8 ( ) -> Self :: Encoding ;
1115
12- /// Determine the character encoding of a CSS stylesheet and decode it.
16+ /// Whether the given encoding is UTF-16BE or UTF-16LE
17+ fn is_utf16_be_or_le ( encoding : & Self :: Encoding ) -> bool ;
18+ }
19+
20+
21+ /// Determine the character encoding of a CSS stylesheet.
1322///
1423/// This is based on the presence of a BOM (Byte Order Mark), an `@charset` rule, and
1524/// encoding meta-information.
@@ -20,48 +29,36 @@ use encoding::{EncodingRef, DecoderTrap, decode};
2029/// * `environment_encoding`: An optional `Encoding` object for the [environment encoding]
2130/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any.
2231///
23- /// Returns a 2-tuple of a decoded Unicode string and the `Encoding` object that was used.
24- pub fn decode_stylesheet_bytes ( css : & [ u8 ] , protocol_encoding_label : Option < & str > ,
25- environment_encoding : Option < EncodingRef > )
26- -> ( String , EncodingRef ) {
32+ /// Returns the encoding to use.
33+ pub fn stylesheet_encoding < E > ( css : & [ u8 ] , protocol_encoding_label : Option < & [ u8 ] > ,
34+ environment_encoding : Option < E :: Encoding > )
35+ -> E :: Encoding
36+ where E : EncodingSupport {
2737 // https://drafts.csswg.org/css-syntax/#the-input-byte-stream
2838 match protocol_encoding_label {
2939 None => ( ) ,
30- Some ( label) => match encoding_from_whatwg_label ( label) {
40+ Some ( label) => match E :: from_label ( label) {
3141 None => ( ) ,
32- Some ( fallback ) => return decode_replace ( css , fallback )
42+ Some ( protocol_encoding ) => return protocol_encoding
3343 }
3444 }
35- if css . starts_with ( "@charset \" " . as_bytes ( ) ) {
36- // 10 is "@charset \"".len()
37- // 100 is arbitrary so that no encoding label is more than 100-10 bytes.
38- match css [ 10 ..cmp :: min ( css . len ( ) , 100 ) ] . iter ( ) . position ( |& b| b == b'"' ) {
45+ let prefix = b "@charset \" ";
46+ if css . starts_with ( prefix ) {
47+ let rest = & css [ prefix . len ( ) .. ] ;
48+ match rest . iter ( ) . position ( |& b| b == b'"' ) {
3949 None => ( ) ,
40- Some ( label_length)
41- => if css[ 10 + label_length..] . starts_with ( "\" ;" . as_bytes ( ) ) {
42- let label = & css[ 10 ..10 + label_length] ;
43- let label = label. iter ( ) . map ( |& b| b as char ) . collect :: < String > ( ) ;
44- match encoding_from_whatwg_label ( & * label) {
50+ Some ( label_length) => if rest[ label_length..] . starts_with ( b"\" ;" ) {
51+ let label = & rest[ ..label_length] ;
52+ match E :: from_label ( label) {
4553 None => ( ) ,
46- Some ( fallback ) => match fallback . name ( ) {
47- "utf-16be" | "utf-16le"
48- => return decode_replace ( css , UTF_8 as EncodingRef ) ,
49- _ => return decode_replace ( css , fallback ) ,
54+ Some ( charset_encoding ) => if E :: is_utf16_be_or_le ( & charset_encoding ) {
55+ return E :: utf8 ( )
56+ } else {
57+ return charset_encoding
5058 }
5159 }
5260 }
5361 }
5462 }
55- match environment_encoding {
56- None => ( ) ,
57- Some ( fallback) => return decode_replace ( css, fallback)
58- }
59- return decode_replace ( css, UTF_8 as EncodingRef )
60- }
61-
62-
63- #[ inline]
64- fn decode_replace ( input : & [ u8 ] , fallback_encoding : EncodingRef ) -> ( String , EncodingRef ) {
65- let ( result, used_encoding) = decode ( input, DecoderTrap :: Replace , fallback_encoding) ;
66- ( result. unwrap ( ) , used_encoding)
63+ environment_encoding. unwrap_or_else ( E :: utf8)
6764}
0 commit comments