|
| 1 | +/* This Source Code Form is subject to the terms of the Mozilla Public |
| 2 | + * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 3 | + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| 4 | + |
| 5 | +use std::str; |
| 6 | + |
| 7 | +use encoding::label::encoding_from_whatwg_label; |
| 8 | +use encoding::all::UTF_8; |
| 9 | +use encoding::Encoding; |
| 10 | +use encoding::DecodeReplace; |
| 11 | +use encoding::decode; |
| 12 | + |
| 13 | +use tokenizer::{tokenize, Tokenizer}; |
| 14 | +use parser::{parse_stylesheet_rules, StylesheetParser}; |
| 15 | + |
| 16 | + |
| 17 | +/// Determine the character encoding of a CSS stylesheet and decode it. |
| 18 | +/// |
| 19 | +/// This is based on the presence of a :abbr:`BOM (Byte Order Mark)`, |
| 20 | +/// an `@charset` rule, |
| 21 | +/// and encoding meta-information. |
| 22 | +/// |
| 23 | +/// :param css_bytes: A byte string. |
| 24 | +/// :param protocol_encoding: |
| 25 | +/// The encoding label, if any, defined by HTTP or equivalent protocol. |
| 26 | +/// (e.g. via the `charset` parameter of the `Content-Type` header.) |
| 27 | +/// :param environment_encoding: |
| 28 | +/// An optional `Encoding` object |
| 29 | +/// for the `environment encoding |
| 30 | +/// <http://www.w3.org/TR/css-syntax/#environment-encoding>`_, |
| 31 | +/// if any. |
| 32 | +/// :returns: |
| 33 | +/// A 2-tuple of a decoded Unicode string |
| 34 | +/// and the `Encoding` object that was used. |
| 35 | +pub fn decode_stylesheet_bytes(css: &[u8], protocol_encoding_label: Option<&str>, |
| 36 | + environment_encoding: Option<&'static Encoding>) |
| 37 | + -> (~str, &'static Encoding) { |
| 38 | + // http://dev.w3.org/csswg/css-syntax/#the-input-byte-stream |
| 39 | + match protocol_encoding_label { |
| 40 | + None => (), |
| 41 | + Some(label) => match encoding_from_whatwg_label(label) { |
| 42 | + None => (), |
| 43 | + Some(fallback) => return decode_replace(css, fallback) |
| 44 | + } |
| 45 | + } |
| 46 | + if css.starts_with("@charset \"".as_bytes()) { |
| 47 | + // 10 is "@charset \"".len() |
| 48 | + // 100 is arbitrary so that no encoding label is more than 100-10 bytes. |
| 49 | + match css.slice(10, css.len().min(&100)).position_elem(&('"' as u8)) { |
| 50 | + None => (), |
| 51 | + Some(label_length) |
| 52 | + => if css.slice_from(10 + label_length).starts_with("\";".as_bytes()) { |
| 53 | + let label = css.slice(10, 10 + label_length); |
| 54 | + let label = str::from_chars(label.iter().map(|&b| b as char).to_owned_vec()); |
| 55 | + match encoding_from_whatwg_label(label) { |
| 56 | + None => (), |
| 57 | + Some(fallback) => match fallback.name() { |
| 58 | + "utf-16be" | "utf-16le" |
| 59 | + => return decode_replace(css, UTF_8 as &'static Encoding), |
| 60 | + _ => return decode_replace(css, fallback), |
| 61 | + } |
| 62 | + } |
| 63 | + } |
| 64 | + } |
| 65 | + } |
| 66 | + match environment_encoding { |
| 67 | + None => (), |
| 68 | + Some(fallback) => return decode_replace(css, fallback) |
| 69 | + } |
| 70 | + return decode_replace(css, UTF_8 as &'static Encoding) |
| 71 | +} |
| 72 | + |
| 73 | + |
| 74 | +#[inline] |
| 75 | +fn decode_replace(input: &[u8], fallback_encoding: &'static Encoding)-> (~str, &'static Encoding) { |
| 76 | + let (result, used_encoding) = decode(input, DecodeReplace, fallback_encoding); |
| 77 | + (result.unwrap(), used_encoding) |
| 78 | +} |
| 79 | + |
| 80 | + |
| 81 | +/// Parse stylesheet from bytes. |
| 82 | +/// |
| 83 | +/// :param css_bytes: A byte string. |
| 84 | +/// :param protocol_encoding: |
| 85 | +/// The encoding label, if any, defined by HTTP or equivalent protocol. |
| 86 | +/// (e.g. via the `charset` parameter of the `Content-Type` header.) |
| 87 | +/// :param environment_encoding: |
| 88 | +/// An optional `Encoding` object |
| 89 | +/// for the `environment encoding |
| 90 | +/// <http://www.w3.org/TR/css-syntax/#environment-encoding>`_, |
| 91 | +/// if any. |
| 92 | +/// :returns: |
| 93 | +/// A 2-tuple of a Iterator<Result<Rule, SyntaxError>> |
| 94 | +/// and the `Encoding` object that was used. |
| 95 | +pub fn parse_stylesheet_rules_from_bytes( |
| 96 | + css_bytes: &[u8], protocol_encoding_label: Option<&str>, |
| 97 | + environment_encoding: Option<&'static Encoding>) |
| 98 | + -> (StylesheetParser<Tokenizer>, &'static Encoding) { |
| 99 | + let (css_unicode, encoding) = decode_stylesheet_bytes( |
| 100 | + css_bytes, protocol_encoding_label, environment_encoding); |
| 101 | + (parse_stylesheet_rules(tokenize(css_unicode)), encoding) |
| 102 | +} |
0 commit comments