Skip to content

Commit 5daadfd

Browse files
committed
Add CompactCowStr
1 parent b088ff6 commit 5daadfd

File tree

4 files changed

+236
-1
lines changed

4 files changed

+236
-1
lines changed

src/compact_cow_str.rs

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4+
5+
use std::borrow::{Borrow, Cow};
6+
use std::cmp;
7+
use std::fmt;
8+
use std::hash;
9+
use std::marker::PhantomData;
10+
use std::mem;
11+
use std::ops::Deref;
12+
use std::slice;
13+
use std::str;
14+
15+
// All bits set except the highest
16+
const MAX_LEN: usize = !0 >> 1;
17+
18+
// Only the highest bit
19+
const OWNED_TAG: usize = MAX_LEN + 1;
20+
21+
/// Like `Cow<'a, str>`, but with smaller `std::mem::size_of`. (Two words instead of four.)
22+
pub struct CompactCowStr<'a> {
23+
// `tagged_len` is a tag in its highest bit, and the string length in the rest of the bits.
24+
//
25+
// * If the tag is 1, the memory pointed to by `ptr` is owned
26+
// and the lifetime parameter is irrelevant.
27+
// `ptr` and `len` are the components of a `Box<str>`.
28+
//
29+
// * If the tag is 0, the memory is borrowed.
30+
// `ptr` and `len` are the components of a `&'a str`.
31+
32+
// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared
33+
ptr: *const u8,
34+
tagged_len: usize,
35+
phantom: PhantomData<&'a str>,
36+
}
37+
38+
impl<'a> From<&'a str> for CompactCowStr<'a> {
39+
#[inline]
40+
fn from(s: &'a str) -> Self {
41+
let len = s.len();
42+
assert!(len <= MAX_LEN);
43+
CompactCowStr {
44+
ptr: s.as_ptr(),
45+
tagged_len: len,
46+
phantom: PhantomData,
47+
}
48+
}
49+
}
50+
51+
impl<'a> From<Box<str>> for CompactCowStr<'a> {
52+
#[inline]
53+
fn from(s: Box<str>) -> Self {
54+
let ptr = s.as_ptr();
55+
let len = s.len();
56+
assert!(len <= MAX_LEN);
57+
mem::forget(s);
58+
CompactCowStr {
59+
ptr: ptr,
60+
tagged_len: len | OWNED_TAG,
61+
phantom: PhantomData,
62+
}
63+
}
64+
}
65+
66+
impl<'a> CompactCowStr<'a> {
67+
/// Whether this string refers to borrowed memory
68+
/// (as opposed to owned, which would be freed when `CompactCowStr` goes out of scope).
69+
#[inline]
70+
pub fn is_borrowed(&self) -> bool {
71+
(self.tagged_len & OWNED_TAG) == 0
72+
}
73+
74+
/// The length of this string
75+
#[inline]
76+
pub fn len(&self) -> usize {
77+
self.tagged_len & !OWNED_TAG
78+
}
79+
80+
// Intentionally private since it is easy to use incorrectly.
81+
#[inline]
82+
fn as_raw_str(&self) -> *const str {
83+
unsafe {
84+
str::from_utf8_unchecked(slice::from_raw_parts(self.ptr, self.len()))
85+
}
86+
}
87+
88+
/// If this string is borrowed, return a slice with the original lifetime,
89+
/// not borrowing `self`.
90+
///
91+
/// (`Deref` is implemented unconditionally, but returns a slice with a shorter lifetime.)
92+
#[inline]
93+
pub fn as_str(&self) -> Option<&'a str> {
94+
if self.is_borrowed() {
95+
Some(unsafe { &*self.as_raw_str() })
96+
} else {
97+
None
98+
}
99+
}
100+
}
101+
102+
impl<'a> Clone for CompactCowStr<'a> {
103+
#[inline]
104+
fn clone(&self) -> Self {
105+
if self.is_borrowed() {
106+
CompactCowStr { ..*self }
107+
} else {
108+
Self::from(Box::from(&**self))
109+
}
110+
}
111+
}
112+
113+
impl<'a> Drop for CompactCowStr<'a> {
114+
#[inline]
115+
fn drop(&mut self) {
116+
if !self.is_borrowed() {
117+
unsafe {
118+
Box::from_raw(self.as_raw_str() as *mut str);
119+
}
120+
}
121+
}
122+
}
123+
124+
impl<'a> Deref for CompactCowStr<'a> {
125+
type Target = str;
126+
127+
#[inline]
128+
fn deref(&self) -> &str {
129+
unsafe {
130+
&*self.as_raw_str()
131+
}
132+
}
133+
}
134+
135+
impl<'a> From<CompactCowStr<'a>> for Cow<'a, str> {
136+
#[inline]
137+
fn from(cow: CompactCowStr<'a>) -> Self {
138+
unsafe {
139+
let raw = cow.as_raw_str();
140+
let is_borrowed = cow.is_borrowed();
141+
mem::forget(cow);
142+
if is_borrowed {
143+
Cow::Borrowed(&*raw)
144+
} else {
145+
Cow::Owned(String::from(Box::from_raw(raw as *mut str)))
146+
}
147+
}
148+
}
149+
}
150+
151+
impl<'a> From<String> for CompactCowStr<'a> {
152+
#[inline]
153+
fn from(s: String) -> Self {
154+
Self::from(s.into_boxed_str())
155+
}
156+
}
157+
158+
impl<'a> From<Cow<'a, str>> for CompactCowStr<'a> {
159+
#[inline]
160+
fn from(s: Cow<'a, str>) -> Self {
161+
match s {
162+
Cow::Borrowed(s) => Self::from(s),
163+
Cow::Owned(s) => Self::from(s),
164+
}
165+
}
166+
}
167+
168+
impl<'a> AsRef<str> for CompactCowStr<'a> {
169+
#[inline]
170+
fn as_ref(&self) -> &str {
171+
self
172+
}
173+
}
174+
175+
impl<'a> Borrow<str> for CompactCowStr<'a> {
176+
#[inline]
177+
fn borrow(&self) -> &str {
178+
self
179+
}
180+
}
181+
182+
impl<'a> Default for CompactCowStr<'a> {
183+
#[inline]
184+
fn default() -> Self {
185+
Self::from("")
186+
}
187+
}
188+
189+
impl<'a> hash::Hash for CompactCowStr<'a> {
190+
#[inline]
191+
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
192+
str::hash(self, hasher)
193+
}
194+
}
195+
196+
impl<'a, T: AsRef<str>> PartialEq<T> for CompactCowStr<'a> {
197+
#[inline]
198+
fn eq(&self, other: &T) -> bool {
199+
str::eq(self, other.as_ref())
200+
}
201+
}
202+
203+
impl<'a, T: AsRef<str>> PartialOrd<T> for CompactCowStr<'a> {
204+
#[inline]
205+
fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
206+
str::partial_cmp(self, other.as_ref())
207+
}
208+
}
209+
210+
impl<'a> Eq for CompactCowStr<'a> {}
211+
212+
impl<'a> Ord for CompactCowStr<'a> {
213+
#[inline]
214+
fn cmp(&self, other: &Self) -> cmp::Ordering {
215+
str::cmp(self, other)
216+
}
217+
}
218+
219+
impl<'a> fmt::Display for CompactCowStr<'a> {
220+
#[inline]
221+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
222+
str::fmt(self, formatter)
223+
}
224+
}
225+
226+
impl<'a> fmt::Debug for CompactCowStr<'a> {
227+
#[inline]
228+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
229+
str::fmt(self, formatter)
230+
}
231+
}

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ pub use nth::parse_nth;
9191
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
9292
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition, ParseError, BasicParseError, ParserInput};
9393
pub use unicode_range::UnicodeRange;
94+
pub use compact_cow_str::CompactCowStr;
9495

9596
// For macros
9697
#[doc(hidden)] pub use macros::_internal__to_lowercase;
@@ -116,6 +117,7 @@ mod color;
116117
mod nth;
117118
mod serializer;
118119
mod unicode_range;
120+
mod compact_cow_str;
119121

120122
#[cfg(test)] mod tests;
121123
#[cfg(test)] mod size_of_tests;

src/size_of_tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* License, v. 2.0. If a copy of the MPL was not distributed with this
33
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
44

5+
use compact_cow_str::CompactCowStr;
56
use std::borrow::Cow;
67
use tokenizer::{Token, NumericValue, PercentageValue};
78

@@ -35,3 +36,4 @@ size_of_test!(token, Token, 56);
3536
size_of_test!(numeric_value, NumericValue, 16);
3637
size_of_test!(percentage_value, PercentageValue, 16);
3738
size_of_test!(std_cow_str, Cow<'static, str>, 32);
39+
size_of_test!(compact_cow_str, CompactCowStr, 16);

src/tokenizer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use self::Token::*;
1717

1818
/// One of the pieces the CSS input is broken into.
1919
///
20-
/// Some components use `CowString` in order to borrow from the original input string
20+
/// Some components use `Cow` in order to borrow from the original input string
2121
/// and avoid allocating/copying when possible.
2222
#[derive(PartialEq, Debug, Clone)]
2323
pub enum Token<'a> {

0 commit comments

Comments
 (0)