8000 Replace Box<str> with Rc<String> in CompactCowStr. · servo/rust-cssparser@3b1906b · GitHub
Skip to content

Commit 3b1906b

Browse files
committed
Replace Box<str> with Rc<String> in CompactCowStr.
This make the Clone impl never allocate.
1 parent e009794 commit 3b1906b

File tree

2 files changed

+86
-87
lines changed

2 files changed

+86
-87
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.17.0"
4+
version = "0.18.0"
55
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/compact_cow_str.rs

Lines changed: 85 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -9,106 +9,97 @@ use std::hash;
99
use std::marker::PhantomData;
1010
use std::mem;
1111
use std::ops::Deref;
12+
use std::rc::Rc;
1213
use std::slice;
1314
use std::str;
14-
15-
// All bits set except the highest
16-
const MAX_LEN: usize = !0 >> 1;
17-
18-
// Only the highest bit
19-
const OWNED_TAG: usize = MAX_LEN + 1;
20-
21-
/// Like `Cow<'a, str>`, but with smaller `std::mem::size_of`. (Two words instead of four.)
15+
use std::usize;
16+
17+
/// A string that is either shared (heap-allocated and reference-counted) or borrowed.
18+
///
19+
/// Equivalent to `enum { Borrowed(&'a str), Shared(Rc<String>) }`, but stored more compactly.
20+
///
21+
/// FIXME(https://github.com/rust-lang/rfcs/issues/1230): use an actual enum if/when
22+
/// the compiler can do this layout optimization.
2223
pub struct CompactCowStr<'a> {
23-
// `tagged_len` is a tag in its highest bit, and the string length in the rest of the bits.
24-
//
25-
// * If the tag is 1, the memory pointed to by `ptr` is owned
26-
// and the lifetime parameter is irrelevant.
27-
// `ptr` and `len` are the components of a `Box<str>`.
28-
//
29-
// * If the tag is 0, the memory is borrowed.
30-
// `ptr` and `len` are the components of a `&'a str`.
24+
/// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared
25+
/// In the meantime we abuse `&'static _` to get the effect of `NonZero<*const _>`.
26+
/// `ptr` doesn’t really have the 'static lifetime!
27+
ptr: &'static (),
28+
29+
/// * If `borrowed_len_or_max == usize::MAX`, then `ptr` represents `NonZero<*const String>`
30+
/// from `Rc::into_raw`.
31+
/// The lifetime parameter `'a` is irrelevant in this case.
32+
///
33+
/// * Otherwise, `ptr` represents the `NonZero<*const u8>` data component of `&'a str`,
34+
/// and `borrowed_len_or_max` its length.
35+
borrowed_len_or_max: usize,
3136

32-
// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared
33-
ptr: *const u8,
34-
tagged_len: usize,
35-
phantom: PhantomData<&'a str>,
37+
phantom: PhantomData<Result<&'a str, Rc<String>>>,
38+
}
39+
40+
fn _static_assert_same_size<'a>() {
41+
// "Instantiate" the generic function without calling it.
42+
let _ = mem::transmute::<CompactCowStr<'a>, Option<CompactCowStr<'a>>>;
3643
}
3744

3845
impl<'a> From<&'a str> for CompactCowStr<'a> {
3946
#[inline]
4047
fn from(s: &'a str) -> Self {
4148
let len = s.len();
42-
assert!(len <= MAX_LEN);
49+
assert!(len < usize::MAX);
4350
CompactCowStr {
44-
ptr: s.as_ptr(),
45-
tagged_len: len,
51+
ptr: unsafe { &*(s.as_ptr() as *const ()) },
52+
borrowed_len_or_max: len,
4653
phantom: PhantomData,
4754
}
4855
}
4956
}
5057

51-
impl<'a> From<Box<str>> for CompactCowStr<'a> {
58+
impl<'a> From<Rc<String>> for CompactCowStr<'a> {
5259
#[inline]
53-
fn from(s: Box<str>) -> Self {
54-
let ptr = s.as_ptr();
55-
let len = s.len();
56-
assert!(len <= MAX_LEN);
57-
mem::forget(s);
60+
fn from(s: Rc<String>) -> Self {
61+
let ptr = unsafe { &*(Rc::into_raw(s) as *const ()) };
5862
CompactCowStr {
5963
ptr: ptr,
60-
tagged_len: len | OWNED_TAG,
64+
borrowed_len_or_max: usize::MAX,
6165
phantom: PhantomData,
6266
}
6367
}
6468
}
6569

6670
impl<'a> CompactCowStr<'a> {
67-
/// Whether this string refers to borrowed memory
68-
/// (as opposed to owned, which would be freed when `CompactCowStr` goes out of scope).
69-
#[inline]
70-
pub fn is_borrowed(&self) -> bool {
71-
(self.tagged_len & OWNED_TAG) == 0
72-
}
73-
74-
/// The length of this string
75-
#[inline]
76-
pub fn len(&self) -> usize {
77-
self.tagged_len & !OWNED_TAG
78-
}
79-
80-
// Intentionally private since it is easy to use incorrectly.
8171
#[inline]
82-
fn as_raw_str(&self) -> *const str {
83-
unsafe {
84-
str::from_utf8_unchecked(slice::from_raw_parts(self.ptr, self.len()))
72+
fn unpack(&self) -> Result<&'a str, *const String> {
73+
if self.borrowed_len_or_max == usize::MAX {
74+
Err(self.ptr as *const () as *const String)
75+
} else {
76+
unsafe {
77+
Ok(str::from_utf8_unchecked(slice::from_raw_parts(
78+
self.ptr as *const () as *const u8,
79+
self.borrowed_len_or_max,
80+
)))
81+
}
8582
}
8683
}
8784

88-
/// If this string is borrowed, return a slice with the original lifetime,
89-
/// not borrowing `self`.
90-
///
91-
/// (`Deref` is implemented unconditionally, but returns a slice with a shorter lifetime.)
9285
#[inline]
93-
pub fn as_str(&self) -> Option<&'a str> {
94-
if self.is_borrowed() {
95-
Some(unsafe { &*self.as_raw_str() })
96-
} else {
97-
None
98-
}
86+
fn into_enum(self) -> Result<&'a str, Rc<String>> {
87+
self.unpack().map_err(|ptr| {
88+
mem::forget(self);
89+
unsafe {
90+
Rc::from_raw(ptr)
91+
}
92+
})
9993
}
10094

101-
/// Convert into `String`, re-using the memory allocation if it was already owned.
95+
/// Convert into `String`, re-using an existing memory allocation if possible.
10296
#[inline]
10397
pub fn into_owned(self) -> String {
104-
unsafe {
105-
let raw = self.as_raw_str();
106-
let is_borrowed = self.is_borrowed();
107-
mem::forget(self);
108-
if is_borrowed {
109-
String::from(&*raw)
110-
} else {
111-
Box::from_raw(raw as *mut str).into_string()
98+
match self.into_enum() {
99+
Ok(s) => s.to_owned(),
100+
Err(rc) => match Rc::try_unwrap(rc) {
101+
Ok(s) => s,
102+
Err(rc) => (*rc).clone()
112103
}
113104
}
114105
}
@@ -117,21 +108,29 @@ impl<'a> CompactCowStr<'a> {
117108
impl<'a> Clone for CompactCowStr<'a> {
118109
#[inline]
119110
fn clone(&self) -> Self {
120-
if self.is_borrowed() {
121-
CompactCowStr { ..*self }
122-
} else {
123-
Self::from(String::from(&**self).into_boxed_str())
111+
match self.unpack() {
112+
Err(ptr) => {
113+
let rc = unsafe {
114+
Rc::from_raw(ptr)
115+
};
116+
let new_rc = rc.clone();
117+
mem::forget(rc); // Don’t actually take ownership of this strong reference
118+
new_rc.into()
119+
}
120+
Ok(_) => {
121+
CompactCowStr { ..*self }
122+
}
124123
}
125124
}
126125
}
127126

128127
impl<'a> Drop for CompactCowStr<'a> {
129128
#[inline]
130129
fn drop(&mut self) {
131-
if !self.is_borrowed() {
132-
unsafe {
133-
Box::from_raw(self.as_raw_str() as *mut str);
134-
}
130+
if let Err(ptr) = self.unpack() {
131+
mem::drop(unsafe {
132+
Rc::from_raw(ptr)
133+
})
135134
}
136135
}
137136
}
@@ -141,23 +140,20 @@ impl<'a> Deref for CompactCowStr<'a> {
141140

142141
#[inline]
143142
fn deref(&self) -> &str {
144-
unsafe {
145-
&*self.as_raw_str()
146-
}
143+
self.unpack().unwrap_or_else(|ptr| unsafe {
144+
&**ptr
145+
})
147146
}
148147
}
149148

150149
impl<'a> From<CompactCowStr<'a>> for Cow<'a, str> {
151150
#[inline]
152151
fn from(cow: CompactCowStr<'a>) -> Self {
153-
unsafe {
154-
let raw = cow.as_raw_str();
155-
let is_borrowed = cow.is_borrowed();
156-
mem::forget(cow);
157-
if is_borrowed {
158-
Cow::Borrowed(&*raw)
159-
} else {
160-
Cow::Owned(Box::from_raw(raw as *mut str).into_string())
152+
match cow.into_enum() {
153+
Ok(s) => Cow::Borrowed(s),
154+
Err(rc) => match Rc::try_unwrap(rc) {
155+
Ok(s) => Cow::Owned(s),
156+
Err(rc) => Cow::Owned((*rc).clone())
161157
}
162158
}
163159
}
@@ -166,7 +162,7 @@ impl<'a> From<CompactCowStr<'a>> for Cow<'a, str> {
166162
impl<'a> From<String> for CompactCowStr<'a> {
167163
#[inline]
168164
fn from(s: String) -> Self {
169-
Self::from(s.into_boxed_str())
165+
Self::from(Rc::new(s))
170166
}
171167
}
172168

@@ -180,6 +176,9 @@ impl<'a> From<Cow<'a, str>> for CompactCowStr<'a> {
180176
}
181177
}
182178

179+
180+
// Boilerplate / trivial impls below.
181+
183182
impl<'a> AsRef<str> for CompactCowStr<'a> {
184183
#[inline]
185184
fn as_ref(&self) -> &str {

0 commit comments

Comments
 (0)