@@ -9,106 +9,97 @@ use std::hash;
9
9
use std:: marker:: PhantomData ;
10
10
use std:: mem;
11
11
use std:: ops:: Deref ;
12
+ use std:: rc:: Rc ;
12
13
use std:: slice;
13
14
use std:: str;
14
-
15
- // All bits set except the highest
16
- const MAX_LEN : usize = ! 0 >> 1 ;
17
-
18
- // Only the highest bit
19
- const OWNED_TAG : usize = MAX_LEN + 1 ;
20
-
21
- /// Like `Cow<'a, str>`, but with smaller `std::mem::size_of`. (Two words instead of four.)
15
+ use std :: usize ;
16
+
17
+ /// A string that is either shared (heap-allocated and reference-counted) or borrowed.
18
+ ///
19
+ /// Equivalent to `enum { Borrowed(&'a str), Shared(Rc<String>) }`, but stored more compactly.
20
+ ///
21
+ /// FIXME(https://github.com/rust-lang/rfcs/issues/1230): use an actual enum if/when
22
+ /// the compiler can do this layout optimization.
22
23
pub struct CompactCowStr < ' a > {
23
- // `tagged_len` is a tag in its highest bit, and the string length in the rest of the bits.
24
- //
25
- // * If the tag is 1, the memory pointed to by `ptr` is owned
26
- // and the lifetime parameter is irrelevant.
27
- // `ptr` and `len` are the components of a `Box<str>`.
28
- //
29
- // * If the tag is 0, the memory is borrowed.
30
- // `ptr` and `len` are the components of a `&'a str`.
24
+ /// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared
25
+ /// In the meantime we abuse `&'static _` to get the effect of `NonZero<*const _>`.
26
+ /// `ptr` doesn’t really have the 'static lifetime!
27
+ ptr : & ' static ( ) ,
28 +
29
+ /// * If `borrowed_len_or_max == usize::MAX`, then `ptr` represents `NonZero<*const String>`
30
+ /// from `Rc::into_raw`.
31
+ /// The lifetime parameter `'a` is irrelevant in this case.
32
+ ///
33
+ /// * Otherwise, `ptr` represents the `NonZero<*const u8>` data component of `&'a str`,
34
+ /// and `borrowed_len_or_max` its length.
35
+ borrowed_len_or_max : usize ,
31
36
32
- // FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared
33
- ptr : * const u8 ,
34
- tagged_len : usize ,
35
- phantom : PhantomData < & ' a str > ,
37
+ phantom : PhantomData < Result < & ' a str , Rc < String > > > ,
38
+ }
39
+
40
+ fn _static_assert_same_size < ' a > ( ) {
41
+ // "Instantiate" the generic function without calling it.
42
+ let _ = mem:: transmute :: < CompactCowStr < ' a > , Option < CompactCowStr < ' a > > > ;
36
43
}
37
44
38
45
impl < ' a > From < & ' a str > for CompactCowStr < ' a > {
39
46
#[ inline]
40
47
fn from ( s : & ' a str ) -> Self {
41
48
let len = s. len ( ) ;
42
- assert ! ( len <= MAX_LEN ) ;
49
+ assert ! ( len < usize :: MAX ) ;
43
50
CompactCowStr {
44
- ptr : s. as_ptr ( ) ,
45
- tagged_len : len,
51
+ ptr : unsafe { & * ( s. as_ptr ( ) as * const ( ) ) } ,
52
+ borrowed_len_or_max : len,
46
53
phantom : PhantomData ,
47
54
}
48
55
}
49
56
}
50
57
51
- impl < ' a > From < Box < str > > for CompactCowStr < ' a > {
58
+ impl < ' a > From < Rc < String > > for CompactCowStr < ' a > {
52
59
#[ inline]
53
- fn from ( s : Box < str > ) -> Self {
54
- let ptr = s. as_ptr ( ) ;
55
- let len = s. len ( ) ;
56
- assert ! ( len <= MAX_LEN ) ;
57
- mem:: forget ( s) ;
60
+ fn from ( s : Rc < String > ) -> Self {
61
+ let ptr = unsafe { & * ( Rc :: into_raw ( s) as * const ( ) ) } ;
58
62
CompactCowStr {
59
63
ptr : ptr,
60
- tagged_len : len | OWNED_TAG ,
64
+ borrowed_len_or_max : usize :: MAX ,
61
65
phantom : PhantomData ,
62
66
}
63
67
}
64
68
}
65
69
66
70
impl < ' a > CompactCowStr < ' a > {
67
- /// Whether this string refers to borrowed memory
68
- /// (as opposed to owned, which would be freed when `CompactCowStr` goes out of scope).
69
- #[ inline]
70
- pub fn is_borrowed ( & self ) -> bool {
71
- ( self . tagged_len & OWNED_TAG ) == 0
72
- }
73
-
74
- /// The length of this string
75
- #[ inline]
76
- pub fn len ( & self ) -> usize {
77
- self . tagged_len & !OWNED_TAG
78
- }
79
-
80
- // Intentionally private since it is easy to use incorrectly.
81
71
#[ inline]
82
- fn as_raw_str ( & self ) -> * const str {
83
- unsafe {
84
- str:: from_utf8_unchecked ( slice:: from_raw_parts ( self . ptr , self . len ( ) ) )
72
+ fn unpack ( & self ) -> Result < & ' a str , * const String > {
73
+ if self . borrowed_len_or_max == usize:: MAX {
74
+ Err ( self . ptr as * const ( ) as * const String )
75
+ } else {
76
+ unsafe {
77
+ Ok ( str:: from_utf8_unchecked ( slice:: from_raw_parts (
78
+ self . ptr as * const ( ) as * const u8 ,
79
+ self . borrowed_len_or_max ,
80
+ ) ) )
81
+ }
85
82
}
86
83
}
87
84
88
- /// If this string is borrowed, return a slice with the original lifetime,
89
- /// not borrowing `self`.
90
- ///
91
- /// (`Deref` is implemented unconditionally, but returns a slice with a shorter lifetime.)
92
85
#[ inline]
93
- pub fn as_str ( & self ) -> Option < & ' a str > {
94
- if self . is_borrowed ( ) {
95
- Some ( unsafe { & * self . as_raw_str ( ) } )
96
- } else {
97
- None
98
- }
86
+ fn into_enum ( self ) -> Result < & ' a str , Rc < String > > {
87
+ self . unpack ( ) . map_err ( |ptr| {
88
+ mem:: forget ( self ) ;
89
+ unsafe {
90
+ Rc :: from_raw ( ptr)
91
+ }
92
+ } )
99
93
}
100
94
101
- /// Convert into `String`, re-using the memory allocation if it was already owned .
95
+ /// Convert into `String`, re-using an existing memory allocation if possible .
102
96
#[ inline]
103
97
pub fn into_owned ( self ) -> String {
104
- unsafe {
105
- let raw = self . as_raw_str ( ) ;
106
- let is_borrowed = self . is_borrowed ( ) ;
107
- mem:: forget ( self ) ;
108
- if is_borrowed {
109
- String :: from ( & * raw)
110
- } else {
111
- Box :: from_raw ( raw as * mut str ) . into_string ( )
98
+ match self . into_enum ( ) {
99
+ Ok ( s) => s. to_owned ( ) ,
100
+ Err ( rc) => match Rc :: try_unwrap ( rc) {
101
+ Ok ( s) => s,
102
+ Err ( rc) => ( * rc) . clone ( )
112
103
}
113
104
}
114
105
}
@@ -117,21 +108,29 @@ impl<'a> CompactCowStr<'a> {
117
108
impl < ' a > Clone for CompactCowStr < ' a > {
118
109
#[ inline]
119
110
fn clone ( & self ) -> Self {
120
- if self . is_borrowed ( ) {
121
- CompactCowStr { ..* self }
122
- } else {
123
- Self :: from ( String :: from ( & * * self ) . into_boxed_str ( ) )
111
+ match self . unpack ( ) {
112
+ Err ( ptr) => {
113
+ let rc = unsafe {
114
+ Rc :: from_raw ( ptr)
115
+ } ;
116
+ let new_rc = rc. clone ( ) ;
117
+ mem:: forget ( rc) ; // Don’t actually take ownership of this strong reference
118
+ new_rc. into ( )
119
+ }
120
+ Ok ( _) => {
121
+ CompactCowStr { ..* self }
122
+ }
124
123
}
125
124
}
126
125
}
127
126
128
127
impl < ' a > Drop for CompactCowStr < ' a > {
129
128
#[ inline]
130
129
fn drop ( & mut self ) {
131
- if ! self . is_borrowed ( ) {
132
- unsafe {
133
- Box :: from_raw ( self . as_raw_str ( ) as * mut str ) ;
134
- }
130
+ if let Err ( ptr ) = self . unpack ( ) {
131
+ mem :: drop ( unsafe {
132
+ Rc :: from_raw ( ptr )
133
+ } )
135
134
}
136
135
}
137
136
}
@@ -141,23 +140,20 @@ impl<'a> Deref for CompactCowStr<'a> {
141
140
142
141
#[ inline]
143
142
fn deref ( & self ) -> & str {
144
- unsafe {
145
- & * self . as_raw_str ( )
146
- }
143
+ self . unpack ( ) . unwrap_or_else ( |ptr| unsafe {
144
+ & * * ptr
145
+ } )
147
146
}
148
147
}
149
148
150
149
impl < ' a > From < CompactCowStr < ' a > > for Cow < ' a , str > {
151
150
#[ inline]
152
151
fn from ( cow : CompactCowStr < ' a > ) -> Self {
153
- unsafe {
154
- let raw = cow. as_raw_str ( ) ;
155
- let is_borrowed = cow. is_borrowed ( ) ;
156
- mem:: forget ( cow) ;
157
- if is_borrowed {
158
- Cow :: Borrowed ( & * raw)
159
- } else {
160
- Cow :: Owned ( Box :: from_raw ( raw as * mut str ) . into_string ( ) )
152
+ match cow. into_enum ( ) {
153
+ Ok ( s) => Cow :: Borrowed ( s) ,
154
+ Err ( rc) => match Rc :: try_unwrap ( rc) {
155
+ Ok ( s) => Cow :: Owned ( s) ,
156
+ Err ( rc) => Cow :: Owned ( ( * rc) . clone ( ) )
161
157
}
162
158
}
163
159
}
@@ -166,7 +162,7 @@ impl<'a> From<CompactCowStr<'a>> for Cow<'a, str> {
166
162
impl < ' a > From < String > for CompactCowStr < ' a > {
167
163
#[ inline]
168
164
fn from ( s : String ) -> Self {
169
- Self :: from ( s . into_boxed_str ( ) )
165
+ Self :: from ( Rc :: new ( s ) )
170
166
}
171
167
}
172
168
@@ -180,6 +176,9 @@ impl<'a> From<Cow<'a, str>> for CompactCowStr<'a> {
180
176
}
181
177
}
182
178
179
+
180
+ // Boilerplate / trivial impls below.
181
+
183
182
impl < ' a > AsRef < str > for CompactCowStr < ' a > {
184
183
#[ inline]
185
184
fn as_ref ( & self ) -> & str {
0 commit comments