diff --git a/Cargo.lock b/Cargo.lock index 29d7aa7..8caaf96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -370,6 +370,7 @@ checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" name = "hstr" version = "0.2.10" dependencies = [ + "cfg-if", "compact_str", "criterion", "dudy-malloc", @@ -386,6 +387,7 @@ dependencies = [ "serde", "smartstring", "smol_str", + "static_assertions", "string_cache", "triomphe", ] diff --git a/crates/hstr/Cargo.toml b/crates/hstr/Cargo.toml index 19a42ed..0268c00 100644 --- a/crates/hstr/Cargo.toml +++ b/crates/hstr/Cargo.toml @@ -21,6 +21,7 @@ atom_size_64 = [] atom_size_128 = [] [dependencies] +cfg-if = "1.0.0" hashbrown = { version = "0.14.3", default-features = false } new_debug_unreachable = "1.0.4" once_cell = "1.18.0" @@ -28,6 +29,7 @@ phf = "0.11.2" rkyv = { version = "0.7.42", optional = true } rustc-hash = "1.1.0" serde = { version = "1.0.192", optional = true } +static_assertions = "1.1.0" triomphe = "0.1.11" [dev-dependencies] diff --git a/crates/hstr/src/lib.rs b/crates/hstr/src/lib.rs index 7c724e4..a190ca9 100644 --- a/crates/hstr/src/lib.rs +++ b/crates/hstr/src/lib.rs @@ -19,6 +19,7 @@ use crate::dynamic::Entry; mod dynamic; mod global_store; +mod repr; mod tagged_value; #[cfg(test)] mod tests; diff --git a/crates/hstr/src/repr/capacity.rs b/crates/hstr/src/repr/capacity.rs new file mode 100644 index 0000000..3c35a51 --- /dev/null +++ b/crates/hstr/src/repr/capacity.rs @@ -0,0 +1,169 @@ +use crate::repr::HEAP_MASK; + +// how many bytes a `usize` occupies +const USIZE_SIZE: usize = core::mem::size_of::(); + +/// Used to generate [`CAPACITY_IS_ON_THE_HEAP`] +#[allow(non_snake_case)] +const fn CAP_ON_HEAP_FLAG() -> [u8; USIZE_SIZE] { + // all bytes 255, with the last being HEAP_MASK + let mut flag = [255; USIZE_SIZE]; + flag[USIZE_SIZE - 1] = HEAP_MASK; + flag +} + +/// State that describes the capacity as being stored on the heap. +/// +/// All bytes `255`, with the last being [`HEAP_MASK`], using the same amount of +/// bytes as `usize` Example (64-bit): `[255, 255, 255, 255, 255, 255, 255, +/// 254]` +const CAPACITY_IS_ON_THE_HEAP: [u8; USIZE_SIZE] = CAP_ON_HEAP_FLAG(); + +// how many bytes we can use for capacity +const SPACE_FOR_CAPACITY: usize = USIZE_SIZE - 1; +// the maximum value we're able to store, e.g. on 64-bit arch this is 2^56 - 2 +// +// note: Preferably we'd used usize.pow(..) here, but that's not a `const fn`, +// so we need to use bitshift operators, and there's a lint against using them +// in this pattern, which IMO isn't a great lint +pub const MAX_VALUE: usize = 2usize.pow(SPACE_FOR_CAPACITY as u32 * 8) - 2; + +/// An integer type that uses `core::mem::size_of::() - 1` bytes to store +/// the capacity of a heap buffer. +/// +/// Assumming a 64-bit arch, a [`super::BoxString`] uses 8 bytes for a pointer, +/// 8 bytes for a length, and then needs 1 byte for a discriminant. We need to +/// store the capacity somewhere, and we could store it on the heap, but we also +/// have 7 unused bytes. [`Capacity`] handles storing a value in these 7 bytes, +/// returning an error if it's not possible, at which point we'll store the +/// capacity on the heap. +/// +/// # Max Values +/// * __64-bit:__ `(2 ^ (7 * 8)) - 2 = 72_057_594_037_927_934 ~= 64 petabytes` +/// * __32-bit:__ `(2 ^ (3 * 8)) - 2 = 16_777_214 ~= 16 megabytes` +/// +/// Practically speaking, on a 64-bit architecture we'll never need to store the +/// capacity on the heap, because with it's impossible to create a string that +/// is 64 petabytes or larger. But for 32-bit architectures we need to be able +/// to store a capacity larger than 16 megabytes, since a string larger than 16 +/// megabytes probably isn't that uncommon. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(target_pointer_width = "64", repr(align(8)))] +#[cfg_attr(target_pointer_width = "32", repr(align(4)))] +pub struct Capacity([u8; USIZE_SIZE]); + +static_assertions::assert_eq_size!(Capacity, usize); +static_assertions::assert_eq_align!(Capacity, usize); + +impl Capacity { + #[inline] + pub const fn new(capacity: usize) -> Self { + cfg_if::cfg_if! { + if #[cfg(target_pointer_width = "64")] { + // on 64-bit arches we can always fit the capacity inline + debug_assert!(capacity <= MAX_VALUE); + + let mut bytes = capacity.to_le_bytes(); + bytes[core::mem::size_of::() - 1] = HEAP_MASK; + Capacity(bytes) + } else if #[cfg(target_pointer_width = "32")] { + // on 32-bit arches we might need to store the capacity on the heap + if capacity > MAX_VALUE { + // if we need the last byte to encode this capacity then we need to put the capacity on + // the heap. return an Error so `BoxString` can do the right thing + Capacity(CAPACITY_IS_ON_THE_HEAP) + } else { + // otherwise, we can store this capacity inline! Set the last byte to be our `HEAP_MASK` + // for our discriminant, using the leading bytes to store the actual value + let mut bytes = capacity.to_le_bytes(); + bytes[core::mem::size_of::() - 1] = HEAP_MASK; + Capacity(bytes) + } + } else { + compile_error!("Unsupported target_pointer_width"); + } + } + } + + /// Re-interprets a [`Capacity`] as a `usize` + /// + /// # SAFETY: + /// * `self` must be less than or equal to [`MAX_VALUE`] + #[inline(always)] + pub unsafe fn as_usize(&self) -> usize { + let mut usize_buf = [0u8; USIZE_SIZE]; + // SAFETY: + // * `src` is valid for reads of `SPACE_FOR_CAPACITY` because it is less than + // `USIZE_SIZE` + // * `dst` is valid for reads of `SPACE_FOR_CAPACITY` because it is less than + // `USIZE_SIZE` + // * `src` and `dst` do not overlap because we created `usize_buf` + core::ptr::copy_nonoverlapping(self.0.as_ptr(), usize_buf.as_mut_ptr(), SPACE_FOR_CAPACITY); + usize::from_le_bytes(usize_buf) + } + + /// Returns whether or not this [`Capacity`] has a value that indicates the + /// capacity is being stored on the heap + #[inline(always)] + pub fn is_heap(&self) -> bool { + self.0 == CAPACITY_IS_ON_THE_HEAP + } +} + +#[cfg(test)] +mod tests { + use rayon::prelude::*; + + use super::Capacity; + + #[test] + fn test_zero_roundtrips() { + let og = 0; + let cap = Capacity::new(og); + let after = unsafe { cap.as_usize() }; + + assert_eq!(og, after); + } + + #[test] + fn test_max_value() { + let available_bytes = (core::mem::size_of::() - 1) as u32; + let max_value = 2usize.pow(available_bytes * 8) - 2; + + #[cfg(target_pointer_width = "64")] + assert_eq!(max_value, 72057594037927934); + #[cfg(target_pointer_width = "32")] + assert_eq!(max_value, 16777214); + + let cap = Capacity::new(max_value); + let after = unsafe { cap.as_usize() }; + + assert_eq!(max_value, after); + } + + #[cfg(target_pointer_width = "32")] + #[test] + + fn test_invalid_value() { + let invalid_val = usize::MAX; + let cap = Capacity::new(invalid_val); + let after = unsafe { cap.as_usize() }; + + // anything greater than or equal to 16777215, should "resolve" to 16777215 + assert_eq!(16777215, after); + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_all_valid_32bit_values() { + #[cfg(target_pointer_width = "32")] + assert_eq!(16_777_214, super::MAX_VALUE); + + (0..=16_777_214).into_par_iter().for_each(|i| { + let cap = Capacity::new(i); + let val = unsafe { cap.as_usize() }; + + assert_eq!(val, i, "value roundtriped to wrong value?"); + }); + } +} diff --git a/crates/hstr/src/repr/heap.rs b/crates/hstr/src/repr/heap.rs new file mode 100644 index 0000000..253060d --- /dev/null +++ b/crates/hstr/src/repr/heap.rs @@ -0,0 +1,173 @@ +use std::{ + mem, + ptr::{self, NonNull}, +}; + +use super::{capacity::Capacity, Repr}; + +pub struct HeapStr { + ptr: ptr::NonNull, + len: Capacity, +} + +static_assertions::assert_eq_size!(HeapStr, Repr); + +impl HeapStr { + pub unsafe fn new(text: &str) -> Self { + let len = Capacity::new(text.len()); + let ptr = NonNull::new_unchecked(text as *const str as *mut u8); + Self { ptr, len } + } + + pub fn len(&self) -> usize { + unsafe { self.len.as_usize() } + } + + pub fn as_str(&self) -> &str { + unsafe { + std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr.as_ptr(), self.len())) + } + } + + #[inline] + pub fn dealloc(&mut self) { + deallocate_ptr(self.ptr, self.len) + } +} + +/// Deallocates a buffer on the heap, handling when the capacity is also stored +/// on the heap +#[inline] +pub fn deallocate_ptr(ptr: ptr::NonNull, cap: Capacity) { + #[cold] + fn deallocate_with_capacity_on_heap(ptr: ptr::NonNull) { + // re-adjust the pointer to include the capacity that's on the heap + let adj_ptr = ptr.as_ptr().wrapping_sub(mem::size_of::()); + // read the capacity from the heap so we know how much to deallocate + let mut buf = [0u8; mem::size_of::()]; + // SAFETY: `src` and `dst` don't overlap, and are valid for usize number of + // bytes + unsafe { + ptr::copy_nonoverlapping(adj_ptr, buf.as_mut_ptr(), mem::size_of::()); + } + let capacity = usize::from_ne_bytes(buf); + // SAFETY: We know the pointer is not null since we got it as a NonNull + let ptr = unsafe { ptr::NonNull::new_unchecked(adj_ptr) }; + // SAFETY: We checked above that our capacity is on the heap, and we readjusted + // the pointer to reference the capacity + unsafe { heap_capacity::dealloc(ptr, capacity) } + } + + if cap.is_heap() { + deallocate_with_capacity_on_heap(ptr); + } else { + // SAFETY: Our capacity is always inline on 64-bit archs + unsafe { inline_capacity::dealloc(ptr, cap.as_usize()) } + } +} + +mod heap_capacity { + use core::ptr; + use std::alloc; + + use super::HeapStr; + + #[inline] + pub fn alloc(capacity: usize) -> ptr::NonNull { + let layout = layout(capacity); + debug_assert!(layout.size() > 0); + + // SAFETY: `alloc(...)` has undefined behavior if the layout is zero-sized. We + // know the layout can't be zero-sized though because we're always at + // least allocating one `usize` + let raw_ptr = unsafe { alloc::alloc(layout) }; + + // Check to make sure our pointer is non-null, some allocators return null + // pointers instead of panicking + match ptr::NonNull::new(raw_ptr) { + Some(ptr) => ptr, + None => alloc::handle_alloc_error(layout), + } + } + + /// Deallocates a pointer which references a `HeapBuffer` whose capacity is + /// on the heap + /// + /// # Saftey + /// * `ptr` must point to the start of a `HeapBuffer` whose capacity is on + /// the heap. i.e. we must have `ptr -> [cap ; string]` + pub unsafe fn dealloc(ptr: ptr::NonNull, capacity: usize) { + let layout = layout(capacity); + alloc::dealloc(ptr.as_ptr(), layout); + } + + #[repr(C)] + struct HeapBufferInnerHeapCapacity { + capacity: usize, + buffer: HeapStr, + } + + #[inline(always)] + pub fn layout(capacity: usize) -> alloc::Layout { + let buffer_layout = alloc::Layout::array::(capacity).expect("valid capacity"); + alloc::Layout::new::() + .extend(buffer_layout) + .expect("valid layout") + .0 + .pad_to_align() + } +} + +mod inline_capacity { + use core::ptr; + use std::alloc; + + use super::HeapStr; + + /// # SAFETY: + /// * `capacity` must be > 0 + #[inline] + pub unsafe fn alloc(capacity: usize) -> ptr::NonNull { + let layout = layout(capacity); + debug_assert!(layout.size() > 0); + + // SAFETY: `alloc(...)` has undefined behavior if the layout is zero-sized. We + // specify that `capacity` must be > 0 as a constraint to uphold the + // safety of this method. If capacity is greater than 0, then our layout + // will be non-zero-sized. + let raw_ptr = alloc::alloc(layout); + + // Check to make sure our pointer is non-null, some allocators return null + // pointers instead of panicking + match ptr::NonNull::new(raw_ptr) { + Some(ptr) => ptr, + None => alloc::handle_alloc_error(layout), + } + } + + /// Deallocates a pointer which references a `HeapBuffer` whose capacity is + /// stored inline + /// + /// # Saftey + /// * `ptr` must point to the start of a `HeapBuffer` whose capacity is on + /// the inline + pub unsafe fn dealloc(ptr: ptr::NonNull, capacity: usize) { + let layout = layout(capacity); + alloc::dealloc(ptr.as_ptr(), layout); + } + + #[repr(C)] + struct HeapBufferInnerInlineCapacity { + buffer: HeapStr, + } + + #[inline(always)] + pub fn layout(capacity: usize) -> alloc::Layout { + let buffer_layout = alloc::Layout::array::(capacity).expect("valid capacity"); + alloc::Layout::new::() + .extend(buffer_layout) + .expect("valid layout") + .0 + .pad_to_align() + } +} diff --git a/crates/hstr/src/repr/inline.rs b/crates/hstr/src/repr/inline.rs new file mode 100644 index 0000000..b401f47 --- /dev/null +++ b/crates/hstr/src/repr/inline.rs @@ -0,0 +1,25 @@ +use std::mem::transmute; + +use super::{nonmax::NonMaxU8, Repr, MAX_SIZE}; + +pub struct InlineBuffer(pub [u8; MAX_SIZE - 1], NonMaxU8); +static_assertions::assert_eq_size!(InlineBuffer, Repr); + +impl InlineBuffer { + /// Safety: `text.len()` must be less than `MAX_SIZE`. + pub unsafe fn new(text: &str) -> Self { + let mut buffer = InlineBuffer([0; MAX_SIZE - 1], unsafe { transmute(text.len() as u8) }); + let len = text.len(); + let text = text.as_bytes(); + buffer.0[..len].copy_from_slice(text); + buffer + } + + pub fn len(&self) -> usize { + unsafe { transmute::<_, u8>(self.1) as usize } + } + + pub fn as_str(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(&self.0[..self.len()]) } + } +} diff --git a/crates/hstr/src/repr/interned.rs b/crates/hstr/src/repr/interned.rs new file mode 100644 index 0000000..3af506c --- /dev/null +++ b/crates/hstr/src/repr/interned.rs @@ -0,0 +1,9 @@ +pub struct Interned { + ptr: *const (), +} + +impl Interned { + pub fn new(ptr: *const ()) -> Self { + Self { ptr } + } +} diff --git a/crates/hstr/src/repr/mod.rs b/crates/hstr/src/repr/mod.rs new file mode 100644 index 0000000..2c51be9 --- /dev/null +++ b/crates/hstr/src/repr/mod.rs @@ -0,0 +1,189 @@ +use std::mem::{size_of, transmute}; + +use debug_unreachable::debug_unreachable; + +use self::{ + heap::HeapStr, inline::InlineBuffer, interned::Interned, nonmax::NonMaxUsize, + static_ref::StaticStr, +}; + +mod capacity; +mod heap; +mod inline; +mod interned; +mod nonmax; +mod static_ref; + +const MAX_SIZE: usize = size_of::(); + +#[repr(C)] +pub struct Repr( + // We have a pointer in the repesentation to properly carry provenance + *const u8, + NonMaxUsize, +); + +unsafe impl Send for Repr {} +unsafe impl Sync for Repr {} + +const KIND_INLINED: u8 = 0b00; +const KIND_INTERNED: u8 = 0b01; +const KIND_HEAP: u8 = 0b10; +const KIND_STATIC: u8 = 0b11; +const KIND_MASK: u8 = 0b11; + +/// Used as a discriminant to identify different variants +const HEAP_MASK: u8 = 0b11111110; + +impl Repr { + #[inline] + pub fn new_static(text: &'static str) -> Self { + let repr = unsafe { StaticStr::new(text) }; + + debug_assert_eq!(repr.len(), text.len()); + + let repr = unsafe { std::mem::transmute::(repr) }; + + debug_assert_eq!(repr.kind(), KIND_STATIC); + debug_assert_eq!(repr.len(), text.len()); + + if cfg!(feature = "debug") { + assert_eq!(repr.as_str(), text); + } + + repr + } + + #[inline] + pub fn new_dynamic(text: &str) -> Self { + let len = text.len(); + + if len == 0 { + return Self::new_static(""); + } + + if len < MAX_SIZE { + let repr = unsafe { InlineBuffer::new(text) }; + + debug_assert_eq!(repr.len(), text.len()); + + let repr = unsafe { std::mem::transmute::(repr) }; + + debug_assert_eq!(repr.kind(), KIND_INLINED); + debug_assert_eq!(repr.len(), text.len()); + + if cfg!(feature = "debug") { + assert_eq!(repr.as_str(), text); + } + + repr + } else { + let repr = unsafe { HeapStr::new(text) }; + + debug_assert_eq!(repr.len(), text.len()); + + let repr = unsafe { std::mem::transmute::(repr) }; + + debug_assert_eq!(repr.kind(), KIND_HEAP); + debug_assert_eq!(repr.len(), text.len()); + + if cfg!(feature = "debug") { + assert_eq!(repr.as_str(), text); + } + + repr + } + } + + // #[inline] + // pub fn new_interned(text: &str) -> Self {} + + fn len(&self) -> usize { + match self.kind() { + KIND_INLINED => { + let repr = unsafe { std::mem::transmute::<&Repr, &InlineBuffer>(self) }; + repr.len() + } + KIND_HEAP => { + let repr = unsafe { std::mem::transmute::<&Repr, &HeapStr>(self) }; + repr.len() + } + KIND_STATIC => { + let repr = unsafe { std::mem::transmute::<&Repr, &StaticStr>(self) }; + repr.len() + } + KIND_INTERNED => { + todo!("Repr::len() for interned strings") + } + _ => unsafe { debug_unreachable!("Invalid kind in Repr::len()") }, + } + } + + fn as_str(&self) -> &str { + match self.kind() { + KIND_INLINED => { + let repr = unsafe { std::mem::transmute::<&Repr, &InlineBuffer>(self) }; + repr.as_str() + } + KIND_HEAP => { + let repr = unsafe { std::mem::transmute::<&Repr, &HeapStr>(self) }; + repr.as_str() + } + KIND_STATIC => { + let repr = unsafe { std::mem::transmute::<&Repr, &StaticStr>(self) }; + repr.as_str() + } + KIND_INTERNED => { + todo!("Repr::as_str() for interned strings") + } + _ => unsafe { debug_unreachable!("Invalid kind in Repr::as_str()") }, + } + } + + #[inline] + fn kind(&self) -> u8 { + self.last_byte() & KIND_MASK + } + + fn last_byte(&self) -> u8 { + self.1.last_byte() + } +} + +static_assertions::assert_eq_size!(Repr, Option, [usize; 2]); + +impl Drop for Repr { + #[inline] + fn drop(&mut self) { + // By "outlining" the actual Drop code and only calling it if we're a heap + // variant, it allows dropping an inline variant to be as cheap as + // possible. + match self.kind() { + KIND_HEAP | KIND_INLINED => outlined_drop(self), + _ => {} + } + + #[cold] + fn outlined_drop(this: &mut Repr) { + match this.kind() { + KIND_HEAP => { + let repr = unsafe { + // SAFETY: We just checked the discriminant to make sure we're heap + // allocated + transmute::<&mut Repr, &mut HeapStr>(this) + }; + repr.dealloc(); + } + KIND_INTERNED => { + let repr = unsafe { + // SAFETY: We just checked the discriminant to make sure + // we're heap allocated + transmute::<&mut Repr, &mut Interned>(this) + }; + repr.dealloc(); + } + _ => unsafe { debug_unreachable!("Invalid kind in Repr::drop()") }, + } + } + } +} diff --git a/crates/hstr/src/repr/nonmax.rs b/crates/hstr/src/repr/nonmax.rs new file mode 100644 index 0000000..73d4c0d --- /dev/null +++ b/crates/hstr/src/repr/nonmax.rs @@ -0,0 +1,316 @@ +use std::mem::transmute; + +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub struct NonMaxUsize( + // Then we need one `usize` (aka WORDs) of data + // ...but we breakup into multiple pieces... + #[cfg(target_pointer_width = "64")] u32, + u16, + u8, + // ...so that the last byte can be a NonMax, which allows the compiler to see a niche value + NonMaxU8, +); + +static_assertions::assert_eq_size!(NonMaxUsize, Option, usize); + +impl NonMaxUsize { + pub fn new(value: usize) -> Self { + debug_assert_ne!( + value, + usize::MAX, + "NonMaxUsize::new(usize::MAX) is not allowed" + ); + + unsafe { transmute(value) } + } + + pub fn as_usize(self) -> usize { + unsafe { transmute(self) } + } + + pub const fn last_byte(self) -> u8 { + cfg_if::cfg_if! { + if #[cfg(target_pointer_width = "64")] { + let last_byte = self.3; + } else if #[cfg(target_pointer_width = "32")] { + let last_byte = self.2; + } else { + compile_error!("Unsupported target_pointer_width"); + } + }; + last_byte as u8 + } +} + +/// [`NonMaxU8`] is an unsigned 8-bit integer data type that has a valid range +/// of `[0, 254]`. Excluding `255` allows the Rust compiler to use `255` as a +/// niche. +/// +/// Specifically the compiler can use `255` to encode the `None` variant of +/// `Option` allowing `std::mem::size_of:: == +/// std::mem::size_of::>()` +#[allow(clippy::upper_case_acronyms)] +#[allow(dead_code)] +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug)] +#[repr(u8)] +pub enum NonMaxU8 { + V0 = 0, + V1 = 1, + V2 = 2, + V3 = 3, + V4 = 4, + V5 = 5, + V6 = 6, + V7 = 7, + V8 = 8, + V9 = 9, + V10 = 10, + V11 = 11, + V12 = 12, + V13 = 13, + V14 = 14, + V15 = 15, + V16 = 16, + V17 = 17, + V18 = 18, + V19 = 19, + V20 = 20, + V21 = 21, + V22 = 22, + V23 = 23, + V24 = 24, + V25 = 25, + V26 = 26, + V27 = 27, + V28 = 28, + V29 = 29, + V30 = 30, + V31 = 31, + V32 = 32, + V33 = 33, + V34 = 34, + V35 = 35, + V36 = 36, + V37 = 37, + V38 = 38, + V39 = 39, + V40 = 40, + V41 = 41, + V42 = 42, + V43 = 43, + V44 = 44, + V45 = 45, + V46 = 46, + V47 = 47, + V48 = 48, + V49 = 49, + V50 = 50, + V51 = 51, + V52 = 52, + V53 = 53, + V54 = 54, + V55 = 55, + V56 = 56, + V57 = 57, + V58 = 58, + V59 = 59, + V60 = 60, + V61 = 61, + V62 = 62, + V63 = 63, + V64 = 64, + V65 = 65, + V66 = 66, + V67 = 67, + V68 = 68, + V69 = 69, + V70 = 70, + V71 = 71, + V72 = 72, + V73 = 73, + V74 = 74, + V75 = 75, + V76 = 76, + V77 = 77, + V78 = 78, + V79 = 79, + V80 = 80, + V81 = 81, + V82 = 82, + V83 = 83, + V84 = 84, + V85 = 85, + V86 = 86, + V87 = 87, + V88 = 88, + V89 = 89, + V90 = 90, + V91 = 91, + V92 = 92, + V93 = 93, + V94 = 94, + V95 = 95, + V96 = 96, + V97 = 97, + V98 = 98, + V99 = 99, + V100 = 100, + V101 = 101, + V102 = 102, + V103 = 103, + V104 = 104, + V105 = 105, + V106 = 106, + V107 = 107, + V108 = 108, + V109 = 109, + V110 = 110, + V111 = 111, + V112 = 112, + V113 = 113, + V114 = 114, + V115 = 115, + V116 = 116, + V117 = 117, + V118 = 118, + V119 = 119, + V120 = 120, + V121 = 121, + V122 = 122, + V123 = 123, + V124 = 124, + V125 = 125, + V126 = 126, + V127 = 127, + V128 = 128, + V129 = 129, + V130 = 130, + V131 = 131, + V132 = 132, + V133 = 133, + V134 = 134, + V135 = 135, + V136 = 136, + V137 = 137, + V138 = 138, + V139 = 139, + V140 = 140, + V141 = 141, + V142 = 142, + V143 = 143, + V144 = 144, + V145 = 145, + V146 = 146, + V147 = 147, + V148 = 148, + V149 = 149, + V150 = 150, + V151 = 151, + V152 = 152, + V153 = 153, + V154 = 154, + V155 = 155, + V156 = 156, + V157 = 157, + V158 = 158, + V159 = 159, + V160 = 160, + V161 = 161, + V162 = 162, + V163 = 163, + V164 = 164, + V165 = 165, + V166 = 166, + V167 = 167, + V168 = 168, + V169 = 169, + V170 = 170, + V171 = 171, + V172 = 172, + V173 = 173, + V174 = 174, + V175 = 175, + V176 = 176, + V177 = 177, + V178 = 178, + V179 = 179, + V180 = 180, + V181 = 181, + V182 = 182, + V183 = 183, + V184 = 184, + V185 = 185, + V186 = 186, + V187 = 187, + V188 = 188, + V189 = 189, + V190 = 190, + V191 = 191, + V192 = 192, + V193 = 193, + V194 = 194, + V195 = 195, + V196 = 196, + V197 = 197, + V198 = 198, + V199 = 199, + V200 = 200, + V201 = 201, + V202 = 202, + V203 = 203, + V204 = 204, + V205 = 205, + V206 = 206, + V207 = 207, + V208 = 208, + V209 = 209, + V210 = 210, + V211 = 211, + V212 = 212, + V213 = 213, + V214 = 214, + V215 = 215, + V216 = 216, + V217 = 217, + V218 = 218, + V219 = 219, + V220 = 220, + V221 = 221, + V222 = 222, + V223 = 223, + V224 = 224, + V225 = 225, + V226 = 226, + V227 = 227, + V228 = 228, + V229 = 229, + V230 = 230, + V231 = 231, + V232 = 232, + V233 = 233, + V234 = 234, + V235 = 235, + V236 = 236, + V237 = 237, + V238 = 238, + V239 = 239, + V240 = 240, + V241 = 241, + V242 = 242, + V243 = 243, + V244 = 244, + V245 = 245, + V246 = 246, + V247 = 247, + V248 = 248, + V249 = 249, + V250 = 250, + V251 = 251, + V252 = 252, + V253 = 253, + V254 = 254, +} + +static_assertions::assert_eq_size!(NonMaxU8, Option, u8); diff --git a/crates/hstr/src/repr/static_ref.rs b/crates/hstr/src/repr/static_ref.rs new file mode 100644 index 0000000..11e212b --- /dev/null +++ b/crates/hstr/src/repr/static_ref.rs @@ -0,0 +1,32 @@ +use std::ptr; + +use super::{capacity::Capacity, Repr}; + +#[repr(C)] +pub(super) struct StaticStr { + ptr: ptr::NonNull, + len: Capacity, +} + +static_assertions::assert_eq_size!(Repr, StaticStr); + +impl StaticStr { + pub unsafe fn new(text: &'static str) -> Self { + let len = Capacity::new(text.len()); + + Self { + ptr: ptr::NonNull::new_unchecked(text as *const str as *mut u8), + len, + } + } + + pub fn len(&self) -> usize { + unsafe { self.len.as_usize() } + } + + pub fn as_str(&self) -> &str { + unsafe { + std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr.as_ptr(), self.len())) + } + } +}