From a9847eef656de437647bce896a1d65837c5c1758 Mon Sep 17 00:00:00 2001 From: bakaq Date: Sat, 26 Apr 2025 23:34:53 -0300 Subject: [PATCH] Migrate to strict and exposed provenance --- src/arena.rs | 4 +- src/atom_table.rs | 4 +- src/ffi.rs | 2 +- src/heap_print.rs | 2 +- src/machine/heap.rs | 2 +- src/machine/lib_machine/mod.rs | 2 +- src/machine/stack.rs | 85 ++++++++++++++++------------------ src/macros.rs | 10 ++-- src/offset_table.rs | 7 ++- src/raw_block.rs | 10 ++-- src/types.rs | 14 +++--- 11 files changed, 69 insertions(+), 73 deletions(-) diff --git a/src/arena.rs b/src/arena.rs index 47de6fa3..a9f9a76b 100644 --- a/src/arena.rs +++ b/src/arena.rs @@ -611,12 +611,12 @@ mod tests { let mut wam = MockWAM::new(); #[cfg(target_pointer_width = "32")] let const_value = HeapCellValue::from(ConsPtr::build_with( - 0x0000_0431 as *const _, + std::ptr::without_provenance(0x0000_0431), ConsPtrMaskTag::Cons, )); #[cfg(target_pointer_width = "64")] let const_value = HeapCellValue::from(ConsPtr::build_with( - 0x0000_5555_ff00_0431 as *const _, + std::ptr::without_provenance(0x0000_5555_ff00_0431), ConsPtrMaskTag::Cons, )); diff --git a/src/atom_table.rs b/src/atom_table.rs index 8b366549..12910753 100644 --- a/src/atom_table.rs +++ b/src/atom_table.rs @@ -516,12 +516,12 @@ impl AtomTable { } }; - let ptr_base = block_epoch.block.base as usize; + let ptr_base = block_epoch.block.base.addr(); write_to_ptr(string, len_ptr); let atom = AtomCell::new() - .with_name((STRINGS.len() + len_ptr as usize - ptr_base) as u64) + .with_name((STRINGS.len() + len_ptr.addr() - ptr_base) as u64) .with_arity(0) .with_f(false) .with_m(false) diff --git a/src/ffi.rs b/src/ffi.rs index 4832bad1..a3015b6d 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -517,7 +517,7 @@ impl Value { fn as_ptr(&mut self) -> Result<*mut c_void, FFIError> { match self { Value::CString(ref mut cstr) => Ok(&mut *cstr as *mut _ as *mut c_void), - Value::Int(n) => Ok(*n as *mut c_void), + Value::Int(n) => Ok(std::ptr::with_exposed_provenance_mut(*n as usize)), _ => Err(FFIError::ValueCast), } } diff --git a/src/heap_print.rs b/src/heap_print.rs index 7bd227e2..9f1efee7 100644 --- a/src/heap_print.rs +++ b/src/heap_print.rs @@ -984,7 +984,7 @@ impl<'a, Outputter: HCValueOutputter> HCPrinter<'a, Outputter> { #[inline] fn print_raw_ptr(&mut self, ptr: *const ArenaHeader) { - append_str!(self, &format!("0x{:x}", ptr as *const u8 as usize)); + append_str!(self, &format!("0x{:x}", ptr.addr())); } fn print_number(&mut self, max_depth: usize, n: NumberFocus, op: &Option) { diff --git a/src/machine/heap.rs b/src/machine/heap.rs index c12af9d1..8d832f29 100644 --- a/src/machine/heap.rs +++ b/src/machine/heap.rs @@ -97,7 +97,7 @@ unsafe fn scan_slice_to_str(heap_slice: &[u8]) -> HeapStringScan { .unwrap_or(heap_slice.len()); let zero_byte_addr = heap_slice.as_ptr().add(string_len); - let sentinel_len = pstr_sentinel_length(zero_byte_addr as usize); + let sentinel_len = pstr_sentinel_length(zero_byte_addr.addr()); let tail_idx = cell_index!( (string_len + sentinel_len).next_multiple_of(ALIGN) + if sentinel_len <= 1 { heap_index!(1) } else { 0 } diff --git a/src/machine/lib_machine/mod.rs b/src/machine/lib_machine/mod.rs index d11419e0..7e87dc00 100644 --- a/src/machine/lib_machine/mod.rs +++ b/src/machine/lib_machine/mod.rs @@ -297,7 +297,7 @@ impl Term { Term::atom(alias.as_str().to_string()) } else { Term::compound("$stream", [ - Term::integer(stream.as_ptr() as usize) + Term::integer(stream.as_ptr().addr()) ]) }; term_stack.push(stream_term); diff --git a/src/machine/stack.rs b/src/machine/stack.rs index f0b136fa..4247149f 100644 --- a/src/machine/stack.rs +++ b/src/machine/stack.rs @@ -59,9 +59,10 @@ impl Index for AndFrame { unsafe { let ptr = self as *const crate::machine::stack::AndFrame as *const u8; - let ptr = ptr as usize + prelude_offset + index_offset; - &*(ptr as *const HeapCellValue) + // This address falls outside the provenance for self, therefore we have to get it + // from exposed provenance. + &*std::ptr::with_exposed_provenance(ptr.addr() + prelude_offset + index_offset) } } } @@ -72,10 +73,11 @@ impl IndexMut for AndFrame { let index_offset = (index - 1) * mem::size_of::(); unsafe { - let ptr = self as *mut crate::machine::stack::AndFrame as *const u8; - let ptr = ptr as usize + prelude_offset + index_offset; + let ptr = self as *mut crate::machine::stack::AndFrame as *mut u8; - &mut *(ptr as *mut HeapCellValue) + // This address falls outside the provenance for self, therefore we have to get it + // from exposed provenance. + &mut *std::ptr::with_exposed_provenance_mut(ptr.addr() + prelude_offset + index_offset) } } } @@ -85,20 +87,14 @@ impl Index for Stack { #[inline] fn index(&self, index: usize) -> &Self::Output { - unsafe { - let ptr = self.buf.base as usize + index; - &*(ptr as *const HeapCellValue) - } + unsafe { &*self.buf.base.add(index).cast() } } } impl IndexMut for Stack { #[inline] fn index_mut(&mut self, index: usize) -> &mut Self::Output { - unsafe { - let ptr = self.buf.base as usize + index; - &mut *(ptr as *mut HeapCellValue) - } + unsafe { &mut *self.buf.base.add(index).cast_mut().cast() } } } @@ -132,9 +128,10 @@ impl Index for OrFrame { unsafe { let ptr = self as *const crate::machine::stack::OrFrame as *const u8; - let ptr = ptr as usize + prelude_offset + index_offset; - &*(ptr as *const HeapCellValue) + // This address falls outside the provenance for self, therefore we have to get it + // from exposed provenance. + &*std::ptr::with_exposed_provenance(ptr.addr() + prelude_offset + index_offset) } } } @@ -146,10 +143,11 @@ impl IndexMut for OrFrame { let index_offset = index * mem::size_of::(); unsafe { - let ptr = self as *mut crate::machine::stack::OrFrame as *const u8; - let ptr = ptr as usize + prelude_offset + index_offset; + let ptr = self as *mut crate::machine::stack::OrFrame as *mut u8; - &mut *(ptr as *mut HeapCellValue) + // This address falls outside the provenance for self, therefore we have to get it + // from exposed provenance. + &mut *std::ptr::with_exposed_provenance_mut(ptr.addr() + prelude_offset + index_offset) } } } @@ -187,15 +185,19 @@ impl Stack { let frame_size = AndFrame::size_of(num_cells); unsafe { - let e = (*self.buf.ptr.get_mut()) as usize - self.buf.base as usize; + let e = (*self.buf.ptr.get_mut()).addr() - self.buf.base.addr(); let new_ptr = self.alloc(frame_size); let mut offset = prelude_size::(); for idx in 0..num_cells { - ptr::write( - new_ptr.add(offset) as *mut HeapCellValue, - stack_loc_as_cell!(AndFrame, e, idx + 1), - ); + let cell_ptr = new_ptr.add(offset) as *mut HeapCellValue; + ptr::write(cell_ptr, stack_loc_as_cell!(AndFrame, e, idx + 1)); + + // Because in the Index and IndexMut inplementations we need to get this from + // exposed provenance, we need to expose the provenance here, even though we don't + // actually use the value for anything. This is a reminder that `expose_provenance` + // isn't just a cast from a pointer to an integer but has actual side effects. + cell_ptr.expose_provenance(); offset += mem::size_of::(); } @@ -208,22 +210,26 @@ impl Stack { } pub(crate) fn top(&self) -> usize { - unsafe { (*self.buf.ptr.get()) as usize - self.buf.base as usize } + unsafe { (*self.buf.ptr.get()).addr() - self.buf.base.addr() } } pub(crate) fn allocate_or_frame(&mut self, num_cells: usize) -> usize { let frame_size = OrFrame::size_of(num_cells); unsafe { - let b = (*self.buf.ptr.get_mut()) as usize - self.buf.base as usize; + let b = (*self.buf.ptr.get_mut()).addr() - self.buf.base.addr(); let new_ptr = self.alloc(frame_size); let mut offset = prelude_size::(); for idx in 0..num_cells { - ptr::write( - new_ptr.byte_add(offset) as *mut HeapCellValue, - stack_loc_as_cell!(OrFrame, b, idx), - ); + let cell_ptr = new_ptr.byte_add(offset) as *mut HeapCellValue; + ptr::write(cell_ptr, stack_loc_as_cell!(OrFrame, b, idx)); + + // Because in the Index and IndexMut inplementations we need to get this from + // exposed provenance, we need to expose the provenance here, even though we don't + // actually use the value for anything. This is a reminder that `expose_provenance` + // isn't just a cast from a pointer to an integer but has actual side effects. + cell_ptr.expose_provenance(); offset += mem::size_of::(); } @@ -237,10 +243,7 @@ impl Stack { #[inline(always)] pub(crate) fn index_and_frame(&self, e: usize) -> &AndFrame { - unsafe { - let ptr = self.buf.base as usize + e; - &*(ptr as *const AndFrame) - } + unsafe { &*self.buf.base.add(e).cast() } } #[inline(always)] @@ -254,26 +257,20 @@ impl Stack { #[inline(always)] pub(crate) fn index_or_frame(&self, b: usize) -> &OrFrame { - unsafe { - let ptr = self.buf.base as usize + b; - &*(ptr as *const OrFrame) - } + unsafe { &*self.buf.base.add(b).cast() } } #[inline(always)] pub(crate) fn index_or_frame_mut(&mut self, b: usize) -> &mut OrFrame { - unsafe { - let ptr = self.buf.base as usize + b; - &mut *(ptr as *mut OrFrame) - } + unsafe { &mut *self.buf.base.add(b).cast_mut().cast() } } #[inline(always)] pub(crate) fn truncate(&mut self, b: usize) { - let base = self.buf.base as usize + b; + let base = unsafe { self.buf.base.add(b) }; - if base < (*self.buf.ptr.get_mut()) as usize { - *self.buf.ptr.get_mut() = base as *mut _; + if base < (*self.buf.ptr.get_mut()) { + *self.buf.ptr.get_mut() = base.cast_mut(); } } } diff --git a/src/macros.rs b/src/macros.rs index 7aeba996..5347af3c 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -146,11 +146,11 @@ macro_rules! typed_arena_ptr_as_cell { macro_rules! raw_ptr_as_cell { ($ptr:expr) => {{ // Cell is 64-bit, but raw ptr is 32-bit in 32-bit systems - // TODO use <*{const,mut} _>::addr instead of as when the strict_provenance feature is stable rust-lang/rust#95228 - // we might need <*{const,mut} _>::expose_provenance for strict provenance, depending on how we recreate a pointer later - let ptr : *const _ = $ptr; - debug_assert!(!$ptr.is_null()); - HeapCellValue::from_ptr_addr(ptr as usize) + let ptr: *const _ = $ptr; + // This needs to expose provenance because it needs to be turned back into a pointer + // in contexts where there is no available provenance locally. For example, in + // `ConsPtr::as_ptr`. + HeapCellValue::from_ptr_addr(ptr.expose_provenance()) }}; } diff --git a/src/offset_table.rs b/src/offset_table.rs index 5afcf89b..7b47b96f 100644 --- a/src/offset_table.rs +++ b/src/offset_table.rs @@ -141,9 +141,8 @@ where ptr::write(ptr as *mut T, value); - let value = as OffsetTable>::Offset::from( - ptr as usize - block_epoch.base as usize, - ); + let value = + as OffsetTable>::Offset::from(ptr.addr() - block_epoch.base.addr()); // AtomTable would have to update the index table at this point // explicit drop to ensure we don't accidentally drop it early @@ -270,7 +269,7 @@ where #[inline(always)] pub fn as_offset(&self) -> as OffsetTable>::Offset { as OffsetTable>::Offset::from( - self.0.get() as usize - RcuRef::get_root(&self.0).base as usize, + self.0.get().addr() - RcuRef::get_root(&self.0).base.addr(), ) } } diff --git a/src/raw_block.rs b/src/raw_block.rs index da757415..3c39c77d 100644 --- a/src/raw_block.rs +++ b/src/raw_block.rs @@ -66,8 +66,8 @@ impl RawBlock { false } else { self.base = new_base; - self.top = (self.base as usize + size * 2) as *const _; - *self.ptr.get_mut() = (self.base as usize + size) as *mut _; + self.top = self.base.add(size * 2); + *self.ptr.get_mut() = self.base.add(size).cast_mut(); true } } @@ -83,7 +83,7 @@ impl RawBlock { // allocation failed None } else { - let allocated = (*self.ptr.get()) as usize - self.base as usize; + let allocated = (*self.ptr.get()).addr() - self.base.addr(); self.base.copy_to(new_block.base.cast_mut(), allocated); *new_block.ptr.get_mut() = new_block.base.add(allocated).cast_mut(); Some(new_block) @@ -93,7 +93,7 @@ impl RawBlock { #[inline] pub fn size(&self) -> usize { - self.top as usize - self.base as usize + self.top.addr() - self.base.addr() } #[inline(always)] @@ -105,7 +105,7 @@ impl RawBlock { self.base ); - self.top as usize - (*self.ptr.get()) as usize + self.top.addr() - (*self.ptr.get()).addr() } pub unsafe fn alloc(&self, size: usize) -> *mut u8 { diff --git a/src/types.rs b/src/types.rs index d68f605d..e6dda7d9 100644 --- a/src/types.rs +++ b/src/types.rs @@ -93,7 +93,7 @@ impl ConsPtr { #[inline(always)] pub fn build_with(ptr: *const ArenaHeader, tag: ConsPtrMaskTag) -> Self { ConsPtr::new() - .with_ptr(ptr as *const u8 as u64) + .with_ptr(ptr.expose_provenance() as u64) .with_f(false) .with_m(false) .with_tag(tag) @@ -102,7 +102,7 @@ impl ConsPtr { #[inline(always)] pub fn as_ptr(self) -> *mut u8 { let addr: u64 = self.ptr(); - addr as usize as *mut _ + std::ptr::with_exposed_provenance_mut(addr as usize) } #[inline(always)] @@ -377,7 +377,7 @@ where { #[inline] fn from(arena_ptr: TypedArenaPtr) -> HeapCellValue { - HeapCellValue::from(arena_ptr.header_ptr() as u64) + HeapCellValue::from(arena_ptr.header_ptr().expose_provenance() as u64) } } @@ -402,7 +402,7 @@ impl From for HeapCellValue { #[inline(always)] fn from(cons_ptr: ConsPtr) -> HeapCellValue { HeapCellValue::from_bytes( - ConsPtr::from(cons_ptr.as_ptr() as u64) + ConsPtr::from(cons_ptr.as_ptr().expose_provenance() as u64) .with_tag(ConsPtrMaskTag::Cons) .with_m(false) .into_bytes(), @@ -724,14 +724,14 @@ const_assert!(mem::size_of::() == 8); impl From<*const ArenaHeader> for UntypedArenaPtr { #[inline] fn from(ptr: *const ArenaHeader) -> UntypedArenaPtr { - UntypedArenaPtr::build_with(ptr as usize) + UntypedArenaPtr::build_with(ptr.expose_provenance()) } } impl From<*const IndexPtr> for UntypedArenaPtr { #[inline] fn from(ptr: *const IndexPtr) -> UntypedArenaPtr { - UntypedArenaPtr::build_with(ptr as usize) + UntypedArenaPtr::build_with(ptr.expose_provenance()) } } @@ -751,7 +751,7 @@ impl UntypedArenaPtr { #[inline] pub fn get_ptr(self) -> *const u8 { let addr: u64 = self.ptr(); - addr as usize as *const u8 + std::ptr::with_exposed_provenance(addr as usize) } #[inline] -- 2.54.0