From: Mark Thom Date: Wed, 16 Apr 2025 05:10:02 +0000 (-0700) Subject: generalize compare_pstr_segments X-Git-Tag: v0.10.0~35^2~37 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=e563fd8e4f8d2db271a685c0f768bd98bffebcb7;p=scryer-prolog.git generalize compare_pstr_segments --- diff --git a/src/machine/dispatch.rs b/src/machine/dispatch.rs index 4c9db0e8..3cb1a8e5 100644 --- a/src/machine/dispatch.rs +++ b/src/machine/dispatch.rs @@ -2815,8 +2815,6 @@ impl Machine { (HeapCellValueTag::Str | HeapCellValueTag::Lis | HeapCellValueTag::PStrLoc) => { - debug_assert!(store_v.is_ref()); - self.machine_st.heap[0] = store_v; let heap_pstr_iter = HeapPStrIter::new(&self.machine_st.heap, 0); @@ -2841,8 +2839,7 @@ impl Machine { self.machine_st.mode = MachineMode::Read; } None => { - self.machine_st.backtrack(); - continue; + self.machine_st.fail = true; } } } @@ -2862,8 +2859,7 @@ impl Machine { self.machine_st.mode = MachineMode::Write; } _ => { - self.machine_st.backtrack(); - continue; + self.machine_st.fail = true; } ); diff --git a/src/machine/heap.rs b/src/machine/heap.rs index 87f50732..fa554437 100644 --- a/src/machine/heap.rs +++ b/src/machine/heap.rs @@ -4,7 +4,6 @@ use crate::functor_macro::*; use crate::types::*; use std::alloc; -use std::cmp::Ordering; use std::convert::TryFrom; use std::ops::{Bound, Index, IndexMut, Range, RangeBounds}; use std::ptr; @@ -112,11 +111,94 @@ unsafe fn scan_slice_to_str(heap_slice: &[u8]) -> HeapStringScan { } } +#[derive(Debug, Clone, Copy)] +pub(crate) enum PStrContinuable { + PStrOffset(usize), + TailIndex(usize), +} + +impl PStrContinuable { + #[inline] + pub(crate) fn offset_by(&self, pstr_loc: usize) -> HeapCellValue { + match self { + Self::PStrOffset(pstr_offset) => pstr_loc_as_cell!(pstr_loc + pstr_offset), + Self::TailIndex(tail_idx) => heap_loc_as_cell!(tail_idx + cell_index!(pstr_loc)), + } + } +} + #[derive(Debug, Clone, Copy)] pub(crate) enum PStrSegmentCmpResult { Less, Greater, - Continue(HeapCellValue, HeapCellValue), + Continue(PStrContinuable, PStrContinuable), +} + +pub(crate) fn compare_pstr_slices(slice1: &[u8], slice2: &[u8]) -> PStrSegmentCmpResult { + use std::cmp::Ordering; + + debug_assert!(!slice1.is_empty() && !slice2.is_empty()); + let find_tail = |slice| unsafe { scan_slice_to_str(slice).tail_idx }; + + match slice1 + .iter() + .zip(slice2.iter()) + .position(|(b1, b2)| b1 != b2 || *b1 == 0 || *b2 == 0) + { + Some(pos) => { + if slice1[pos] == 0 { + // subtract 1 from pos to offset the increment of scan_slice_to_str if the + // string is "\0\". + let tail1_idx = find_tail(&slice1[pos..]); + + if slice2[pos] == 0 { + let tail2_idx = find_tail(&slice2[pos..]); + + PStrSegmentCmpResult::Continue( + PStrContinuable::TailIndex(tail1_idx + cell_index!(pos)), + PStrContinuable::TailIndex(tail2_idx + cell_index!(pos)), + ) + } else { + PStrSegmentCmpResult::Continue( + PStrContinuable::TailIndex(tail1_idx + cell_index!(pos)), + PStrContinuable::PStrOffset(pos), + ) + } + } else if slice2[pos] == 0 { + let tail2_idx = find_tail(&slice2[pos..]); + + PStrSegmentCmpResult::Continue( + PStrContinuable::PStrOffset(pos), + PStrContinuable::TailIndex(tail2_idx + cell_index!(pos)), + ) + } else { + // Compute 7-byte chunks with the mismatching character at pos in the middle of + // each. This way, the character of which the byte at pos is a part will be + // validated and reached eventually by the utf8_chunks() iterator. + + let slice1_range = pos.saturating_sub(3)..(pos + 4).min(slice1.len()); + let slice2_range = pos.saturating_sub(3)..(pos + 4).min(slice2.len()); + + let chars1_iter = slice1[slice1_range].utf8_chunks(); + let chars2_iter = slice2[slice2_range].utf8_chunks(); + + for (chunk1, chunk2) in chars1_iter.zip(chars2_iter) { + let result = chunk1.valid().cmp(chunk2.valid()); + + if result == Ordering::Greater { + return PStrSegmentCmpResult::Greater; + } else if result == Ordering::Less { + return PStrSegmentCmpResult::Less; + } + } + + unreachable!() + } + } + None => { + unreachable!() + } + } } #[derive(Debug)] @@ -609,6 +691,7 @@ impl Heap { }); } + #[inline] pub(crate) fn compare_pstr_segments( &self, pstr_loc1: usize, @@ -617,67 +700,7 @@ impl Heap { let slice1 = &self.as_slice()[pstr_loc1..]; let slice2 = &self.as_slice()[pstr_loc2..]; - let find_tail = |null_idx: usize| -> usize { self.scan_slice_to_str(null_idx).tail_idx }; - - match slice1 - .iter() - .zip(slice2.iter()) - .position(|(b1, b2)| b1 != b2 || *b1 == 0 || *b2 == 0) - { - Some(pos) => { - if slice1[pos] == 0 { - // subtract 1 from pos to offset the increment of scan_slice_to_str if the - // string is "\0\". - let tail1_idx = find_tail(pstr_loc1 + pos); - - if slice2[pos] == 0 { - let tail2_idx = find_tail(pstr_loc2 + pos); - - PStrSegmentCmpResult::Continue( - heap_loc_as_cell!(tail1_idx), - heap_loc_as_cell!(tail2_idx), - ) - } else { - PStrSegmentCmpResult::Continue( - heap_loc_as_cell!(tail1_idx), - pstr_loc_as_cell!(pstr_loc2 + pos), - ) - } - } else if slice2[pos] == 0 { - let tail2_idx = find_tail(pstr_loc2 + pos); - - PStrSegmentCmpResult::Continue( - pstr_loc_as_cell!(pstr_loc1 + pos), - heap_loc_as_cell!(tail2_idx), - ) - } else { - // Compute 7-byte chunks with the mismatching character at pos in the middle of - // each. This way, the character of which the byte at pos is a part will be - // validated and reached eventually by the utf8_chunks() iterator. - - let slice1_range = pos.saturating_sub(3)..(pos + 4).min(slice1.len()); - let slice2_range = pos.saturating_sub(3)..(pos + 4).min(slice2.len()); - - let chars1_iter = slice1[slice1_range].utf8_chunks(); - let chars2_iter = slice2[slice2_range].utf8_chunks(); - - for (chunk1, chunk2) in chars1_iter.zip(chars2_iter) { - let result = chunk1.valid().cmp(chunk2.valid()); - - if result == Ordering::Greater { - return PStrSegmentCmpResult::Greater; - } else if result == Ordering::Less { - return PStrSegmentCmpResult::Less; - } - } - - unreachable!() - } - } - None => { - unreachable!() - } - } + compare_pstr_slices(slice1, slice2) } #[inline] diff --git a/src/machine/machine_state_impl.rs b/src/machine/machine_state_impl.rs index d2dfc18d..45c689a5 100644 --- a/src/machine/machine_state_impl.rs +++ b/src/machine/machine_state_impl.rs @@ -25,7 +25,7 @@ impl MachineState { pub(crate) fn new() -> Self { let mut heap = Heap::with_cell_capacity(256 * 256).unwrap(); - // this is an interstitial cell reserved for use by the runtime. + // the cell at index 0 is an interstitial cell reserved for use by the runtime. heap.push_cell(empty_list_as_cell!()).unwrap(); heap.store_resource_error(); @@ -636,8 +636,8 @@ impl MachineState { match self.heap.compare_pstr_segments(l1, l2) { PStrSegmentCmpResult::Continue(v1, v2) => { - self.pdl.push(v1); - self.pdl.push(v2); + self.pdl.push(v1.offset_by(l1)); + self.pdl.push(v2.offset_by(l2)); } PStrSegmentCmpResult::Less => { return Some(Ordering::Less); diff --git a/src/machine/unify.rs b/src/machine/unify.rs index 7d451fc8..fdc996a0 100644 --- a/src/machine/unify.rs +++ b/src/machine/unify.rs @@ -136,8 +136,8 @@ pub(crate) trait Unifier: DerefMut { (HeapCellValueTag::PStrLoc, other_pstr_loc) => { match machine_st.heap.compare_pstr_segments(pstr_loc, other_pstr_loc) { PStrSegmentCmpResult::Continue(v1, v2) => { - machine_st.pdl.push(v1); - machine_st.pdl.push(v2); + machine_st.pdl.push(v1.offset_by(pstr_loc)); + machine_st.pdl.push(v2.offset_by(other_pstr_loc)); } _ => { machine_st.fail = true; @@ -161,18 +161,6 @@ pub(crate) trait Unifier: DerefMut { self.fail = !(arity == 0 && name == atom); } - /* - (HeapCellValueTag::CStr, cstr_atom) if atom == atom!("[]") => { - self.fail = cstr_atom != atom!(""); - } - (HeapCellValueTag::Char, c1) => { - if let Some(c2) = atom.as_char() { - self.fail = c1 != c2; - } else { - self.fail = true; - } - } - */ (HeapCellValueTag::AttrVar, h) => { Self::bind(self, Ref::attr_var(h), atom_as_cell!(atom)); } @@ -207,13 +195,6 @@ pub(crate) trait Unifier: DerefMut { self.fail = true; } } - /* - (HeapCellValueTag::Char, c2) => { - if c != c2 { - self.fail = true; - } - } - */ (HeapCellValueTag::AttrVar, h) => { Self::bind(self, Ref::attr_var(h), char_as_cell!(c)); } @@ -433,38 +414,6 @@ pub(crate) trait Unifier: DerefMut { tabu_list.insert((d1, d2)); } } - /* - (HeapCellValueTag::CStr) => { - read_heap_cell!(d2, - (HeapCellValueTag::AttrVar, h) => { - Self::bind(self, Ref::attr_var(h), d1); - continue; - } - (HeapCellValueTag::Var, h) => { - Self::bind(self, Ref::heap_cell(h), d1); - continue; - } - (HeapCellValueTag::StackVar, s) => { - Self::bind(self, Ref::stack_cell(s), d1); - continue; - } - (HeapCellValueTag::Str | - HeapCellValueTag::Lis | - HeapCellValueTag::PStrLoc) => { - } - (HeapCellValueTag::CStr) => { - self.fail = d1 != d2; - continue; - } - _ => { - self.fail = true; - return; - } - ); - - Self::unify_partial_string(self, d2, d1); - } - */ (HeapCellValueTag::F64, f1) => { Self::unify_f64(self, f1, d2); }