generalize compare_pstr_segments

author Mark Thom <[email protected]>

Wed, 16 Apr 2025 05:10:02 +0000 (22:10 -0700)

committer Mark Thom <[email protected]>

Tue, 8 Jul 2025 05:38:12 +0000 (22:38 -0700)
author Mark Thom <[email protected]>
Wed, 16 Apr 2025 05:10:02 +0000 (22:10 -0700)
committer Mark Thom <[email protected]>
Tue, 8 Jul 2025 05:38:12 +0000 (22:38 -0700)
diff --git a/src/machine/dispatch.rs b/src/machine/dispatch.rs

index 4c9db0e8680376378f7a93d4c6b7a01ef579341f..3cb1a8e5effdaa54989122ecc1b5db0b7d9fae41 100644 (file)
--- a/src/machine/dispatch.rs
+++ b/src/machine/dispatch.rs
@@ -2815,8 +2815,6 @@ impl Machine {
                              (HeapCellValueTag::Str |
                               HeapCellValueTag::Lis |
                               HeapCellValueTag::PStrLoc) => {
-                                debug_assert!(store_v.is_ref());
-
                                  self.machine_st.heap[0] = store_v;
                                  let heap_pstr_iter = HeapPStrIter::new(&self.machine_st.heap, 0);
  
@@ -2841,8 +2839,7 @@ impl Machine {
                                          self.machine_st.mode = MachineMode::Read;
                                      }
                                      None => {
-                                        self.machine_st.backtrack();
-                                        continue;
+                                        self.machine_st.fail = true;
                                      }
                                  }
                              }
@@ -2862,8 +2859,7 @@ impl Machine {
                                  self.machine_st.mode = MachineMode::Write;
                              }
                              _ => {
-                                self.machine_st.backtrack();
-                                continue;
+                                self.machine_st.fail = true;
                              }
                          );
  
diff --git a/src/machine/heap.rs b/src/machine/heap.rs

index 87f50732abdd788eb1b814aa98aa9ef05df39a4b..fa554437afc5fa4734c914639fc077d65de9253b 100644 (file)
--- a/src/machine/heap.rs
+++ b/src/machine/heap.rs
@@ -4,7 +4,6 @@ use crate::functor_macro::*;
  use crate::types::*;
  
  use std::alloc;
-use std::cmp::Ordering;
  use std::convert::TryFrom;
  use std::ops::{Bound, Index, IndexMut, Range, RangeBounds};
  use std::ptr;
@@ -112,11 +111,94 @@ unsafe fn scan_slice_to_str(heap_slice: &[u8]) -> HeapStringScan {
      }
  }
  
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum PStrContinuable {
+    PStrOffset(usize),
+    TailIndex(usize),
+}
+
+impl PStrContinuable {
+    #[inline]
+    pub(crate) fn offset_by(&self, pstr_loc: usize) -> HeapCellValue {
+        match self {
+            Self::PStrOffset(pstr_offset) => pstr_loc_as_cell!(pstr_loc + pstr_offset),
+            Self::TailIndex(tail_idx) => heap_loc_as_cell!(tail_idx + cell_index!(pstr_loc)),
+        }
+    }
+}
+
  #[derive(Debug, Clone, Copy)]
  pub(crate) enum PStrSegmentCmpResult {
      Less,
      Greater,
-    Continue(HeapCellValue, HeapCellValue),
+    Continue(PStrContinuable, PStrContinuable),
+}
+
+pub(crate) fn compare_pstr_slices(slice1: &[u8], slice2: &[u8]) -> PStrSegmentCmpResult {
+    use std::cmp::Ordering;
+
+    debug_assert!(!slice1.is_empty() && !slice2.is_empty());
+    let find_tail = |slice| unsafe { scan_slice_to_str(slice).tail_idx };
+
+    match slice1
+        .iter()
+        .zip(slice2.iter())
+        .position(|(b1, b2)| b1 != b2 || *b1 == 0 || *b2 == 0)
+    {
+        Some(pos) => {
+            if slice1[pos] == 0 {
+                // subtract 1 from pos to offset the increment of scan_slice_to_str if the
+                // string is "\0\".
+                let tail1_idx = find_tail(&slice1[pos..]);
+
+                if slice2[pos] == 0 {
+                    let tail2_idx = find_tail(&slice2[pos..]);
+
+                    PStrSegmentCmpResult::Continue(
+                        PStrContinuable::TailIndex(tail1_idx + cell_index!(pos)),
+                        PStrContinuable::TailIndex(tail2_idx + cell_index!(pos)),
+                    )
+                } else {
+                    PStrSegmentCmpResult::Continue(
+                        PStrContinuable::TailIndex(tail1_idx + cell_index!(pos)),
+                        PStrContinuable::PStrOffset(pos),
+                    )
+                }
+            } else if slice2[pos] == 0 {
+                let tail2_idx = find_tail(&slice2[pos..]);
+
+                PStrSegmentCmpResult::Continue(
+                    PStrContinuable::PStrOffset(pos),
+                    PStrContinuable::TailIndex(tail2_idx + cell_index!(pos)),
+                )
+            } else {
+                // Compute 7-byte chunks with the mismatching character at pos in the middle of
+                // each. This way, the character of which the byte at pos is a part will be
+                // validated and reached eventually by the utf8_chunks() iterator.
+
+                let slice1_range = pos.saturating_sub(3)..(pos + 4).min(slice1.len());
+                let slice2_range = pos.saturating_sub(3)..(pos + 4).min(slice2.len());
+
+                let chars1_iter = slice1[slice1_range].utf8_chunks();
+                let chars2_iter = slice2[slice2_range].utf8_chunks();
+
+                for (chunk1, chunk2) in chars1_iter.zip(chars2_iter) {
+                    let result = chunk1.valid().cmp(chunk2.valid());
+
+                    if result == Ordering::Greater {
+                        return PStrSegmentCmpResult::Greater;
+                    } else if result == Ordering::Less {
+                        return PStrSegmentCmpResult::Less;
+                    }
+                }
+
+                unreachable!()
+            }
+        }
+        None => {
+            unreachable!()
+        }
+    }
  }
  
  #[derive(Debug)]
@@ -609,6 +691,7 @@ impl Heap {
          });
      }
  
+    #[inline]
      pub(crate) fn compare_pstr_segments(
          &self,
          pstr_loc1: usize,
@@ -617,67 +700,7 @@ impl Heap {
          let slice1 = &self.as_slice()[pstr_loc1..];
          let slice2 = &self.as_slice()[pstr_loc2..];
  
-        let find_tail = |null_idx: usize| -> usize { self.scan_slice_to_str(null_idx).tail_idx };
-
-        match slice1
-            .iter()
-            .zip(slice2.iter())
-            .position(|(b1, b2)| b1 != b2 || *b1 == 0 || *b2 == 0)
-        {
-            Some(pos) => {
-                if slice1[pos] == 0 {
-                    // subtract 1 from pos to offset the increment of scan_slice_to_str if the
-                    // string is "\0\".
-                    let tail1_idx = find_tail(pstr_loc1 + pos);
-
-                    if slice2[pos] == 0 {
-                        let tail2_idx = find_tail(pstr_loc2 + pos);
-
-                        PStrSegmentCmpResult::Continue(
-                            heap_loc_as_cell!(tail1_idx),
-                            heap_loc_as_cell!(tail2_idx),
-                        )
-                    } else {
-                        PStrSegmentCmpResult::Continue(
-                            heap_loc_as_cell!(tail1_idx),
-                            pstr_loc_as_cell!(pstr_loc2 + pos),
-                        )
-                    }
-                } else if slice2[pos] == 0 {
-                    let tail2_idx = find_tail(pstr_loc2 + pos);
-
-                    PStrSegmentCmpResult::Continue(
-                        pstr_loc_as_cell!(pstr_loc1 + pos),
-                        heap_loc_as_cell!(tail2_idx),
-                    )
-                } else {
-                    // Compute 7-byte chunks with the mismatching character at pos in the middle of
-                    // each. This way, the character of which the byte at pos is a part will be
-                    // validated and reached eventually by the utf8_chunks() iterator.
-
-                    let slice1_range = pos.saturating_sub(3)..(pos + 4).min(slice1.len());
-                    let slice2_range = pos.saturating_sub(3)..(pos + 4).min(slice2.len());
-
-                    let chars1_iter = slice1[slice1_range].utf8_chunks();
-                    let chars2_iter = slice2[slice2_range].utf8_chunks();
-
-                    for (chunk1, chunk2) in chars1_iter.zip(chars2_iter) {
-                        let result = chunk1.valid().cmp(chunk2.valid());
-
-                        if result == Ordering::Greater {
-                            return PStrSegmentCmpResult::Greater;
-                        } else if result == Ordering::Less {
-                            return PStrSegmentCmpResult::Less;
-                        }
-                    }
-
-                    unreachable!()
-                }
-            }
-            None => {
-                unreachable!()
-            }
-        }
+        compare_pstr_slices(slice1, slice2)
      }
  
      #[inline]
diff --git a/src/machine/machine_state_impl.rs b/src/machine/machine_state_impl.rs

index d2dfc18de08d325c693b201eabb99362a54af154..45c689a531c1c82c35c232981b9edb2b4991e931 100644 (file)
--- a/src/machine/machine_state_impl.rs
+++ b/src/machine/machine_state_impl.rs
@@ -25,7 +25,7 @@ impl MachineState {
      pub(crate) fn new() -> Self {
          let mut heap = Heap::with_cell_capacity(256 * 256).unwrap();
  
-        // this is an interstitial cell reserved for use by the runtime.
+        // the cell at index 0 is an interstitial cell reserved for use by the runtime.
          heap.push_cell(empty_list_as_cell!()).unwrap();
          heap.store_resource_error();
  
@@ -636,8 +636,8 @@ impl MachineState {
  
                                      match self.heap.compare_pstr_segments(l1, l2) {
                                          PStrSegmentCmpResult::Continue(v1, v2) => {
-                                            self.pdl.push(v1);
-                                            self.pdl.push(v2);
+                                            self.pdl.push(v1.offset_by(l1));
+                                            self.pdl.push(v2.offset_by(l2));
                                          }
                                          PStrSegmentCmpResult::Less => {
                                              return Some(Ordering::Less);
diff --git a/src/machine/unify.rs b/src/machine/unify.rs

index 7d451fc8e9d2c5bcd20ba216a65a3402b7dc1a79..fdc996a0e41e8c96a0138d6669aba0117add58e8 100644 (file)
--- a/src/machine/unify.rs
+++ b/src/machine/unify.rs
@@ -136,8 +136,8 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
              (HeapCellValueTag::PStrLoc, other_pstr_loc) => {
                  match machine_st.heap.compare_pstr_segments(pstr_loc, other_pstr_loc) {
                      PStrSegmentCmpResult::Continue(v1, v2) => {
-                        machine_st.pdl.push(v1);
-                        machine_st.pdl.push(v2);
+                        machine_st.pdl.push(v1.offset_by(pstr_loc));
+                        machine_st.pdl.push(v2.offset_by(other_pstr_loc));
                      }
                      _ => {
                          machine_st.fail = true;
@@ -161,18 +161,6 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
  
                  self.fail = !(arity == 0 && name == atom);
              }
-            /*
-            (HeapCellValueTag::CStr, cstr_atom) if atom == atom!("[]") => {
-                self.fail = cstr_atom != atom!("");
-            }
-            (HeapCellValueTag::Char, c1) => {
-                if let Some(c2) = atom.as_char() {
-                    self.fail = c1 != c2;
-                } else {
-                    self.fail = true;
-                }
-            }
-            */
              (HeapCellValueTag::AttrVar, h) => {
                  Self::bind(self, Ref::attr_var(h), atom_as_cell!(atom));
              }
@@ -207,13 +195,6 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
                      self.fail = true;
                  }
              }
-            /*
-            (HeapCellValueTag::Char, c2) => {
-                if c != c2 {
-                    self.fail = true;
-                }
-            }
-            */
              (HeapCellValueTag::AttrVar, h) => {
                  Self::bind(self, Ref::attr_var(h), char_as_cell!(c));
              }
@@ -433,38 +414,6 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
                              tabu_list.insert((d1, d2));
                          }
                      }
-                    /*
-                    (HeapCellValueTag::CStr) => {
-                        read_heap_cell!(d2,
-                            (HeapCellValueTag::AttrVar, h) => {
-                                Self::bind(self, Ref::attr_var(h), d1);
-                                continue;
-                            }
-                            (HeapCellValueTag::Var, h) => {
-                                Self::bind(self, Ref::heap_cell(h), d1);
-                                continue;
-                            }
-                            (HeapCellValueTag::StackVar, s) => {
-                                Self::bind(self, Ref::stack_cell(s), d1);
-                                continue;
-                            }
-                            (HeapCellValueTag::Str |
-                             HeapCellValueTag::Lis |
-                             HeapCellValueTag::PStrLoc) => {
-                            }
-                            (HeapCellValueTag::CStr) => {
-                                self.fail = d1 != d2;
-                                continue;
-                            }
-                            _ => {
-                                self.fail = true;
-                                return;
-                            }
-                        );
-
-                        Self::unify_partial_string(self, d2, d1);
-                    }
-                    */
                      (HeapCellValueTag::F64, f1) => {
                          Self::unify_f64(self, f1, d2);
                      }
author	Mark Thom <[email protected]>
	Wed, 16 Apr 2025 05:10:02 +0000 (22:10 -0700)
committer	Mark Thom <[email protected]>
	Tue, 8 Jul 2025 05:38:12 +0000 (22:38 -0700)
src/machine/dispatch.rs		patch \| blob \| history
src/machine/heap.rs		patch \| blob \| history
src/machine/machine_state_impl.rs		patch \| blob \| history
src/machine/unify.rs		patch \| blob \| history