]> Repositorios git - scryer-prolog.git/commitdiff
generalize compare_pstr_segments
authorMark Thom <[email protected]>
Wed, 16 Apr 2025 05:10:02 +0000 (22:10 -0700)
committerMark Thom <[email protected]>
Wed, 23 Apr 2025 06:33:11 +0000 (23:33 -0700)
src/machine/dispatch.rs
src/machine/heap.rs
src/machine/machine_state_impl.rs
src/machine/unify.rs

index 4c9db0e8680376378f7a93d4c6b7a01ef579341f..3cb1a8e5effdaa54989122ecc1b5db0b7d9fae41 100644 (file)
@@ -2815,8 +2815,6 @@ impl Machine {
                             (HeapCellValueTag::Str |
                              HeapCellValueTag::Lis |
                              HeapCellValueTag::PStrLoc) => {
-                                debug_assert!(store_v.is_ref());
-
                                 self.machine_st.heap[0] = store_v;
                                 let heap_pstr_iter = HeapPStrIter::new(&self.machine_st.heap, 0);
 
@@ -2841,8 +2839,7 @@ impl Machine {
                                         self.machine_st.mode = MachineMode::Read;
                                     }
                                     None => {
-                                        self.machine_st.backtrack();
-                                        continue;
+                                        self.machine_st.fail = true;
                                     }
                                 }
                             }
@@ -2862,8 +2859,7 @@ impl Machine {
                                 self.machine_st.mode = MachineMode::Write;
                             }
                             _ => {
-                                self.machine_st.backtrack();
-                                continue;
+                                self.machine_st.fail = true;
                             }
                         );
 
index 87f50732abdd788eb1b814aa98aa9ef05df39a4b..fa554437afc5fa4734c914639fc077d65de9253b 100644 (file)
@@ -4,7 +4,6 @@ use crate::functor_macro::*;
 use crate::types::*;
 
 use std::alloc;
-use std::cmp::Ordering;
 use std::convert::TryFrom;
 use std::ops::{Bound, Index, IndexMut, Range, RangeBounds};
 use std::ptr;
@@ -112,11 +111,94 @@ unsafe fn scan_slice_to_str(heap_slice: &[u8]) -> HeapStringScan {
     }
 }
 
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum PStrContinuable {
+    PStrOffset(usize),
+    TailIndex(usize),
+}
+
+impl PStrContinuable {
+    #[inline]
+    pub(crate) fn offset_by(&self, pstr_loc: usize) -> HeapCellValue {
+        match self {
+            Self::PStrOffset(pstr_offset) => pstr_loc_as_cell!(pstr_loc + pstr_offset),
+            Self::TailIndex(tail_idx) => heap_loc_as_cell!(tail_idx + cell_index!(pstr_loc)),
+        }
+    }
+}
+
 #[derive(Debug, Clone, Copy)]
 pub(crate) enum PStrSegmentCmpResult {
     Less,
     Greater,
-    Continue(HeapCellValue, HeapCellValue),
+    Continue(PStrContinuable, PStrContinuable),
+}
+
+pub(crate) fn compare_pstr_slices(slice1: &[u8], slice2: &[u8]) -> PStrSegmentCmpResult {
+    use std::cmp::Ordering;
+
+    debug_assert!(!slice1.is_empty() && !slice2.is_empty());
+    let find_tail = |slice| unsafe { scan_slice_to_str(slice).tail_idx };
+
+    match slice1
+        .iter()
+        .zip(slice2.iter())
+        .position(|(b1, b2)| b1 != b2 || *b1 == 0 || *b2 == 0)
+    {
+        Some(pos) => {
+            if slice1[pos] == 0 {
+                // subtract 1 from pos to offset the increment of scan_slice_to_str if the
+                // string is "\0\".
+                let tail1_idx = find_tail(&slice1[pos..]);
+
+                if slice2[pos] == 0 {
+                    let tail2_idx = find_tail(&slice2[pos..]);
+
+                    PStrSegmentCmpResult::Continue(
+                        PStrContinuable::TailIndex(tail1_idx + cell_index!(pos)),
+                        PStrContinuable::TailIndex(tail2_idx + cell_index!(pos)),
+                    )
+                } else {
+                    PStrSegmentCmpResult::Continue(
+                        PStrContinuable::TailIndex(tail1_idx + cell_index!(pos)),
+                        PStrContinuable::PStrOffset(pos),
+                    )
+                }
+            } else if slice2[pos] == 0 {
+                let tail2_idx = find_tail(&slice2[pos..]);
+
+                PStrSegmentCmpResult::Continue(
+                    PStrContinuable::PStrOffset(pos),
+                    PStrContinuable::TailIndex(tail2_idx + cell_index!(pos)),
+                )
+            } else {
+                // Compute 7-byte chunks with the mismatching character at pos in the middle of
+                // each. This way, the character of which the byte at pos is a part will be
+                // validated and reached eventually by the utf8_chunks() iterator.
+
+                let slice1_range = pos.saturating_sub(3)..(pos + 4).min(slice1.len());
+                let slice2_range = pos.saturating_sub(3)..(pos + 4).min(slice2.len());
+
+                let chars1_iter = slice1[slice1_range].utf8_chunks();
+                let chars2_iter = slice2[slice2_range].utf8_chunks();
+
+                for (chunk1, chunk2) in chars1_iter.zip(chars2_iter) {
+                    let result = chunk1.valid().cmp(chunk2.valid());
+
+                    if result == Ordering::Greater {
+                        return PStrSegmentCmpResult::Greater;
+                    } else if result == Ordering::Less {
+                        return PStrSegmentCmpResult::Less;
+                    }
+                }
+
+                unreachable!()
+            }
+        }
+        None => {
+            unreachable!()
+        }
+    }
 }
 
 #[derive(Debug)]
@@ -609,6 +691,7 @@ impl Heap {
         });
     }
 
+    #[inline]
     pub(crate) fn compare_pstr_segments(
         &self,
         pstr_loc1: usize,
@@ -617,67 +700,7 @@ impl Heap {
         let slice1 = &self.as_slice()[pstr_loc1..];
         let slice2 = &self.as_slice()[pstr_loc2..];
 
-        let find_tail = |null_idx: usize| -> usize { self.scan_slice_to_str(null_idx).tail_idx };
-
-        match slice1
-            .iter()
-            .zip(slice2.iter())
-            .position(|(b1, b2)| b1 != b2 || *b1 == 0 || *b2 == 0)
-        {
-            Some(pos) => {
-                if slice1[pos] == 0 {
-                    // subtract 1 from pos to offset the increment of scan_slice_to_str if the
-                    // string is "\0\".
-                    let tail1_idx = find_tail(pstr_loc1 + pos);
-
-                    if slice2[pos] == 0 {
-                        let tail2_idx = find_tail(pstr_loc2 + pos);
-
-                        PStrSegmentCmpResult::Continue(
-                            heap_loc_as_cell!(tail1_idx),
-                            heap_loc_as_cell!(tail2_idx),
-                        )
-                    } else {
-                        PStrSegmentCmpResult::Continue(
-                            heap_loc_as_cell!(tail1_idx),
-                            pstr_loc_as_cell!(pstr_loc2 + pos),
-                        )
-                    }
-                } else if slice2[pos] == 0 {
-                    let tail2_idx = find_tail(pstr_loc2 + pos);
-
-                    PStrSegmentCmpResult::Continue(
-                        pstr_loc_as_cell!(pstr_loc1 + pos),
-                        heap_loc_as_cell!(tail2_idx),
-                    )
-                } else {
-                    // Compute 7-byte chunks with the mismatching character at pos in the middle of
-                    // each. This way, the character of which the byte at pos is a part will be
-                    // validated and reached eventually by the utf8_chunks() iterator.
-
-                    let slice1_range = pos.saturating_sub(3)..(pos + 4).min(slice1.len());
-                    let slice2_range = pos.saturating_sub(3)..(pos + 4).min(slice2.len());
-
-                    let chars1_iter = slice1[slice1_range].utf8_chunks();
-                    let chars2_iter = slice2[slice2_range].utf8_chunks();
-
-                    for (chunk1, chunk2) in chars1_iter.zip(chars2_iter) {
-                        let result = chunk1.valid().cmp(chunk2.valid());
-
-                        if result == Ordering::Greater {
-                            return PStrSegmentCmpResult::Greater;
-                        } else if result == Ordering::Less {
-                            return PStrSegmentCmpResult::Less;
-                        }
-                    }
-
-                    unreachable!()
-                }
-            }
-            None => {
-                unreachable!()
-            }
-        }
+        compare_pstr_slices(slice1, slice2)
     }
 
     #[inline]
index d2dfc18de08d325c693b201eabb99362a54af154..45c689a531c1c82c35c232981b9edb2b4991e931 100644 (file)
@@ -25,7 +25,7 @@ impl MachineState {
     pub(crate) fn new() -> Self {
         let mut heap = Heap::with_cell_capacity(256 * 256).unwrap();
 
-        // this is an interstitial cell reserved for use by the runtime.
+        // the cell at index 0 is an interstitial cell reserved for use by the runtime.
         heap.push_cell(empty_list_as_cell!()).unwrap();
         heap.store_resource_error();
 
@@ -636,8 +636,8 @@ impl MachineState {
 
                                     match self.heap.compare_pstr_segments(l1, l2) {
                                         PStrSegmentCmpResult::Continue(v1, v2) => {
-                                            self.pdl.push(v1);
-                                            self.pdl.push(v2);
+                                            self.pdl.push(v1.offset_by(l1));
+                                            self.pdl.push(v2.offset_by(l2));
                                         }
                                         PStrSegmentCmpResult::Less => {
                                             return Some(Ordering::Less);
index 7d451fc8e9d2c5bcd20ba216a65a3402b7dc1a79..fdc996a0e41e8c96a0138d6669aba0117add58e8 100644 (file)
@@ -136,8 +136,8 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
             (HeapCellValueTag::PStrLoc, other_pstr_loc) => {
                 match machine_st.heap.compare_pstr_segments(pstr_loc, other_pstr_loc) {
                     PStrSegmentCmpResult::Continue(v1, v2) => {
-                        machine_st.pdl.push(v1);
-                        machine_st.pdl.push(v2);
+                        machine_st.pdl.push(v1.offset_by(pstr_loc));
+                        machine_st.pdl.push(v2.offset_by(other_pstr_loc));
                     }
                     _ => {
                         machine_st.fail = true;
@@ -161,18 +161,6 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
 
                 self.fail = !(arity == 0 && name == atom);
             }
-            /*
-            (HeapCellValueTag::CStr, cstr_atom) if atom == atom!("[]") => {
-                self.fail = cstr_atom != atom!("");
-            }
-            (HeapCellValueTag::Char, c1) => {
-                if let Some(c2) = atom.as_char() {
-                    self.fail = c1 != c2;
-                } else {
-                    self.fail = true;
-                }
-            }
-            */
             (HeapCellValueTag::AttrVar, h) => {
                 Self::bind(self, Ref::attr_var(h), atom_as_cell!(atom));
             }
@@ -207,13 +195,6 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
                     self.fail = true;
                 }
             }
-            /*
-            (HeapCellValueTag::Char, c2) => {
-                if c != c2 {
-                    self.fail = true;
-                }
-            }
-            */
             (HeapCellValueTag::AttrVar, h) => {
                 Self::bind(self, Ref::attr_var(h), char_as_cell!(c));
             }
@@ -433,38 +414,6 @@ pub(crate) trait Unifier: DerefMut<Target = MachineState> {
                             tabu_list.insert((d1, d2));
                         }
                     }
-                    /*
-                    (HeapCellValueTag::CStr) => {
-                        read_heap_cell!(d2,
-                            (HeapCellValueTag::AttrVar, h) => {
-                                Self::bind(self, Ref::attr_var(h), d1);
-                                continue;
-                            }
-                            (HeapCellValueTag::Var, h) => {
-                                Self::bind(self, Ref::heap_cell(h), d1);
-                                continue;
-                            }
-                            (HeapCellValueTag::StackVar, s) => {
-                                Self::bind(self, Ref::stack_cell(s), d1);
-                                continue;
-                            }
-                            (HeapCellValueTag::Str |
-                             HeapCellValueTag::Lis |
-                             HeapCellValueTag::PStrLoc) => {
-                            }
-                            (HeapCellValueTag::CStr) => {
-                                self.fail = d1 != d2;
-                                continue;
-                            }
-                            _ => {
-                                self.fail = true;
-                                return;
-                            }
-                        );
-
-                        Self::unify_partial_string(self, d2, d1);
-                    }
-                    */
                     (HeapCellValueTag::F64, f1) => {
                         Self::unify_f64(self, f1, d2);
                     }