From: Mark Date: Sun, 22 Oct 2023 03:37:27 +0000 (-0600) Subject: use Deutsch-Schorr-Waite to implement acyclic_term/1 (#2128) X-Git-Tag: remove~14 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=6a913bc4ccc76a662392b7b1febb16e6ae4bf3da;p=scryer-prolog.git use Deutsch-Schorr-Waite to implement acyclic_term/1 (#2128) --- diff --git a/Cargo.lock b/Cargo.lock index 133b6524..e054b9db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2047,7 +2047,6 @@ dependencies = [ "to-syn-value", "to-syn-value_derive", "tokio", - "topo_sort", "walkdir", "warp", "wasm-bindgen", @@ -2577,12 +2576,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "topo_sort" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "156552d3c80df430aaac98c605a4e0eb7da8d06029cce2d40b4a6b095a34b37e" - [[package]] name = "tower-service" version = "0.3.2" diff --git a/Cargo.toml b/Cargo.toml index 6ed48fc0..5fb0db69 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,6 @@ bytes = "1" dashu = "0.4.0" num-order = { version = "1.2.0" } rand = "0.8.5" -topo_sort = { version = "0.4.0" } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] libffi = { version = "3.2.0", optional = true } diff --git a/src/heap_iter.rs b/src/heap_iter.rs index 3c47c934..2e1bf6eb 100644 --- a/src/heap_iter.rs +++ b/src/heap_iter.rs @@ -1,6 +1,8 @@ -pub(crate) use crate::machine::gc::{CycleDetectorUMP, StacklessPreOrderHeapIter}; +#[cfg(test)] +pub(crate) use crate::machine::gc::StacklessPreOrderHeapIter; use crate::atom_table::*; +use crate::machine::cycle_detection::CycleDetectingIter; use crate::machine::heap::*; use crate::machine::stack::*; use crate::types::*; @@ -504,11 +506,11 @@ impl<'a, ElideLists: ListElisionPolicy> Iterator for StackfulPreOrderHeapIter<'a } #[inline(always)] -pub(crate) fn cycle_detecting_stackless_preorder_iter( - heap: &mut Vec, +pub(crate) fn cycle_detecting_stackless_preorder_iter<'a>( + heap: &'a mut [HeapCellValue], start: usize, -) -> StacklessPreOrderHeapIter { - StacklessPreOrderHeapIter::::new(heap, start) +) -> CycleDetectingIter<'a> { + CycleDetectingIter::new(heap, start) } #[inline(always)] diff --git a/src/machine/cycle_detection.rs b/src/machine/cycle_detection.rs new file mode 100644 index 00000000..8de3e8e6 --- /dev/null +++ b/src/machine/cycle_detection.rs @@ -0,0 +1,424 @@ +use crate::atom_table::*; +use crate::types::*; + +/* Use the pointer reversal technique of the Deutsch-Schorr-Waite + * algorithm to detect cycles in Prolog terms. + * + * Much of the structure and nomenclature of the GC marking algorithm + * is adapted here but there are a few significant changes: + * + * - Forwarded cells now form a trail of bread crumbs leading back to self.start + * - Cells are only marked during the backward phase + * - Visiting subterms of a visited compound does not immediately shift to the backward phase + * - The heads of LIS structures are both marked and forwarded rather + * than just forwarded to distinguish them from tails; + * continue_forwarding() checks for this before entering the forward + * phase + * + * Commonalities with the GC marking algorithm: + * - The contents of forwarded cells are modified only when they are unforwarded + * - Marked (but unforwarded!) cells immediately shift to the backward phase + */ + +#[derive(Debug)] +pub(crate) struct CycleDetectingIter<'a> { + pub(crate) heap: &'a mut [HeapCellValue], + start: usize, + current: usize, + next: u64, + cycle_found: bool, + mark_phase: bool, +} + +impl<'a> CycleDetectingIter<'a> { + pub(crate) fn new(heap: &'a mut [HeapCellValue], start: usize) -> Self { + heap[start].set_forwarding_bit(true); + let next = heap[start].get_value(); + + Self { + heap, + start, + current: start, + next, + cycle_found: false, + mark_phase: true, + } + } + + #[inline] + pub(crate) fn cycle_found(&self) -> bool { + self.cycle_found + } + + #[inline] + fn cycle_detection_active(&self) -> bool { + self.mark_phase && !self.cycle_found + } + + fn backward_and_return(&mut self) -> HeapCellValue { + let mut current = self.heap[self.current]; + current.set_value(self.next); + + if self.backward() { + // set the f and m bits on the heap cell at start + // so we invoke backward() and return None next call. + + self.heap[self.current].set_forwarding_bit(false); + self.heap[self.current].set_mark_bit(self.mark_phase); + } + + current + } + + fn traverse_subterm(&mut self, h: usize, arity: usize) -> Option { + let mut last_cell_loc = h + arity - 1; + + for idx in (h .. h + arity).rev() { + if self.heap[idx].get_forwarding_bit() { + if self.cycle_detection_active() { + self.cycle_found = true; + return None; + } + + last_cell_loc -= 1; + } else if self.heap[idx].get_mark_bit() == self.mark_phase { + last_cell_loc -= 1; + } else { + break; + } + } + + Some(last_cell_loc) + } + + #[inline] + fn continue_forwarding(&self) -> bool { + self.heap[self.current].get_mark_bit() != self.mark_phase || + self.heap[self.current].get_forwarding_bit() + } + + fn forward(&mut self) -> Option { + loop { + if self.continue_forwarding() { + match self.heap[self.current].get_tag() { + tag @ HeapCellValueTag::AttrVar | tag @ HeapCellValueTag::Var => { + let next = self.next as usize; + + if self.heap[next].get_forwarding_bit() { + if self.current != next { + return if self.cycle_detection_active() { + self.cycle_found = true; + None + } else { + Some(self.backward_and_return()) + }; + } else if self.backward() { + return None; + } + + continue; + } else if self.heap[next].get_mark_bit() == self.mark_phase { + return Some(self.backward_and_return()); + } + + self.heap[next].set_forwarding_bit(true); + + let temp = self.heap[next].get_value(); + + self.heap[next].set_value(self.current as u64); + self.current = next; + self.next = temp; + + if self.next < self.heap.len() as u64 { + if self.heap[self.next as usize].get_mark_bit() == self.mark_phase { + return Some(HeapCellValue::build_with(tag, next as u64)); + } + } + } + HeapCellValueTag::Str => { + let h = self.next as usize; + let cell = self.heap[h]; + let arity = cell_as_atom_cell!(self.heap[h]).get_arity(); + + let last_cell_loc = match self.traverse_subterm(h + 1, arity) { + Some(last_cell_loc) => last_cell_loc, + None => return None, + }; + + if last_cell_loc == h { + if self.backward() { + return None; + } + + continue; + } + + if self.cycle_detection_active() { + for idx in (h + 1 .. last_cell_loc).rev() { + if self.heap[idx].get_forwarding_bit() { + self.cycle_found = true; + return None; + } + } + } + + self.heap[last_cell_loc].set_forwarding_bit(true); + + self.next = self.heap[last_cell_loc].get_value(); + self.heap[last_cell_loc].set_value(self.current as u64); + self.current = last_cell_loc; + + return Some(cell); + } + HeapCellValueTag::Lis => { + let mut cell = self.heap[self.current]; + cell.set_value(self.next); + + let last_cell_loc = match self.traverse_subterm(self.next as usize, 2) { + Some(last_cell_loc) => last_cell_loc, + None => return None, + }; + + if (last_cell_loc + 1) as u64 == self.next { + if self.backward() { + return None; + } + + continue; + } else if last_cell_loc as u64 == self.next { + // car cells of lists are both marked and forwarded. + self.heap[last_cell_loc].set_mark_bit(self.mark_phase); + } + + self.heap[last_cell_loc].set_forwarding_bit(true); + + self.next = self.heap[last_cell_loc].get_value(); + self.heap[last_cell_loc].set_value(self.current as u64); + self.current = last_cell_loc; + + return Some(cell); + } + HeapCellValueTag::PStrLoc => { + let h = self.next as usize; + let cell = self.heap[h]; + let last_cell_loc = h + 1; + + if self.heap[last_cell_loc].get_forwarding_bit() { + if self.cycle_detection_active() { + self.cycle_found = true; + return None; + } else if self.backward() { + return None; + } + + continue; + } + + self.heap[last_cell_loc].set_forwarding_bit(true); + + self.next = self.heap[last_cell_loc].get_value(); + self.heap[last_cell_loc].set_value(self.current as u64); + self.current = last_cell_loc; + + return Some(cell); + } + HeapCellValueTag::PStrOffset => { + let h = self.next as usize; + let cell = self.heap[h]; + let last_cell_loc = h + 1; + + if self.heap[h].get_tag() == HeapCellValueTag::PStr { + if self.heap[last_cell_loc].get_forwarding_bit() { + if self.cycle_detection_active() { + self.cycle_found = true; + return None; + } else if self.backward() { + return None; + } + + continue; + } + + self.heap[last_cell_loc].set_forwarding_bit(true); + + self.next = self.heap[last_cell_loc].get_value(); + self.heap[last_cell_loc].set_value(self.current as u64); + self.current = last_cell_loc; + } else { + debug_assert!(self.heap[h].get_tag() == HeapCellValueTag::CStr); + + self.next = self.heap[h].get_value(); + self.heap[h].set_value(self.current as u64); + self.current = h; + } + + return Some(cell); + } + tag @ HeapCellValueTag::Atom => { + let cell = HeapCellValue::build_with(tag, self.next); + let arity = AtomCell::from_bytes(cell.into_bytes()).get_arity(); + + if arity == 0 { + return Some(self.backward_and_return()); + } else if self.backward() { + return None; + } + } + HeapCellValueTag::PStr => { + if self.backward() { + return None; + } + } + _ => { + return Some(self.backward_and_return()); + } + } + } else if self.backward() { + return None; + } + } + } + + fn pivot_subterm(&mut self) { + self.current -= 1; + + let temp = self.heap[self.current + 1].get_value(); + + self.heap[self.current + 1].set_value(self.next); + self.next = self.heap[self.current].get_value(); + self.heap[self.current].set_value(temp); + + self.heap[self.current].set_forwarding_bit(true); + } + + fn continue_backward(&mut self) -> bool { + self.heap[self.current].set_forwarding_bit(false); + + if self.current == self.start { + return false; + } + + let temp = self.heap[self.current].get_value(); + + match self.heap[temp as usize].get_tag() { + HeapCellValueTag::Str => { + let mut new_str_back_link = self.current; + + for idx in (0 .. self.current).rev() { + if self.heap[idx].get_tag() == HeapCellValueTag::Atom { + if cell_as_atom_cell!(self.heap[idx]).get_arity() > 0 { + new_str_back_link = idx; + break; + } + } + + if self.heap[idx].get_mark_bit() != self.mark_phase { + if !self.heap[idx].get_forwarding_bit() { + new_str_back_link = idx; + break; + } + } + } + + self.heap[self.current].set_mark_bit(self.mark_phase); + self.heap[self.current].set_value(self.next); + + let back_link_cell = self.heap[new_str_back_link]; + + self.next = back_link_cell.get_value(); + self.heap[new_str_back_link].set_value(temp); + self.current = new_str_back_link; + + read_heap_cell!(back_link_cell, + (HeapCellValueTag::Atom, (_name, arity)) => { + if arity > 0 { + self.heap[self.current].set_mark_bit(self.mark_phase); + return true; + } + } + _ => {} + ); + + self.heap[self.current].set_forwarding_bit(true); + false + } + HeapCellValueTag::Lis => { + if self.heap[self.current].get_mark_bit() == self.mark_phase { + true + } else { + self.heap[self.current - 1].set_mark_bit(self.mark_phase); + self.heap[self.current].set_mark_bit(self.mark_phase); + + if self.heap[self.current - 1].get_forwarding_bit() { + self.next = self.current as u64 - 1; + self.heap[self.current].set_value(self.next); + self.current = temp as usize; + + true + } else { + self.pivot_subterm(); + false + } + } + } + _ => { + self.heap[self.current].set_mark_bit(self.mark_phase); + true + } + } + } + + fn backward(&mut self) -> bool { + while self.continue_backward() { + let temp = self.heap[self.current].get_value(); + + self.heap[self.current].set_value(self.next); + self.next = self.current as u64; + self.current = temp as usize; + } + + if self.current == self.start { + return true; + } + + false + } + + fn invert_marker(&mut self) { + self.cycle_found = false; + + if self.heap[self.start].get_forwarding_bit() { + while !self.backward() {} + } + + self.mark_phase = false; + self.heap[self.start].set_forwarding_bit(true); + + self.next = self.heap[self.start].get_value(); + self.current = self.start; + + while let Some(_) = self.forward() {} + } +} + +impl<'a> Iterator for CycleDetectingIter<'a> { + type Item = HeapCellValue; + + #[inline] + fn next(&mut self) -> Option { + self.forward() + } +} + + +impl<'a> Drop for CycleDetectingIter<'a> { + fn drop(&mut self) { + self.invert_marker(); + + if self.current == self.start { + return; + } + + while !self.backward() {} + } +} diff --git a/src/machine/gc.rs b/src/machine/gc.rs index b7f257a5..715adde7 100644 --- a/src/machine/gc.rs +++ b/src/machine/gc.rs @@ -55,39 +55,6 @@ impl UnmarkPolicy for IteratorUMP { } } -pub(crate) struct CycleDetectorUMP { - mark_phase: bool, - focus: usize, -} - -impl UnmarkPolicy for CycleDetectorUMP { - #[inline(always)] - fn forward_attr_var(iter: &mut StacklessPreOrderHeapIter) -> Option { - iter.forward_var() - } - - #[inline] - fn invert_marker(iter: &mut StacklessPreOrderHeapIter) { - iter.iter_state.mark_phase = false; - invert_marker(iter); - } - - #[inline] - fn mark_phase(&self) -> bool { - self.mark_phase - } - - #[inline(always)] - fn report_var_link(_iter: &StacklessPreOrderHeapIter) -> bool { - true - } - - #[inline(always)] - fn record_focus(iter: &mut StacklessPreOrderHeapIter) { - iter.iter_state.focus = iter.current; - } -} - struct MarkerUMP {} impl UnmarkPolicy for MarkerUMP { @@ -177,31 +144,6 @@ impl<'a> StacklessPreOrderHeapIter<'a, IteratorUMP> { } } -impl<'a> StacklessPreOrderHeapIter<'a, CycleDetectorUMP> { - pub(crate) fn new(heap: &'a mut [HeapCellValue], start: usize) -> Self { - heap[start].set_forwarding_bit(true); - let next = heap[start].get_value(); - - Self { - heap, - start, - current: start, - next, - iter_state: CycleDetectorUMP { mark_phase: true, focus: 0 }, - } - } - - #[inline] - pub(crate) fn focus(&self) -> usize { - self.iter_state.focus - } - - #[inline(always)] - pub(crate) fn current(&self) -> usize { - self.current - } -} - impl<'a, UMP: UnmarkPolicy> StacklessPreOrderHeapIter<'a, UMP> { fn backward_and_return(&mut self) -> HeapCellValue { let mut current = self.heap[self.current]; diff --git a/src/machine/machine_state_impl.rs b/src/machine/machine_state_impl.rs index 6a7cb95d..5007e85a 100644 --- a/src/machine/machine_state_impl.rs +++ b/src/machine/machine_state_impl.rs @@ -1131,66 +1131,23 @@ impl MachineState { #[inline] pub fn is_cyclic_term(&mut self, value: HeapCellValue) -> bool { - use topo_sort::TopoSort; - let value = self.store(self.deref(value)); - if value.is_constant() || value.is_stack_var() { + if value.is_stack_var() || value.is_constant() { return false; } let h = self.heap.len(); self.heap.push(value); - let found_cycle = (|| { - let mut topo_graph = TopoSort::new(); + let cycle_found = { let mut iter = cycle_detecting_stackless_preorder_iter(&mut self.heap, h); - - while let Some(cell) = iter.next() { - let focus = iter.focus(); - - read_heap_cell!(cell, - (HeapCellValueTag::Atom, (_name, arity)) => { - if arity > 0 { - // focus is actually the location of Str(s) here. - let s = iter.current() - arity; - - topo_graph.insert(focus, vec![s]); - topo_graph.insert(s, (s + 1 .. s + arity + 1).collect::>()); - } - } - (HeapCellValueTag::Str, s) => { - topo_graph.insert(focus, vec![s]); - } - (HeapCellValueTag::Lis | HeapCellValueTag::PStrLoc | HeapCellValueTag::PStrOffset, l) => { - if l <= focus && focus < l + 2 { - // TopoSort doesn't consider focus -> - // focus to induce a cycle so in this case - // it must be checked manually. - return true; - } - - topo_graph.insert(focus, (l .. l + 2).collect::>()); - } - (HeapCellValueTag::AttrVar | HeapCellValueTag::Var, h) => { - if focus != h { - topo_graph.insert(focus, vec![h]); - } - } - _ => { - } - ); - - if topo_graph.cycle_detected() { - break; - } - } - - topo_graph.cycle_detected() - })(); + while let Some(_) = iter.next() {} + iter.cycle_found() + }; self.heap.pop(); - found_cycle + cycle_found } // arg(+N, +Term, ?Arg) diff --git a/src/machine/mod.rs b/src/machine/mod.rs index 665366c3..c44df762 100644 --- a/src/machine/mod.rs +++ b/src/machine/mod.rs @@ -6,6 +6,7 @@ pub mod code_walker; pub mod loader; pub mod compile; pub mod copier; +pub mod cycle_detection; pub mod disjuncts; pub mod dispatch; pub mod gc;