From: Mark Thom Date: Sat, 13 Dec 2025 02:05:47 +0000 (-0800) Subject: add variant_hash and is_non_variant to fix setof/3, bagof/3 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=6284aa3a3f2ffe99985f6c66601052fffc323a66;p=scryer-prolog.git add variant_hash and is_non_variant to fix setof/3, bagof/3 --- diff --git a/Cargo.lock b/Cargo.lock index 4fa61925..a605ccf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,6 +45,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -872,6 +878,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "foreign-types" version = "0.3.2" @@ -1112,6 +1124,17 @@ version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "headers" version = "0.3.9" @@ -1456,7 +1479,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.3", ] [[package]] @@ -2734,6 +2757,7 @@ dependencies = [ "fxhash", "getrandom 0.2.16", "git-version", + "hashbrown 0.16.1", "hostname", "iai-callgrind", "indexmap", diff --git a/Cargo.toml b/Cargo.toml index 5971a229..88682b69 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,6 +88,7 @@ ego-tree = "0.10.0" serde_json = "1.0.122" serde = "1.0.204" parking_lot = "0.12.4" +hashbrown = "0.16.1" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] crossterm = { version = "0.28.1", optional = true } diff --git a/build/instructions_template.rs b/build/instructions_template.rs index 8971f505..eb67213a 100644 --- a/build/instructions_template.rs +++ b/build/instructions_template.rs @@ -649,6 +649,8 @@ enum SystemClauseType { Argv, #[strum_discriminants(strum(props(Arity = "2", Name = "$variant")))] IsVariant, + #[strum_discriminants(strum(props(Arity = "2", Name = "$group_by_variant")))] + GroupByVariant, Repl(ReplCodePtr), } diff --git a/src/forms.rs b/src/forms.rs index d6cc39d4..65939d58 100644 --- a/src/forms.rs +++ b/src/forms.rs @@ -44,12 +44,6 @@ impl AppendOrPrepend { } } -#[derive(Debug, Clone, Copy)] -pub enum VarComparison { - Indistinct, - Distinct, -} - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Level { Deep, diff --git a/src/heap_iter.rs b/src/heap_iter.rs index 05b91293..903fcbc2 100644 --- a/src/heap_iter.rs +++ b/src/heap_iter.rs @@ -47,6 +47,7 @@ impl<'a> ParallelHeapIter<'a> { } #[derive(Debug)] +#[allow(dead_code)] pub enum TermPair { Vars(usize, usize), Less(HeapCellValue, HeapCellValue), @@ -97,10 +98,6 @@ impl Iterator for ParallelHeapIter<'_> { let s2 = self.stack.pop().unwrap(); let s2 = heap_bound_deref(self.heap, s2); - if s1 == s2 { - continue; - } - let v1 = heap_bound_store(self.heap, s1); let v2 = heap_bound_store(self.heap, s2); @@ -392,7 +389,7 @@ pub struct EagerStackfulPreOrderHeapIter<'a> { start_value: HeapCellValue, iter_stack: Vec, mark_phase: bool, - heap: &'a mut Heap, + pub heap: &'a mut Heap, } impl<'a> Drop for EagerStackfulPreOrderHeapIter<'a> { diff --git a/src/lib/builtins.pl b/src/lib/builtins.pl index 76a53420..7dea6d3e 100644 --- a/src/lib/builtins.pl +++ b/src/lib/builtins.pl @@ -956,54 +956,6 @@ set_difference([], _, []) :- !. set_difference(Xs, [], Xs). -% variant/2 checks whether X is a variant of Y per the definition in -% 7.1.6.1 of the ISO standard. - -:- non_counted_backtracking variant/4. - -variant(X,Y,VPs,VPs0) :- - ( var(X) -> - var(Y), - VPs = [X-Y|VPs0] - ; var(Y) -> - false - ; X =.. [FX | XArgs], - Y =.. [FX | YArgs], - lists:foldl('$call'(builtins:variant), XArgs, YArgs, VPs, VPs0) - ). - -:- non_counted_backtracking variant/2. - -singleton([_]). - -variant(X, Y) :- - variant(X,Y, VPs, []), - keysort(VPs, SVPs), - pairs:group_pairs_by_key(SVPs, SVPKs), - pairs:pairs_values(SVPKs, Vals), - lists:maplist('$call'(builtins:term_variables), Vals, Vs), - lists:maplist('$call'(builtins:singleton), Vs), - term_variables(Vs, YVars), - lists:length(SVPKs, N), - lists:length(YVars, N). - - -:- non_counted_backtracking group_by_variant/4. - -group_by_variant([V2-S2 | Pairs], V1-S1, [S2 | Solutions], Pairs0) :- - variant(V1, V2), - !, - V1 = V2, - group_by_variant(Pairs, V2-S2, Solutions, Pairs0). -group_by_variant(Pairs, _, [], Pairs). - -:- non_counted_backtracking group_by_variants/2. - -group_by_variants([V-S|Pairs], [V-Solution|Solutions]) :- - group_by_variant([V-S|Pairs], V-S, Solution, Pairs0), - group_by_variants(Pairs0, Solutions). -group_by_variants([], []). - :- non_counted_backtracking iterate_variants/3. iterate_variants([V-Solution|GroupSolutions], V, Solution) :- @@ -1074,9 +1026,8 @@ bagof(Template, Goal, Solution) :- term_variables(Goal, GoalVars), term_variables(TemplateVars+GoalVars, TGVs), lists:append(TemplateVars, Witnesses0, TGVs), - findall_with_existential(Template, Goal, PairedSolutions0, Witnesses0, Witnesses), - keysort(PairedSolutions0, PairedSolutions), - group_by_variants(PairedSolutions, GroupedSolutions), + findall_with_existential(Template, Goal, PairedSolutions, Witnesses0, Witnesses), + '$group_by_variant'(PairedSolutions, GroupedSolutions), iterate_variants(GroupedSolutions, Witnesses, Solution). :- non_counted_backtracking iterate_variants_and_sort/3. @@ -1090,7 +1041,6 @@ iterate_variants_and_sort([V-Solution0|GroupSolutions], V, Solution) :- iterate_variants_and_sort([_|GroupSolutions], Ws, Solution) :- iterate_variants_and_sort(GroupSolutions, Ws, Solution). - :- meta_predicate(setof(?, 0, ?)). :- non_counted_backtracking setof/3. @@ -1112,9 +1062,8 @@ setof(Template, Goal, Solution) :- term_variables(Goal, GoalVars), term_variables(TemplateVars+GoalVars, TGVs), lists:append(TemplateVars, Witnesses0, TGVs), - findall_with_existential(Template, Goal, PairedSolutions0, Witnesses0, Witnesses), - '$keysort_with_constant_var_ordering'(PairedSolutions0, PairedSolutions), % see 7.2.1 - group_by_variants(PairedSolutions, GroupedSolutions), + findall_with_existential(Template, Goal, PairedSolutions, Witnesses0, Witnesses), + '$group_by_variant'(PairedSolutions, GroupedSolutions), iterate_variants_and_sort(GroupedSolutions, Witnesses, Solution). % Clause retrieval and information. diff --git a/src/machine/dispatch.rs b/src/machine/dispatch.rs index d68f8efe..900f55dc 100644 --- a/src/machine/dispatch.rs +++ b/src/machine/dispatch.rs @@ -156,7 +156,7 @@ impl MachineState { let heap_addr = resource_error_call_result!( self, - sized_iter_to_heap_list(&mut self.heap, list.len(), list.into_iter(),) + sized_iter_to_heap_list(&mut self.heap, list.len(), list.into_iter()) ); let target_addr = self.registers[2]; @@ -173,7 +173,7 @@ impl MachineState { let mut key_pairs = Vec::with_capacity(list.len()); for val in list { - let key = self.project_onto_key(val)?; + let (key, _) = self.key_val_pair(val)?; key_pairs.push((key, val)); } @@ -2544,26 +2544,6 @@ impl Machine { self.machine_st.p = self.machine_st.cp; } } - &Instruction::CallKeySortWithConstantVarOrdering => { - try_or_throw!(self.machine_st, self.machine_st.keysort(), continue); - - if self.machine_st.fail { - self.machine_st.backtrack(); - } else { - increment_call_count!(self.machine_st); - self.machine_st.p += 1; - } - } - &Instruction::ExecuteKeySortWithConstantVarOrdering => { - try_or_throw!(self.machine_st, self.machine_st.keysort(), continue); - - if self.machine_st.fail { - self.machine_st.backtrack(); - } else { - increment_call_count!(self.machine_st); - self.machine_st.p = self.machine_st.cp; - } - } &Instruction::CallIs(r, at) => { try_or_throw!(self.machine_st, self.machine_st.is(r, at), continue); @@ -4781,11 +4761,25 @@ impl Machine { step_or_fail!(self.machine_st, self.machine_st.p = self.machine_st.cp); } &Instruction::CallIsVariant => { - self.machine_st.fail = self.machine_st.is_not_variant(); + self.machine_st.fail = self.machine_st.is_non_variant( + self.machine_st.registers[1], + self.machine_st.registers[2], + ); step_or_fail!(self.machine_st, self.machine_st.p += 1); } &Instruction::ExecuteIsVariant => { - self.machine_st.fail = self.machine_st.is_not_variant(); + self.machine_st.fail = self.machine_st.is_non_variant( + self.machine_st.registers[1], + self.machine_st.registers[2], + ); + step_or_fail!(self.machine_st, self.machine_st.p = self.machine_st.cp); + } + &Instruction::CallGroupByVariant => { + try_or_throw!(self.machine_st, self.machine_st.group_by_variant(), continue); + step_or_fail!(self.machine_st, self.machine_st.p += 1); + } + &Instruction::ExecuteGroupByVariant => { + try_or_throw!(self.machine_st, self.machine_st.group_by_variant(), continue); step_or_fail!(self.machine_st, self.machine_st.p = self.machine_st.cp); } &Instruction::CallCurrentTime => { diff --git a/src/machine/machine_state_impl.rs b/src/machine/machine_state_impl.rs index 668a5bc0..393b646f 100644 --- a/src/machine/machine_state_impl.rs +++ b/src/machine/machine_state_impl.rs @@ -553,7 +553,7 @@ impl MachineState { } self.compare_term_test(h1, h2) - .map(|o| o != Ordering::Equal) + .map(|o| !o.is_eq()) .unwrap_or(true) } @@ -934,7 +934,7 @@ impl MachineState { } // see 8.4.4.3 of Draft Technical Corrigendum 2 for an error guide. - pub fn project_onto_key(&mut self, value: HeapCellValue) -> Result { + pub fn key_val_pair(&mut self, value: HeapCellValue) -> Result<(HeapCellValue, HeapCellValue), MachineStub> { let stub_gen = || functor_stub(atom!("keysort"), 2); let store_v = self.store(self.deref(value)); @@ -948,7 +948,7 @@ impl MachineState { let (name, arity) = cell_as_atom_cell!(self.heap[s]).get_name_and_arity(); if name == atom!("-") && arity == 2 { - Ok(heap_loc_as_cell!(s + 1)) + Ok((heap_loc_as_cell!(s+1), heap_loc_as_cell!(s+2))) } else { let err = self.type_error(ValidType::Pair, self.heap[s]); Err(self.error_form(err, stub_gen())) diff --git a/src/machine/mock_wam.rs b/src/machine/mock_wam.rs index 082ed62c..b871e0fc 100644 --- a/src/machine/mock_wam.rs +++ b/src/machine/mock_wam.rs @@ -632,12 +632,12 @@ mod tests { }); assert_eq!( - wam.compare_term_test(heap_loc_as_cell!(0), heap_loc_as_cell!(0)), + wam.compare_term_test(str_loc_as_cell!(0), str_loc_as_cell!(0)), Some(Ordering::Equal) ); assert_eq!( - wam.compare_term_test(heap_loc_as_cell!(0), atom_as_cell!(atom!("a"))), + wam.compare_term_test(str_loc_as_cell!(0), atom_as_cell!(atom!("a"))), Some(Ordering::Greater) ); diff --git a/src/machine/mod.rs b/src/machine/mod.rs index 859290a4..42999e07 100644 --- a/src/machine/mod.rs +++ b/src/machine/mod.rs @@ -27,6 +27,7 @@ pub mod streams; pub mod system_calls; pub mod term_stream; pub mod unify; +pub mod variant_hashing; use crate::arena::*; use crate::arithmetic::*; diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index aff084db..3a6c321b 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -574,53 +574,6 @@ pub(crate) struct FindallCopyInfo { } impl MachineState { - // determine whether two terms are variants, i.e. if there exists - // a bijection between their variable sets such that applying it - // to h1 produces h2 (ISO Prolog standard section 7.1.6.1). - // return true on failure and false on success. - #[inline(always)] - pub fn is_not_variant(&self) -> bool { - let h1 = self.registers[1]; - let h2 = self.registers[2]; - - let mut a_to_b = IndexMap::with_hasher(FxBuildHasher::default()); - let mut b_to_a = IndexMap::with_hasher(FxBuildHasher::default()); - - for term_pair in ParallelHeapIter::from(self, h1, h2) { - match term_pair { - TermPair::Vars(v1_offset, v2_offset) => { - match a_to_b.entry(v1_offset) { - indexmap::map::Entry::Occupied(stored_v2_offset) => { - if v2_offset != *stored_v2_offset.get() { - return true; - } - } - indexmap::map::Entry::Vacant(entry) => { - entry.insert_entry(v2_offset); - } - } - - match b_to_a.entry(v2_offset) { - indexmap::map::Entry::Occupied(stored_v1_offset) => { - if v1_offset != *stored_v1_offset.get() { - return true; - } - } - indexmap::map::Entry::Vacant(entry) => { - entry.insert_entry(v1_offset); - } - } - } - TermPair::Less(..) => return true, - TermPair::Greater(..) => return true, - TermPair::Unordered(cell_1, cell_2) if cell_1 != cell_2 => return true, - _ => {} - } - } - - false - } - fn copy_lifted_heap_from_offset(&mut self, offset: usize, lh_offset: usize) { let reserve_size = self.lifted_heap.cell_len() - lh_offset; let mut writer = step_or_resource_error!(self, self.heap.reserve(reserve_size)); diff --git a/src/machine/variant_hashing.rs b/src/machine/variant_hashing.rs new file mode 100644 index 00000000..42a7e18a --- /dev/null +++ b/src/machine/variant_hashing.rs @@ -0,0 +1,190 @@ +use crate::forms::*; +use crate::heap_iter::*; +use crate::types::*; +use crate::machine::*; +use crate::machine::heap::*; + +use fxhash::{FxHasher, FxBuildHasher}; +use hashbrown::{HashTable}; + +use std::hash::{Hash, Hasher}; + +impl MachineState { + // determine whether two terms are variants, i.e. if there exists + // a bijection between their variable sets such that applying it + // to h1 produces h2 (ISO Prolog standard section 7.1.6.1). + // return false on success and true on failure like eq_test. + #[inline(always)] + pub fn is_non_variant(&self, h1: HeapCellValue, h2: HeapCellValue) -> bool { + let mut a_to_b = IndexMap::with_hasher(FxBuildHasher::default()); + let mut b_to_a = IndexMap::with_hasher(FxBuildHasher::default()); + + for term_pair in ParallelHeapIter::from(self, h1, h2) { + match term_pair { + TermPair::Vars(v1_offset, v2_offset) => { + match a_to_b.entry(v1_offset) { + indexmap::map::Entry::Occupied(stored_v2_offset) => { + if v2_offset != *stored_v2_offset.get() { + return true; + } + } + indexmap::map::Entry::Vacant(entry) => { + entry.insert_entry(v2_offset); + } + } + + match b_to_a.entry(v2_offset) { + indexmap::map::Entry::Occupied(stored_v1_offset) => { + if v1_offset != *stored_v1_offset.get() { + return true; + } + } + indexmap::map::Entry::Vacant(entry) => { + entry.insert_entry(v1_offset); + } + } + } + TermPair::Less(..) => return true, + TermPair::Greater(..) => return true, + TermPair::Unordered(cell_1, cell_2) if cell_1 != cell_2 => return true, + _ => {} + } + } + + false + } + + fn variant_hash(&mut self, cell: HeapCellValue) -> u64 { + let mut var_ids = IndexMap::with_hasher(FxBuildHasher::default()); + let mut hasher = FxHasher::default(); + let mut iter = eager_stackful_preorder_iter(&mut self.heap, cell); + let mut next_var_id = 0; + + while let Some(term) = iter.next() { + read_heap_cell!(term, + (HeapCellValueTag::Str, s) => { + let (name, arity) = cell_as_atom_cell!(iter.heap[s]).get_name_and_arity(); + (name.index, arity).hash(&mut hasher); + } + (HeapCellValueTag::Lis) => { + (atom!(".").index, 2).hash(&mut hasher); + } + (HeapCellValueTag::PStrLoc, l) => { + let string = iter.heap.scan_slice_to_str(l).string; + + for c in string.chars() { + (atom!(".").index, 2).hash(&mut hasher); + hasher.write_u64(AtomCell::new_char_inlined(c).get_name().index); + } + } + (HeapCellValueTag::Atom, (name, arity)) => { + debug_assert_eq!(arity, 0); + (name.index, arity).hash(&mut hasher); + } + (HeapCellValueTag::AttrVar | HeapCellValueTag::Var, h) => { + let canonical_id = var_ids.entry(h).or_insert_with(|| { + let id = next_var_id; + next_var_id += 1; + id + }); + + hasher.write_u64(*canonical_id); + } + _ => { + if let Some(n) = Number::try_from((term, &self.arena.f64_tbl)).ok() { + match n { + Number::Float(f) => f.hash(&mut hasher), + Number::Integer(n) => n.hash(&mut hasher), + Number::Rational(r) => r.hash(&mut hasher), + Number::Fixnum(f) => f.hash(&mut hasher), + } + } else { + term.hash(&mut hasher); + } + } + ); + } + + hasher.finish() + } + + pub fn group_by_variant(&mut self) -> CallResult { + let stub_gen = || functor_stub(atom!("$group_by_variant"), 2); + let list = self.try_from_list(self.registers[1], stub_gen)?; + + let mut key_pairs = Vec::with_capacity(list.len()); + + for val in list { + key_pairs.push(self.key_val_pair(val)?); + } + + // the first parameter is the hash. Rust forces us to store it + // because of non-lexical lifetime hell between + // HashTable::find_mut and HashTable::insert_unique. also + // avoid computing the same hash repeatedly + let mut table: HashTable<(u64, Vec, Vec)> = HashTable::new(); + + for (key, val) in key_pairs { + let hash = self.variant_hash(key); + + match table.find_mut(hash, |(_, keys, _)| !self.is_non_variant(key, keys[0])) { + Some((_, keys, vals)) => { + keys.push(key); + vals.push(val); + } + None => { + table.insert_unique(hash, (hash, vec![key], vec![val]), |(h, _, _)| *h); + } + } + } + + let mut list_of_lists = Vec::with_capacity(table.len()); + + for (_, keys, variants) in table { + if let None = keys.windows(2).try_for_each(|cells| { + unify_fn!(*self, cells[0], cells[1]); + if self.fail { None } else { Some(()) } + }) { + return Ok(()); + } + + let variant_list_cell = resource_error_call_result!( + self, + sized_iter_to_heap_list( + &mut self.heap, + variants.len(), + variants.into_iter(), + ) + ); + + let mut writer = resource_error_call_result!(self, self.heap.reserve(3)); + + let key_val_cell = writer.write_with(|section| { + let key_val_cell = str_loc_as_cell!(section.cell_len()); + + section.push_cell(atom_as_cell!(atom!("-"), 2)); + section.push_cell(keys[0]); + section.push_cell(variant_list_cell); + + key_val_cell + }).result; + + list_of_lists.push(key_val_cell); + } + + let variant_grouped_list = resource_error_call_result!( + self, + sized_iter_to_heap_list( + &mut self.heap, + list_of_lists.len(), + list_of_lists.into_iter(), + ) + ); + + let target_addr = self.registers[2]; + unify_fn!(*self, target_addr, variant_grouped_list); + Ok(()) + } +} + +