From: Mark Thom Date: Sun, 22 Jan 2017 18:24:29 +0000 (-0700) Subject: streamline code generation with iterators and traits X-Git-Tag: v0.8.110~786 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=ed2343be9f1a841cc4f7928c3a73e18c9615b8d1;p=scryer-prolog.git streamline code generation with iterators and traits --- diff --git a/README.md b/README.md index 948f9093..7d00f4ad 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,6 @@ l0> ?- p(z, w). no l0> ?- p(w, w). yes -l0> ?- p(Z, w). -yes l0> ?- p(Z, h(Z, W), f(W)). no l0> p(Z, h(Z, W), f(W)). @@ -42,4 +40,15 @@ yes l0> ?- p(z, h(Z, w), f(w)). yes l0> quit +``` + +## Occurs check + +There's no occurs check, so cyclic terms do unify: + +``` +l0> p(W, W). +Program stored. +l0> ?- p(f(f(W)), W). +yes ``` \ No newline at end of file diff --git a/src/l0/ast.rs b/src/l0/ast.rs index e84d3851..7c30f827 100644 --- a/src/l0/ast.rs +++ b/src/l0/ast.rs @@ -1,3 +1,4 @@ +use std::cell::{Cell}; use std::vec::{Vec}; pub type Var = String; @@ -12,24 +13,39 @@ pub enum TopLevel { #[derive(Debug)] pub enum Term { - Atom(Atom), - Clause(Atom, Vec>), - Var(Var) + Atom(Cell, Atom), + Clause(Cell, Atom, Vec>), + Var(Cell, Var) } -pub enum MachineInstruction { +pub enum FactInstruction { GetStructure(Atom, usize, usize), - PutStructure(Atom, usize, usize), - SetVariable(usize), - SetValue(usize), UnifyVariable(usize), UnifyValue(usize) } -pub type Program = Vec; +pub enum QueryInstruction { + PutStructure(Atom, usize, usize), + SetVariable(usize), + SetValue(usize), +} + +pub type CompiledFact = Vec; + +pub type CompiledQuery = Vec; #[derive(Clone, Copy, PartialEq)] pub enum Addr { HeapCell(usize), RegNum(usize) } + +impl Term { + pub fn set_cell(&self, cell_num: usize) { + match self { + &Term::Atom(ref cell, _) => cell.set(cell_num), + &Term::Clause(ref cell, _, _) => cell.set(cell_num), + &Term::Var(ref cell, _) => cell.set(cell_num) + }; + } +} diff --git a/src/l0/codegen.rs b/src/l0/codegen.rs index d41d8d72..75c630e7 100644 --- a/src/l0/codegen.rs +++ b/src/l0/codegen.rs @@ -1,178 +1,126 @@ -use l0::ast::{Atom, Term, MachineInstruction, Program, TopLevel, Var}; +use l0::ast::{Atom, Term, FactInstruction, QueryInstruction, Var}; +use l0::iterators::{BreadthFirstIterator, PostOrderIterator}; -use std::collections::{HashMap, VecDeque}; +use std::collections::{HashSet}; use std::fmt; use std::vec::{Vec}; -impl fmt::Display for MachineInstruction { +impl fmt::Display for QueryInstruction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - &MachineInstruction::GetStructure(ref a, ref s, ref r) => - write!(f, "get_structure {}/{}, X{}", a, s, r), - &MachineInstruction::PutStructure(ref a, ref s, ref r) => + &QueryInstruction::PutStructure(ref a, ref s, ref r) => write!(f, "put_structure {}/{}, X{}", a, s, r), - &MachineInstruction::SetVariable(ref r) => + &QueryInstruction::SetVariable(ref r) => write!(f, "set_variable X{}", r), - &MachineInstruction::SetValue(ref r) => + &QueryInstruction::SetValue(ref r) => write!(f, "set_value X{}", r), - &MachineInstruction::UnifyVariable(ref r) => + } + } +} + +impl fmt::Display for FactInstruction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &FactInstruction::GetStructure(ref a, ref s, ref r) => + write!(f, "get_structure {}/{}, X{}", a, s, r), + &FactInstruction::UnifyVariable(ref r) => write!(f, "unify_variable X{}", r), - &MachineInstruction::UnifyValue(ref r) => + &FactInstruction::UnifyValue(ref r) => write!(f, "unify_value X{}", r) } } } -enum IntTerm<'a> { - FinishedClause(usize, usize, &'a Atom, &'a Vec>), - UnfinishedClause(usize, &'a Atom, &'a Vec>), - FinishedAtom(usize, &'a Atom) +pub trait CompilationTarget<'a> where Self : Sized { + type Iterator : Iterator; + + fn iter(term: &'a Term) -> Self::Iterator; + + fn to_structure(name: Atom, arity: usize, cell_num: usize) -> Self; + fn to_value(cell_num: usize) -> Self; + fn to_variable(cell_num: usize) -> Self; } -pub fn compile_query<'a>(t: &'a Term) -> Program -{ - let mut stack : Vec> = Vec::new(); - let mut variable_allocs : HashMap<&Var, (usize, bool)> = HashMap::new(); - let mut query : Program = Vec::new(); - - match t { - &Term::Clause(ref atom, ref terms) => { - stack.push(IntTerm::UnfinishedClause(1, atom, terms)); - variable_allocs.insert(atom, (1, true)); - }, - &Term::Atom(ref atom) => { - query.push(MachineInstruction::PutStructure(atom.clone(), 0, 1)); - return query; - }, - &Term::Var(_) => { - query.push(MachineInstruction::SetVariable(1)); - return query; - }, - }; - - let mut max_reg_used : usize = 1; - - while let Some(int_term) = stack.pop() { - match int_term { - IntTerm::UnfinishedClause(r, atom, terms) => { - stack.push(IntTerm::FinishedClause(r, max_reg_used, atom, terms)); - - let mut counter : usize = max_reg_used; // r + 1; - - for t in terms { - if let &Term::Var(ref var) = t.as_ref() { - if !variable_allocs.contains_key(var) { - counter += 1; - variable_allocs.insert(var, (counter, false)); - } - } else { - counter += 1; - } - } +impl<'a> CompilationTarget<'a> for FactInstruction { + type Iterator = BreadthFirstIterator<'a>; + + fn iter(term: &'a Term) -> Self::Iterator { + term.breadth_first_iter() + } - max_reg_used = counter; - - for t in terms.iter().rev() { - if let &Term::Var(_) = t.as_ref() { - counter -= 1; - continue; - } - - let r = { - let oc = counter; - counter -= 1; - oc - }; - - match t.as_ref() { - &Term::Atom(ref atom) => - stack.push(IntTerm::FinishedAtom(r, atom)), - &Term::Clause(ref atom, ref terms) => - stack.push(IntTerm::UnfinishedClause(r, atom, terms)), - _ => {} - }; - } - }, - IntTerm::FinishedAtom(r, atom) => - query.push(MachineInstruction::PutStructure(atom.clone(), 0, r)), - IntTerm::FinishedClause(r, mr, atom, terms) => { - query.push(MachineInstruction::PutStructure(atom.clone(), terms.len(), r)); - - let mut counter : usize = mr + 1; - - for t in terms { - if let &Term::Var(ref var) = t.as_ref() { - let &mut (reg, ref mut seen) = variable_allocs.get_mut(var).unwrap(); - - if !*seen { - query.push(MachineInstruction::SetVariable(reg)); - *seen = true; - } else { - query.push(MachineInstruction::SetValue(reg)); - } - - if reg == counter { - counter += 1; - } - } else { - query.push(MachineInstruction::SetValue(counter)); - counter += 1; - } - } + fn to_structure(name: Atom, arity: usize, cell_num: usize) -> Self { + FactInstruction::GetStructure(name, arity, cell_num) + } - max_reg_used = counter - 1; - } - }; + fn to_value(cell_num: usize) -> Self { + FactInstruction::UnifyValue(cell_num) } - query + fn to_variable(cell_num: usize) -> Self { + FactInstruction::UnifyVariable(cell_num) + } } -pub fn compile_fact<'a>(t: &'a Term) -> Program { - let mut reg : usize = 2; - let mut queue : VecDeque<(usize, &'a Term)> = VecDeque::new(); - let mut variable_allocs : HashMap<&Var, usize> = HashMap::new(); - let mut fact : Program = Vec::new(); - - queue.push_back((1, t)); - - while let Some(t) = queue.pop_front() { - match t { - (r, &Term::Clause(ref atom, ref terms)) => { - fact.push(MachineInstruction::GetStructure(atom.clone(), - terms.len(), - r)); - - let mut counter : usize = reg; - - for t in terms { - if let &Term::Var(ref var) = t.as_ref() { - if !variable_allocs.contains_key(var) { - variable_allocs.insert(var, counter); - - fact.push(MachineInstruction::UnifyVariable(counter)); - counter += 1; - } else { - let r = variable_allocs.get(var).unwrap(); - fact.push(MachineInstruction::UnifyValue(*r)); - } - } else { - fact.push(MachineInstruction::UnifyVariable(counter)); - queue.push_back((counter, t)); - counter += 1; - } - } +impl<'a> CompilationTarget<'a> for QueryInstruction { + type Iterator = PostOrderIterator<'a>; + + fn iter(term: &'a Term) -> Self::Iterator { + term.post_order_iter() + } - reg = counter; + fn to_structure(name: Atom, arity: usize, cell_num: usize) -> Self { + QueryInstruction::PutStructure(name, arity, cell_num) + } + + fn to_value(cell_num: usize) -> Self { + QueryInstruction::SetValue(cell_num) + } + + fn to_variable(cell_num: usize) -> Self { + QueryInstruction::SetVariable(cell_num) + } +} + +fn subterm_to_instr<'a, Target>(subterm: &'a Term, + bindings: &mut HashSet<&'a Var>) + -> Target + where Target: CompilationTarget<'a> +{ + match subterm { + &Term::Atom(ref cell_num, _) => + Target::to_value(cell_num.get()), + &Term::Var(ref cell_num, ref atom) if bindings.contains(atom) => + Target::to_value(cell_num.get()), + &Term::Var(ref cell_num, ref atom) => { + bindings.insert(atom); + Target::to_variable(cell_num.get()) + }, + &Term::Clause(ref cell_num, _, _) => + Target::to_value(cell_num.get()) + } +} + +pub fn compile_target<'a, Target>(term: &'a Term) -> Vec + where Target: CompilationTarget<'a> +{ + let mut iter = Target::iter(term); + let mut target = Vec::::new(); + let mut bindings = HashSet::new(); + + while let Some(term) = iter.next() { + match term { + &Term::Atom(ref cell_num, ref atom) => + target.push(Target::to_structure(atom.clone(), 0, cell_num.get())), + &Term::Clause(ref cell_num, ref atom, ref terms) => { + target.push(Target::to_structure(atom.clone(), 0, cell_num.get())); + + for subterm in terms { + target.push(subterm_to_instr(subterm.as_ref(), &mut bindings)); + } }, - (r, &Term::Atom(ref atom)) => - fact.push(MachineInstruction::GetStructure(atom.clone(), 0, r)), - (r, &Term::Var(_)) => { - fact.push(MachineInstruction::UnifyVariable(r)); - return fact; - } + _ => {}, }; } - fact + target } diff --git a/src/l0/iterators.rs b/src/l0/iterators.rs new file mode 100644 index 00000000..cb123d6f --- /dev/null +++ b/src/l0/iterators.rs @@ -0,0 +1,98 @@ +use l0::ast::{Term}; + +use std::collections::{VecDeque}; +use std::vec::{Vec}; + +enum DepthFirstIteratorState<'a> { + // child no., the containing clause, its vector. + Clause(usize, &'a Term, &'a Vec>), + NonClause(&'a Term) +} + +pub struct PostOrderIterator<'a> { + state_stack: Vec> +} + +impl<'a> PostOrderIterator<'a> { + fn push_clause(&mut self, + child_num: usize, + term: &'a Term, + child_terms: &'a Vec>) + { + self.state_stack.push(DepthFirstIteratorState::Clause(child_num, + term, + child_terms)); + } + + fn render_new_state(term: &'a Term) -> DepthFirstIteratorState<'a> { + match term { + &Term::Clause(_, _, ref child_terms) => + DepthFirstIteratorState::Clause(0, term, child_terms), + _ => DepthFirstIteratorState::NonClause(term) + } + } + + fn push_term(&mut self, term: &'a Term) { + self.state_stack.push(Self::render_new_state(term)); + } +} + +impl<'a> Iterator for PostOrderIterator<'a> { + type Item = &'a Term; + + fn next(&mut self) -> Option { + while let Some(iter_state) = self.state_stack.pop() { + match iter_state { + DepthFirstIteratorState::Clause(child_num, term, child_terms) => { + if child_num == child_terms.len() { + return Some(term); + } else { + self.push_clause(child_num + 1, term, child_terms); + self.push_term(child_terms[child_num].as_ref()); + } + }, + DepthFirstIteratorState::NonClause(term) => return Some(term), + }; + } + + None + } +} + +pub struct BreadthFirstIterator<'a> { + state_queue : VecDeque<&'a Term> +} + +impl<'a> Iterator for BreadthFirstIterator<'a> { + type Item = &'a Term; + + fn next(&mut self) -> Option { + if let Some(term) = self.state_queue.pop_front() { + if let &Term::Clause(_, _, ref child_terms) = term { + for term in child_terms { + self.state_queue.push_back(term); + } + + return Some(term); + } + + return Some(term); + } + + None + } +} + +impl<'a> Term { + pub fn post_order_iter(&'a self) -> PostOrderIterator<'a> { + let initial_state = PostOrderIterator::render_new_state(self); + PostOrderIterator { state_stack: vec![initial_state] } + } + + pub fn breadth_first_iter(&'a self) -> BreadthFirstIterator<'a> { + let mut queue = VecDeque::new(); + queue.push_back(self); + + BreadthFirstIterator { state_queue: queue } + } +} diff --git a/src/l0/l0_parser.lalrpop b/src/l0/l0_parser.lalrpop index 9743ade7..185b77ae 100644 --- a/src/l0/l0_parser.lalrpop +++ b/src/l0/l0_parser.lalrpop @@ -1,3 +1,4 @@ +use std::cell::{Cell}; use l0::ast::{Atom, Term, TopLevel, Var}; grammar; @@ -23,8 +24,8 @@ Term : Term = { "(" ",")*> ")" => { let mut ts = ts; ts.push(t); - Term::Clause(a, ts) + Term::Clause(Cell::new(0), a, ts) }, - => Term::Atom(<>), - => Term::Var(<>), + => Term::Atom(Cell::new(0), <>), + => Term::Var(Cell::new(0), <>), }; \ No newline at end of file diff --git a/src/l0/l0_parser.rs b/src/l0/l0_parser.rs index cd9c7803..21a15081 100644 --- a/src/l0/l0_parser.rs +++ b/src/l0/l0_parser.rs @@ -1,9 +1,11 @@ +use std::cell::{Cell}; use l0::ast::{Atom, Term, TopLevel, Var}; extern crate lalrpop_util as __lalrpop_util; mod __parse__TopLevel { #![allow(non_snake_case, non_camel_case_types, unused_mut, unused_variables, unused_imports)] + use std::cell::{Cell}; use l0::ast::{Atom, Term, TopLevel, Var}; extern crate lalrpop_util as __lalrpop_util; #[allow(dead_code)] @@ -1384,7 +1386,7 @@ pub fn __action6< { let mut ts = ts; ts.push(t); - Term::Clause(a, ts) + Term::Clause(Cell::new(0), a, ts) } } @@ -1396,7 +1398,7 @@ pub fn __action7< (_, __0, _): (usize, Atom, usize), ) -> Term { - Term::Atom(__0) + Term::Atom(Cell::new(0), __0) } #[allow(unused_variables)] @@ -1407,7 +1409,7 @@ pub fn __action8< (_, __0, _): (usize, Var, usize), ) -> Term { - Term::Var(__0) + Term::Var(Cell::new(0), __0) } #[allow(unused_variables)] diff --git a/src/l0/machine.rs b/src/l0/machine.rs index b98547ac..39099e07 100644 --- a/src/l0/machine.rs +++ b/src/l0/machine.rs @@ -1,6 +1,5 @@ -use l0::ast::{Addr, Atom, MachineInstruction, Program, Term, TopLevel, Var}; +use l0::ast::{Addr, Atom, CompiledFact, FactInstruction, QueryInstruction}; -use std::fmt; use std::vec::{Vec}; #[derive(Clone)] @@ -26,7 +25,7 @@ pub struct Machine { pub fail : bool, heap : Heap, mode : MachineMode, - pub program : Option, + pub program : Option, registers : Registers } @@ -111,10 +110,33 @@ impl Machine { } } } + + pub fn execute_query_instr<'a, 'b : 'a>(&'a mut self, instr: &'b QueryInstruction) { + match instr { + &QueryInstruction::PutStructure(ref name, arity, reg) => { + self.heap.push(HeapCell::Str(self.h + 1)); + self.heap.push(HeapCell::NamedStr(arity, name.clone())); + + self.registers[reg] = self.heap[self.h].clone(); + + self.h += 2; + }, + &QueryInstruction::SetVariable(reg) => { + self.heap.push(HeapCell::Ref(self.h)); + self.registers[reg] = self.heap[self.h].clone(); + + self.h += 1; + }, + &QueryInstruction::SetValue(reg) => { + self.heap.push(self.registers[reg].clone()); + self.h += 1; + }, + } + } - pub fn execute<'a, 'b : 'a>(&'a mut self, instr: &'b MachineInstruction) { + pub fn execute_fact_instr<'a, 'b : 'a>(&'a mut self, instr: &'b FactInstruction) { match instr { - &MachineInstruction::GetStructure(ref name, arity, reg) => { + &FactInstruction::GetStructure(ref name, arity, reg) => { let addr = self.deref(Addr::RegNum(reg)); match self.lookup(addr) { @@ -146,25 +168,7 @@ impl Machine { } }; }, - &MachineInstruction::PutStructure(ref name, arity, reg) => { - self.heap.push(HeapCell::Str(self.h + 1)); - self.heap.push(HeapCell::NamedStr(arity, name.clone())); - - self.registers[reg] = self.heap[self.h].clone(); - - self.h += 2; - }, - &MachineInstruction::SetVariable(reg) => { - self.heap.push(HeapCell::Ref(self.h)); - self.registers[reg] = self.heap[self.h].clone(); - - self.h += 1; - }, - &MachineInstruction::SetValue(reg) => { - self.heap.push(self.registers[reg].clone()); - self.h += 1; - }, - &MachineInstruction::UnifyVariable(reg) => { + &FactInstruction::UnifyVariable(reg) => { match self.mode { MachineMode::Read => self.registers[reg] = self.heap[self.s].clone(), MachineMode::Write => { @@ -176,7 +180,7 @@ impl Machine { self.s += 1; }, - &MachineInstruction::UnifyValue(reg) => { + &FactInstruction::UnifyValue(reg) => { let s = self.s; match self.mode { @@ -197,38 +201,5 @@ impl Machine { *self = Machine::new(); self.program = program; - } - - pub fn dump_registers_and_heap(&self) { - let mut c = 0; - - let printer = |contents, c| { - match contents { - &HeapCell::NamedStr(ref arity, ref atom) => { - println!("{} = NAME({}, {})", c, arity, atom); - }, - &HeapCell::Ref(hc) => { - println!("{} = REF({})", c, hc); - }, - &HeapCell::Str(hc) => { - println!("{} = STR({})", c, hc); - } - }; - }; - - for contents in &self.registers { - print!("X"); - printer(contents, c); - c += 1; - } - - println!(""); - - c = 0; - - for contents in &self.heap { - printer(contents, c); - c += 1; - } - } + } } diff --git a/src/l0/mod.rs b/src/l0/mod.rs index 933bebe5..5ef53dfd 100644 --- a/src/l0/mod.rs +++ b/src/l0/mod.rs @@ -1,4 +1,7 @@ +mod l0_parser; + pub mod ast; -pub mod l0_parser; +pub mod iterators; +pub mod parser; pub mod codegen; pub mod machine; diff --git a/src/l0/parser.rs b/src/l0/parser.rs new file mode 100644 index 00000000..e155c1d5 --- /dev/null +++ b/src/l0/parser.rs @@ -0,0 +1,52 @@ +use l0::ast::{Term, TopLevel, Var}; +use l0::l0_parser::{parse_TopLevel}; + +use std::collections::{HashMap}; + +extern crate lalrpop_util as __lalrpop_util; + +pub type ParseResult<'a> = + Result>; + +pub fn parse_top_level<'a>(input: &'a str) -> ParseResult { + let result = parse_TopLevel(&*input); + + if let Ok(result) = result { + return Ok(mark_cells(result)); + } + + result +} + +#[inline] +fn mark_cells(tl: TopLevel) -> TopLevel { + match tl { + TopLevel::Fact(term) => TopLevel::Fact(mark_term_cells(term)), + TopLevel::Query(term) => TopLevel::Query(mark_term_cells(term)) + } +} + +fn mark_term_cells(term: Term) -> Term { + let mut cell_num = 1; + + { + let mut bindings: HashMap<&Var, usize> = HashMap::new(); + let mut iter = term.breadth_first_iter(); + + while let Some(term) = iter.next() { + if let &Term::Var(ref cell, ref var) = term { + let cell_num_in_map = bindings.entry(var).or_insert(cell_num); + + if *cell_num_in_map != cell_num { + cell.set(*cell_num_in_map); + continue; + } + } + + term.set_cell(cell_num); + cell_num += 1; + } + } + + term +} diff --git a/src/main.rs b/src/main.rs index e405207c..f9006ac7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,28 +1,23 @@ mod l0; -use l0::ast::{Atom, Program, Term, TopLevel, Var}; -use l0::codegen::{compile_fact, compile_query}; +use l0::ast::{TopLevel}; +use l0::codegen::{compile_target}; use l0::machine::{Machine}; use std::io::{self, Write}; -fn print_instructions(program : &Program) { - for instruction in program { - println!("{:}", instruction); - } -} - fn l0_repl() { let mut ms = Machine::new(); loop { print!("l0> "); - io::stdout().flush(); - + + let _ = io::stdout().flush(); let mut buffer = String::new(); + io::stdin().read_line(&mut buffer).unwrap(); - let result = l0::l0_parser::parse_TopLevel(&*buffer); + let result = l0::parser::parse_top_level(&*buffer); if &*buffer == "quit\n" { break; @@ -32,8 +27,8 @@ fn l0_repl() { match result { Ok(TopLevel::Fact(fact)) => { - let program = compile_fact(&fact); - + let program = compile_target(&fact); + ms = Machine::new(); ms.program = Some(program); @@ -41,14 +36,14 @@ fn l0_repl() { }, Ok(TopLevel::Query(query)) => { if let Some(program) = ms.program.take() { - let query = compile_query(&query); + let query = compile_target(&query); for instruction in &query { - ms.execute(instruction); + ms.execute_query_instr(instruction); } for instruction in &program { - ms.execute(instruction); + ms.execute_fact_instr(instruction); if ms.fail { break; @@ -60,7 +55,7 @@ fn l0_repl() { } else { println!("yes"); } - + ms.reset_heap(); ms.program = Some(program); } else {