From f861b7a80e7cb35e0c72b4908da3844024757e04 Mon Sep 17 00:00:00 2001 From: Mark Thom Date: Wed, 22 Aug 2018 00:26:48 -0600 Subject: [PATCH] update to handle strings as lists. --- README.md | 5 +- src/prolog/ast.rs | 2 +- src/prolog/codegen.rs | 9 ++-- src/prolog/compile.rs | 43 ++++++++------- src/prolog/heap_print.rs | 69 +++++++++++++++++------- src/prolog/indexing.rs | 10 +++- src/prolog/machine/machine_state.rs | 16 ++++-- src/prolog/machine/machine_state_impl.rs | 46 +++++++++++++++- src/prolog/machine/mod.rs | 5 ++ src/prolog/parser | 2 +- src/prolog/read.rs | 3 +- src/prolog/string_list.rs | 23 ++++++-- src/prolog/toplevel.rs | 10 ++-- src/tests.rs | 4 +- 14 files changed, 187 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index b085e609..7df97d23 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,10 @@ Extend rusty-wam to include the following, among other features: * Built-in predicates for list processing and top-level declarative control (`setup_call_control/3`, `call_with_inference_limit/3`, etc.) (_done_) -* Definite Clause Grammars +* Default representation of strings as list of chars, using a packed + internal representation (_in progress_). +* `term_expansion/2` and `goal_expansion/2`. +* Definite Clause Grammars. * Attributed variables using the SICStus Prolog interface and semantics. Adding coroutines like `dif/2`, `freeze/2`, etc. is straightforward with attributed variables. diff --git a/src/prolog/ast.rs b/src/prolog/ast.rs index c1f98da0..63aefed6 100644 --- a/src/prolog/ast.rs +++ b/src/prolog/ast.rs @@ -877,7 +877,7 @@ impl<'a> From<&'a TabledRc> for ClauseName { } } -impl ClauseName { +impl ClauseName { pub fn as_str(&self) -> &str { match self { &ClauseName::BuiltIn(s) => s, diff --git a/src/prolog/codegen.rs b/src/prolog/codegen.rs index 741c1eb7..a4e3c4fe 100644 --- a/src/prolog/codegen.rs +++ b/src/prolog/codegen.rs @@ -4,6 +4,7 @@ use prolog::ast::*; use prolog::fixtures::*; use prolog::indexing::*; use prolog::iterators::*; +use prolog::machine::machine_state::MachineFlags; use prolog::targets::*; use std::cell::Cell; @@ -12,6 +13,7 @@ use std::rc::Rc; use std::vec::Vec; pub struct CodeGenerator { + flags: MachineFlags, marker: TermMarker, var_count: HashMap, usize>, non_counted_bt: bool @@ -44,10 +46,11 @@ impl<'a> ConjunctInfo<'a> impl<'a, TermMarker: Allocator<'a>> CodeGenerator { - pub fn new(non_counted_bt: bool) -> Self { + pub fn new(non_counted_bt: bool, flags: MachineFlags) -> Self { CodeGenerator { marker: Allocator::new(), var_count: HashMap::new(), - non_counted_bt } + non_counted_bt, + flags } } pub fn take_vars(self) -> AllocVarDict { @@ -705,7 +708,7 @@ impl<'a, TermMarker: Allocator<'a>> CodeGenerator -> Result { let mut code_body = Vec::new(); - let mut code_offsets = CodeOffsets::new(); + let mut code_offsets = CodeOffsets::new(self.flags); let num_clauses = clauses.len(); diff --git a/src/prolog/compile.rs b/src/prolog/compile.rs index d751a03c..5d1f6f67 100644 --- a/src/prolog/compile.rs +++ b/src/prolog/compile.rs @@ -2,6 +2,7 @@ use prolog::ast::*; use prolog::debray_allocator::*; use prolog::codegen::*; use prolog::machine::*; +use prolog::machine::machine_state::MachineFlags; use prolog::toplevel::*; use std::collections::{HashMap, HashSet, VecDeque}; @@ -39,20 +40,22 @@ pub fn parse_code(wam: &mut Machine, buffer: &str) -> Result Result +fn compile_relation(tl: &TopLevel, non_counted_bt: bool, flags: MachineFlags) -> Result { - let mut cg = CodeGenerator::::new(non_counted_bt); + let mut cg = CodeGenerator::::new(non_counted_bt, flags); match tl { &TopLevel::Declaration(_) | &TopLevel::Query(_) => @@ -84,22 +87,25 @@ fn set_first_index(code: &mut Code) } } -fn compile_appendix(code: &mut Code, queue: Vec, non_counted_bt: bool) -> Result<(), ParserError> +fn compile_appendix(code: &mut Code, queue: Vec, non_counted_bt: bool, flags: MachineFlags) + -> Result<(), ParserError> { for tl in queue.iter() { set_first_index(code); - code.append(&mut compile_relation(tl, non_counted_bt)?); + code.append(&mut compile_relation(tl, non_counted_bt, flags)?); } Ok(()) } -fn compile_query(terms: Vec, queue: Vec) -> Result<(Code, AllocVarDict), ParserError> +fn compile_query(terms: Vec, queue: Vec, flags: MachineFlags) + -> Result<(Code, AllocVarDict), ParserError> { - let mut cg = CodeGenerator::::new(false); // count backtracking inferences. + // count backtracking inferences. + let mut cg = CodeGenerator::::new(false, flags); let mut code = try!(cg.compile_query(&terms)); - compile_appendix(&mut code, queue, false)?; + compile_appendix(&mut code, queue, false, flags)?; Ok((code, cg.take_vars())) } @@ -124,8 +130,8 @@ fn compile_decl(wam: &mut Machine, tl: TopLevel, queue: Vec) -> EvalSe Err(SessionError::NamelessEntry) }); - let mut code = try_eval_session!(compile_relation(&tl, false)); - try_eval_session!(compile_appendix(&mut code, queue, false)); + let mut code = try_eval_session!(compile_relation(&tl, false, wam.machine_flags())); + try_eval_session!(compile_appendix(&mut code, queue, false, wam.machine_flags())); if !code.is_empty() { wam.add_user_code(name, tl.arity(), code, tl.as_predicate().ok().unwrap()) @@ -140,7 +146,7 @@ pub fn compile_packet(wam: &mut Machine, tl: TopLevelPacket) -> EvalSession { match tl { TopLevelPacket::Query(terms, queue) => - match compile_query(terms, queue) { + match compile_query(terms, queue, wam.machine_flags()) { Ok((mut code, vars)) => wam.submit_query(code, vars), Err(e) => EvalSession::from(e) }, @@ -182,17 +188,15 @@ impl<'a> ListingCompiler<'a> { let non_counted_bt = self.non_counted_bt_preds.contains(&(name.clone(), arity)); let p = code.len() + self.wam.code_size(); - let mut decl_code = compile_relation(&TopLevel::Predicate(decl), non_counted_bt)?; - - compile_appendix(&mut decl_code, Vec::from(queue), non_counted_bt)?; + let mut decl_code = compile_relation(&TopLevel::Predicate(decl), non_counted_bt, + self.wam.machine_flags())?; -// println!("\n{}/{}:\n", name.as_str(), arity); + compile_appendix(&mut decl_code, Vec::from(queue), non_counted_bt, + self.wam.machine_flags())?; let idx = code_dir.entry((name, arity)).or_insert(CodeIndex::default()); set_code_index!(idx, IndexPtr::Index(p), self.get_module_name()); -// print_code(&decl_code); - code.extend(decl_code.into_iter()); } @@ -241,7 +245,8 @@ fn use_qualified_module(module: &mut Option, submodule: &Module, exports pub fn compile_listing(wam: &mut Machine, src_str: &str, mut indices: MachineCodeIndices) -> EvalSession { - let mut worker = TopLevelBatchWorker::new(src_str.as_bytes(), wam.atom_tbl(), wam.string_tbl()); + let mut worker = TopLevelBatchWorker::new(src_str.as_bytes(), wam.atom_tbl(), wam.string_tbl(), + wam.machine_flags()); let mut compiler = ListingCompiler::new(wam); while let Some(decl) = try_eval_session!(worker.consume(&mut indices)) { diff --git a/src/prolog/heap_print.rs b/src/prolog/heap_print.rs index ba4f5d9d..2ae1f57c 100644 --- a/src/prolog/heap_print.rs +++ b/src/prolog/heap_print.rs @@ -1,8 +1,9 @@ use prolog::ast::*; use prolog::num::*; use prolog::heap_iter::*; -use prolog::machine::machine_state::MachineState; +use prolog::machine::machine_state::{DoubleQuotes, MachineState}; use prolog::ordered_float::OrderedFloat; +use prolog::string_list::*; use std::cell::Cell; use std::collections::{HashMap, HashSet}; @@ -11,6 +12,7 @@ use std::rc::Rc; #[derive(Clone)] pub enum TokenOrRedirect { Atom(ClauseName), + Char(char), NumberedVar(String), Redirect, Open, @@ -19,7 +21,6 @@ pub enum TokenOrRedirect { OpenList(Rc>), CloseList(Rc>), HeadTailSeparator, -// Space } pub trait HCValueFormatter { @@ -201,6 +202,10 @@ fn reverse_heap_locs<'a>(machine_st: &'a MachineState, heap_locs: &'a HeapVarDic }).collect() } +fn non_quoted_token(c: char) -> bool { + graphic_token_char!(c) || alpha_numeric_char!(c) +} + impl<'a, Formatter: HCValueFormatter, Outputter: HCValueOutputter> HCPrinter<'a, Formatter, Outputter> { @@ -275,19 +280,45 @@ impl<'a, Formatter: HCValueFormatter, Outputter: HCValueOutputter> } fn print_atom(&mut self, atom: &ClauseName) { - let non_quoted_token = |c| { - graphic_token_char!(c) || alpha_numeric_char!(c) - }; - match atom.as_str() { ";" | "!" => self.outputter.append(atom.as_str()), s => if s.chars().all(non_quoted_token) { self.outputter.append(atom.as_str()); } else { - self.outputter.append(&("'".to_owned() + atom.as_str() + "'")); + self.outputter.push_char('\''); + self.outputter.append(atom.as_str()); + self.outputter.push_char('\''); } } } + + fn expand_char_list(&mut self, s: StringList) { + let cell = Rc::new(Cell::new(true)); + let cursor = s.cursor(); + + self.state_stack.push(TokenOrRedirect::CloseList(cell.clone())); + + if !s.is_empty() { + for c in s.borrow()[cursor ..].chars().rev() { + self.state_stack.push(TokenOrRedirect::Char(c)); + self.state_stack.push(TokenOrRedirect::Comma); + } + + self.state_stack.pop(); + } + + self.state_stack.push(TokenOrRedirect::OpenList(cell)); + } + + fn print_char(&mut self, c: char) { + if non_quoted_token(c) { + self.outputter.push_char(c); + } else { + self.outputter.push_char('\''); + self.outputter.push_char(c); + self.outputter.push_char('\''); + } + } fn print_constant(&mut self, c: Constant) { match c { @@ -307,11 +338,8 @@ impl<'a, Formatter: HCValueFormatter, Outputter: HCValueOutputter> // self.outputter.append("\a"), // Constant::Char(c) if c == '\\v' => // self.outputter.append("\\v"), - Constant::Char(c) => { - self.outputter.append("'"); - self.outputter.push_char(c); - self.outputter.append("'"); - }, + Constant::Char(c) => + self.print_char(c), Constant::EmptyList => self.outputter.append("[]"), Constant::Number(Number::Float(fl)) => @@ -322,11 +350,14 @@ impl<'a, Formatter: HCValueFormatter, Outputter: HCValueOutputter> }, Constant::Number(n) => self.outputter.append(&format!("{}", n)), - Constant::String(s) => { - self.outputter.append("\""); - self.outputter.append(s.borrow().as_str()); - self.outputter.append("\""); - }, + Constant::String(s) => + if let DoubleQuotes::Chars = self.machine_st.machine_flags().double_quotes { + self.expand_char_list(s); + } else { // for now, == DoubleQuotes::Atom + self.outputter.append("\""); + self.outputter.append(s.borrow().as_str()); + self.outputter.append("\""); + }, Constant::Usize(i) => self.outputter.append(&format!("u{}", i)) } @@ -384,10 +415,10 @@ impl<'a, Formatter: HCValueFormatter, Outputter: HCValueOutputter> loop { if let Some(loc_data) = self.state_stack.pop() { match loc_data { -// TokenOrRedirect::Space => -// self.outputter.append(" "), TokenOrRedirect::Atom(atom) => self.outputter.append(atom.as_str()), + TokenOrRedirect::Char(c) => + self.print_char(c), TokenOrRedirect::NumberedVar(num_var) => self.outputter.append(num_var.as_str()), TokenOrRedirect::Redirect => diff --git a/src/prolog/indexing.rs b/src/prolog/indexing.rs index 48751fad..84fbb8f3 100644 --- a/src/prolog/indexing.rs +++ b/src/prolog/indexing.rs @@ -1,4 +1,5 @@ use prolog::ast::*; +use prolog::machine::machine_state::MachineFlags; use std::collections::{HashMap, VecDeque}; use std::hash::Hash; @@ -10,14 +11,16 @@ enum IntIndex { } pub struct CodeOffsets { + flags: MachineFlags, pub constants: HashMap, pub lists: ThirdLevelIndex, pub structures: HashMap<(ClauseName, usize), ThirdLevelIndex> } impl CodeOffsets { - pub fn new() -> Self { + pub fn new(flags: MachineFlags) -> Self { CodeOffsets { + flags, constants: HashMap::new(), lists: Vec::new(), structures: HashMap::new() @@ -56,6 +59,11 @@ impl CodeOffsets { let is_initial_index = self.lists.is_empty(); self.lists.push(Self::add_index(is_initial_index, index)); }, + &Term::Constant(_, Constant::String(_)) + if self.flags.double_quotes.is_chars() => { // strings are lists in this case. + let is_initial_index = self.lists.is_empty(); + self.lists.push(Self::add_index(is_initial_index, index)); + }, &Term::Constant(_, ref constant) => { let code = self.constants.entry(constant.clone()) .or_insert(Vec::new()); diff --git a/src/prolog/machine/machine_state.rs b/src/prolog/machine/machine_state.rs index fe406fcc..950ae36b 100644 --- a/src/prolog/machine/machine_state.rs +++ b/src/prolog/machine/machine_state.rs @@ -266,10 +266,20 @@ pub(super) enum MachineMode { } #[derive(Clone, Copy)] -pub(super) enum DoubleQuotes { +pub enum DoubleQuotes { Atom, Chars, // Codes } +impl DoubleQuotes { + pub fn is_chars(self) -> bool { + if let DoubleQuotes::Chars = self { + true + } else { + false + } + } +} + impl Default for DoubleQuotes { fn default() -> Self { DoubleQuotes::Chars @@ -277,8 +287,8 @@ impl Default for DoubleQuotes { } #[derive(Clone, Copy)] -pub(super) struct MachineFlags { - pub(super) double_quotes: DoubleQuotes +pub struct MachineFlags { + pub double_quotes: DoubleQuotes } impl Default for MachineFlags { diff --git a/src/prolog/machine/machine_state_impl.rs b/src/prolog/machine/machine_state_impl.rs index e5218063..f6100893 100644 --- a/src/prolog/machine/machine_state_impl.rs +++ b/src/prolog/machine/machine_state_impl.rs @@ -56,6 +56,11 @@ impl MachineState { } } + #[inline] + pub fn machine_flags(&self) -> MachineFlags { + self.flags + } + fn next_global_index(&self) -> usize { max(if self.and_stack.len() > 0 { self.and_stack[self.e].global_index } else { 0 }, if self.b > 0 { self.or_stack[self.b - 1].global_index } else { 0 }) + 1 @@ -185,6 +190,30 @@ impl MachineState { self.fail = true; }, + (Addr::Con(Constant::Char(c)), Addr::Con(Constant::Atom(atom))) + | (Addr::Con(Constant::Atom(atom)), Addr::Con(Constant::Char(c))) => { + let s = atom.as_str(); + if c.len_utf8() != s.len() || Some(c) != s.chars().next() { + self.fail = true; + } + }, + (Addr::Lis(a1), Addr::Con(Constant::String(ref s))) + | (Addr::Con(Constant::String(ref s)), Addr::Lis(a1)) + if self.flags.double_quotes.is_chars() => + if let Some(c) = s.head() { + pdl.push(Addr::Con(Constant::String(s.tail()))); + pdl.push(Addr::HeapCell(a1 + 1)); + + pdl.push(Addr::Con(Constant::Char(c))); + pdl.push(Addr::HeapCell(a1)); + } else { + self.fail = true; + }, + (Addr::Con(Constant::EmptyList), Addr::Con(Constant::String(ref s))) + | (Addr::Con(Constant::String(ref s)), Addr::Con(Constant::EmptyList)) + if self.flags.double_quotes.is_chars() => { + self.fail = !s.is_empty(); + }, (Addr::Lis(a1), Addr::Lis(a2)) => { pdl.push(Addr::HeapCell(a1)); pdl.push(Addr::HeapCell(a2)); @@ -750,6 +779,20 @@ impl MachineState { let addr = self.store(self.deref(self[reg].clone())); match addr { + Addr::Con(Constant::String(ref s)) + if self.flags.double_quotes.is_chars() => { + if let Some(c) = s.head() { + let h = self.heap.h; + + self.heap.push(HeapCellValue::Addr(Addr::Con(Constant::Char(c)))); + self.heap.push(HeapCellValue::Addr(Addr::Con(Constant::String(s.tail())))); + + self.s = h; + self.mode = MachineMode::Read; + } else { + self.fail = true; + } + }, Addr::HeapCell(hc) => { let h = self.heap.h; @@ -907,6 +950,7 @@ impl MachineState { let offset = match addr { Addr::HeapCell(_) | Addr::StackCell(_, _) => v, + Addr::Con(Constant::String(_)) if self.flags.double_quotes.is_chars() => l, Addr::Con(_) => c, Addr::Lis(_) => l, Addr::Str(_) => s @@ -1400,7 +1444,7 @@ impl MachineState { let d = self.store(self.deref(self[r1].clone())); match d { - Addr::Con(Constant::Atom(_)) => self.p += 1, + Addr::Con(Constant::Atom(_)) | Addr::Con(Constant::Char(_)) => self.p += 1, _ => self.fail = true }; }, diff --git a/src/prolog/machine/mod.rs b/src/prolog/machine/mod.rs index 62c17f5c..73c10428 100644 --- a/src/prolog/machine/mod.rs +++ b/src/prolog/machine/mod.rs @@ -122,6 +122,11 @@ impl Machine { wam } + #[inline] + pub fn machine_flags(&self) -> MachineFlags { + self.ms.flags + } + fn remove_module(&mut self, module_name: ClauseName) { let iter = if let Some(submodule) = self.modules.get(&module_name) { submodule.module_decl.exports.iter().cloned() diff --git a/src/prolog/parser b/src/prolog/parser index 6d995645..b5eafac5 160000 --- a/src/prolog/parser +++ b/src/prolog/parser @@ -1 +1 @@ -Subproject commit 6d995645d57849f0f16f8ce50c124e8875c9e76b +Subproject commit b5eafac5128ce9a4831619deb2f2d075f23f8781 diff --git a/src/prolog/read.rs b/src/prolog/read.rs index 713152db..1e37d799 100644 --- a/src/prolog/read.rs +++ b/src/prolog/read.rs @@ -35,8 +35,9 @@ impl<'a> Reader<'a> { let atom_tbl = self.machine_st.atom_tbl.clone(); let string_tbl = self.machine_st.string_tbl.clone(); + let flags = self.machine_st.machine_flags(); - let mut parser = Parser::new(buffer.as_bytes(), atom_tbl, string_tbl); + let mut parser = Parser::new(buffer.as_bytes(), atom_tbl, string_tbl, flags); Ok(self.write_term_to_heap(parser.read_term(op_dir)?)) } diff --git a/src/prolog/string_list.rs b/src/prolog/string_list.rs index c8956aed..d2acb5c7 100644 --- a/src/prolog/string_list.rs +++ b/src/prolog/string_list.rs @@ -18,7 +18,7 @@ impl Hash for StringListWrapper { pub struct StringList { body: TabledRc, cursor: usize, // use this to generate a chars() iterator on the fly, - // and skip over the first cursor chars. + // and skip over the first cursor chars. expandable: bool } @@ -63,17 +63,32 @@ impl StringList { } } + #[inline] + pub fn cursor(&self) -> usize { + self.cursor + } + + #[inline] + pub fn head(&self) -> Option { + self.borrow()[self.cursor ..].chars().next() + } + #[inline] pub fn tail(&self) -> Self { let mut new_string_list = self.clone(); - - if let Some(c) = self.borrow()[self.cursor ..].chars().next() { + + if let Some(c) = self.head() { new_string_list.cursor += c.len_utf8(); } - + new_string_list } + #[inline] + pub fn is_empty(&self) -> bool { + self.borrow().len() == self.cursor + } + #[inline] pub fn borrow(&self) -> Ref { self.body.0.borrow() diff --git a/src/prolog/toplevel.rs b/src/prolog/toplevel.rs index 8eb80c31..00fabb7d 100644 --- a/src/prolog/toplevel.rs +++ b/src/prolog/toplevel.rs @@ -1,5 +1,6 @@ use prolog::ast::*; use prolog::machine::*; +use prolog::machine::machine_state::MachineFlags; use prolog::num::*; use prolog::parser::parser::*; use prolog::string_list::*; @@ -608,10 +609,10 @@ pub struct TopLevelWorker<'a, R: Read> { impl<'a, R: Read> TopLevelWorker<'a, R> { pub fn new(inner: R, atom_tbl: TabledData, string_tbl: TabledData, - indices: MachineCodeIndices<'a>) + flags: MachineFlags, indices: MachineCodeIndices<'a>) -> Self { - TopLevelWorker { parser: Parser::new(inner, atom_tbl, string_tbl), indices } + TopLevelWorker { parser: Parser::new(inner, atom_tbl, string_tbl, flags), indices } } pub fn parse_code(&mut self) -> Result @@ -640,10 +641,11 @@ pub struct TopLevelBatchWorker { } impl TopLevelBatchWorker { - pub fn new(inner: R, atom_tbl: TabledData, string_tbl: TabledData) + pub fn new(inner: R, atom_tbl: TabledData, string_tbl: TabledData, + flags: MachineFlags) -> Self { - TopLevelBatchWorker { parser: Parser::new(inner, atom_tbl, string_tbl), + TopLevelBatchWorker { parser: Parser::new(inner, atom_tbl, string_tbl, flags), rel_worker: RelationWorker::new(), source_mod: clause_name!("user"), results: vec![] } diff --git a/src/tests.rs b/src/tests.rs index 48a06593..cedef261 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1357,7 +1357,7 @@ fn test_queries_on_builtins() assert_prolog_success!(&mut wam, "?- functor(Func, f, 4).", [["Func = f(_2, _3, _4, _5)"]]); assert_prolog_success!(&mut wam, "?- catch(functor(F, \"sdf\", 3), error(E, _), true).", - [["E = type_error(atom, \"sdf\")", "F = _1"]]); + [["E = type_error(atom, [s, d, f])", "F = _1"]]); assert_prolog_success!(&mut wam, "?- catch(functor(Func, F, 3), error(E, _), true).", [["E = instantiation_error", "Func = _1", "F = _2"]]); assert_prolog_success!(&mut wam, "?- catch(functor(Func, f, N), error(E, _), true).", @@ -1370,7 +1370,7 @@ fn test_queries_on_builtins() assert_prolog_success!(&mut wam, "?- X is 3 + 3.5, \\+ integer(X)."); assert_prolog_success!(&mut wam, "?- Func =.. [atom].", [["Func = atom"]]); - assert_prolog_success!(&mut wam, "?- Func =.. [\"sdf\"].", [["Func = \"sdf\""]]); + assert_prolog_success!(&mut wam, "?- Func =.. [\"sdf\"].", [["Func = [s, d, f]"]]); assert_prolog_success!(&mut wam, "?- Func =.. [1].", [["Func = 1"]]); assert_prolog_success!(&mut wam, "?- catch(Func =.. [1,2], error(type_error(atom, 1), _), true)."); assert_prolog_success!(&mut wam, "?- f(1,2,3) =.. List.", [["List = [f, 1, 2, 3]"]]); -- 2.54.0