From a15ebd20c18b3213137ed7a26388dec1d688b831 Mon Sep 17 00:00:00 2001 From: Mark Thom Date: Sun, 31 Mar 2019 19:48:40 -0600 Subject: [PATCH] add support for char codes --- Cargo.toml | 4 +- src/prolog/heap_iter.rs | 38 ++-- src/prolog/heap_print.rs | 52 +++-- src/prolog/lib/builtins.pl | 4 +- src/prolog/machine/machine_state_impl.rs | 243 +++++++++++++++-------- src/prolog/machine/system_calls.rs | 6 +- 6 files changed, 227 insertions(+), 120 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 72b42125..f59f62b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scryer-prolog" -version = "0.8.31" +version = "0.8.32" authors = ["Mark Thom "] repository = "https://github.com/mthom/scryer-prolog" description = "A modern Prolog implementation written mostly in Rust." @@ -14,7 +14,7 @@ cfg-if = "0.1.7" downcast = "0.10.0" num = "0.2" ordered-float = "0.5.0" -prolog_parser = "0.8.9" +prolog_parser = "0.8.10" readline_rs_compat = { version = "0.1.7", optional = true } ref_thread_local = "0.0.0" diff --git a/src/prolog/heap_iter.rs b/src/prolog/heap_iter.rs index 3353c323..add9b7cc 100644 --- a/src/prolog/heap_iter.rs +++ b/src/prolog/heap_iter.rs @@ -47,18 +47,28 @@ impl<'a> HCPreOrderIterator<'a> { { let da = self.machine_st.store(self.machine_st.deref(addr)); - match da { - Addr::Con(Constant::String(ref s)) - if self.machine_st.machine_flags().double_quotes.is_chars() => { - if let Some(c) = s.head() { - let tail = s.tail(); - - self.state_stack.push(Addr::Con(Constant::String(tail))); - self.state_stack.push(Addr::Con(Constant::Char(c))); - } + match da { + Addr::Con(Constant::String(ref s)) => { + match self.machine_st.machine_flags().double_quotes { + DoubleQuotes::Chars => + if let Some(c) = s.head() { + let tail = s.tail(); + + self.state_stack.push(Addr::Con(Constant::String(tail))); + self.state_stack.push(Addr::Con(Constant::Char(c))); + }, + DoubleQuotes::Codes => + if let Some(c) = s.head() { + let tail = s.tail(); + + self.state_stack.push(Addr::Con(Constant::String(tail))); + self.state_stack.push(Addr::Con(Constant::CharCode(c as u8))); + }, + _ => {} + } - Addr::Con(Constant::String(s.clone())) - }, + Addr::Con(Constant::String(s.clone())) + }, Addr::Con(_) | Addr::DBRef(_) => da, Addr::Lis(a) => { self.state_stack.push(Addr::HeapCell(a + 1)); @@ -147,7 +157,7 @@ impl> Iterator for HCPostOrderIterator = HCPostOrderIterator>; -impl MachineState { +impl MachineState { pub fn pre_order_iter<'a>(&'a self, a: Addr) -> HCPreOrderIterator<'a> { HCPreOrderIterator::new(self, a) } @@ -189,7 +199,7 @@ impl HCAcyclicIterator impl Deref for HCAcyclicIterator { type Target = HCIter; - + fn deref(&self) -> &Self::Target { &self.iter } @@ -258,5 +268,5 @@ impl Iterator for HCZippedAcyclicIterator }, _ => None } - } + } } diff --git a/src/prolog/heap_print.rs b/src/prolog/heap_print.rs index c74509b8..38429884 100644 --- a/src/prolog/heap_print.rs +++ b/src/prolog/heap_print.rs @@ -1,4 +1,5 @@ use prolog_parser::ast::*; +use prolog_parser::string_list::*; use prolog::clause_types::*; use prolog::heap_iter::*; @@ -652,37 +653,46 @@ impl<'a, Outputter: HCValueOutputter> HCPrinter<'a, Outputter> } else { self.print_char(c); }, + Constant::CharCode(c) => + self.append_str(&format!("{}", c)), Constant::EmptyList => self.append_str("[]"), Constant::Number(n) => self.print_number(n, op), Constant::String(s) => - if self.machine_st.machine_flags().double_quotes.is_chars() { - if !s.is_empty() { - if self.ignore_ops { - self.format_struct(2, clause_name!(".")); - } else { - self.push_list(); - } - } else if s.is_expandable() { - if !self.at_cdr(" | _") { - self.push_char('_'); - } - } else if !self.at_cdr("") { - self.append_str("[]"); - } - } else { // for now, == DoubleQuotes::Atom - let borrowed_str = s.borrow(); - - self.push_char('"'); - self.append_str(&borrowed_str[s.cursor() ..]); - self.push_char('"'); - }, + self.print_string(s), Constant::Usize(i) => self.append_str(&format!("u{}", i)) } } + fn print_string(&mut self, s: StringList) { + match self.machine_st.machine_flags().double_quotes { + DoubleQuotes::Chars | DoubleQuotes::Codes => { + if !s.is_empty() { + if self.ignore_ops { + self.format_struct(2, clause_name!(".")); + } else { + self.push_list(); + } + } else if s.is_expandable() { + if !self.at_cdr(" | _") { + self.push_char('_'); + } + } else if !self.at_cdr("") { + self.append_str("[]"); + } + }, + DoubleQuotes::Atom => { + let borrowed_str = s.borrow(); + + self.push_char('"'); + self.append_str(&borrowed_str[s.cursor() ..]); + self.push_char('"'); + } + } + } + fn push_list(&mut self) { let cell = Rc::new(Cell::new(true)); diff --git a/src/prolog/lib/builtins.pl b/src/prolog/lib/builtins.pl index 59997a6f..fb3933b8 100644 --- a/src/prolog/lib/builtins.pl +++ b/src/prolog/lib/builtins.pl @@ -107,7 +107,9 @@ set_prolog_flag(integer_rounding_function, Value) :- set_prolog_flag(double_quotes, chars) :- !, '$set_double_quotes'(chars). % 7.11.2.5, list of one-char atoms. set_prolog_flag(double_quotes, atom) :- - !, '$set_double_quotes'(atom). % 7.11.2.5, list of one-char atoms. + !, '$set_double_quotes'(atom). % 7.11.2.5, list of char codes (UTF8). +set_prolog_flag(double_quotes, codes) :- + !, '$set_double_quotes'(codes). set_prolog_flag(double_quotes, Value) :- throw(error(domain_error(flag_value, double_quotes + Value), set_prolog_flag/2)). % 8.17.1.3 e diff --git a/src/prolog/machine/machine_state_impl.rs b/src/prolog/machine/machine_state_impl.rs index dd3935e3..3e44b735 100644 --- a/src/prolog/machine/machine_state_impl.rs +++ b/src/prolog/machine/machine_state_impl.rs @@ -188,15 +188,15 @@ impl MachineState { where Outputter: HCValueOutputter { let mut printer = HCPrinter::from_heap_locs(&self, output, var_dict); - + printer.see_all_locs(); printer.quoted = true; - + printer.print(addr) } pub(super) - fn unify_string(&mut self, pdl: &mut Vec, s1: &mut StringList, s2: &mut StringList) -> bool + fn unify_strings(&mut self, pdl: &mut Vec, s1: &mut StringList, s2: &mut StringList) -> bool { if let Some(c1) = s1.head() { if let Some(c2) = s2.head() { @@ -240,6 +240,86 @@ impl MachineState { false } + fn deconstruct_chars(&mut self, s: &mut StringList, offset: usize, pdl: &mut Vec) -> bool + { + if let Some(c) = s.head() { + pdl.push(Addr::Con(Constant::String(s.tail()))); + pdl.push(Addr::HeapCell(offset + 1)); + + pdl.push(Addr::Con(Constant::Char(c))); + pdl.push(Addr::HeapCell(offset)); + + return true; + } else if s.is_expandable() { + let prev_s = s.clone(); + + let mut stepper = |c| { + let new_s = s.push_char(c); + + pdl.push(Addr::HeapCell(offset + 1)); + pdl.push(Addr::Con(Constant::String(new_s))); + }; + + match self.heap[offset].clone() { + HeapCellValue::Addr(Addr::Con(Constant::Char(c))) => { + self.pstr_trail(prev_s); + stepper(c); + return true; + }, + HeapCellValue::Addr(Addr::Con(Constant::Atom(ref a, _))) => + if let Some(c) = a.as_str().chars().next() { + if c.len_utf8() == a.as_str().len() { + self.pstr_trail(prev_s); + stepper(c); + return true; + } + }, + _ => {} + } + } + + false + } + + fn deconstruct_codes(&mut self, s: &mut StringList, offset: usize, pdl: &mut Vec) -> bool + { + if let Some(c) = s.head() { + pdl.push(Addr::Con(Constant::String(s.tail()))); + pdl.push(Addr::HeapCell(offset + 1)); + + pdl.push(Addr::Con(Constant::CharCode(c as u8))); + pdl.push(Addr::HeapCell(offset)); + + return true; + } else if s.is_expandable() { + let prev_s = s.clone(); + + let mut stepper = |c| { + let new_s = s.push_char(c); + + pdl.push(Addr::HeapCell(offset + 1)); + pdl.push(Addr::Con(Constant::String(new_s))); + }; + + match self.heap[offset].clone() { + HeapCellValue::Addr(Addr::Con(Constant::CharCode(c))) => { + self.pstr_trail(prev_s); + stepper(c as char); + return true; + }, + HeapCellValue::Addr(Addr::Con(Constant::Number(Number::Integer(n)))) => + if let Some(c) = n.to_u8() { + self.pstr_trail(prev_s); + stepper(c as char); + return true; + }, + _ => {} + } + } + + false + } + pub(super) fn unify(&mut self, a1: Addr, a2: Addr) { let mut pdl = vec![a1, a2]; @@ -277,49 +357,20 @@ impl MachineState { self.fail = true; }, (Addr::Lis(a1), Addr::Con(Constant::String(ref mut s))) - | (Addr::Con(Constant::String(ref mut s)), Addr::Lis(a1)) - if self.flags.double_quotes.is_chars() => { - if let Some(c) = s.head() { - pdl.push(Addr::Con(Constant::String(s.tail()))); - pdl.push(Addr::HeapCell(a1 + 1)); - - pdl.push(Addr::Con(Constant::Char(c))); - pdl.push(Addr::HeapCell(a1)); - - continue; - } else if s.is_expandable() { - let prev_s = s.clone(); - - let mut stepper = |c| { - let new_s = s.push_char(c); - - pdl.push(Addr::HeapCell(a1 + 1)); - pdl.push(Addr::Con(Constant::String(new_s))); - }; - - match self.heap[a1].clone() { - HeapCellValue::Addr(Addr::Con(Constant::Char(c))) => { - self.pstr_trail(prev_s); - stepper(c); - continue; - }, - HeapCellValue::Addr(Addr::Con(Constant::Atom(ref a, _))) => - if let Some(c) = a.as_str().chars().next() { - if c.len_utf8() == a.as_str().len() { - self.pstr_trail(prev_s); - stepper(c); - continue; - } - }, - _ => {} - }; - } - - self.fail = true; - }, + | (Addr::Con(Constant::String(ref mut s)), Addr::Lis(a1)) => { + if match self.flags.double_quotes { + DoubleQuotes::Chars => self.deconstruct_chars(s, a1, &mut pdl), + DoubleQuotes::Codes => self.deconstruct_codes(s, a1, &mut pdl), + DoubleQuotes::Atom => false + } { + continue; + } + + self.fail = true; + }, (Addr::Con(Constant::EmptyList), Addr::Con(Constant::String(ref s))) - | (Addr::Con(Constant::String(ref s)), Addr::Con(Constant::EmptyList)) - if self.flags.double_quotes.is_chars() => { + | (Addr::Con(Constant::String(ref s)), Addr::Con(Constant::EmptyList)) + if !self.flags.double_quotes.is_atom() => { if s.is_expandable() && s.is_empty() { self.pstr_trail(s.clone()); s.set_expandable(false); @@ -337,8 +388,8 @@ impl MachineState { }, (Addr::Con(Constant::String(ref mut s1)), Addr::Con(Constant::String(ref mut s2))) => - self.fail = !(self.unify_string(&mut pdl, s1, s2) - || self.unify_string(&mut pdl, s2, s1)), + self.fail = !(self.unify_strings(&mut pdl, s1, s2) + || self.unify_strings(&mut pdl, s2, s1)), (Addr::Con(ref c1), Addr::Con(ref c2)) => if c1 != c2 { self.fail = true; @@ -523,51 +574,81 @@ impl MachineState { false } - pub(super) fn write_constant_to_var(&mut self, addr: Addr, c: Constant) { - match self.store(self.deref(addr)) { - Addr::Con(Constant::String(ref mut s)) => - self.fail = match c { - Constant::EmptyList if self.flags.double_quotes.is_chars() => - !s.is_empty(), - Constant::String(ref s2) - if s.is_expandable() && s2.starts_with(s) => { - self.pstr_trail(s.clone()); - s.append_suffix(s2); - s.set_expandable(s2.is_expandable()); - false - }, - Constant::String(s2) => *s != s2, - Constant::Atom(ref a, _) - if a.as_str().starts_with(&s.borrow()[s.cursor() ..]) => - if let Some(c) = a.as_str().chars().next() { - if c.len_utf8() == a.as_str().len() { - // detect chars masquerading as atoms. - if s.is_empty() { - self.write_char_to_string(s, c); - } + fn write_constant_to_string(&mut self, s: &mut StringList, c: Constant) -> bool { + match c { + Constant::EmptyList if !self.flags.double_quotes.is_atom() => + !s.is_empty(), + Constant::String(ref s2) + if s.is_expandable() && s2.starts_with(s) => { + self.pstr_trail(s.clone()); + s.append_suffix(s2); + s.set_expandable(s2.is_expandable()); + false + }, + Constant::String(s2) => + s.borrow()[s.cursor() ..] != s2.borrow()[s2.cursor() ..], + Constant::Atom(ref a, _) + if a.as_str().starts_with(&s.borrow()[s.cursor() ..]) => + if let Some(c) = a.as_str().chars().next() { + if c.len_utf8() == a.as_str().len() { + // detect chars masquerading as atoms. + if s.is_empty() { + self.write_char_to_string(s, c); + } - false - } else { - true - } - } else { - true - }, - Constant::Char(ref c) if s.is_empty() && s.is_expandable() => - self.write_char_to_string(s, *c), - Constant::Char(ref c) => + false + } else { + true + } + } else { + true + }, + Constant::Char(ref c) if s.is_empty() && s.is_expandable() => + match self.flags.double_quotes { + DoubleQuotes::Chars => self.write_char_to_string(s, *c), + _ => false + }, + Constant::Char(ref c) => + match self.flags.double_quotes { + DoubleQuotes::Chars => if s.borrow().chars().next() == Some(*c) && c.len_utf8() == s.len() { s.set_expandable(false); false } else { true }, - _ => true + _ => false + }, + Constant::CharCode(ref c) if s.is_empty() && s.is_expandable() => + match self.flags.double_quotes { + DoubleQuotes::Codes => self.write_char_to_string(s, *c as char), + _ => false }, + Constant::CharCode(ref c) => + match self.flags.double_quotes { + DoubleQuotes::Codes => + if s.borrow().chars().next() == Some(*c as char) && 1 == s.len() { + s.set_expandable(false); + false + } else { + true + }, + _ => false + }, + _ => true + } + } + + pub(super) fn write_constant_to_var(&mut self, addr: Addr, c: Constant) { + match self.store(self.deref(addr)) { + Addr::Con(Constant::String(ref mut s)) => + self.fail = self.write_constant_to_string(s, c), Addr::Con(c1) => if c1 != c { self.fail = true; }, + Addr::Lis(l) => + self.unify(Addr::Lis(l), Addr::Con(c)), addr => if let Some(r) = addr.as_var() { self.bind(r, Addr::Con(c)); } else { @@ -1207,7 +1288,7 @@ impl MachineState { let offset = match addr { Addr::HeapCell(_) | Addr::StackCell(..) | Addr::AttrVar(..) => v, - Addr::Con(Constant::String(_)) if self.flags.double_quotes.is_chars() => l, + Addr::Con(Constant::String(_)) if !self.flags.double_quotes.is_atom() => l, Addr::Con(_) => c, Addr::Lis(_) => l, Addr::Str(_) => s, diff --git a/src/prolog/machine/system_calls.rs b/src/prolog/machine/system_calls.rs index e05d20b0..2f295504 100644 --- a/src/prolog/machine/system_calls.rs +++ b/src/prolog/machine/system_calls.rs @@ -736,7 +736,9 @@ impl MachineState { DoubleQuotes::Chars => self.unify(a1, Addr::Con(atom!("chars"))), DoubleQuotes::Atom => - self.unify(a1, Addr::Con(atom!("atom"))) + self.unify(a1, Addr::Con(atom!("atom"))), + DoubleQuotes::Codes => + self.unify(a1, Addr::Con(atom!("codes"))) } }, &SystemClauseType::GetSCCCleaner => { @@ -985,6 +987,8 @@ impl MachineState { self.flags.double_quotes = DoubleQuotes::Chars, Addr::Con(Constant::Atom(ref atom, _)) if atom.as_str() == "atom" => self.flags.double_quotes = DoubleQuotes::Atom, + Addr::Con(Constant::Atom(ref atom, _)) if atom.as_str() == "codes" => + self.flags.double_quotes = DoubleQuotes::Codes, _ => self.fail = true }, &SystemClauseType::InferenceLevel => { -- 2.54.0