From: Mark Thom Date: Mon, 2 May 2022 05:13:48 +0000 (-0600) Subject: compact lists of characters to partial strings in read_term_from_heap (#1452, #1453) X-Git-Tag: v0.9.1~41 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=0f502fff841bcc6876a9f3c77e462cc426855794;p=scryer-prolog.git compact lists of characters to partial strings in read_term_from_heap (#1452, #1453) --- diff --git a/src/arithmetic.rs b/src/arithmetic.rs index 432926c8..77274fbd 100644 --- a/src/arithmetic.rs +++ b/src/arithmetic.rs @@ -79,7 +79,7 @@ impl<'a> ArithInstructionIterator<'a> { )), }?, Term::Literal(cell, cons) => TermIterState::Literal(Level::Shallow, cell, cons), - Term::Cons(..) | Term::PartialString(..) => { + Term::Cons(..) | Term::PartialString(..) | Term::CompleteString(..) => { return Err(ArithmeticError::NonEvaluableFunctor( Literal::Atom(atom!(".")), 2, diff --git a/src/codegen.rs b/src/codegen.rs index 0224d298..5d67e5f8 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -324,7 +324,8 @@ impl<'b> CodeGenerator<'b> { } &Term::Cons(ref cell, ..) | &Term::Clause(ref cell, ..) | - Term::PartialString(ref cell, ..) => { + Term::PartialString(ref cell, ..) | + Term::CompleteString(ref cell, ..) => { self.marker.mark_non_var::(Level::Deep, term_loc, cell, target); target.push(Target::clause_arg_to_instr(cell.get())); } @@ -383,13 +384,14 @@ impl<'b> CodeGenerator<'b> { } TermRef::PartialString(lvl, cell, string, tail) => { self.marker.mark_non_var::(lvl, term_loc, cell, &mut target); + let atom = self.atom_tbl.build_with(&string); - if let Some(tail) = tail { - target.push(Target::to_pstr(lvl, string, cell.get(), true)); - self.subterm_to_instr::(tail, term_loc, is_exposed, &mut target); - } else { - target.push(Target::to_pstr(lvl, string, cell.get(), false)); - } + target.push(Target::to_pstr(lvl, atom, cell.get(), true)); + self.subterm_to_instr::(tail, term_loc, is_exposed, &mut target); + } + TermRef::CompleteString(lvl, cell, atom) => { + self.marker.mark_non_var::(lvl, term_loc, cell, &mut target); + target.push(Target::to_pstr(lvl, atom, cell.get(), false)); } TermRef::Var(lvl @ Level::Shallow, cell, ref var) if var.as_str() == "!" => { if self.marker.is_unbound(var.clone()) { @@ -550,7 +552,8 @@ impl<'b> CodeGenerator<'b> { &Term::AnonVar | &Term::Clause(..) | &Term::Cons(..) | - &Term::PartialString(..) => { + &Term::PartialString(..) | + &Term::CompleteString(..) => { code.push(instr!("$fail", 0)); } &Term::Literal(_, Literal::String(_)) => { @@ -577,6 +580,7 @@ impl<'b> CodeGenerator<'b> { &Term::Clause(..) | &Term::Cons(..) | &Term::PartialString(..) | + &Term::CompleteString(..) | &Term::Literal(_, Literal::String(..)) => { code.push(instr!("$succeed", 0)); } @@ -702,7 +706,8 @@ impl<'b> CodeGenerator<'b> { &Term::Literal(..) | &Term::Clause(..) | &Term::Cons(..) | - &Term::PartialString(..) => { + &Term::PartialString(..) | + &Term::CompleteString(..) => { code.push(instr!("$fail", 0)); } &Term::AnonVar => { diff --git a/src/iterators.rs b/src/iterators.rs index 422ad667..f6ee98f2 100644 --- a/src/iterators.rs +++ b/src/iterators.rs @@ -17,7 +17,8 @@ pub(crate) enum TermRef<'a> { Cons(Level, &'a Cell, &'a Term, &'a Term), Literal(Level, &'a Cell, &'a Literal), Clause(Level, &'a Cell, ClauseType, &'a Vec), - PartialString(Level, &'a Cell, Atom, &'a Option>), + PartialString(Level, &'a Cell, &'a String, &'a Box), + CompleteString(Level, &'a Cell, Atom), Var(Level, &'a Cell, Rc), } @@ -28,8 +29,9 @@ impl<'a> TermRef<'a> { | TermRef::Cons(lvl, ..) | TermRef::Literal(lvl, ..) | TermRef::Var(lvl, ..) - | TermRef::Clause(lvl, ..) => lvl, - TermRef::PartialString(lvl, ..) => lvl, + | TermRef::Clause(lvl, ..) + | TermRef::CompleteString(lvl, ..) + | TermRef::PartialString(lvl, ..) => lvl, } } } @@ -41,8 +43,9 @@ pub(crate) enum TermIterState<'a> { Clause(Level, usize, &'a Cell, ClauseType, &'a Vec), InitialCons(Level, &'a Cell, &'a Term, &'a Term), FinalCons(Level, &'a Cell, &'a Term, &'a Term), - InitialPartialString(Level, &'a Cell, Atom, &'a Option>), - FinalPartialString(Level, &'a Cell, Atom, &'a Option>), + InitialPartialString(Level, &'a Cell, &'a String, &'a Box), + FinalPartialString(Level, &'a Cell, &'a String, &'a Box), + CompleteString(Level, &'a Cell, Atom), Var(Level, &'a Cell, Rc), } @@ -59,7 +62,10 @@ impl<'a> TermIterState<'a> { } Term::Literal(cell, constant) => TermIterState::Literal(lvl, cell, constant), Term::PartialString(cell, string_buf, tail) => { - TermIterState::InitialPartialString(lvl, cell, *string_buf, tail) + TermIterState::InitialPartialString(lvl, cell, string_buf, tail) + } + Term::CompleteString(cell, atom) => { + TermIterState::CompleteString(lvl, cell, *atom) } Term::Var(cell, var) => TermIterState::Var(lvl, cell, var.clone()), } @@ -89,7 +95,8 @@ impl<'a> QueryIterator<'a> { fn from_term(term: &'a Term) -> Self { let state = match term { - Term::AnonVar | Term::Cons(..) | Term::Literal(..) | Term::PartialString(..) => { + Term::AnonVar | Term::Cons(..) | Term::Literal(..) | + Term::PartialString(..) | Term::CompleteString(..) => { return QueryIterator { state_stack: vec![], } @@ -196,13 +203,13 @@ impl<'a> Iterator for QueryIterator<'a> { } TermIterState::InitialPartialString(lvl, cell, string, tail) => { self.state_stack.push(TermIterState::FinalPartialString(lvl, cell, string, tail)); - - if let Some(tail) = tail { - self.push_subterm(lvl.child_level(), tail); - } + self.push_subterm(lvl.child_level(), tail); } - TermIterState::FinalPartialString(lvl, cell, string, tail) => { - return Some(TermRef::PartialString(lvl, cell, string, tail)); + TermIterState::FinalPartialString(lvl, cell, atom, tail) => { + return Some(TermRef::PartialString(lvl, cell, atom, tail)); + } + TermIterState::CompleteString(lvl, cell, atom) => { + return Some(TermRef::CompleteString(lvl, cell, atom)); } TermIterState::FinalCons(lvl, cell, head, tail) => { return Some(TermRef::Cons(lvl, cell, head, tail)); @@ -259,12 +266,19 @@ impl<'a> FactIterator<'a> { head.as_ref(), tail.as_ref(), )], - Term::PartialString(cell, string_buf, tail_opt) => { + Term::PartialString(cell, string_buf, tail) => { vec![TermIterState::InitialPartialString( Level::Root, cell, - *string_buf, - tail_opt, + string_buf, + tail, + )] + } + Term::CompleteString(cell, atom) => { + vec![TermIterState::CompleteString( + Level::Root, + cell, + *atom, )] } Term::Literal(cell, constant) => { @@ -307,12 +321,12 @@ impl<'a> Iterator for FactIterator<'a> { return Some(TermRef::Cons(lvl, cell, head, tail)); } - TermIterState::InitialPartialString(lvl, cell, string_buf, tail_opt) => { - if let Some(tail) = tail_opt { - self.push_subterm(Level::Deep, tail); - } - - return Some(TermRef::PartialString(lvl, cell, string_buf, tail_opt)); + TermIterState::InitialPartialString(lvl, cell, string_buf, tail) => { + self.push_subterm(Level::Deep, tail); + return Some(TermRef::PartialString(lvl, cell, string_buf, tail)); + } + TermIterState::CompleteString(lvl, cell, atom) => { + return Some(TermRef::CompleteString(lvl, cell, atom)); } TermIterState::Literal(lvl, cell, constant) => { return Some(TermRef::Literal(lvl, cell, constant)) diff --git a/src/lib/dcgs.pl b/src/lib/dcgs.pl index 488c7a92..4fe172f7 100644 --- a/src/lib/dcgs.pl +++ b/src/lib/dcgs.pl @@ -8,6 +8,7 @@ ]). :- use_module(library(error)). +:- use_module(library(iso_ext)). :- use_module(library(lists), [append/3, member/2]). :- use_module(library(loader), [strip_module/3]). diff --git a/src/machine/loader.rs b/src/machine/loader.rs index bf7456be..e8f0688d 100644 --- a/src/machine/loader.rs +++ b/src/machine/loader.rs @@ -485,10 +485,21 @@ impl<'a, LS: LoadState<'a>> Loader<'a, LS> { read_heap_cell!(addr, (HeapCellValueTag::Lis) => { + use crate::parser::parser::as_partial_string; + let tail = term_stack.pop().unwrap(); let head = term_stack.pop().unwrap(); - term_stack.push(Term::Cons(Cell::default(), Box::new(head), Box::new(tail))); + match as_partial_string(head, tail) { + Ok((string, Some(tail))) => { + term_stack.push(Term::PartialString(Cell::default(), string, tail)); + } + Ok((string, None)) => { + let atom = machine_st.atom_tbl.build_with(&string); + term_stack.push(Term::CompleteString(Cell::default(), atom)); + } + Err(cons_term) => term_stack.push(cons_term), + } } (HeapCellValueTag::Var | HeapCellValueTag::AttrVar | HeapCellValueTag::StackVar, h) => { let offset_string = format!("_{}", h); @@ -509,31 +520,27 @@ impl<'a, LS: LoadState<'a>> Loader<'a, LS> { term_stack.push(Term::Clause(Cell::default(), name, subterms)); } } - (HeapCellValueTag::PStr, string) => { + (HeapCellValueTag::PStr, atom) => { let tail = term_stack.pop().unwrap(); if let Term::Literal(_, Literal::Atom(atom!("[]"))) = &tail { - term_stack.push(Term::PartialString( - Cell::default(), - string, - None, - )); + term_stack.push(Term::CompleteString(Cell::default(), atom)); } else { term_stack.push(Term::PartialString( Cell::default(), - string, - Some(Box::new(tail)), + atom.as_str().to_owned(), + Box::new(tail), )); } } (HeapCellValueTag::PStrLoc, h) => { - let string = cell_as_atom_cell!(iter.heap[h]).get_name(); + let atom = cell_as_atom_cell!(iter.heap[h]).get_name(); let tail = term_stack.pop().unwrap(); term_stack.push(Term::PartialString( Cell::default(), - string, - Some(Box::new(tail)), + atom.as_str().to_owned(), + Box::new(tail), )); } _ => { diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 9f728867..3cc1ccbb 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -541,11 +541,7 @@ impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Literal::Atom(ref atom) => { - // if atom.as_str().chars().any(|c| "`.$'\" ".contains(c)) { - // write!(f, "'{}'", atom) - // } else { write!(f, "{}", atom.flat_index()) - // } } Literal::Char(c) => write!(f, "'{}'", *c as u32), Literal::Fixnum(n) => write!(f, "{}", n.get_num()), @@ -553,7 +549,6 @@ impl fmt::Display for Literal { Literal::Rational(ref n) => write!(f, "{}", n), Literal::Float(ref n) => write!(f, "{}", *n), Literal::String(ref s) => write!(f, "\"{}\"", s.as_str()), - // Literal::Usize(integer) => write!(f, "u{}", integer), } } } @@ -573,7 +568,10 @@ pub enum Term { Clause(Cell, Atom, Vec), Cons(Cell, Box, Box), Literal(Cell, Literal), - PartialString(Cell, Atom, Option>), + // PartialString wraps a String in anticipation of it absorbing + // other PartialString variants in as_partial_string. + PartialString(Cell, String, Box), + CompleteString(Cell, Atom), Var(Cell, Rc), } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 431f3799..ecdc2ab0 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -49,11 +49,10 @@ struct TokenDesc { spec: u32, } -fn is_partial_string( +pub(crate) fn as_partial_string( head: Term, mut tail: Term, - atom_tbl: &mut AtomTable, -) -> Result<(Atom, Option>), Term> { +) -> Result<(String, Option>), Term> { let mut string = match &head { Term::Literal(_, Literal::Atom(atom)) => { if let Some(c) = atom.as_char() { @@ -92,6 +91,15 @@ fn is_partial_string( tail_ref = succ; } + Term::PartialString(_, pstr, tail) => { + string += &pstr; + tail_ref = tail; + } + Term::CompleteString(_, cstr) => { + string += cstr.as_str(); + tail = Term::Literal(Cell::default(), Literal::Atom(atom!("[]"))); + break; + } tail_ref => { tail = mem::replace(tail_ref, Term::AnonVar); break; @@ -101,21 +109,17 @@ fn is_partial_string( match &tail { Term::AnonVar | Term::Var(..) => { - let pstr_atom = atom_tbl.build_with(&string); - Ok((pstr_atom, Some(Box::new(tail)))) + Ok((string, Some(Box::new(tail)))) } Term::Literal(_, Literal::Atom(atom!("[]"))) => { - let pstr_atom = atom_tbl.build_with(&string); - Ok((pstr_atom, None)) + Ok((string, None)) } Term::Literal(_, Literal::String(tail)) => { string += tail.as_str(); - let pstr_atom = atom_tbl.build_with(&string); - Ok((pstr_atom, None)) + Ok((string, None)) } _ => { - let pstr_atom = atom_tbl.build_with(&string); - Ok((pstr_atom, Some(Box::new(tail)))) + Ok((string, Some(Box::new(tail)))) } } } @@ -412,7 +416,7 @@ impl<'a, R: CharRead> Parser<'a, R> { TokenType::Term } Token::Literal(Literal::String(s)) if self.lexer.machine_st.flags.double_quotes.is_chars() => { - self.terms.push(Term::PartialString(Cell::default(), s, None)); + self.terms.push(Term::CompleteString(Cell::default(), s)); TokenType::Term } Token::Literal(c) => { @@ -564,24 +568,19 @@ impl<'a, R: CharRead> Parser<'a, R> { let head = subterms.pop().unwrap(); self.terms.push( - match is_partial_string(head, tail, &mut self.lexer.machine_st.atom_tbl) { - Ok((string_buf, tail_opt)) => { - Term::PartialString(Cell::default(), string_buf, tail_opt) + match as_partial_string(head, tail) { + Ok((string_buf, Some(tail))) => { + Term::PartialString(Cell::default(), string_buf, tail) + } + Ok((string_buf, None)) => { + let atom = self.lexer.machine_st.atom_tbl.build_with(&string_buf); + Term::CompleteString(Cell::default(), atom) } Err(term) => term, }, ); - - /* - self.terms.push(Term::Cons( - Cell::default(), - Box::new(head), - Box::new(tail), - )); - */ } else { - self.terms - .push(Term::Clause(Cell::default(), name, subterms)); + self.terms.push(Term::Clause(Cell::default(), name, subterms)); } if let Some(&mut TokenDesc { @@ -741,9 +740,13 @@ impl<'a, R: CharRead> Parser<'a, R> { self.terms.push(match list { Term::Cons(_, head, tail) => { - match is_partial_string(*head, *tail, &mut self.lexer.machine_st.atom_tbl) { - Ok((string_buf, tail_opt)) => { - Term::PartialString(Cell::default(), string_buf, tail_opt) + match as_partial_string(*head, *tail) { + Ok((string_buf, Some(tail))) => { + Term::PartialString(Cell::default(), string_buf, tail) + } + Ok((string_buf, None)) => { + let atom = self.lexer.machine_st.atom_tbl.build_with(&string_buf); + Term::CompleteString(Cell::default(), atom) } Err(term) => term, } diff --git a/src/read.rs b/src/read.rs index 20af4836..c58e99f4 100644 --- a/src/read.rs +++ b/src/read.rs @@ -286,7 +286,7 @@ impl<'a, 'b> TermWriter<'a, 'b> { match term { &TermRef::Cons(..) => list_loc_as_cell!(h), &TermRef::AnonVar(_) | &TermRef::Var(..) => heap_loc_as_cell!(h), - &TermRef::PartialString(_, _, ref src, None) => + &TermRef::CompleteString(_, _, ref src) => if src.as_str().is_empty() { empty_list_as_cell!() } else if self.heap[h].get_tag() == HeapCellValueTag::CStr { @@ -369,20 +369,17 @@ impl<'a, 'b> TermWriter<'a, 'b> { continue; } - &TermRef::PartialString(lvl, _, ref src, tail) => { - if tail.is_some() { - allocate_pstr(self.heap, src.as_str(), self.atom_tbl); - } else { - put_complete_string(self.heap, src.as_str(), self.atom_tbl); - } + &TermRef::CompleteString(_, _, ref src) => { + put_complete_string(self.heap, src.as_str(), self.atom_tbl); + } + &TermRef::PartialString(lvl, _, ref src, _) => { + allocate_pstr(self.heap, src.as_str(), self.atom_tbl); - if tail.is_some() { - let h = self.heap.len(); - self.queue.push_back((1, h - 1)); + let h = self.heap.len(); + self.queue.push_back((1, h - 1)); - if let Level::Root = lvl { - continue; - } + if let Level::Root = lvl { + continue; } } &TermRef::Var(_, _, ref var) => {