From: Mark Date: Fri, 30 Jun 2023 23:13:38 +0000 (-0600) Subject: use lexer to detect remaining layout in parse_number_from_string (#1773) X-Git-Tag: v0.9.2~97 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=b0566e41503a6c8d29b792b560defca8ca028cf5;p=scryer-prolog.git use lexer to detect remaining layout in parse_number_from_string (#1773) --- diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index 6a676ae9..4c9c0f2f 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -886,75 +886,92 @@ impl MachineState { indices: &IndexStore, stub_gen: impl Fn() -> FunctorStub, ) -> CallResult { + use crate::parser::lexer::*; + let nx = self.store(self.deref(self.registers[2])); + let add_dot = !string.ends_with("."); + let cursor = std::io::Cursor::new(string); - let mut charcode_space = false; - let mut cs = string.chars(); + let iter = std::io::Read::chain( + cursor, + { + let mut dot_buf: [u8; '.'.len_utf8()] = [0u8]; - loop { - let c = cs.next(); + if add_dot { + '.'.encode_utf8(&mut dot_buf); + } - if c == None { - break; - } + std::io::Cursor::new(dot_buf) + }, + ); - if c == Some('0') - && cs.next() == Some('\'') - && cs.next() == Some(' ') - && cs.next() == None { - charcode_space = true; - break; - } - } + let mut lexer = Lexer::new(CharReader::new(iter), self); + let mut tokens = vec![]; - if !charcode_space { - if let Some(c) = string.chars().last() { - if layout_char!(c) { - let (line_num, col_num) = string.chars().fold((0, 0), |(line_num, col_num), c| { - if new_line_char!(c) { - (1 + line_num, 0) - } else { - (line_num, col_num + 1) - } - }); - let err = ParserError::UnexpectedChar(c, line_num, col_num); - let err = self.syntax_error(err); + match lexer.next_token() { + Ok(token @ Token::Literal(Literal::Atom(atom!("-")) | Literal::Char('-'))) => { + tokens.push(token); - return Err(self.error_form(err, stub_gen())); + if let Ok(token) = lexer.next_token() { + tokens.push(token); } } + Ok(token) => { + tokens.push(token); + } + Err(err) => { + let err = self.syntax_error(err); + return Err(self.error_form(err, stub_gen())); + } } - let mut dot_buf: [u8; '.'.len_utf8()] = [0u8]; - '.'.encode_utf8(&mut dot_buf); + loop { + match lexer.lookahead_char() { + Err(ParserError::UnexpectedEOF) => { + let mut parser = Parser::from_lexer(lexer); + let op_dir = CompositeOpDir::new(&indices.op_dir, None); - let cursor = std::io::Cursor::new(string); - let iter = std::io::Read::chain(cursor, std::io::Cursor::new(dot_buf)); + tokens.reverse(); - let mut parser = Parser::new(CharReader::new(iter), self); + match parser.read_term(&op_dir, Tokens::Provided(tokens)) { + Err(err) => { + let err = self.syntax_error(err); + return Err(self.error_form(err, stub_gen())); + } + Ok(Term::Literal(_, Literal::Rational(n))) => { + self.unify_rational(n, nx); + } + Ok(Term::Literal(_, Literal::Float(n))) => { + self.unify_f64(n.as_ptr(), nx); + } + Ok(Term::Literal(_, Literal::Integer(n))) => { + self.unify_big_int(n, nx); + } + Ok(Term::Literal(_, Literal::Fixnum(n))) => { + self.unify_fixnum(n, nx); + } + _ => { + let err = ParserError::ParseBigInt(0, 0); + let err = self.syntax_error(err); - match parser.read_term(&CompositeOpDir::new(&indices.op_dir, None)) { - Err(err) => { - let err = self.syntax_error(err); - return Err(self.error_form(err, stub_gen())); - } - Ok(Term::Literal(_, Literal::Rational(n))) => { - self.unify_rational(n, nx); - } - Ok(Term::Literal(_, Literal::Float(n))) => { - self.unify_f64(n.as_ptr(), nx); - } - Ok(Term::Literal(_, Literal::Integer(n))) => { - self.unify_big_int(n, nx); - } - Ok(Term::Literal(_, Literal::Fixnum(n))) => { - self.unify_fixnum(n, nx); - } - _ => { - let err = ParserError::ParseBigInt(0, 0); - let err = self.syntax_error(err); + return Err(self.error_form(err, stub_gen())); + } + } - return Err(self.error_form(err, stub_gen())); + break; + } + Ok('.') => { + lexer.skip_char('.'); + } + Ok(c) => { + let (line_num, col_num) = (lexer.line_num, lexer.col_num); + + let err = ParserError::UnexpectedChar(c, line_num, col_num); + let err = self.syntax_error(err); + + return Err(self.error_form(err, stub_gen())); + } + Err(_) => unreachable!(), } } @@ -5730,8 +5747,9 @@ impl Machine { if let Some(atom_or_string) = self.machine_st.value_to_str_like(self.machine_st.registers[1]) { let chars = CharReader::new(ByteStream::from_string(atom_or_string.to_string())); let mut parser = Parser::new(chars, &mut self.machine_st); + let op_dir = CompositeOpDir::new(&self.indices.op_dir, None); - let term_write_result = parser.read_term(&CompositeOpDir::new(&self.indices.op_dir, None)) + let term_write_result = parser.read_term(&op_dir, Tokens::Default) .map_err(CompilationError::from) .and_then(|term| { write_term_to_heap( diff --git a/src/machine/term_stream.rs b/src/machine/term_stream.rs index bb92c8d4..98d77627 100644 --- a/src/machine/term_stream.rs +++ b/src/machine/term_stream.rs @@ -52,7 +52,7 @@ impl<'a> TermStream for BootstrappingTermStream<'a> { fn next(&mut self, op_dir: &CompositeOpDir) -> Result { self.parser.reset(); self.parser - .read_term(op_dir) + .read_term(op_dir, Tokens::Default) .map_err(CompilationError::from) } diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index 236585d9..7f5d73ed 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -110,7 +110,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { self.reader.put_back_char(c); } - fn skip_char(&mut self, c: char) { + pub fn skip_char(&mut self, c: char) { self.reader.consume(c.len_utf8()); if new_line_char!(c) { diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 1d7e035d..5bb600da 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -24,6 +24,16 @@ enum TokenType { End, } +/* +Specifies whether the token sequence should be read from the lexer or +provided via the Provided variant. +*/ +#[derive(Debug)] +pub enum Tokens { + Default, + Provided(Vec), +} + impl TokenType { fn is_sep(self) -> bool { matches!( @@ -302,8 +312,17 @@ impl<'a, R: CharRead> Parser<'a, R> { Parser { lexer: Lexer::new(stream, machine_st), tokens: vec![], - stack: Vec::new(), - terms: Vec::new(), + stack: vec![], + terms: vec![], + } + } + + pub fn from_lexer(lexer: Lexer<'a, R>) -> Self { + Parser { + lexer, + tokens: vec![], + stack: vec![], + terms: vec![], } } @@ -1048,8 +1067,11 @@ impl<'a, R: CharRead> Parser<'a, R> { } // on success, returns the parsed term and the number of lines read. - pub fn read_term(&mut self, op_dir: &CompositeOpDir) -> Result { - self.tokens = read_tokens(&mut self.lexer)?; + pub fn read_term(&mut self, op_dir: &CompositeOpDir, tokens: Tokens) -> Result { + self.tokens = match tokens { + Tokens::Default => read_tokens(&mut self.lexer)?, + Tokens::Provided(tokens) => tokens, + }; while let Some(token) = self.tokens.pop() { self.shift_token(token, op_dir)?; diff --git a/src/read.rs b/src/read.rs index e42e3439..085c867e 100644 --- a/src/read.rs +++ b/src/read.rs @@ -45,10 +45,11 @@ impl MachineState { let (term, num_lines_read) = { let prior_num_lines_read = inner.lines_read(); let mut parser = Parser::new(inner, self); + let op_dir = CompositeOpDir::new(op_dir, None); parser.add_lines_read(prior_num_lines_read); - let term = parser.read_term(&CompositeOpDir::new(op_dir, None)) + let term = parser.read_term(&op_dir, Tokens::Default) .map_err(CompilationError::from)?; (term, parser.lines_read() - prior_num_lines_read)