]> Repositorios git - scryer-prolog.git/commitdiff
use lexer to detect remaining layout in parse_number_from_string (#1773)
authorMark <[email protected]>
Fri, 30 Jun 2023 23:13:38 +0000 (17:13 -0600)
committerMark <[email protected]>
Fri, 30 Jun 2023 23:18:10 +0000 (17:18 -0600)
src/machine/system_calls.rs
src/machine/term_stream.rs
src/parser/lexer.rs
src/parser/parser.rs
src/read.rs

index 6a676ae9593c4b381d552bc01c3dc1de64bdd3b4..4c9c0f2f1b826d039de99a3b846d8bff59dd0457 100644 (file)
@@ -886,75 +886,92 @@ impl MachineState {
         indices: &IndexStore,
         stub_gen: impl Fn() -> FunctorStub,
     ) -> CallResult {
+        use crate::parser::lexer::*;
+
         let nx = self.store(self.deref(self.registers[2]));
+        let add_dot = !string.ends_with(".");
+        let cursor = std::io::Cursor::new(string);
 
-        let mut charcode_space = false;
-        let mut cs = string.chars();
+        let iter = std::io::Read::chain(
+            cursor,
+            {
+                let mut dot_buf: [u8; '.'.len_utf8()] = [0u8];
 
-        loop {
-            let c = cs.next();
+                if add_dot {
+                    '.'.encode_utf8(&mut dot_buf);
+                }
 
-            if c == None {
-                break;
-            }
+                std::io::Cursor::new(dot_buf)
+            },
+        );
 
-            if c == Some('0')
-                && cs.next() == Some('\'')
-                && cs.next() == Some(' ')
-                && cs.next() == None {
-                charcode_space = true;
-                break;
-            }
-        }
+        let mut lexer = Lexer::new(CharReader::new(iter), self);
+        let mut tokens = vec![];
 
-        if !charcode_space {
-            if let Some(c) = string.chars().last() {
-                if layout_char!(c) {
-                    let (line_num, col_num) = string.chars().fold((0, 0), |(line_num, col_num), c| {
-                        if new_line_char!(c) {
-                            (1 + line_num, 0)
-                        } else {
-                            (line_num, col_num + 1)
-                        }
-                    });
-                    let err = ParserError::UnexpectedChar(c, line_num, col_num);
-                    let err = self.syntax_error(err);
+        match lexer.next_token() {
+            Ok(token @ Token::Literal(Literal::Atom(atom!("-")) | Literal::Char('-'))) => {
+                tokens.push(token);
 
-                    return Err(self.error_form(err, stub_gen()));
+                if let Ok(token) = lexer.next_token() {
+                    tokens.push(token);
                 }
             }
+            Ok(token) => {
+                tokens.push(token);
+            }
+            Err(err) => {
+                let err = self.syntax_error(err);
+                return Err(self.error_form(err, stub_gen()));
+            }
         }
 
-        let mut dot_buf: [u8; '.'.len_utf8()] = [0u8];
-        '.'.encode_utf8(&mut dot_buf);
+        loop {
+            match lexer.lookahead_char() {
+                Err(ParserError::UnexpectedEOF) => {
+                    let mut parser = Parser::from_lexer(lexer);
+                    let op_dir = CompositeOpDir::new(&indices.op_dir, None);
 
-        let cursor = std::io::Cursor::new(string);
-        let iter = std::io::Read::chain(cursor, std::io::Cursor::new(dot_buf));
+                    tokens.reverse();
 
-        let mut parser = Parser::new(CharReader::new(iter), self);
+                    match parser.read_term(&op_dir, Tokens::Provided(tokens)) {
+                        Err(err) => {
+                            let err = self.syntax_error(err);
+                            return Err(self.error_form(err, stub_gen()));
+                        }
+                        Ok(Term::Literal(_, Literal::Rational(n))) => {
+                            self.unify_rational(n, nx);
+                        }
+                        Ok(Term::Literal(_, Literal::Float(n))) => {
+                            self.unify_f64(n.as_ptr(), nx);
+                        }
+                        Ok(Term::Literal(_, Literal::Integer(n))) => {
+                            self.unify_big_int(n, nx);
+                        }
+                        Ok(Term::Literal(_, Literal::Fixnum(n))) => {
+                            self.unify_fixnum(n, nx);
+                        }
+                        _ => {
+                            let err = ParserError::ParseBigInt(0, 0);
+                            let err = self.syntax_error(err);
 
-        match parser.read_term(&CompositeOpDir::new(&indices.op_dir, None)) {
-            Err(err) => {
-                let err = self.syntax_error(err);
-                return Err(self.error_form(err, stub_gen()));
-            }
-            Ok(Term::Literal(_, Literal::Rational(n))) => {
-                self.unify_rational(n, nx);
-            }
-            Ok(Term::Literal(_, Literal::Float(n))) => {
-                self.unify_f64(n.as_ptr(), nx);
-            }
-            Ok(Term::Literal(_, Literal::Integer(n))) => {
-                self.unify_big_int(n, nx);
-            }
-            Ok(Term::Literal(_, Literal::Fixnum(n))) => {
-                self.unify_fixnum(n, nx);
-            }
-            _ => {
-                let err = ParserError::ParseBigInt(0, 0);
-                let err = self.syntax_error(err);
+                            return Err(self.error_form(err, stub_gen()));
+                        }
+                    }
 
-                return Err(self.error_form(err, stub_gen()));
+                    break;
+                }
+                Ok('.') => {
+                    lexer.skip_char('.');
+                }
+                Ok(c) => {
+                    let (line_num, col_num) = (lexer.line_num, lexer.col_num);
+
+                    let err = ParserError::UnexpectedChar(c, line_num, col_num);
+                    let err = self.syntax_error(err);
+
+                    return Err(self.error_form(err, stub_gen()));
+                }
+                Err(_) => unreachable!(),
             }
         }
 
@@ -5730,8 +5747,9 @@ impl Machine {
         if let Some(atom_or_string) = self.machine_st.value_to_str_like(self.machine_st.registers[1]) {
             let chars = CharReader::new(ByteStream::from_string(atom_or_string.to_string()));
             let mut parser = Parser::new(chars, &mut self.machine_st);
+            let op_dir = CompositeOpDir::new(&self.indices.op_dir, None);
 
-            let term_write_result = parser.read_term(&CompositeOpDir::new(&self.indices.op_dir, None))
+            let term_write_result = parser.read_term(&op_dir, Tokens::Default)
                 .map_err(CompilationError::from)
                 .and_then(|term| {
                     write_term_to_heap(
index bb92c8d4f8e8c19f10a3718d20e93b4274883187..98d776275e02ee4bcb2c74416ecda9495637b50f 100644 (file)
@@ -52,7 +52,7 @@ impl<'a> TermStream for BootstrappingTermStream<'a> {
     fn next(&mut self, op_dir: &CompositeOpDir) -> Result<Term, CompilationError> {
         self.parser.reset();
         self.parser
-            .read_term(op_dir)
+            .read_term(op_dir, Tokens::Default)
             .map_err(CompilationError::from)
     }
 
index 236585d9cd16e6b8d57f20dfa0d7509f75e62c24..7f5d73ed656dbb0d60dc3174b6662016c5da9048 100644 (file)
@@ -110,7 +110,7 @@ impl<'a, R: CharRead> Lexer<'a, R> {
         self.reader.put_back_char(c);
     }
 
-    fn skip_char(&mut self, c: char) {
+    pub fn skip_char(&mut self, c: char) {
         self.reader.consume(c.len_utf8());
 
         if new_line_char!(c) {
index 1d7e035db4be35b3c672b96875bffadb823a9b00..5bb600daf29a5afea04f2cb37482a3fc9f6ff6f6 100644 (file)
@@ -24,6 +24,16 @@ enum TokenType {
     End,
 }
 
+/*
+Specifies whether the token sequence should be read from the lexer or
+provided via the Provided variant.
+*/
+#[derive(Debug)]
+pub enum Tokens {
+    Default,
+    Provided(Vec<Token>),
+}
+
 impl TokenType {
     fn is_sep(self) -> bool {
         matches!(
@@ -302,8 +312,17 @@ impl<'a, R: CharRead> Parser<'a, R> {
         Parser {
             lexer: Lexer::new(stream, machine_st),
             tokens: vec![],
-            stack: Vec::new(),
-            terms: Vec::new(),
+            stack: vec![],
+            terms: vec![],
+        }
+    }
+
+    pub fn from_lexer(lexer: Lexer<'a, R>) -> Self {
+        Parser {
+            lexer,
+            tokens: vec![],
+            stack: vec![],
+            terms: vec![],
         }
     }
 
@@ -1048,8 +1067,11 @@ impl<'a, R: CharRead> Parser<'a, R> {
     }
 
     // on success, returns the parsed term and the number of lines read.
-    pub fn read_term(&mut self, op_dir: &CompositeOpDir) -> Result<Term, ParserError> {
-        self.tokens = read_tokens(&mut self.lexer)?;
+    pub fn read_term(&mut self, op_dir: &CompositeOpDir, tokens: Tokens) -> Result<Term, ParserError> {
+        self.tokens = match tokens {
+            Tokens::Default => read_tokens(&mut self.lexer)?,
+            Tokens::Provided(tokens) => tokens,
+        };
 
         while let Some(token) = self.tokens.pop() {
             self.shift_token(token, op_dir)?;
index e42e34397dbe2bed43f505514d969b4000ce749c..085c867e837d80cd56038b50446ec4a31457d833 100644 (file)
@@ -45,10 +45,11 @@ impl MachineState {
         let (term, num_lines_read) = {
             let prior_num_lines_read = inner.lines_read();
             let mut parser = Parser::new(inner, self);
+            let op_dir = CompositeOpDir::new(op_dir, None);
 
             parser.add_lines_read(prior_num_lines_read);
 
-            let term = parser.read_term(&CompositeOpDir::new(op_dir, None))
+            let term = parser.read_term(&op_dir, Tokens::Default)
                 .map_err(CompilationError::from)?;
 
             (term, parser.lines_read() - prior_num_lines_read)