From: Mark Date: Sun, 9 Jul 2023 04:25:30 +0000 (-0600) Subject: refine EOF handling X-Git-Tag: v0.9.2~76 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=b8a6882a276c0c4723f013c69a66a002eb8ed3c5;p=scryer-prolog.git refine EOF handling --- diff --git a/scryer-prolog.wxs b/scryer-prolog.wxs index b69b1dff..fef55e49 100644 --- a/scryer-prolog.wxs +++ b/scryer-prolog.wxs @@ -1,28 +1,28 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index 11a724cc..c6cf9254 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -7469,17 +7469,21 @@ impl Machine { #[inline(always)] pub(crate) fn devour_whitespace(&mut self) -> CallResult { - let stream = self.machine_st.get_stream_or_alias( + let mut stream = self.machine_st.get_stream_or_alias( self.machine_st.registers[1], &self.indices.stream_aliases, atom!("$devour_whitespace"), 1, )?; - match self.machine_st.devour_whitespace(stream) { + let mut parser = Parser::new(stream, &mut self.machine_st); + + match devour_whitespace(&mut parser) { Ok(false) => { // not at EOF. + stream.add_lines_read(parser.lines_read()); } Ok(true) => { + stream.add_lines_read(parser.lines_read()); self.machine_st.fail = true; } Err(err) => { diff --git a/src/machine/term_stream.rs b/src/machine/term_stream.rs index 079f5c49..8c6b055d 100644 --- a/src/machine/term_stream.rs +++ b/src/machine/term_stream.rs @@ -5,6 +5,7 @@ use crate::machine::loader::*; use crate::machine::machine_errors::*; use crate::parser::ast::*; use crate::parser::parser::*; +use crate::read::devour_whitespace; use crate::predicate_queue; @@ -58,8 +59,8 @@ impl<'a> TermStream for BootstrappingTermStream<'a> { #[inline] fn eof(&mut self) -> Result { - self.parser.devour_whitespace()?; // eliminate dangling comments before checking for EOF. - Ok(self.parser.eof()?) + devour_whitespace(&mut self.parser) // eliminate dangling comments before checking for EOF. + .map_err(CompilationError::from) } #[inline] @@ -111,7 +112,7 @@ impl TermStream for LiveTermStream { #[inline] fn eof(&mut self) -> Result { - return Ok(self.term_queue.is_empty()); + Ok(self.term_queue.is_empty()) } #[inline] diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index e3a81e6f..1eacc80b 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -10,20 +10,6 @@ use crate::parser::rug::Integer; use std::convert::TryFrom; use std::fmt; -macro_rules! is_not_eof { - ($parser:expr, $c:expr) => { - match $c { - Ok('\u{0}') => { - $parser.consume('\u{0}'.len_utf8()); - return Ok(true); - } - Ok(c) => c, - Err(e) if e.is_unexpected_eof() => return Ok(true), - Err(e) => return Err(e), - } - }; -} - macro_rules! consume_chars_with { ($token:expr, $e:expr) => { loop { @@ -37,6 +23,12 @@ macro_rules! consume_chars_with { }; } +#[derive(Debug, Default)] +pub struct LayoutInfo { + pub inserted: bool, + pub more: bool, +} + #[derive(Debug, PartialEq)] pub enum Token { Literal(Literal), @@ -121,18 +113,6 @@ impl<'a, R: CharRead> Lexer<'a, R> { } } - pub fn eof(&mut self) -> Result { - let mut c = is_not_eof!(self.reader, self.lookahead_char()); - - while layout_char!(c) { - self.skip_char(c); - - c = is_not_eof!(self.reader, self.lookahead_char()); - } - - Ok(false) - } - fn single_line_comment(&mut self) -> Result<(), ParserError> { loop { if self.reader.peek_char().is_none() { @@ -929,38 +909,48 @@ impl<'a, R: CharRead> Lexer<'a, R> { } } - pub fn scan_for_layout(&mut self) -> Result { - let mut layout_inserted = false; - let mut more_layout = true; + pub fn consume_layout( + &mut self, + c: Option, + layout_info: &mut LayoutInfo, + ) -> Result<(), ParserError> { + match c { + Some(c) if layout_char!(c) => { + self.skip_char(c); + layout_info.inserted = true; + } + Some(c) if end_line_comment_char!(c) => { + self.single_line_comment()?; + layout_info.inserted = true; + } + Some(c) if comment_1_char!(c) => { + if self.bracketed_comment()? { + layout_info.inserted = true; + } else { + layout_info.more = false; + } + } + _ => { + layout_info.more = false; + } + } + + Ok(()) + } + + fn scan_for_layout(&mut self) -> Result { + let mut layout_info = LayoutInfo { inserted: false, more: true }; loop { let cr = self.lookahead_char(); + self.consume_layout(cr.ok(), &mut layout_info)?; - match cr { - Ok(c) if layout_char!(c) => { - self.skip_char(c); - layout_inserted = true; - } - Ok(c) if end_line_comment_char!(c) => { - self.single_line_comment()?; - layout_inserted = true; - } - Ok(c) if comment_1_char!(c) => { - if self.bracketed_comment()? { - layout_inserted = true; - } else { - more_layout = false; - } - } - _ => more_layout = false, - }; - - if !more_layout { + if !layout_info.more { break; } } - Ok(layout_inserted) + Ok(layout_info.inserted) } pub fn next_token(&mut self) -> Result { diff --git a/src/parser/parser.rs b/src/parser/parser.rs index c170cfe0..41cc108c 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -621,7 +621,26 @@ impl<'a, R: CharRead> Parser<'a, R> { } pub fn devour_whitespace(&mut self) -> Result<(), ParserError> { - self.lexer.scan_for_layout()?; + match self.lexer.lookahead_char() { + Err(e) => { // if e.is_unexpected_eof() => { + return Err(e); + } + Ok(c) => { + let mut layout_info = LayoutInfo { inserted: false, more: true }; + let mut cr = Some(c); + + loop { + self.lexer.consume_layout(cr, &mut layout_info)?; + + if !layout_info.more { + break; + } + + cr = self.lexer.lookahead_char().ok(); + } + } + } + Ok(()) } @@ -1051,11 +1070,6 @@ impl<'a, R: CharRead> Parser<'a, R> { Ok(()) } - #[inline] - pub fn eof(&mut self) -> Result { - self.lexer.eof() - } - #[inline] pub fn add_lines_read(&mut self, lines_read: usize) { self.lexer.line_num += lines_read; diff --git a/src/read.rs b/src/read.rs index 86fdcdbd..6094b38e 100644 --- a/src/read.rs +++ b/src/read.rs @@ -24,19 +24,19 @@ use std::io::{Cursor, Error, ErrorKind, Read}; type SubtermDeque = VecDeque<(usize, usize)>; -impl MachineState { - pub(crate) fn devour_whitespace( - &mut self, - mut inner: Stream, - ) -> Result { - let mut parser = Parser::new(inner, self); - - parser.devour_whitespace()?; - inner.add_lines_read(parser.lines_read()); - - parser.eof() +pub(crate) fn devour_whitespace<'a, R: CharRead>(parser: &mut Parser<'a, R>) -> Result { + match parser.devour_whitespace() { + Err(e) if e.is_unexpected_eof() => { + Ok(true) + } + Err(e) => Err(e), + Ok(()) => { + Ok(false) + } } +} +impl MachineState { pub(crate) fn read( &mut self, mut inner: Stream,