From: Mark Date: Sat, 8 Jul 2023 19:38:32 +0000 (-0600) Subject: clarify EOF error across stream types and predicates (#1867, #1870) X-Git-Tag: v0.9.2~78 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=067b5998ee557899247faab3914abcfdc8e75684;p=scryer-prolog.git clarify EOF error across stream types and predicates (#1867, #1870) --- diff --git a/src/machine/dispatch.rs b/src/machine/dispatch.rs index 11e6d95d..047d5071 100644 --- a/src/machine/dispatch.rs +++ b/src/machine/dispatch.rs @@ -198,7 +198,7 @@ impl Machine { let value = self.machine_st.registers[2]; unify_fn!(&mut self.machine_st, value, heap_loc_as_cell!(offset.heap_loc)); } - Err(CompilationError::ParserError(ParserError::UnexpectedEOF)) => { + Err(CompilationError::ParserError(e)) if e.is_unexpected_eof() => { let value = self.machine_st.registers[2]; self.machine_st.unify_atom(atom!("end_of_file"), value); } diff --git a/src/machine/machine_errors.rs b/src/machine/machine_errors.rs index dc9f8f13..4422fdae 100644 --- a/src/machine/machine_errors.rs +++ b/src/machine/machine_errors.rs @@ -680,7 +680,6 @@ impl CompilationError { functor!(atom!("no_such_module"), [atom(module_name)]) } &CompilationError::InvalidRuleHead => { - functor!(atom!("invalid_head_of_rule")) // TODO: type_error(callable, _). } &CompilationError::InvalidUseModuleDecl => { diff --git a/src/machine/machine_state.rs b/src/machine/machine_state.rs index 2d62d368..7b074a64 100644 --- a/src/machine/machine_state.rs +++ b/src/machine/machine_state.rs @@ -634,21 +634,24 @@ impl MachineState { return Ok(unify_fn!(*self, var_names_offset, var_names_addr)); } Err(err) => { - if let CompilationError::ParserError(ParserError::UnexpectedEOF) = err { - self.eof_action( - self.registers[2], - stream, - atom!("read_term"), - 3, - )?; - - if stream.options().eof_action() == EOFAction::Reset { - if self.fail == false { - continue; + match err { + CompilationError::ParserError(e) if e.is_unexpected_eof() => { + self.eof_action( + self.registers[2], + stream, + atom!("read_term"), + 3, + )?; + + if stream.options().eof_action() == EOFAction::Reset { + if self.fail == false { + continue; + } } - } - return Ok(()); + return Ok(()); + } + _ => {} } let stub = functor_stub(atom!("read_term"), 3); diff --git a/src/machine/streams.rs b/src/machine/streams.rs index 27942cb2..80c51537 100644 --- a/src/machine/streams.rs +++ b/src/machine/streams.rs @@ -1523,20 +1523,10 @@ impl MachineState { } } - pub(crate) fn open_parsing_stream( - &mut self, - mut stream: Stream, - stub_name: Atom, - stub_arity: usize, - ) -> Result { + pub(crate) fn open_parsing_stream(&mut self, mut stream: Stream) -> Result { match stream.peek_char() { None => Ok(stream), // empty stream is handled gracefully by Lexer::eof - Some(Err(e)) => { - let err = self.session_error(SessionError::from(e)); - let stub = functor_stub(stub_name, stub_arity); - - Err(self.error_form(err, stub)) - } + Some(Err(e)) => Err(ParserError::IO(e)), Some(Ok(c)) => { if c == '\u{feff}' { // skip UTF-8 BOM diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index 13045f8c..da2314d3 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -925,7 +925,7 @@ impl MachineState { loop { match lexer.lookahead_char() { - Err(ParserError::UnexpectedEOF) => { + Err(e) if e.is_unexpected_eof() => { let mut parser = Parser::from_lexer(lexer); let op_dir = CompositeOpDir::new(&indices.op_dir, None); @@ -3377,7 +3377,7 @@ impl Machine { } let stub_gen = || functor_stub(atom!("get_char"), 2); - let mut iter = self.machine_st.open_parsing_stream(stream, atom!("get_char"), 2)?; + let result = self.machine_st.open_parsing_stream(stream); let addr = if addr.is_var() { addr @@ -3396,11 +3396,26 @@ impl Machine { ) }; - loop { - let result = iter.read_char(); + let mut iter = match result { + Ok(iter) => iter, + Err(e) => { + if e.is_unexpected_eof() { + self.machine_st.unify_atom(atom!("end_of_file"), addr); + return Ok(()); + } else { + let err = self.machine_st.session_error(SessionError::from(e)); + return Err(self.machine_st.error_form(err, stub_gen())); + } + } + }; - match result { - Some(Ok('\u{0}')) | Some(Err(_)) | None => { + loop { + match iter.read_char() { + Some(Ok(c)) => { + self.machine_st.unify_char(c, addr); + break; + } + _ => { self.machine_st.eof_action( self.machine_st.registers[2], stream, @@ -3414,10 +3429,6 @@ impl Machine { break; } } - Some(Ok(c)) => { - self.machine_st.unify_char(c, addr); - break; - } } } @@ -3459,7 +3470,13 @@ impl Machine { string.push(c as char); } } else { - let mut iter = self.machine_st.open_parsing_stream(stream, atom!("get_n_chars"), 2)?; + let mut iter = self.machine_st.open_parsing_stream(stream) + .map_err(|e| { + let err = self.machine_st.session_error(SessionError::from(e)); + let stub = functor_stub(atom!("get_n_chars"), 2); + + self.machine_st.error_form(err, stub) + })?; for _ in 0..num { let result = iter.read_char(); @@ -3557,7 +3574,13 @@ impl Machine { } }; - let mut iter = self.machine_st.open_parsing_stream(stream.clone(), atom!("get_code"), 2)?; + let mut iter = self.machine_st.open_parsing_stream(stream) + .map_err(|e| { + let err = self.machine_st.session_error(SessionError::from(e)); + let stub = functor_stub(atom!("get_code"), 2); + + self.machine_st.error_form(err, stub) + })?; loop { let result = iter.read_char(); diff --git a/src/machine/term_stream.rs b/src/machine/term_stream.rs index 98d77627..079f5c49 100644 --- a/src/machine/term_stream.rs +++ b/src/machine/term_stream.rs @@ -125,15 +125,15 @@ pub struct InlineTermStream { impl TermStream for InlineTermStream { fn next(&mut self, _: &CompositeOpDir) -> Result { - Err(CompilationError::from(ParserError::UnexpectedEOF)) + Err(CompilationError::from(ParserError::unexpected_eof())) } fn eof(&mut self) -> Result { - Ok(true) + Ok(true) } fn listing_src(&self) -> &ListingSource { - &ListingSource::User + &ListingSource::User } } diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 6ac5d05e..caed5915 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -7,7 +7,7 @@ use crate::types::HeapCellValueTag; use std::cell::{Cell, Ref, RefCell, RefMut}; use std::fmt; use std::hash::{Hash, Hasher}; -use std::io::{Error as IOError}; +use std::io::{Error as IOError, ErrorKind}; use std::ops::{Deref, Neg}; use std::rc::Rc; use std::vec::Vec; @@ -380,7 +380,7 @@ pub enum ParserError { NonPrologChar(usize, usize), ParseBigInt(usize, usize), UnexpectedChar(char, usize, usize), - UnexpectedEOF, + // UnexpectedEOF, Utf8Error(usize, usize), } @@ -403,16 +403,30 @@ impl ParserError { ParserError::BackQuotedString(..) => atom!("back_quoted_string"), ParserError::IncompleteReduction(..) => atom!("incomplete_reduction"), ParserError::InvalidSingleQuotedCharacter(..) => atom!("invalid_single_quoted_character"), + ParserError::IO(e) if e.kind() == ErrorKind::UnexpectedEof => atom!("unexpected_end_of_file"), ParserError::IO(_) => atom!("input_output_error"), - ParserError::LexicalError(_) => atom!("lexical_error"), // TODO: ? + ParserError::LexicalError(_) => atom!("lexical_error"), ParserError::MissingQuote(..) => atom!("missing_quote"), ParserError::NonPrologChar(..) => atom!("non_prolog_character"), ParserError::ParseBigInt(..) => atom!("cannot_parse_big_int"), ParserError::UnexpectedChar(..) => atom!("unexpected_char"), - ParserError::UnexpectedEOF => atom!("unexpected_end_of_file"), ParserError::Utf8Error(..) => atom!("utf8_conversion_error"), } } + + #[inline] + pub fn unexpected_eof() -> Self { + ParserError::IO(std::io::Error::from(ErrorKind::UnexpectedEof)) + } + + #[inline] + pub fn is_unexpected_eof(&self) -> bool { + if let ParserError::IO(e) = self { + e.kind() == ErrorKind::UnexpectedEof + } else { + false + } + } } impl From for ParserError { diff --git a/src/parser/char_reader.rs b/src/parser/char_reader.rs index 4b08e008..32854b07 100644 --- a/src/parser/char_reader.rs +++ b/src/parser/char_reader.rs @@ -117,8 +117,6 @@ impl CharReader { // Branch using `>=` instead of the more correct `==` // to tell the compiler that the pos..cap slice is always valid. if self.pos >= self.buf.len() { - debug_assert!(self.pos >= self.buf.len()); - self.buf.clear(); let mut word = [0u8; std::mem::size_of::()]; diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index 47ff0a22..e3a81e6f 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -18,7 +18,7 @@ macro_rules! is_not_eof { return Ok(true); } Ok(c) => c, - Err($crate::parser::ast::ParserError::UnexpectedEOF) => return Ok(true), + Err(e) if e.is_unexpected_eof() => return Ok(true), Err(e) => return Err(e), } }; @@ -94,14 +94,14 @@ impl<'a, R: CharRead> Lexer<'a, R> { pub fn lookahead_char(&mut self) -> Result { match self.reader.peek_char() { Some(Ok(c)) => Ok(c), - _ => Err(ParserError::UnexpectedEOF) + _ => Err(ParserError::unexpected_eof()) } } pub fn read_char(&mut self) -> Result { match self.reader.read_char() { Some(Ok(c)) => Ok(c), - _ => Err(ParserError::UnexpectedEOF) + _ => Err(ParserError::unexpected_eof()) } } @@ -168,7 +168,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { let mut c = self.lookahead_char()?; - let mut comment_loop = || { + let mut comment_loop = || -> Result<(), ParserError> { loop { while !comment_2_char!(c) { self.skip_char(c); @@ -187,7 +187,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { }; match comment_loop() { - Err(ParserError::UnexpectedEOF) => { + Err(e) if e.is_unexpected_eof() => { return Err(ParserError::IncompleteReduction(self.line_num, self.col_num)); } Err(e) => { @@ -1003,7 +1003,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { return Ok(Token::End); } - Err(ParserError::UnexpectedEOF) => { + Err(e) if e.is_unexpected_eof() => { return Ok(Token::End); } _ => { @@ -1055,7 +1055,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { } if c == '\u{0}' { - return Err(ParserError::UnexpectedEOF); + return Err(ParserError::unexpected_eof()) } self.name_token(c) diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 5bb600da..c170cfe0 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -275,7 +275,7 @@ fn read_tokens(lexer: &mut Lexer) -> Result, ParserEr break; } } - Err(ParserError::UnexpectedEOF) if !tokens.is_empty() => { + Err(e) if e.is_unexpected_eof() && !tokens.is_empty() => { return Err(ParserError::IncompleteReduction( lexer.line_num, lexer.col_num, @@ -883,7 +883,7 @@ impl<'a, R: CharRead> Parser<'a, R> { }) = get_op_desc(name, op_dir) { if (pre > 0 && inf + post > 0) || is_negate!(spec) { - match self.tokens.last().ok_or(ParserError::UnexpectedEOF)? { + match self.tokens.last().ok_or(ParserError::unexpected_eof())? { // do this when layout hasn't been inserted, // ie. why we don't match on Token::Open. Token::OpenCT => { diff --git a/src/read.rs b/src/read.rs index 085c867e..86fdcdbd 100644 --- a/src/read.rs +++ b/src/read.rs @@ -150,7 +150,7 @@ impl ReadlineStream { Ok(self.pending_input.get_ref().get_ref().len()) } - Err(ReadlineError::Eof) => Ok(0), + Err(ReadlineError::Eof) => Err(Error::from(ErrorKind::UnexpectedEof)), Err(e) => Err(Error::new(ErrorKind::InvalidInput, e)), } } @@ -178,9 +178,6 @@ impl ReadlineStream { loop { match byte { - Some(0) => { - return Ok(0); - } Some(b) => { return Ok(b); } @@ -188,10 +185,6 @@ impl ReadlineStream { Err(e) => { return Err(e); } - Ok(0) => { - self.pending_input.get_mut().get_mut().push('\u{0}'); - return Ok(0); - } _ => { set_prompt(false); } @@ -218,9 +211,6 @@ impl CharRead for ReadlineStream { fn peek_char(&mut self) -> Option> { loop { match self.pending_input.peek_char() { - Some(Ok('\u{0}')) => { - return Some(Ok('\u{0}')); - } Some(Ok(c)) => { return Some(Ok(c)); } @@ -229,10 +219,6 @@ impl CharRead for ReadlineStream { Err(e) => { return Some(Err(e)); } - Ok(0) => { - self.pending_input.get_mut().get_mut().push('\u{0}'); - return Some(Ok('\u{0}')); - } _ => { set_prompt(false); }