From: Mark Date: Mon, 17 Jul 2023 02:42:40 +0000 (-0600) Subject: do a better job handling EOF in read_term (#1887) X-Git-Tag: v0.9.2~46^2~1 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=86c90d77dd116694c1c8ec442ff227aa26c0e6ae;p=scryer-prolog.git do a better job handling EOF in read_term (#1887) --- diff --git a/src/machine/machine_state.rs b/src/machine/machine_state.rs index 499dce55..c8a9cbe0 100644 --- a/src/machine/machine_state.rs +++ b/src/machine/machine_state.rs @@ -616,7 +616,7 @@ impl MachineState { unreachable!("Stream must be a Stream::Readline(_)") } - pub fn read_term(&mut self, stream: Stream, indices: &mut IndexStore) -> CallResult { + pub fn read_term(&mut self, mut stream: Stream, indices: &mut IndexStore) -> CallResult { self.check_stream_properties( stream, StreamType::Text, @@ -637,22 +637,27 @@ impl MachineState { match self.read(stream, &indices.op_dir) { Ok(term_write_result) => return self.read_term_body(term_write_result), Err(err) => { - match err { + match &err { CompilationError::ParserError(e) if e.is_unexpected_eof() => { - self.eof_action( - self.registers[2], - stream, - atom!("read_term"), - 3, - )?; - - if stream.options().eof_action() == EOFAction::Reset { - if self.fail == false { - continue; + if stream.at_end_of_stream() { + unify!(self, self.registers[2], atom_as_cell!(atom!("end_of_file"))); + return Ok(()); + } else if stream.past_end_of_stream() { + self.eof_action( + self.registers[2], + stream, + atom!("read_term"), + 3, + )?; + + if stream.options().eof_action() == EOFAction::Reset { + if self.fail == false { + continue; + } } - } - return Ok(()); + return Ok(()); + } } _ => {} } diff --git a/src/machine/streams.rs b/src/machine/streams.rs index c1b4678e..0ea8591d 100644 --- a/src/machine/streams.rs +++ b/src/machine/streams.rs @@ -884,19 +884,38 @@ impl PartialEq for Stream { impl Eq for Stream {} +fn cursor_position(past_end_of_stream: &mut bool, cursor: &Cursor, cursor_len: u64) -> AtEndOfStream { + let position = cursor.position(); + + let at_end_of_stream = match position.cmp(&cursor_len) { + Ordering::Equal => AtEndOfStream::At, + Ordering::Greater => { + *past_end_of_stream = true; + AtEndOfStream::Past + } + Ordering::Less => AtEndOfStream::Not, + }; + + at_end_of_stream +} + impl Stream { #[inline] pub(crate) fn position(&mut self) -> Option<(u64, usize)> { // returns lines_read, position. let result = match self { + Stream::Byte(byte_stream_layout) => { + Some(byte_stream_layout.stream.get_ref().0.position()) + } + Stream::StaticString(string_stream_layout) => { + Some(string_stream_layout.stream.stream.position()) + } Stream::InputFile(file_stream) => { file_stream.position() } - Stream::NamedTcp(..) - | Stream::NamedTls(..) - | Stream::Readline(..) - | Stream::StaticString(..) - | Stream::Byte(..) => Some(0), + Stream::NamedTcp(..) | Stream::NamedTls(..) | Stream::Readline(..) => { + Some(0) + } _ => None, }; @@ -971,38 +990,61 @@ impl Stream { return AtEndOfStream::Past; } - if let Stream::InputFile(stream_layout) = self { - let position = stream_layout.position(); - - let StreamLayout { - past_end_of_stream, - stream, - .. - } = &mut **stream_layout; - - match stream.get_ref().file.metadata() { - Ok(metadata) => { - if let Some(position) = position { - return match position.cmp(&metadata.len()) { - Ordering::Equal => AtEndOfStream::At, - Ordering::Less => AtEndOfStream::Not, - Ordering::Greater => { - *past_end_of_stream = true; - AtEndOfStream::Past + match self { + Stream::Byte(stream_layout) => { + let StreamLayout { + past_end_of_stream, + stream, + .. + } = &mut **stream_layout; + + let cursor_len = stream.get_ref().0.get_ref().len() as u64; + cursor_position(past_end_of_stream, &stream.get_ref().0, cursor_len) + } + Stream::StaticString(stream_layout) => { + let StreamLayout { + past_end_of_stream, + stream, + .. + } = &mut **stream_layout; + + let cursor_len = stream.stream.get_ref().len() as u64; + cursor_position(past_end_of_stream, &stream.stream, cursor_len) + } + Stream::InputFile(stream_layout) => { + let position = stream_layout.position(); + + let StreamLayout { + past_end_of_stream, + stream, + .. + } = &mut **stream_layout; + + match stream.get_ref().file.metadata() { + Ok(metadata) => { + if let Some(position) = position { + match position.cmp(&metadata.len()) { + Ordering::Equal => AtEndOfStream::At, + Ordering::Less => AtEndOfStream::Not, + Ordering::Greater => { + *past_end_of_stream = true; + AtEndOfStream::Past + } } - }; - } else { + } else { + *past_end_of_stream = true; + AtEndOfStream::Past + } + } + _ => { *past_end_of_stream = true; AtEndOfStream::Past } } - _ => { - *past_end_of_stream = true; - AtEndOfStream::Past - } } - } else { - AtEndOfStream::Not + _ => { + AtEndOfStream::Not + } } } @@ -1306,7 +1348,7 @@ impl MachineState { match eof_action { EOFAction::Error => { stream.set_past_end_of_stream(true); - return Err(self.open_past_eos_error(stream, caller, arity)); + Err(self.open_past_eos_error(stream, caller, arity)) } EOFAction::EOFCode => { let end_of_stream = if stream.options().stream_type() == StreamType::Binary { diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index f18a5464..86c4c1bd 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -5797,71 +5797,70 @@ impl Machine { } #[inline(always)] - pub(crate) fn read_from_chars(&mut self) -> CallResult { - if let Some(atom_or_string) = self.machine_st.value_to_str_like(self.machine_st.registers[1]) { - let chars = CharReader::new(ByteStream::from_string(atom_or_string.to_string())); - let mut parser = Parser::new(chars, &mut self.machine_st); - let op_dir = CompositeOpDir::new(&self.indices.op_dir, None); - - let term_write_result = parser.read_term(&op_dir, Tokens::Default) - .map_err(CompilationError::from) - .and_then(|term| { - write_term_to_heap( - &term, - &mut self.machine_st.heap, - &mut self.machine_st.atom_tbl, - ) - }); + fn read_term_and_write_to_heap( + &mut self, + atom_or_string: AtomOrString, + ) -> Result, MachineStub> { + let string = match atom_or_string { + AtomOrString::Atom(atom) if atom == atom!("[]") => "".to_owned(), + _ => atom_or_string.to_string(), + }; - let term_write_result = match term_write_result { - Ok(term_write_result) => term_write_result, - Err(e) => { - let stub = functor_stub(atom!("read_from_chars"), 2); - let e = self.machine_st.session_error(SessionError::from(e)); + let chars = CharReader::new(ByteStream::from_string(string)); + let mut parser = Parser::new(chars, &mut self.machine_st); + let op_dir = CompositeOpDir::new(&self.indices.op_dir, None); - return Err(self.machine_st.error_form(e, stub)); - } - }; + let term_write_result = parser.read_term(&op_dir, Tokens::Default) + .map_err(|err| error_after_read_term(err, 0, &parser)) + .and_then(|term| { + write_term_to_heap( + &term, + &mut self.machine_st.heap, + &mut self.machine_st.atom_tbl, + ) + }); - let result = heap_loc_as_cell!(term_write_result.heap_loc); - let var = self.deref_register(2).as_var().unwrap(); + match term_write_result { + Ok(term_write_result) => Ok(Some(term_write_result)), + Err(CompilationError::ParserError(e)) if e.is_unexpected_eof() => { + let value = self.machine_st.registers[2]; + self.machine_st.unify_atom(atom!("end_of_file"), value); - self.machine_st.bind(var, result); - } else { - unreachable!() - } + Ok(None) + } + Err(e) => { + let stub = functor_stub(atom!("read_term_from_chars"), 3); + let e = self.machine_st.session_error(SessionError::from(e)); - Ok(()) + Err(self.machine_st.error_form(e, stub)) + } + } } #[inline(always)] - pub(crate) fn read_term_from_chars(&mut self) -> CallResult { + pub(crate) fn read_from_chars(&mut self) -> CallResult { if let Some(atom_or_string) = self.machine_st.value_to_str_like(self.machine_st.registers[1]) { - let chars = CharReader::new(ByteStream::from_string(atom_or_string.to_string())); - let mut parser = Parser::new(chars, &mut self.machine_st); - let op_dir = CompositeOpDir::new(&self.indices.op_dir, None); - - let term_write_result = parser.read_term(&op_dir, Tokens::Default) - .map_err(CompilationError::from) - .and_then(|term| { - write_term_to_heap( - &term, - &mut self.machine_st.heap, - &mut self.machine_st.atom_tbl, - ) - }); + if let Some(term_write_result) = self.read_term_and_write_to_heap(atom_or_string)? { + let result = heap_loc_as_cell!(term_write_result.heap_loc); + let var = self.deref_register(2).as_var().unwrap(); - let term_write_result = match term_write_result { - Ok(term_write_result) => term_write_result, - Err(e) => { - let stub = functor_stub(atom!("read_term_from_chars"), 3); - let e = self.machine_st.session_error(SessionError::from(e)); + self.machine_st.bind(var, result); + } - return Err(self.machine_st.error_form(e, stub)); - } - }; + Ok(()) + } else { + unreachable!() + } + } - self.machine_st.read_term_body(term_write_result) + #[inline(always)] + pub(crate) fn read_term_from_chars(&mut self) -> CallResult { + if let Some(atom_or_string) = self.machine_st.value_to_str_like(self.machine_st.registers[1]) { + if let Some(term_write_result) = self.read_term_and_write_to_heap(atom_or_string)? { + self.machine_st.read_term_body(term_write_result) + } else { + Ok(()) + } } else { unreachable!() } diff --git a/src/read.rs b/src/read.rs index 9a191dad..dfba5147 100644 --- a/src/read.rs +++ b/src/read.rs @@ -36,6 +36,25 @@ pub(crate) fn devour_whitespace<'a, R: CharRead>(parser: &mut Parser<'a, R>) -> } } +pub(crate) fn error_after_read_term( + err: ParserError, + prior_num_lines_read: usize, + parser: &Parser, +) -> CompilationError { + if err.is_unexpected_eof() { + let line_num = parser.lexer.line_num; + let col_num = parser.lexer.col_num; + + // rough overlap with errors 8.14.1.3 k) & l) of the ISO standard here + if !(line_num == prior_num_lines_read && col_num == 0) { + return CompilationError::from(ParserError::IncompleteReduction(line_num, col_num)); + } + } + + CompilationError::from(err) +} + + impl MachineState { pub(crate) fn read( &mut self, @@ -50,7 +69,7 @@ impl MachineState { parser.add_lines_read(prior_num_lines_read); let term = parser.read_term(&op_dir, Tokens::Default) - .map_err(CompilationError::from)?; + .map_err(|err| error_after_read_term(err, prior_num_lines_read, &parser))?; // CompilationError::from (term, parser.lines_read() - prior_num_lines_read) };