]> Repositorios git - scryer-prolog.git/commitdiff
throw errors from char_reader.rs and get_n_chars when reading bad UTF8 data (#2244)
authorMark <[email protected]>
Tue, 26 Dec 2023 19:30:46 +0000 (12:30 -0700)
committerMark <[email protected]>
Tue, 26 Dec 2023 19:31:04 +0000 (12:31 -0700)
src/machine/system_calls.rs
src/parser/ast.rs
src/parser/char_reader.rs

index c5beaf9aa25b7ae9d4b3e556d97fdd894cca3ef9..631cf6266ffdf455a1e18f6fe94f43827a6f99ba 100644 (file)
@@ -3492,6 +3492,12 @@ impl Machine {
                     Some(Ok(c)) => {
                         string.push(c);
                     }
+                    Some(Err(e)) => {
+                        let stub = functor_stub(atom!("$get_n_chars"), 3);
+                        let err = self.machine_st.session_error(SessionError::from(e));
+
+                        return Err(self.machine_st.error_form(err, stub));
+                    }
                     _ => {
                         break;
                     }
index ec46a74fc32c8702ba70b270743f83554fc4ef93..04d6fc99cde2213fcdfe8d00dab6579e2326da73 100644 (file)
@@ -417,6 +417,9 @@ impl ParserError {
             ParserError::IO(e) if e.kind() == ErrorKind::UnexpectedEof => {
                 atom!("unexpected_end_of_file")
             }
+            ParserError::IO(e) if e.kind() == ErrorKind::InvalidData => {
+                atom!("invalid_data")
+            }
             ParserError::IO(_) => atom!("input_output_error"),
             ParserError::LexicalError(_) => atom!("lexical_error"),
             ParserError::MissingQuote(..) => atom!("missing_quote"),
index bf3b8f8e78b81da8b794a41c38e05df09e627656..8553905df75244b7aca7fcfef61427a1a2d07f65 100644 (file)
@@ -144,6 +144,35 @@ impl<R: Read> CharRead for CharReader<R> {
             Err(e) => return Some(Err(e)),
         }
 
+        let bad_bytes_error = |buf: &[u8]| {
+            // If we have 4 bytes that still don't make up
+            // a valid code point, then we have garbage.
+
+            // We have bad data in the buffer. Remove
+            // leading bytes until either the buffer is
+            // empty, or we have a valid code point.
+
+            let mut split_point = 1;
+            let mut badbytes = vec![];
+
+            loop {
+                let (bad, rest) = buf.split_at(split_point);
+
+                if rest.is_empty() || str::from_utf8(rest).is_ok() {
+                    badbytes.extend_from_slice(bad);
+                    break;
+                }
+
+                split_point += 1;
+            }
+
+            // Raise the error. If we still have data in
+            // the buffer, it will be returned on the next
+            // loop.
+
+            io::Error::new(io::ErrorKind::InvalidData, BadUtf8Error { bytes: badbytes })
+        };
+
         loop {
             let buf = &self.buf[self.pos..];
 
@@ -159,35 +188,7 @@ impl<R: Read> CharRead for CharReader<R> {
                 };
 
                 if buf.len() - e.valid_up_to() >= 4 {
-                    // If we have 4 bytes that still don't make up
-                    // a valid code point, then we have garbage.
-
-                    // We have bad data in the buffer. Remove
-                    // leading bytes until either the buffer is
-                    // empty, or we have a valid code point.
-
-                    let mut split_point = 1;
-                    let mut badbytes = vec![];
-
-                    loop {
-                        let (bad, rest) = buf.split_at(split_point);
-
-                        if rest.is_empty() || str::from_utf8(rest).is_ok() {
-                            badbytes.extend_from_slice(bad);
-                            break;
-                        }
-
-                        split_point += 1;
-                    }
-
-                    // Raise the error. If we still have data in
-                    // the buffer, it will be returned on the next
-                    // loop.
-
-                    return Some(Err(io::Error::new(
-                        io::ErrorKind::InvalidData,
-                        BadUtf8Error { bytes: badbytes },
-                    )));
+                    return Some(Err(bad_bytes_error(buf)));
                 } else if self.pos >= self.buf.len() {
                     return None;
                 } else if self.buf.len() - self.pos >= 4 {
@@ -223,6 +224,7 @@ impl<R: Read> CharRead for CharReader<R> {
 
                     match self.inner.read(word_slice) {
                         Err(e) => return Some(Err(e)),
+                        Ok(nread) if nread == 0 => return Some(Err(bad_bytes_error(&self.buf))),
                         Ok(nread) => {
                             self.buf.extend_from_slice(&word_slice[0..nread]);
                         }