From: Mark Thom Date: Wed, 15 Apr 2020 05:14:47 +0000 (-0600) Subject: properly iterate through UTF-8 strings (#350) X-Git-Tag: v0.8.123~159 X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=c9074d8d9650af979b479d563e96dac1e64db606;p=scryer-prolog.git properly iterate through UTF-8 strings (#350) --- diff --git a/src/prolog/machine/partial_string.rs b/src/prolog/machine/partial_string.rs index 700358eb..eaaad181 100644 --- a/src/prolog/machine/partial_string.rs +++ b/src/prolog/machine/partial_string.rs @@ -48,13 +48,15 @@ fn scan_for_terminator>(iter: Iter) -> usize { pub struct PStrIter { buf: *const u8, + len: usize, } impl PStrIter { #[inline] - fn from(buf: *const u8, idx: usize) -> Self { + fn from(buf: *const u8, len: usize, idx: usize) -> Self { PStrIter { - buf: (buf as usize + idx) as *const _ + buf: (buf as usize + idx) as *const _, + len: len - idx, } } } @@ -64,27 +66,13 @@ impl Iterator for PStrIter { fn next(&mut self) -> Option { unsafe { - let mut byte_count = 0; - - for n in 0 .. mem::size_of::() { - let b = ptr::read((self.buf as usize + n) as *const u8); - - if b == 0u8 { - break; - } else { - byte_count += 1; - } - } - - if byte_count == 0 { - return None; - } - - let slice = slice::from_raw_parts(self.buf, byte_count); + let slice = slice::from_raw_parts(self.buf, self.len); let s = str::from_utf8(slice).unwrap(); if let Some(c) = s.chars().next() { self.buf = self.buf.offset(c.len_utf8() as isize); + self.len -= c.len_utf8(); + Some(c) } else { None @@ -212,7 +200,7 @@ impl PartialString { #[inline] pub fn range_from(&self, index: RangeFrom) -> PStrIter { - PStrIter::from(self.buf, index.start) + PStrIter::from(self.buf, self.len, index.start) } #[inline]