From ce56b5ed6b28745b202ef939dd227aa9491b2721 Mon Sep 17 00:00:00 2001 From: Mark Thom Date: Sat, 5 Jul 2025 00:58:28 -0700 Subject: [PATCH] modify number_token to produce partial lexing results (#2986) --- src/machine/system_calls.rs | 17 +- src/parser/lexer.rs | 337 +++++++++++++++--------------------- 2 files changed, 144 insertions(+), 210 deletions(-) diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index 9d056ff9..f14645e7 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -911,27 +911,16 @@ impl MachineState { use crate::parser::lexer::*; let nx = self.store(self.deref(self.registers[2])); - let add_dot = !string.ends_with('.') || string.ends_with("'."); - let cursor = std::io::Cursor::new(string); - - let iter = std::io::Read::chain(cursor, { - let mut dot_buf: [u8; '.'.len_utf8()] = [0u8]; - - if add_dot && !string.ends_with('\'') { - '.'.encode_utf8(&mut dot_buf); - } - - std::io::Cursor::new(dot_buf) - }); + let iter = std::io::Cursor::new(string); let mut lexer = Lexer::new(CharReader::new(iter), self); let mut tokens = vec![]; - match lexer.next_token() { + match lexer.next_number_token() { Ok(token @ Token::Literal(Literal::Atom(atom!("-")) | Literal::Char('-'))) => { tokens.push(token); - if let Ok(token) = lexer.next_token() { + if let Ok(token) = lexer.next_number_token() { tokens.push(token); } } diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index 06f8a326..75700f38 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -1,3 +1,5 @@ +use crate::arena::F64Ptr; +use crate::arena::TypedArenaPtr; use crate::atom_table::*; pub use crate::machine::machine_state::*; use crate::parser::ast::*; @@ -49,6 +51,55 @@ impl Token { } } +#[derive(Debug)] +enum Number { + BigInt(TypedArenaPtr), + Fixnum(Fixnum), + Float(F64Ptr), +} + +impl Number { + #[inline] + fn to_literal(self) -> Literal { + match self { + Number::BigInt(ibig) => Literal::Integer(ibig), + Number::Fixnum(fixnum) => Literal::Fixnum(fixnum), + Number::Float(f) => Literal::Float(f.as_offset()), + } + } +} + +#[derive(Debug)] +enum NumberToken { + Number(Number), + Partial(String), +} + +impl NumberToken { + #[inline] + fn to_token(self) -> Option { + match self { + NumberToken::Number(number) => Some(Token::Literal(number.to_literal())), + NumberToken::Partial(_) => None, + } + } +} + +macro_rules! try_nt { + ($token:expr, $e: expr) => {{ + match $e { + Ok(c) => c, + Err(e) => { + return if e.is_unexpected_eof() { + Ok(NumberToken::Partial($token)) + } else { + Err(e) + } + } + } + }}; +} + pub struct Lexer<'a, R> { pub(crate) reader: R, pub(crate) machine_st: &'a mut MachineState, @@ -443,7 +494,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { } } - fn hexadecimal_constant(&mut self, start: char) -> Result { + fn hexadecimal_constant(&mut self, start: char) -> Result { self.skip_char(start); let mut c = self.lookahead_char()?; @@ -454,31 +505,21 @@ impl<'a, R: CharRead> Lexer<'a, R> { if hexadecimal_digit_char!(c) { self.skip_char(c); token.push(c); - c = self.lookahead_char()?; + c = try_nt!(token, self.lookahead_char()); } else { break; } } - i64::from_str_radix(&token, 16) - .map(|n| Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena))) - .or_else(|_| { - Integer::from_str_radix(&token, 16) - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) - }) + self.parse_integer_by_radix(&token, 16) + .map(NumberToken::Number) } else { self.return_char(start); Err(ParserError::ParseBigInt(self.line_num, self.col_num)) } } - fn octal_constant(&mut self, start: char) -> Result { + fn octal_constant(&mut self, start: char) -> Result { self.skip_char(start); let mut c = self.lookahead_char()?; @@ -489,31 +530,21 @@ impl<'a, R: CharRead> Lexer<'a, R> { if octal_digit_char!(c) { self.skip_char(c); token.push(c); - c = self.lookahead_char()?; + c = try_nt!(token, self.lookahead_char()); } else { break; } } - i64::from_str_radix(&token, 8) - .map(|n| Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena))) - .or_else(|_| { - Integer::from_str_radix(&token, 8) - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) - }) + self.parse_integer_by_radix(&token, 8) + .map(NumberToken::Number) } else { self.return_char(start); Err(ParserError::ParseBigInt(self.line_num, self.col_num)) } } - fn binary_constant(&mut self, start: char) -> Result { + fn binary_constant(&mut self, start: char) -> Result { self.skip_char(start); let mut c = self.lookahead_char()?; @@ -524,24 +555,14 @@ impl<'a, R: CharRead> Lexer<'a, R> { if binary_digit_char!(c) { self.skip_char(c); token.push(c); - c = self.lookahead_char()?; + c = try_nt!(token, self.lookahead_char()); } else { break; } } - i64::from_str_radix(&token, 2) - .map(|n| Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena))) - .or_else(|_| { - Integer::from_str_radix(&token, 2) - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) - }) + self.parse_integer_by_radix(&token, 2) + .map(NumberToken::Number) } else { self.return_char(start); Err(ParserError::ParseBigInt(self.line_num, self.col_num)) @@ -634,15 +655,10 @@ impl<'a, R: CharRead> Lexer<'a, R> { } } - fn vacate_with_float(&mut self, mut token: String) -> Result { + fn vacate_with_float(&mut self, mut token: String) -> Result { self.return_char(token.pop().unwrap()); - let n = parse_float_lossy(&token)?; - - Ok(Token::Literal(Literal::from(float_alloc!( - n, - self.machine_st.arena - )))) + Ok(Number::Float(float_alloc!(n, self.machine_st.arena))) } fn skip_underscore_in_number(&mut self) -> Result { @@ -663,17 +679,43 @@ impl<'a, R: CharRead> Lexer<'a, R> { } } - pub fn number_token(&mut self, leading_c: char) -> Result { + fn parse_integer_by_radix( + &mut self, + token: &String, + radix: u32, + ) -> Result { + i64::from_str_radix(&token, radix) + .map(|n| { + Fixnum::build_with_checked(n) + .map(Number::Fixnum) + .unwrap_or_else(|_| { + Number::BigInt(arena_alloc!(Integer::from(n), &mut self.machine_st.arena)) + }) + }) + .or_else(|_| { + token + .parse::() + .map(|n| Number::BigInt(arena_alloc!(n, &mut self.machine_st.arena))) + .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) + }) + } + + #[inline] + fn parse_integer(&mut self, token: &String) -> Result { + self.parse_integer_by_radix(token, 10) + } + + fn number_token(&mut self, leading_c: char) -> Result { let mut token = String::with_capacity(16); self.skip_char(leading_c); token.push(leading_c); - let mut c = self.skip_underscore_in_number()?; + let mut c = try_nt!(token, self.skip_underscore_in_number()); while decimal_digit_char!(c) { token.push(c); self.skip_char(c); - c = self.skip_underscore_in_number()?; + c = try_nt!(token, self.skip_underscore_in_number()); } if decimal_point_char!(c) { @@ -681,31 +723,17 @@ impl<'a, R: CharRead> Lexer<'a, R> { if self.reader.peek_char().is_none() { self.return_char('.'); - - token - .parse::() - .map(|n| Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena))) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) - }) + self.parse_integer(&token).map(NumberToken::Number) } else if decimal_digit_char!(self.lookahead_char()?) { token.push('.'); token.push(self.read_char()?); - let mut c = self.lookahead_char()?; + let mut c = try_nt!(token, self.lookahead_char()); while decimal_digit_char!(c) { token.push(c); self.skip_char(c); - c = self.lookahead_char()?; + c = try_nt!(token, self.lookahead_char()); } if exponent_char!(c) { @@ -713,12 +741,12 @@ impl<'a, R: CharRead> Lexer<'a, R> { token.push(c); let c = match self.lookahead_char() { - Err(_) => return self.vacate_with_float(token), + Err(_) => return self.vacate_with_float(token).map(NumberToken::Number), Ok(c) => c, }; if !sign_char!(c) && !decimal_digit_char!(c) { - return self.vacate_with_float(token); + return self.vacate_with_float(token).map(NumberToken::Number); } if sign_char!(c) { @@ -728,14 +756,14 @@ impl<'a, R: CharRead> Lexer<'a, R> { let c = match self.lookahead_char() { Err(_) => { self.return_char(token.pop().unwrap()); - return self.vacate_with_float(token); + return self.vacate_with_float(token).map(NumberToken::Number); } Ok(c) => c, }; if !decimal_digit_char!(c) { self.return_char(token.pop().unwrap()); - return self.vacate_with_float(token); + return self.vacate_with_float(token).map(NumberToken::Number); } } @@ -746,7 +774,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { token.push(c); loop { - c = self.lookahead_char()?; + c = try_nt!(token, self.lookahead_char()); if decimal_digit_char!(c) { self.skip_char(c); @@ -757,60 +785,29 @@ impl<'a, R: CharRead> Lexer<'a, R> { } let n = parse_float_lossy(&token)?; - Ok(Token::Literal(Literal::from(float_alloc!( + Ok(NumberToken::Number(Number::Float(float_alloc!( n, self.machine_st.arena )))) } else { - return self.vacate_with_float(token); + return self.vacate_with_float(token).map(NumberToken::Number); } } else { let n = parse_float_lossy(&token)?; - Ok(Token::Literal(Literal::from(float_alloc!( + Ok(NumberToken::Number(Number::Float(float_alloc!( n, self.machine_st.arena )))) } } else { self.return_char('.'); - - token - .parse::() - .map(|n| Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena))) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) - }) + self.parse_integer(&token).map(NumberToken::Number) } } else if token.starts_with('0') && token.len() == 1 { if c == 'x' { self.hexadecimal_constant(c).or_else(|e| { if let ParserError::ParseBigInt(..) = e { - token - .parse::() - .map(|n| { - Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena)) - }) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| { - ParserError::ParseBigInt(self.line_num, self.col_num) - }) - }) + self.parse_integer(&token).map(NumberToken::Number) } else { Err(e) } @@ -818,24 +815,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { } else if c == 'o' { self.octal_constant(c).or_else(|e| { if let ParserError::ParseBigInt(..) = e { - token - .parse::() - .map(|n| { - Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena)) - }) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| { - ParserError::ParseBigInt(self.line_num, self.col_num) - }) - }) + self.parse_integer(&token).map(NumberToken::Number) } else { Err(e) } @@ -843,24 +823,7 @@ impl<'a, R: CharRead> Lexer<'a, R> { } else if c == 'b' { self.binary_constant(c).or_else(|e| { if let ParserError::ParseBigInt(..) = e { - token - .parse::() - .map(|n| { - Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena)) - }) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| { - ParserError::ParseBigInt(self.line_num, self.col_num) - }) - }) + self.parse_integer(&token).map(NumberToken::Number) } else { Err(e) } @@ -877,14 +840,14 @@ impl<'a, R: CharRead> Lexer<'a, R> { self.skip_char(c); self.return_char('\''); - return Ok(Token::Literal(Literal::Fixnum(Fixnum::build_with(0)))); + return Ok(NumberToken::Number(Number::Fixnum(Fixnum::build_with(0)))); } else { self.return_char('\\'); } } self.get_single_quoted_char() - .map(|c| Token::Literal(Literal::Fixnum(Fixnum::build_with(c as i64)))) + .map(|c| NumberToken::Number(Number::Fixnum(Fixnum::build_with(c as i64)))) .or_else(|err| { match err { ParserError::UnexpectedChar('\'', ..) => {} @@ -892,57 +855,13 @@ impl<'a, R: CharRead> Lexer<'a, R> { } self.return_char(c); - - token - .parse::() - .map(|n| { - Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena)) - }) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| { - ParserError::ParseBigInt(self.line_num, self.col_num) - }) - }) + self.parse_integer(&token).map(NumberToken::Number) }) } else { - token - .parse::() - .map(|n| Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena))) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) - }) + self.parse_integer(&token).map(NumberToken::Number) } } else { - token - .parse::() - .map(|n| Token::Literal(fixnum!(Literal, n, &mut self.machine_st.arena))) - .or_else(|_| { - token - .parse::() - .map(|n| { - Token::Literal(Literal::Integer(arena_alloc!( - n, - &mut self.machine_st.arena - ))) - }) - .map_err(|_| ParserError::ParseBigInt(self.line_num, self.col_num)) - }) + self.parse_integer(&token).map(NumberToken::Number) } } @@ -1001,6 +920,29 @@ impl<'a, R: CharRead> Lexer<'a, R> { } } + pub fn next_number_token(&mut self) -> Result { + self.scan_for_layout()?; + let c = self.lookahead_char()?; + + if !decimal_digit_char!(c) { + return self.name_token(c); + } + + match self.number_token(c) { + Ok(NumberToken::Partial(token_string)) => match self.parse_integer(&token_string) { + Ok(n) => Ok(Token::Literal(n.to_literal())), + Err(_) => { + let n = parse_float_lossy(&token_string)?; + Ok(Token::Literal(Literal::Float( + float_alloc!(n, self.machine_st.arena).as_offset(), + ))) + } + }, + Ok(NumberToken::Number(n)) => return Ok(Token::Literal(n.to_literal())), + Err(e) => return Err(e), + } + } + pub fn next_token(&mut self) -> Result { let layout_inserted = self.scan_for_layout()?; let cr = self.lookahead_char(); @@ -1053,7 +995,10 @@ impl<'a, R: CharRead> Lexer<'a, R> { } if decimal_digit_char!(c) { - return self.number_token(c); + return self.number_token(c).and_then(|nt| match nt.to_token() { + Some(token) => Ok(token), + None => Err(ParserError::unexpected_eof()), + }); } if c == ']' { -- 2.54.0