From cf1e315d72c224fc64d80e38e5493bcc1b846496 Mon Sep 17 00:00:00 2001 From: Mark Thom Date: Thu, 6 Jan 2022 21:29:15 -0700 Subject: [PATCH] add parser tests --- src/tests/bom.rs | 37 +++++++++++++ src/tests/parse_tokens.rs | 109 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 src/tests/bom.rs create mode 100644 src/tests/parse_tokens.rs diff --git a/src/tests/bom.rs b/src/tests/bom.rs new file mode 100644 index 00000000..9a394d63 --- /dev/null +++ b/src/tests/bom.rs @@ -0,0 +1,37 @@ +use crate::atom_table::*; +use crate::parser::ast::*; +use crate::parser::lexer::{Lexer, Token}; + +#[test] +fn valid_token() { + let stream = parsing_stream("valid text".as_bytes()); + assert!(stream.is_ok()); +} + +#[test] +fn empty_stream() { + let bytes: &[u8] = &[]; + assert!(parsing_stream(bytes).is_ok()); +} + +#[test] +fn skip_utf8_bom() { + let mut machine_st = MachineState::new(); + let bytes: &[u8] = &[0xEF, 0xBB, 0xBF, '4' as u8, '\n' as u8]; + let stream = parsing_stream(bytes).expect("valid stream"); + let mut lexer = Lexer::new(stream, &mut machine_st); + match lexer.next_token() { + Ok(Token::Literal(Literal::Fixnum(Fixnum::build_with(4)))) => (), + _ => assert!(false), + } +} + +#[test] +fn invalid_utf16_bom() { + let bytes: &[u8] = &[0xFF, 0xFE, 'a' as u8, '\n' as u8]; + let stream = parsing_stream(bytes); + match stream { + Err(ParserError::Utf8Error(0, 0)) => (), + _ => assert!(false), + } +} diff --git a/src/tests/parse_tokens.rs b/src/tests/parse_tokens.rs new file mode 100644 index 00000000..a4763b4d --- /dev/null +++ b/src/tests/parse_tokens.rs @@ -0,0 +1,109 @@ +use crate::atom_table::*; +use crate::parser::ast::*; +use crate::parser::lexer::{Lexer, Token}; + +fn read_all_tokens(text: &str) -> Result, ParserError> { + let mut machine_st = MachineState::new(); + let stream = parsing_stream(text.as_bytes())?; + let mut lexer = Lexer::new(stream, &mut machine_st); + + let mut tokens = Vec::new(); + while !lexer.eof()? { + let token = lexer.next_token()?; + tokens.push(token); + } + Ok(tokens) +} + +#[test] +fn empty_multiline_comment() -> Result<(), ParserError> { + let tokens = read_all_tokens("/**/ 4\n")?; + assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(4)))]); + Ok(()) +} + +#[test] +fn any_char_multiline_comment() -> Result<(), ParserError> { + let tokens = read_all_tokens("/* █╗╚═══╝ © */ 4\n")?; + assert_eq!(tokens, [Token::Literal(Literal::Fixnum(4))]); + Ok(()) +} + +#[test] +fn simple_char() -> Result<(), ParserError> { + let tokens = read_all_tokens("'a'\n")?; + assert_eq!(tokens, [Token::Literal(Literal::Char('a'))]); + Ok(()) +} + +#[test] +fn char_with_meta_seq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r#"'\\' '\'' '\"' '\`' "#)?; // use literal string so \ are escaped + assert_eq!( + tokens, + [ + Token::Literal(Literal::Char('\\')), + Token::Literal(Literal::Char('\'')), + Token::Literal(Literal::Char('"')), + Token::Literal(Literal::Char('`')) + ] + ); + Ok(()) +} + +#[test] +fn char_with_control_seq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\a' '\b' '\r' '\f' '\t' '\n' '\v' ")?; + assert_eq!( + tokens, + [ + Token::Literal(Literal::Char('\u{07}')), + Token::Literal(Literal::Char('\u{08}')), + Token::Literal(Literal::Char('\r')), + Token::Literal(Literal::Char('\u{0c}')), + Token::Literal(Literal::Char('\t')), + Token::Literal(Literal::Char('\n')), + Token::Literal(Literal::Char('\u{0b}')), + ] + ); + Ok(()) +} + +#[test] +fn char_with_octseq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\60433\' ")?; + assert_eq!(tokens, [Token::Literal(Literal::Char('愛'))]); // Japanese character + Ok(()) +} + +#[test] +fn char_with_octseq_0() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\0\' ")?; + assert_eq!(tokens, [Token::Literal(Literal::Char('\u{0000}'))]); + Ok(()) +} + +#[test] +fn char_with_hexseq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\x2124\' ")?; + assert_eq!(tokens, [Token::Literal(Literal::Char('ℤ'))]); // Z math symbol + Ok(()) +} + +#[test] +fn char_with_hexseq_invalid() { + assert!(read_all_tokens(r"'\x\' ").is_err()); +} + +#[test] +fn empty() -> Result<(), ParserError> { + let tokens = read_all_tokens("")?; + assert!(tokens.is_empty()); + Ok(()) +} + +#[test] +fn comment_then_eof() -> Result<(), ParserError> { + assert!(read_all_tokens("% only a comment").is_err()); + Ok(()) +} -- 2.54.0