]> Repositorios git - scryer-prolog.git/commitdiff
add parser tests
authorMark Thom <[email protected]>
Fri, 7 Jan 2022 04:29:15 +0000 (21:29 -0700)
committerMark Thom <[email protected]>
Fri, 7 Jan 2022 05:09:38 +0000 (22:09 -0700)
src/tests/bom.rs [new file with mode: 0644]
src/tests/parse_tokens.rs [new file with mode: 0644]

diff --git a/src/tests/bom.rs b/src/tests/bom.rs
new file mode 100644 (file)
index 0000000..9a394d6
--- /dev/null
@@ -0,0 +1,37 @@
+use crate::atom_table::*;
+use crate::parser::ast::*;
+use crate::parser::lexer::{Lexer, Token};
+
+#[test]
+fn valid_token() {
+    let stream = parsing_stream("valid text".as_bytes());
+    assert!(stream.is_ok());
+}
+
+#[test]
+fn empty_stream() {
+    let bytes: &[u8] = &[];
+    assert!(parsing_stream(bytes).is_ok());
+}
+
+#[test]
+fn skip_utf8_bom() {
+    let mut machine_st = MachineState::new();
+    let bytes: &[u8] = &[0xEF, 0xBB, 0xBF, '4' as u8, '\n' as u8];
+    let stream = parsing_stream(bytes).expect("valid stream");
+    let mut lexer = Lexer::new(stream, &mut machine_st);
+    match lexer.next_token() {
+        Ok(Token::Literal(Literal::Fixnum(Fixnum::build_with(4)))) => (),
+        _ => assert!(false),
+    }
+}
+
+#[test]
+fn invalid_utf16_bom() {
+    let bytes: &[u8] = &[0xFF, 0xFE, 'a' as u8, '\n' as u8];
+    let stream = parsing_stream(bytes);
+    match stream {
+        Err(ParserError::Utf8Error(0, 0)) => (),
+        _ => assert!(false),
+    }
+}
diff --git a/src/tests/parse_tokens.rs b/src/tests/parse_tokens.rs
new file mode 100644 (file)
index 0000000..a4763b4
--- /dev/null
@@ -0,0 +1,109 @@
+use crate::atom_table::*;
+use crate::parser::ast::*;
+use crate::parser::lexer::{Lexer, Token};
+
+fn read_all_tokens(text: &str) -> Result<Vec<Token>, ParserError> {
+    let mut machine_st = MachineState::new();
+    let stream = parsing_stream(text.as_bytes())?;
+    let mut lexer = Lexer::new(stream, &mut machine_st);
+
+    let mut tokens = Vec::new();
+    while !lexer.eof()? {
+        let token = lexer.next_token()?;
+        tokens.push(token);
+    }
+    Ok(tokens)
+}
+
+#[test]
+fn empty_multiline_comment() -> Result<(), ParserError> {
+    let tokens = read_all_tokens("/**/ 4\n")?;
+    assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(4)))]);
+    Ok(())
+}
+
+#[test]
+fn any_char_multiline_comment() -> Result<(), ParserError> {
+    let tokens = read_all_tokens("/* █╗╚═══╝ © */ 4\n")?;
+    assert_eq!(tokens, [Token::Literal(Literal::Fixnum(4))]);
+    Ok(())
+}
+
+#[test]
+fn simple_char() -> Result<(), ParserError> {
+    let tokens = read_all_tokens("'a'\n")?;
+    assert_eq!(tokens, [Token::Literal(Literal::Char('a'))]);
+    Ok(())
+}
+
+#[test]
+fn char_with_meta_seq() -> Result<(), ParserError> {
+    let tokens = read_all_tokens(r#"'\\' '\'' '\"' '\`' "#)?; // use literal string so \ are escaped
+    assert_eq!(
+        tokens,
+        [
+            Token::Literal(Literal::Char('\\')),
+            Token::Literal(Literal::Char('\'')),
+            Token::Literal(Literal::Char('"')),
+            Token::Literal(Literal::Char('`'))
+        ]
+    );
+    Ok(())
+}
+
+#[test]
+fn char_with_control_seq() -> Result<(), ParserError> {
+    let tokens = read_all_tokens(r"'\a' '\b' '\r' '\f' '\t' '\n' '\v' ")?;
+    assert_eq!(
+        tokens,
+        [
+            Token::Literal(Literal::Char('\u{07}')),
+            Token::Literal(Literal::Char('\u{08}')),
+            Token::Literal(Literal::Char('\r')),
+            Token::Literal(Literal::Char('\u{0c}')),
+            Token::Literal(Literal::Char('\t')),
+            Token::Literal(Literal::Char('\n')),
+            Token::Literal(Literal::Char('\u{0b}')),
+        ]
+    );
+    Ok(())
+}
+
+#[test]
+fn char_with_octseq() -> Result<(), ParserError> {
+    let tokens = read_all_tokens(r"'\60433\' ")?;
+    assert_eq!(tokens, [Token::Literal(Literal::Char('愛'))]); // Japanese character
+    Ok(())
+}
+
+#[test]
+fn char_with_octseq_0() -> Result<(), ParserError> {
+    let tokens = read_all_tokens(r"'\0\' ")?;
+    assert_eq!(tokens, [Token::Literal(Literal::Char('\u{0000}'))]);
+    Ok(())
+}
+
+#[test]
+fn char_with_hexseq() -> Result<(), ParserError> {
+    let tokens = read_all_tokens(r"'\x2124\' ")?;
+    assert_eq!(tokens, [Token::Literal(Literal::Char('ℤ'))]); // Z math symbol
+    Ok(())
+}
+
+#[test]
+fn char_with_hexseq_invalid() {
+    assert!(read_all_tokens(r"'\x\' ").is_err());
+}
+
+#[test]
+fn empty() -> Result<(), ParserError> {
+    let tokens = read_all_tokens("")?;
+    assert!(tokens.is_empty());
+    Ok(())
+}
+
+#[test]
+fn comment_then_eof() -> Result<(), ParserError> {
+    assert!(read_all_tokens("% only a comment").is_err());
+    Ok(())
+}