From 0198fe90b6ba38e7a2cad9cd3a2739740c9086c6 Mon Sep 17 00:00:00 2001 From: panasenco Date: Tue, 27 Apr 2021 22:48:03 -0700 Subject: [PATCH] Added ABNF grammar to library(charsio) as well as octet character type that it depends on. Modified library(json) to use the new ABNF grammar. --- src/lib/charsio.pl | 128 +++++++++++++++++++++++- src/lib/json.pl | 28 ++---- src/machine/system_calls.rs | 3 +- src/tests/json/pass_everything.min.json | 2 +- src/tests/json/test_json.pl | 5 +- 5 files changed, 139 insertions(+), 27 deletions(-) diff --git a/src/lib/charsio.pl b/src/lib/charsio.pl index abd30311..599b892d 100644 --- a/src/lib/charsio.pl +++ b/src/lib/charsio.pl @@ -4,7 +4,23 @@ read_line_to_chars/3, read_term_from_chars/2, write_term_to_chars/3, - chars_base64/3]). + chars_base64/3, + abnf_alpha//1, + abnf_bit//1, + abnf_char//1, + abnf_cr//0, + abnf_crlf//0, + abnf_ctl//1, + abnf_digit//1, + abnf_dquote//0, + abnf_hexdig//1, + abnf_htab//0, + abnf_lf//0, + abnf_lwsp//0, + abnf_octet//1, + abnf_sp//0, + abnf_vchar//1, + abnf_wsp//0 ]). :- use_module(library(dcgs)). :- use_module(library(iso_ext)). @@ -97,6 +113,7 @@ ctype(lower). ctype(meta). ctype(numeric). ctype(octal_digit). +ctype(octet). ctype(prolog). ctype(sign). ctype(solo). @@ -241,3 +258,112 @@ chars_base64(Cs, Bs, Options) :- maplist(must_be(character), Cs), '$chars_base64'(Cs, Bs, Padding, Charset) ). + +/* [Core Rules](https://tools.ietf.org/html/rfc5234#appendix-B.1) of the + Augmented Backus-Naur Form specification (ABNF - RFC 5234). ABNF commonly + serves as the definition language for IETF communication protocols, so + having these DCGs can be extremely useful for reasoning about most IETF + syntaxes. The DCGs are presented in the order they appear in the RFC. + While some DCGs below use `char_type/2`, the most common ones are defined + manually in order to take advantage of Prolog's first-argument indexing. */ +abnf_alpha('a') --> "a". +abnf_alpha('b') --> "b". +abnf_alpha('c') --> "c". +abnf_alpha('d') --> "d". +abnf_alpha('e') --> "e". +abnf_alpha('f') --> "f". +abnf_alpha('g') --> "g". +abnf_alpha('h') --> "h". +abnf_alpha('i') --> "i". +abnf_alpha('j') --> "j". +abnf_alpha('k') --> "k". +abnf_alpha('l') --> "l". +abnf_alpha('m') --> "m". +abnf_alpha('n') --> "n". +abnf_alpha('o') --> "o". +abnf_alpha('p') --> "p". +abnf_alpha('q') --> "q". +abnf_alpha('r') --> "r". +abnf_alpha('s') --> "s". +abnf_alpha('t') --> "t". +abnf_alpha('u') --> "u". +abnf_alpha('v') --> "v". +abnf_alpha('w') --> "w". +abnf_alpha('x') --> "x". +abnf_alpha('y') --> "y". +abnf_alpha('z') --> "z". +abnf_alpha('A') --> "A". +abnf_alpha('B') --> "B". +abnf_alpha('C') --> "C". +abnf_alpha('D') --> "D". +abnf_alpha('E') --> "E". +abnf_alpha('F') --> "F". +abnf_alpha('G') --> "G". +abnf_alpha('H') --> "H". +abnf_alpha('I') --> "I". +abnf_alpha('J') --> "J". +abnf_alpha('K') --> "K". +abnf_alpha('L') --> "L". +abnf_alpha('M') --> "M". +abnf_alpha('N') --> "N". +abnf_alpha('O') --> "O". +abnf_alpha('P') --> "P". +abnf_alpha('Q') --> "Q". +abnf_alpha('R') --> "R". +abnf_alpha('S') --> "S". +abnf_alpha('T') --> "T". +abnf_alpha('U') --> "U". +abnf_alpha('V') --> "V". +abnf_alpha('W') --> "W". +abnf_alpha('X') --> "X". +abnf_alpha('Y') --> "Y". +abnf_alpha('Z') --> "Z". + +abnf_bit(0) --> "0". +abnf_bit(1) --> "1". + +abnf_char(C) --> [C], { dif(C, '\x0000\'), char_type(C, ascii) }. %' + +abnf_cr --> "\r". + +abnf_crlf --> "\r\n". + +abnf_ctl(C) --> [C], { char_type(C, ascii), char_type(C, control) }. + +abnf_digit(0) --> "0". +abnf_digit(1) --> "1". +abnf_digit(2) --> "2". +abnf_digit(3) --> "3". +abnf_digit(4) --> "4". +abnf_digit(5) --> "5". +abnf_digit(6) --> "6". +abnf_digit(7) --> "7". +abnf_digit(8) --> "8". +abnf_digit(9) --> "9". + +abnf_dquote --> "\"". + +abnf_hexdig(Digit) --> abnf_digit(Digit). +abnf_hexdig(10) --> "A". +abnf_hexdig(11) --> "B". +abnf_hexdig(12) --> "C". +abnf_hexdig(13) --> "D". +abnf_hexdig(14) --> "E". +abnf_hexdig(15) --> "F". + +abnf_htab --> "\t". + +abnf_lf --> "\n". + +abnf_lwsp --> "". +abnf_lwsp --> abnf_wsp, abnf_lwsp. +abnf_lwsp --> abnf_clrf, abnf_wsp, abnf_lwsp. + +abnf_octet(C) --> [C], char_type(C, octet). + +abnf_sp --> " ". + +abnf_vchar(C) --> [C], char_type(C, ascii_graphic). + +abnf_wsp --> abnf_sp. +abnf_wsp --> abnf_htab. diff --git a/src/lib/json.pl b/src/lib/json.pl index 2d7787d8..ff980d7f 100644 --- a/src/lib/json.pl +++ b/src/lib/json.pl @@ -37,6 +37,7 @@ json_chars//1 ]). +:- use_module(library(charsio)). :- use_module(library(dcgs)). :- use_module(library(dif)). :- use_module(library(lists)). @@ -161,19 +162,13 @@ json_character(EscapeChar) --> H4 is (EscapeCharCode // 16^0) mod 16 ) }. -json_hex(Digit) --> json_digit(Digit). +json_hex(Hex) --> abnf_hexdig(Hex). json_hex(10) --> "a". json_hex(11) --> "b". json_hex(12) --> "c". json_hex(13) --> "d". json_hex(14) --> "e". json_hex(15) --> "f". -json_hex(10) --> "A". -json_hex(11) --> "B". -json_hex(12) --> "C". -json_hex(13) --> "D". -json_hex(14) --> "E". -json_hex(15) --> "F". /* I can't think of any alternatives to using `number_chars/2` when generating, though this leads to under-reporting of correct solutions. At least matching solutions unify when both are instantiated... @@ -205,31 +200,20 @@ json_number(Number) --> NumberChars ). -json_integer(Digit) --> json_digit(Digit). +json_integer(Digit) --> abnf_digit(Digit). json_integer(TotalValue) --> json_onenine(FirstDigit), json_digits(RemainingValue, Power), { TotalValue is FirstDigit * 10 ^ (Power + 1) + RemainingValue }. -json_digits(Digit, 0) --> json_digit(Digit). +json_digits(Digit, 0) --> abnf_digit(Digit). json_digits(Value, Power) --> - json_digit(FirstDigit), + abnf_digit(FirstDigit), json_digits(RemainingValue, NextPower), { Power is NextPower + 1, Value is FirstDigit * 10^Power + RemainingValue }. -json_digit(0) --> "0". -json_digit(Digit) --> json_onenine(Digit). - -json_onenine(1) --> "1". -json_onenine(2) --> "2". -json_onenine(3) --> "3". -json_onenine(4) --> "4". -json_onenine(5) --> "5". -json_onenine(6) --> "6". -json_onenine(7) --> "7". -json_onenine(8) --> "8". -json_onenine(9) --> "9". +json_onenine(Digit) --> abnf_digit(Digit), { dif(Digit, 0) }. json_fraction(0) --> "". json_fraction(Fraction) --> diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index 428230ca..00aa6d6e 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -3,7 +3,7 @@ use prolog_parser::parser::*; use prolog_parser::{ alpha_char, alpha_numeric_char, binary_digit_char, clause_name, decimal_digit_char, exponent_char, graphic_char, graphic_token_char, hexadecimal_digit_char, layout_char, - meta_char, new_line_char, octal_digit_char, prolog_char, sign_char, solo_char, + meta_char, new_line_char, octal_digit_char, octet_char, prolog_char, sign_char, solo_char, symbolic_control_char, symbolic_hexadecimal_char, temp_v, }; @@ -1803,6 +1803,7 @@ impl MachineState { // macro_check!(new_line_char, "new_line"); method_check!(is_numeric, "numeric"); macro_check!(octal_digit_char, "octal_digit"); + macro_check!(octet_char, "octet"); macro_check!(prolog_char, "prolog"); // macro_check!(semicolon_char, "semicolon"); macro_check!(sign_char, "sign"); diff --git a/src/tests/json/pass_everything.min.json b/src/tests/json/pass_everything.min.json index e474c5dc..5d5659ea 100644 --- a/src/tests/json/pass_everything.min.json +++ b/src/tests/json/pass_everything.min.json @@ -1 +1 @@ -["JSON Test Pattern pass1",{"object with 1 member":["array with 1 element"]},{},[],-42,true,false,null,{"integer":1234567890,"real":-9876.54321,"e":0.000000000000123456789,"E":12345678900000000000000000000000000.0,"":23456789012000000000000000000000000000000000000000000000000000000000000000000,"zero":0,"one":1,"space":" ","quote":"\"","backslash":"\\","controls":"\b\f\n\r\t","slash":"\/ & \/","alpha":"abcdefghijklmnopqrstuvwyz","ALPHA":"ABCDEFGHIJKLMNOPQRSTUVWYZ","digit":"0123456789","special":"`1~!@#$%^&*()_+-={':[,]}|;.<\/>?","hex":"ģ䕧覫\u0001췯ꯍ\u001a","true":true,"false":false,"null":null,"array":[],"object":{},"address":"50 St. James Street","url":"http:\/\/www.JSON.org\/","comment":"\/\/ \/* *\/":" "," s p a c e d ":[1,2,3,4,5,6,7],"compact":[1,2,3,4,5,6,7],"jsontext":"{\"object with 1 member\":[\"array with 1 element\"]}","quotes":"" \" %22 0x22 034 "","\/\\\"쫾몾ꮘﳞ볚\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',.\/<>?":"A key can be any string"},0.5,98.6,99.44,1066,"rosebud"] \ No newline at end of file +["JSON Test Pattern pass1",{"object with 1 member":["array with 1 element"]},{},[],-42,true,false,null,{"integer":1234567890,"real":-9876.54321,"e":0.000000000000123456789,"E":12345678900000000000000000000000000.0,"":23456789012000000000000000000000000000000000000000000000000000000000000000000,"zero":0,"one":1,"space":" ","quote":"\"","backslash":"\\","controls":"\b\f\n\r\t","slash":"\/ & \/","alpha":"abcdefghijklmnopqrstuvwyz","ALPHA":"ABCDEFGHIJKLMNOPQRSTUVWYZ","digit":"0123456789","special":"`1~!@#$%^&*()_+-={':[,]}|;.<\/>?","hex":"ģ䕧覫\u0001췯ꯍ\u001A","true":true,"false":false,"null":null,"array":[],"object":{},"address":"50 St. James Street","url":"http:\/\/www.JSON.org\/","comment":"\/\/ \/* *\/":" "," s p a c e d ":[1,2,3,4,5,6,7],"compact":[1,2,3,4,5,6,7],"jsontext":"{\"object with 1 member\":[\"array with 1 element\"]}","quotes":"" \" %22 0x22 034 "","\/\\\"쫾몾ꮘﳞ볚\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',.\/<>?":"A key can be any string"},0.5,98.6,99.44,1066,"rosebud"] diff --git a/src/tests/json/test_json.pl b/src/tests/json/test_json.pl index e41688ca..441e7cd5 100644 --- a/src/tests/json/test_json.pl +++ b/src/tests/json/test_json.pl @@ -36,7 +36,7 @@ minify_sample_json :- test_path("pass_everything.min.json", MinPath), setup_call_cleanup( open(MinPath, write, Stream), - format(Stream, "~s", [MinChars]), + format(Stream, "~s~n", [MinChars]), close(Stream) ). @@ -45,7 +45,8 @@ test_json_minify :- once(phrase_from_file(seq(RefChars), MinPath)), name_parse("pass_everything.json", Json), time(once(phrase(json_chars(Json), MinChars))), - RefChars = MinChars. + append(MinChars, "\n", MinFileChars), + RefChars = MinFileChars. test_json_int_float :- once(phrase(json_chars(number(ZeroInt)), "0")), -- 2.54.0