From c87e3eac080a526964597de8e2cda96d899f450a Mon Sep 17 00:00:00 2001 From: panasenco Date: Tue, 20 Apr 2021 16:58:33 -0700 Subject: [PATCH] Removed CLP(Z) from library(json), achieving a 100x speedup in JSON parsing. --- Dockerfile | 1 - src/lib/json.pl | 30 ++++++++++++++---------------- src/tests/json/README.md | 13 +++++++++++++ src/tests/json/test_json.pl | 27 +++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 17 deletions(-) create mode 100644 src/tests/json/README.md create mode 100644 src/tests/json/test_json.pl diff --git a/Dockerfile b/Dockerfile index a2469ebc..87cf1aee 100755 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,6 @@ COPY --from=cacher $CARGO_HOME $CARGO_HOME RUN cargo build --release --bin scryer-prolog FROM debian:stable-slim -WORKDIR scryer-prolog COPY --from=builder /scryer-prolog/target/release/scryer-prolog /usr/local/bin ENV RUST_BACKTRACE=1 ENTRYPOINT ["/usr/local/bin/scryer-prolog"] diff --git a/src/lib/json.pl b/src/lib/json.pl index a134d30a..218152e2 100644 --- a/src/lib/json.pl +++ b/src/lib/json.pl @@ -40,7 +40,6 @@ :- use_module(library(assoc)). :- use_module(library(between)). :- use_module(library(charsio)). -:- use_module(library(clpz)). :- use_module(library(dcgs)). :- use_module(library(dif)). :- use_module(library(error)). @@ -164,7 +163,7 @@ json_character(PrintChar) --> { dif(PrintChar, '"'), dif(PrintChar, '\\'), char_code(PrintChar, PrintCharCode), - PrintCharCode in 32..1114111 /* 20.10FFFF */ }. + PrintCharCode >= 32 /* 20.10FFFF */ }. json_character(EscapeChar) --> "\\", json_escape(EscapeChar). json_escape(EscapeChar) --> @@ -173,29 +172,28 @@ json_escape(EscapeChar) --> member(EscapeChar-PrintChar, EscapeMap) }. json_escape(EscapeChar) --> "u", - /* Logic: Define the domain of the escape character as well as the relationship between the escape character - and the four hexes */ - { [H1, H2, H3, H4] ins 0..15, - EscapeCharCode in 0..65535, - EscapeCharCode #= H1 * 16^3 + H2 * 16^2 + H3 * 16 + H4, - /* Control: Get the code of the escape character if we can. Otherwise we'll end up backtracking over 65,536 + { /* Control: Get the code of the escape character if we can. Otherwise we'll end up backtracking over 65,536 possible hex values. Logic: Only the first 32 Unicode characters not escaped in the escape map are eligible for \u-escaping when generating. However, we want to be able to parse any of the 65,536 \u-escaped values when parsing. */ ( nonvar(EscapeChar) -> char_code(EscapeChar, EscapeCharCode), - EscapeCharCode in 0..31, + EscapeCharCode < 32, escape_map(EscapeMap), - \+ member(EscapeChar-_, EscapeMap) + \+ member(EscapeChar-_, EscapeMap), + H1 = 0, + H2 = 0, + H3 is EscapeCharCode // 16, + H4 is EscapeCharCode mod 16 ; true - ) - }, + ) }, json_hex(H1), json_hex(H2), json_hex(H3), json_hex(H4), /* Control + Logic: Get the escape character atom from the character code computed from the hexes. */ { ( var(EscapeChar) -> + EscapeCharCode is H1 * 16^3 + H2 * 16^2 + H3 * 16 + H4, char_code(EscapeChar, EscapeCharCode) ; true ) }. @@ -234,14 +232,14 @@ json_integer(Digit) --> json_digit(Digit). json_integer(TotalValue) --> json_onenine(FirstDigit), json_digits(RemainingValue, Power), - { TotalValue #= FirstDigit * 10 ^ (Power + 1) + RemainingValue }. + { TotalValue is FirstDigit * 10 ^ (Power + 1) + RemainingValue }. json_digits(Digit, 0) --> json_digit(Digit). json_digits(Value, Power) --> json_digit(FirstDigit), json_digits(RemainingValue, NextPower), - { Power #= NextPower + 1, - Value #= FirstDigit * 10^Power + RemainingValue }. + { Power is NextPower + 1, + Value is FirstDigit * 10^Power + RemainingValue }. json_digit(0) --> "0". json_digit(Digit) --> json_onenine(Digit). @@ -267,7 +265,7 @@ json_exponent(Exponent) --> json_exponent_signifier, json_sign(Sign), json_digits(Value, _), - { Exponent #= Sign * Value }. + { Exponent is Sign * Value }. json_exponent_signifier --> "E". json_exponent_signifier --> "e". diff --git a/src/tests/json/README.md b/src/tests/json/README.md new file mode 100644 index 00000000..a8347528 --- /dev/null +++ b/src/tests/json/README.md @@ -0,0 +1,13 @@ +## Benchmarks + +### With CLP(Z): +``` +?- test_json_read. + % CPU time: 41.522 seconds +``` + +### After removing CLP(Z): +``` +?- test_json_read. + % CPU time: 0.444 seconds +``` diff --git a/src/tests/json/test_json.pl b/src/tests/json/test_json.pl new file mode 100644 index 00000000..ff196911 --- /dev/null +++ b/src/tests/json/test_json.pl @@ -0,0 +1,27 @@ +:- module(test_json, [test_json_read/0]). + +:- use_module(library(json)). +:- use_module(library(lists)). +:- use_module(library(os)). +:- use_module(library(pio)). +:- use_module(library(time)). + +test_path(TestName, TestPath) :- + getenv("SCRYER_JSON_TESTS_PATH", JsonPath), + append(JsonPath, TestName, TestPathChars), + atom_chars(TestPath, TestPathChars). + +name_parse(Name, Json) :- + test_path(Name, Path), + once(phrase_from_file(json_chars(Json), Path)). + +test_json_read :- + name_parse("pass_null.json", _), + name_parse("pass_alnum.json", _), + name_parse("pass_special.json", _), + name_parse("pass_mandatory_escapes.json", _), + name_parse("pass_forward_slash.json", _), + name_parse("pass_hex.json", _), + name_parse("pass_smallfloat.json", _), + name_parse("pass_bigfloat.json", _), + time(name_parse("pass_everything.json", _)). -- 2.54.0