From cf5960afad00e17c6a3fc2c5352706547f7b9e3e Mon Sep 17 00:00:00 2001 From: Caden Haustein Date: Sun, 31 Jan 2021 11:40:20 -0600 Subject: [PATCH] Add prolog_parser to scryer_prolog --- Cargo.lock | 540 +++++++-------- Cargo.toml | 5 +- prolog_parser/Cargo.toml | 21 + prolog_parser/src/ast.rs | 876 +++++++++++++++++++++++++ prolog_parser/src/lexer.rs | 898 +++++++++++++++++++++++++ prolog_parser/src/lib.rs | 15 + prolog_parser/src/macros.rs | 187 ++++++ prolog_parser/src/parser.rs | 983 ++++++++++++++++++++++++++++ prolog_parser/src/put_back_n.rs | 71 ++ prolog_parser/src/tabled_rc.rs | 153 +++++ prolog_parser/tests/bom.rs | 43 ++ prolog_parser/tests/parse_tokens.rs | 107 +++ 12 files changed, 3585 insertions(+), 314 deletions(-) create mode 100644 prolog_parser/Cargo.toml create mode 100644 prolog_parser/src/ast.rs create mode 100644 prolog_parser/src/lexer.rs create mode 100644 prolog_parser/src/lib.rs create mode 100644 prolog_parser/src/macros.rs create mode 100644 prolog_parser/src/parser.rs create mode 100644 prolog_parser/src/put_back_n.rs create mode 100644 prolog_parser/src/tabled_rc.rs create mode 100644 prolog_parser/tests/bom.rs create mode 100644 prolog_parser/tests/parse_tokens.rs diff --git a/Cargo.lock b/Cargo.lock index 0b2ca779..21afb6e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,17 +1,5 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -[[package]] -name = "arc-swap" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b585a98a234c46fc563103e9278c9391fde1f4e6850334da895d27edb9580f62" - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - [[package]] name = "arrayvec" version = "0.4.12" @@ -21,12 +9,6 @@ dependencies = [ "nodrop", ] -[[package]] -name = "arrayvec" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" - [[package]] name = "autocfg" version = "0.1.7" @@ -35,21 +17,15 @@ checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" [[package]] name = "autocfg" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "az" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41a6b78289a33e09b00818ca8c90ab17c5dabb6e74f4b29a6de679c0e0886ade" - -[[package]] -name = "base64" -version = "0.11.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" +checksum = "e9bcd47d94aa4eb8c076b50fc61a75020789394ffb9bd74a180b3379130f6569" [[package]] name = "base64" @@ -68,9 +44,9 @@ dependencies = [ [[package]] name = "bit-vec" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0dc55f2d8a1a85650ac47858bb001b4c0dd73d79e3c455a842925e68d29cd3" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" @@ -90,17 +66,6 @@ dependencies = [ "opaque-debug", ] -[[package]] -name = "blake2b_simd" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a" -dependencies = [ - "arrayref", - "arrayvec 0.5.1", - "constant_time_eq", -] - [[package]] name = "block-buffer" version = "0.7.3" @@ -124,9 +89,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.2.1" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187" +checksum = "099e596ef14349721d9016f6b80dd3419ea1bf289ab9b44df8e4dfd3a005d5d9" [[package]] name = "byte-tools" @@ -136,15 +101,15 @@ checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" [[package]] name = "byteorder" -version = "1.3.4" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" [[package]] name = "cc" -version = "1.0.52" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d87b23d6a92cd03af510a5ade527033f6aa6fa92161e2d5863a907d4c5e31d" +checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" [[package]] name = "cfg-if" @@ -160,13 +125,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.11" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" dependencies = [ + "libc", "num-integer", - "num-traits 0.2.11", + "num-traits 0.2.14", "time", + "winapi 0.3.9", ] [[package]] @@ -178,17 +145,11 @@ dependencies = [ "bitflags", ] -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - [[package]] name = "core-foundation" -version = "0.7.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d24c7a13c43e870e37c1556b74555437870a04514f7685f5b354e090567171" +checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62" dependencies = [ "core-foundation-sys", "libc", @@ -196,9 +157,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.7.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac" +checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" [[package]] name = "cpu-time" @@ -207,18 +168,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded" dependencies = [ "libc", - "winapi 0.3.8", -] - -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg 1.0.0", - "cfg-if 0.1.10", - "lazy_static", + "winapi 0.3.9", ] [[package]] @@ -234,16 +184,16 @@ dependencies = [ "mio", "parking_lot", "signal-hook", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] name = "crossterm_winapi" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "057b7146d02fb50175fd7dbe5158f6097f33d02831f43b4ee8ae4ddf67b68f5c" +checksum = "c2265c3f8e080075d9b6417aa72293fc71662f34b4af2612d8d1b074d29510db" dependencies = [ - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -265,15 +215,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "dirs" -version = "3.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "142995ed02755914747cc6ca76fc7e4583cd18578746716d0508ea6ed558b9ff" -dependencies = [ - "dirs-sys", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -284,26 +225,15 @@ dependencies = [ "dirs-sys-next", ] -[[package]] -name = "dirs-sys" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e93d7f5705de3e49895a2b5e0b8855a1c27f080192ae9c32a6432d50741a57a" -dependencies = [ - "libc", - "redox_users", - "winapi 0.3.8", -] - [[package]] name = "dirs-sys-next" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99de365f605554ae33f115102a02057d4fc18b01f3284d6870be0938743cfe7d" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", "redox_users", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -340,7 +270,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" dependencies = [ "libc", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -386,11 +316,11 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.1.14" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" +checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "libc", "wasi", ] @@ -412,21 +342,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34a97a52fdee1870a34fa6e4b77570cba531b27d1838874fef4429a791a3d657" dependencies = [ "proc-macro-hack", - "proc-macro2 1.0.10", - "quote 1.0.3", - "syn 1.0.18", + "proc-macro2 1.0.24", + "quote 1.0.8", + "syn 1.0.60", ] [[package]] name = "gmp-mpfr-sys" -version = "1.2.2" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63d7f805cf9df081683d463f62864bda8b8e3ce7162a8e11cd0c49f27b8ce89b" +checksum = "a57fdb339d49833021b1fded600ed240ae907e33909d5511a61dff884df7f16e" dependencies = [ "libc", - "winapi 0.3.8", + "winapi 0.3.9", ] +[[package]] +name = "hashbrown" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" + [[package]] name = "hostname" version = "0.3.1" @@ -435,7 +371,7 @@ checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" dependencies = [ "libc", "match_cfg", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -454,11 +390,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.3.2" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076f042c5b7b98f31d205f1249267e12a6518c1481e9dae9764af19b707d2292" +checksum = "4fb1fa934250de4de8aef298d81c729a7d33d8c239daa3a7575e6b92bfc7313b" dependencies = [ - "autocfg 1.0.0", + "autocfg 1.0.1", + "hashbrown", ] [[package]] @@ -472,15 +409,15 @@ dependencies = [ [[package]] name = "itoa" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "js-sys" -version = "0.3.39" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa5a448de267e7358beaf4a5d849518fe9a0c13fce7afd44b06e68550e5562a7" +checksum = "5cfb73131c35423a367daf8cbd24100af0d077668c8c2943f0e7dd775fef0f65" dependencies = [ "wasm-bindgen", ] @@ -524,7 +461,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304bccb228c4b020f3a4835d247df0a02a7c4686098d4167762cfbbe4c5cb14" dependencies = [ - "arrayvec 0.4.12", + "arrayvec", "cfg-if 0.1.10", "rustc_version", "ryu", @@ -533,9 +470,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.81" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" +checksum = "1cca32fa0182e8c0989459524dc356b8f2b5c10f1b9eb521b7d182c03cf8c5ff" [[package]] name = "libsodium-sys" @@ -559,11 +496,11 @@ dependencies = [ [[package]] name = "log" -version = "0.4.8" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", ] [[package]] @@ -595,23 +532,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - [[package]] name = "memchr" -version = "2.3.3" +version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" [[package]] name = "mio" -version = "0.6.21" +version = "0.6.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302dec22bcf6bae6dfb69c647187f4b4d0fb6f535521f7bc022430ce8e12008f" +checksum = "4afd66f5b91bf2a3bc13fad0e21caedac168ca4c707504e75585648ae80e4cc4" dependencies = [ "cfg-if 0.1.10", "fuchsia-zircon", @@ -628,9 +559,9 @@ dependencies = [ [[package]] name = "miow" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1f2f3b1cf331de6896aabf6e9d55dca90356cc9960cca7eaaf408a355ae919" +checksum = "ebd808424166322d4a38da87083bfddd3ac4c131334ed55856112eb06d46944d" dependencies = [ "kernel32-sys", "net2", @@ -640,9 +571,9 @@ dependencies = [ [[package]] name = "native-tls" -version = "0.2.4" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d88c06fe90d5ee94048ba40409ef1d9315d86f6f38c2efdaad4fb50c58b2d" +checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" dependencies = [ "lazy_static", "libc", @@ -658,13 +589,13 @@ dependencies = [ [[package]] name = "net2" -version = "0.2.33" +version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88" +checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae" dependencies = [ "cfg-if 0.1.10", "libc", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -710,19 +641,19 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304" dependencies = [ - "autocfg 1.0.0", + "autocfg 1.0.1", "num-integer", - "num-traits 0.2.11", + "num-traits 0.2.14", ] [[package]] name = "num-integer" -version = "0.1.42" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f6ea62e9d81a77cd3ee9a2a5b9b609447857f3d358704331e4ef39eb247fcba" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" dependencies = [ - "autocfg 1.0.0", - "num-traits 0.2.11", + "autocfg 1.0.1", + "num-traits 0.2.14", ] [[package]] @@ -731,10 +662,10 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef" dependencies = [ - "autocfg 1.0.0", + "autocfg 1.0.1", "num-bigint", "num-integer", - "num-traits 0.2.11", + "num-traits 0.2.14", ] [[package]] @@ -747,7 +678,7 @@ dependencies = [ "num-bigint", "num-integer", "num-rational", - "num-traits 0.2.11", + "num-traits 0.2.14", ] [[package]] @@ -756,23 +687,23 @@ version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31" dependencies = [ - "num-traits 0.2.11", + "num-traits 0.2.14", ] [[package]] name = "num-traits" -version = "0.2.11" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" dependencies = [ - "autocfg 1.0.0", + "autocfg 1.0.1", ] [[package]] name = "once_cell" -version = "1.3.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c601810575c99596d4afc46f78a678c80105117c379eb3650cf99b8a21ce5b" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" [[package]] name = "opaque-debug" @@ -782,12 +713,12 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" [[package]] name = "openssl" -version = "0.10.29" +version = "0.10.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee6d85f4cb4c4f59a6a85d5b68a233d280c82e29e822913b9c8b129fbf20bdd" +checksum = "038d43985d1ddca7a9900630d8cd031b56e4794eecc2e9ea39dd17aa04399a70" dependencies = [ "bitflags", - "cfg-if 0.1.10", + "cfg-if 1.0.0", "foreign-types", "lazy_static", "libc", @@ -802,20 +733,20 @@ checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" [[package]] name = "openssl-src" -version = "111.9.0+1.1.1g" +version = "111.13.0+1.1.1i" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2dbe10ddd1eb335aba3780eb2eaa13e1b7b441d2562fd962398740927f39ec4" +checksum = "045e4dc48af57aad93d665885789b43222ae26f4886494da12d1ed58d309dcb6" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.58" +version = "0.9.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a842db4709b604f0fe5d1170ae3565899be2ad3d9cbc72dedc789ac0511f78de" +checksum = "921fc71883267538946025deffb622905ecad223c28efbfdef9bb59a0175f3e6" dependencies = [ - "autocfg 1.0.0", + "autocfg 1.0.1", "cc", "libc", "openssl-src", @@ -852,9 +783,9 @@ dependencies = [ "cfg-if 0.1.10", "cloudabi", "libc", - "redox_syscall", - "smallvec 1.4.0", - "winapi 0.3.8", + "redox_syscall 0.1.57", + "smallvec", + "winapi 0.3.9", ] [[package]] @@ -897,15 +828,15 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.17" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" [[package]] name = "ppv-lite86" -version = "0.2.8" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "precomputed-hash" @@ -915,9 +846,9 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro-hack" -version = "0.5.15" +version = "0.5.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d659fe7c6d27f25e9d80a1a094c223f5246f6a6596453e09d7229bf42750b63" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" @@ -930,18 +861,16 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.10" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" dependencies = [ - "unicode-xid 0.2.0", + "unicode-xid 0.2.1", ] [[package]] name = "prolog_parser" version = "0.8.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520bf98dcd386ef320ef11239415c9a11856d3b28fab0d8dc0b61b0d7e65ffe5" dependencies = [ "lexical", "num-rug-adapter", @@ -961,11 +890,11 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.3" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" dependencies = [ - "proc-macro2 1.0.10", + "proc-macro2 1.0.24", ] [[package]] @@ -984,20 +913,19 @@ dependencies = [ "rand_os", "rand_pcg", "rand_xorshift", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] name = "rand" -version = "0.7.3" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" dependencies = [ - "getrandom", "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc 0.2.0", + "rand_chacha 0.3.0", + "rand_core 0.6.1", + "rand_hc 0.3.0", ] [[package]] @@ -1012,12 +940,12 @@ dependencies = [ [[package]] name = "rand_chacha" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" dependencies = [ "ppv-lite86", - "rand_core 0.5.1", + "rand_core 0.6.1", ] [[package]] @@ -1037,9 +965,9 @@ checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" [[package]] name = "rand_core" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +checksum = "c026d7df8b298d90ccbbc5190bd04d85e159eaf5576caeacf8741da93ccbd2e5" dependencies = [ "getrandom", ] @@ -1055,11 +983,11 @@ dependencies = [ [[package]] name = "rand_hc" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" dependencies = [ - "rand_core 0.5.1", + "rand_core 0.6.1", ] [[package]] @@ -1079,7 +1007,7 @@ checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" dependencies = [ "libc", "rand_core 0.4.2", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -1093,7 +1021,7 @@ dependencies = [ "libc", "rand_core 0.4.2", "rdrand", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -1126,19 +1054,27 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.1.56" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + +[[package]] +name = "redox_syscall" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +checksum = "05ec8ca9416c5ea37062b502703cd7fcb207736bc294f6e0cf367ac6fc234570" +dependencies = [ + "bitflags", +] [[package]] name = "redox_users" -version = "0.3.4" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b23093265f8d200fa7b4c2c76297f47e681c655f6f1285a8780d6a022f7431" +checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ "getrandom", - "redox_syscall", - "rust-argon2", + "redox_syscall 0.2.4", ] [[package]] @@ -1149,18 +1085,18 @@ checksum = "d813022b2e00774a48eaf43caaa3c20b45f040ba8cbf398e2e8911a06668dbe6" [[package]] name = "remove_dir_all" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" dependencies = [ - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] name = "ring" -version = "0.16.13" +version = "0.16.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "703516ae74571f24b465b4a1431e81e2ad51336cb0ded733a55a1aa3eccac196" +checksum = "024a1e66fea74c66c66624ee5622a7ff0e4b73a13b4f5c326ddb50c708944226" dependencies = [ "cc", "libc", @@ -1168,7 +1104,7 @@ dependencies = [ "spin", "untrusted", "web-sys", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -1193,27 +1129,15 @@ dependencies = [ [[package]] name = "rug" -version = "1.8.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72315b6d9cb7d886fb99724330c47ceb29e923df657c31da3849fe88c0ded710" +checksum = "e538d00da450a8e48aac7e6322e67b2dc86ec71a1feeac0e3954c4f07f01bc45" dependencies = [ "az", "gmp-mpfr-sys", "libc", ] -[[package]] -name = "rust-argon2" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bc8af4bda8e1ff4932523b94d3dd20ee30a87232323eda55903ffd71d2fb017" -dependencies = [ - "base64 0.11.0", - "blake2b_simd", - "constant_time_eq", - "crossbeam-utils", -] - [[package]] name = "rustc_version" version = "0.2.3" @@ -1225,13 +1149,13 @@ dependencies = [ [[package]] name = "rustyline" -version = "7.0.0" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a5f54deba50e65ee4cf786dbc37e8b3c63bdccccbcf9d3a8a9fd0c1bb7e1984" +checksum = "8227301bfc717136f0ecbd3d064ba8199e44497a0bdd46bb01ede4387cfd2cec" dependencies = [ "bitflags", "cfg-if 1.0.0", - "dirs", + "dirs-next", "fs2", "libc", "log", @@ -1241,14 +1165,14 @@ dependencies = [ "unicode-segmentation", "unicode-width", "utf8parse", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] name = "ryu" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" [[package]] name = "schannel" @@ -1257,7 +1181,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75" dependencies = [ "lazy_static", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -1270,7 +1194,7 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" name = "scryer-prolog" version = "0.8.127" dependencies = [ - "base64 0.12.3", + "base64", "blake2", "chrono", "cpu-time", @@ -1303,9 +1227,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "0.4.4" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64808902d7d99f78eaddd2b4e2509713babc3dc3c85ad6f4c447680f3c01e535" +checksum = "c1759c2e3c8580017a484a7ac56d3abc5a6c1feadf88db2f3633f12ae4268c69" dependencies = [ "bitflags", "core-foundation", @@ -1316,9 +1240,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "0.4.3" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17bf11d99252f512695eb468de5516e5cf75455521e69dfe343f3b74e4748405" +checksum = "f99b9d5e26d2a71633cc4f2ebae7cc9f874044e0c351a27e17892d76dce5678b" dependencies = [ "core-foundation-sys", "libc", @@ -1351,26 +1275,26 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.114" +version = "1.0.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3" +checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae" [[package]] name = "serde_derive" -version = "1.0.113" +version = "1.0.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93c5eaa17d0954cb481cdcfffe9d84fcfa7a1a9f2349271e678677be4c26ae31" +checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31" dependencies = [ - "proc-macro2 1.0.10", - "quote 1.0.3", - "syn 1.0.18", + "proc-macro2 1.0.24", + "quote 1.0.8", + "syn 1.0.60", ] [[package]] name = "serde_json" -version = "1.0.55" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec2c5d7e739bc07a3e73381a39d61fdb5f671c60c1df26a130690665803d8226" +checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a" dependencies = [ "itoa", "ryu", @@ -1392,9 +1316,9 @@ dependencies = [ [[package]] name = "signal-hook" -version = "0.1.13" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b9f3a1686a29f53cfd91ee5e3db3c12313ec02d33765f02c1a9645a1811e2c" +checksum = "7e31d442c16f047a671b5a71e2161d6e68814012b7f5379d269ebd915fac2729" dependencies = [ "libc", "mio", @@ -1403,11 +1327,10 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94f478ede9f64724c5d173d7bb56099ec3e2d9fc2774aac65d34b8b890405f41" +checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" dependencies = [ - "arc-swap", "libc", ] @@ -1425,18 +1348,9 @@ checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" [[package]] name = "smallvec" -version = "0.6.13" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b0758c52e15a8b5e3691eae6cc559f08eee9406e548a4477ba4e67770a82b6" -dependencies = [ - "maybe-uninit", -] - -[[package]] -name = "smallvec" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" [[package]] name = "sodiumoxide" @@ -1484,8 +1398,8 @@ checksum = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6" dependencies = [ "phf_generator", "phf_shared", - "proc-macro2 1.0.10", - "quote 1.0.3", + "proc-macro2 1.0.24", + "quote 1.0.8", "string_cache_shared", ] @@ -1514,34 +1428,34 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.18" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "410a7488c0a728c7ceb4ad59b9567eb4053d02e8cc7f5c0e0eeeb39518369213" +checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" dependencies = [ - "proc-macro2 1.0.10", - "quote 1.0.3", - "unicode-xid 0.2.0", + "proc-macro2 1.0.24", + "quote 1.0.8", + "unicode-xid 0.2.1", ] [[package]] name = "tempfile" -version = "3.1.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "libc", - "rand 0.7.3", - "redox_syscall", + "rand 0.8.3", + "redox_syscall 0.2.4", "remove_dir_all", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] name = "tendril" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" +checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33" dependencies = [ "futf", "mac", @@ -1555,7 +1469,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" dependencies = [ "libc", - "winapi 0.3.8", + "winapi 0.3.9", ] [[package]] @@ -1566,15 +1480,15 @@ checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" [[package]] name = "unicode-segmentation" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" +checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" [[package]] name = "unicode-width" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" [[package]] name = "unicode-xid" @@ -1584,17 +1498,17 @@ checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" [[package]] name = "unicode-xid" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" [[package]] name = "unicode_reader" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f886d405a2be65db804cb1088f969dbd99528a9eec9bcf941584b17de4cd3034" +checksum = "5b639121690b27acd92c97ed2b52c5e5e8d3d39482e943b4559695cef62f771a" dependencies = [ - "smallvec 0.6.13", + "smallvec", "unicode-segmentation", ] @@ -1627,9 +1541,9 @@ checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" [[package]] name = "vcpkg" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55d1e41d56121e07f1e223db0a4def204e45c85425f6a16d462fd07c8d10d74c" +checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb" [[package]] name = "void" @@ -1639,69 +1553,69 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" +version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasm-bindgen" -version = "0.2.62" +version = "0.2.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c7d40d09cdbf0f4895ae58cf57d92e1e57a9dd8ed2e8390514b54a47cc5551" +checksum = "55c0f7123de74f0dab9b7d00fd614e7b19349cd1e2f5252bbe9b1754b59433be" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.62" +version = "0.2.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3972e137ebf830900db522d6c8fd74d1900dcfc733462e9a12e942b00b4ac94" +checksum = "7bc45447f0d4573f3d65720f636bbcc3dd6ce920ed704670118650bcd47764c7" dependencies = [ "bumpalo", "lazy_static", "log", - "proc-macro2 1.0.10", - "quote 1.0.3", - "syn 1.0.18", + "proc-macro2 1.0.24", + "quote 1.0.8", + "syn 1.0.60", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.62" +version = "0.2.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cd85aa2c579e8892442954685f0d801f9129de24fa2136b2c6a539c76b65776" +checksum = "3b8853882eef39593ad4174dd26fc9865a64e84026d223f63bb2c42affcbba2c" dependencies = [ - "quote 1.0.3", + "quote 1.0.8", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.62" +version = "0.2.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eb197bd3a47553334907ffd2f16507b4f4f01bbec3ac921a7719e0decdfe72a" +checksum = "4133b5e7f2a531fa413b3a1695e925038a05a71cf67e87dafa295cb645a01385" dependencies = [ - "proc-macro2 1.0.10", - "quote 1.0.3", - "syn 1.0.18", + "proc-macro2 1.0.24", + "quote 1.0.8", + "syn 1.0.60", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.62" +version = "0.2.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a91c2916119c17a8e316507afaaa2dd94b47646048014bbdf6bef098c1bb58ad" +checksum = "dd4945e4943ae02d15c13962b38a5b1e81eadd4b71214eee75af64a4d6a4fd64" [[package]] name = "web-sys" -version = "0.3.39" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bc359e5dd3b46cb9687a051d50a2fdd228e4ba7cf6fcf861a5365c3d671a642" +checksum = "c40dc691fc48003eba817c38da7113c15698142da971298003cac3ef175680b3" dependencies = [ "js-sys", "wasm-bindgen", @@ -1715,9 +1629,9 @@ checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" [[package]] name = "winapi" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", @@ -1753,6 +1667,6 @@ dependencies = [ [[package]] name = "xmlparser" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52613e655f6f11f63c0fe7d1c3b5ef69e44d96df9b65dab296b441ed0e1125f5" +checksum = "114ba2b24d2167ef6d67d7d04c8cc86522b87f490025f39f0303b7db5bf5e3d8" diff --git a/Cargo.toml b/Cargo.toml index 6d10a3ae..f473e6f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,9 @@ keywords = ["prolog", "prolog-interpreter", "prolog-system"] categories = ["command-line-utilities"] build = "build.rs" +[workspace] +members = ["prolog_parser"] + [build-dependencies] indexmap = "1.0.2" @@ -32,7 +35,7 @@ libc = "0.2.62" nix = "0.15.0" num-rug-adapter = { optional = true, version = "0.1.4" } ordered-float = "0.5.0" -prolog_parser = { version = "0.8.68", default-features = false } +prolog_parser = { path = "./prolog_parser", default-features = false } ref_thread_local = "0.0.0" rug = { version = "1.4.0", optional = true } rustyline = "7.0.0" diff --git a/prolog_parser/Cargo.toml b/prolog_parser/Cargo.toml new file mode 100644 index 00000000..37ce6e31 --- /dev/null +++ b/prolog_parser/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "prolog_parser" +version = "0.8.68" +authors = ["Mark Thom "] +repository = "https://github.com/mthom/prolog_parser" +description = " An operator precedence parser for rusty-wam, an up and coming ISO Prolog implementation." +license = "BSD-3-Clause" + +[dependencies] +lexical = "2.1.0" +ordered-float = "0.5.0" +rug = { optional = true, version = "1.4.0" } +num-rug-adapter = { optional = true, version = "0.1.3" } +unicode_reader = "1.0.0" + +[lib] +path = "src/lib.rs" + +[features] +num = ["num-rug-adapter"] +default = ["rug"] diff --git a/prolog_parser/src/ast.rs b/prolog_parser/src/ast.rs new file mode 100644 index 00000000..5c31e0e4 --- /dev/null +++ b/prolog_parser/src/ast.rs @@ -0,0 +1,876 @@ +use rug::{Integer, Rational}; +use ordered_float::*; +use tabled_rc::*; + +use put_back_n::*; + +use std::cell::Cell; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::io::{Bytes, Error as IOError, Read}; +use std::rc::Rc; +use std::vec::Vec; + +use unicode_reader::CodePoints; + +pub type Atom = String; + +pub type Var = String; + +pub type Specifier = u32; + +pub const MAX_ARITY: usize = 1023; + +pub const XFX: u32 = 0x0001; +pub const XFY: u32 = 0x0002; +pub const YFX: u32 = 0x0004; +pub const XF: u32 = 0x0010; +pub const YF: u32 = 0x0020; +pub const FX: u32 = 0x0040; +pub const FY: u32 = 0x0080; +pub const DELIMITER: u32 = 0x0100; +pub const TERM: u32 = 0x1000; +pub const LTERM: u32 = 0x3000; + +pub const NEGATIVE_SIGN: u32 = 0x0200; + +#[macro_export] +macro_rules! clause_name { + ($name: expr, $tbl: expr) => ( + ClauseName::User(TabledRc::new($name, $tbl.clone())) + ) ; + ($name: expr) => ( + ClauseName::BuiltIn($name) + ) +} + +#[macro_export] +macro_rules! atom { + ($e:expr, $tbl:expr) => ( + Constant::Atom(ClauseName::User(tabled_rc!($e, $tbl)), None) + ); + ($e:expr) => ( + Constant::Atom(clause_name!($e), None) + ) +} + +#[macro_export] +macro_rules! rc_atom { + ($e:expr) => ( + Rc::new(String::from($e)) + ) +} +macro_rules! is_term { + ($x:expr) => ( ($x & TERM) != 0 ) +} + +macro_rules! is_lterm { + ($x:expr) => ( ($x & LTERM) != 0 ) +} + +macro_rules! is_op { + ($x:expr) => ( $x & (XF | YF | FX | FY | XFX | XFY | YFX) != 0 ) +} + +macro_rules! is_negate { + ($x:expr) => ( ($x & NEGATIVE_SIGN) != 0 ) +} + +#[macro_export] +macro_rules! is_prefix { + ($x:expr) => ( $x & (FX | FY) != 0 ) +} + +#[macro_export] +macro_rules! is_postfix { + ($x:expr) => ( $x & (XF | YF) != 0 ) +} + +#[macro_export] +macro_rules! is_infix { + ($x:expr) => ( ($x & (XFX | XFY | YFX)) != 0 ) +} + +#[macro_export] +macro_rules! is_xfx { + ($x:expr) => ( ($x & XFX) != 0 ) +} + +#[macro_export] +macro_rules! is_xfy { + ($x:expr) => ( ($x & XFY) != 0 ) +} + +#[macro_export] +macro_rules! is_yfx { + ($x:expr) => ( ($x & YFX) != 0 ) +} + +#[macro_export] +macro_rules! is_yf { + ($x:expr) => ( ($x & YF) != 0 ) +} + +#[macro_export] +macro_rules! is_xf { + ($x:expr) => ( ($x & XF) != 0 ) +} + +#[macro_export] +macro_rules! is_fx { + ($x:expr) => ( ($x & FX) != 0 ) +} + +#[macro_export] +macro_rules! is_fy { + ($x:expr) => ( ($x & FY) != 0 ) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum RegType { + Perm(usize), + Temp(usize) +} + +impl Default for RegType { + fn default() -> Self { + RegType::Temp(0) + } +} + +impl RegType { + pub fn reg_num(self) -> usize { + match self { + RegType::Perm(reg_num) | RegType::Temp(reg_num) => reg_num + } + } + + pub fn is_perm(self) -> bool { + match self { + RegType::Perm(_) => true, + _ => false + } + } +} + +impl fmt::Display for RegType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &RegType::Perm(val) => write!(f, "Y{}", val), + &RegType::Temp(val) => write!(f, "X{}", val) + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum VarReg { + ArgAndNorm(RegType, usize), + Norm(RegType) +} + +impl VarReg { + pub fn norm(self) -> RegType { + match self { + VarReg::ArgAndNorm(reg, _) | VarReg::Norm(reg) => reg + } + } +} + +impl fmt::Display for VarReg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &VarReg::Norm(RegType::Perm(reg)) => write!(f, "Y{}", reg), + &VarReg::Norm(RegType::Temp(reg)) => write!(f, "X{}", reg), + &VarReg::ArgAndNorm(RegType::Perm(reg), arg) => + write!(f, "Y{} A{}", reg, arg), + &VarReg::ArgAndNorm(RegType::Temp(reg), arg) => + write!(f, "X{} A{}", reg, arg) + } + } +} + +impl Default for VarReg { + fn default() -> Self { + VarReg::Norm(RegType::default()) + } +} + +#[macro_export] +macro_rules! temp_v { + ($x:expr) => ( + RegType::Temp($x) + ) +} + +#[macro_export] +macro_rules! perm_v { + ($x:expr) => ( + RegType::Perm($x) + ) +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum GenContext { + Head, Mid(usize), Last(usize) // Mid & Last: chunk_num +} + +impl GenContext { + pub fn chunk_num(self) -> usize { + match self { + GenContext::Head => 0, + GenContext::Mid(cn) | GenContext::Last(cn) => cn + } + } +} + +pub type OpDirKey = (ClauseName, Fixity); + +#[derive(Debug, Clone)] +pub struct OpDirValue(pub SharedOpDesc, pub ClauseName); + +impl OpDirValue { + pub fn new(spec: Specifier, priority: usize, module_name: ClauseName) -> Self { + OpDirValue(SharedOpDesc::new(priority, spec), module_name) + } + + #[inline] + pub fn shared_op_desc(&self) -> SharedOpDesc { + self.0.clone() + } + + #[inline] + pub fn owning_module(&self) -> ClauseName { + self.1.clone() + } +} + +// name and fixity -> operator type and precedence. +pub type OpDir = HashMap; + +#[derive(Debug, Clone, Copy)] +pub struct MachineFlags { + pub double_quotes: DoubleQuotes +} + +impl Default for MachineFlags { + fn default() -> Self { + MachineFlags { double_quotes: DoubleQuotes::default() } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum DoubleQuotes { + Atom, Chars, Codes +} + +impl DoubleQuotes { + pub fn is_chars(self) -> bool { + if let DoubleQuotes::Chars = self { + true + } else { + false + } + } + + pub fn is_atom(self) -> bool { + if let DoubleQuotes::Atom = self { + true + } else { + false + } + } + + pub fn is_codes(self) -> bool { + if let DoubleQuotes::Codes = self { + true + } else { + false + } + } +} + +impl Default for DoubleQuotes { + fn default() -> Self { + DoubleQuotes::Chars + } +} + +pub fn default_op_dir() -> OpDir { + let module_name = clause_name!("builtins"); + let mut op_dir = OpDir::new(); + + op_dir.insert((clause_name!(":-"), Fixity::In), OpDirValue::new(XFX, 1200, module_name.clone())); + op_dir.insert((clause_name!(":-"), Fixity::Pre), OpDirValue::new(FX, 1200, module_name.clone())); + op_dir.insert((clause_name!("?-"), Fixity::Pre), OpDirValue::new(FX, 1200, module_name.clone())); + op_dir.insert((clause_name!(","), Fixity::In), OpDirValue::new(XFY, 1000, module_name.clone())); + + op_dir +} + +#[derive(Debug, Clone)] +pub enum ArithmeticError { + NonEvaluableFunctor(Constant, usize), + UninstantiatedVar +} + +#[derive(Debug)] +pub enum ParserError { + Arithmetic(ArithmeticError), + BackQuotedString(usize, usize), + BadPendingByte, + CannotParseCyclicTerm, + UnexpectedChar(char, usize, usize), + UnexpectedEOF, + IO(IOError), + ExpectedRel, + ExpectedTopLevelTerm, + InadmissibleFact, + InadmissibleQueryTerm, + IncompleteReduction(usize, usize), + InconsistentEntry, + InvalidDoubleQuotesDecl, + InvalidHook, + InvalidModuleDecl, + InvalidModuleExport, + InvalidRuleHead, + InvalidUseModuleDecl, + InvalidModuleResolution, + InvalidSingleQuotedCharacter(char), + MissingQuote(usize, usize), + NonPrologChar(usize, usize), + ParseBigInt(usize, usize), + ParseFloat(usize, usize), + Utf8Error(usize, usize) +} + +impl ParserError { + pub fn line_and_col_num(&self) -> Option<(usize, usize)> { + match self { + &ParserError::BackQuotedString(line_num, col_num) + | &ParserError::UnexpectedChar(_, line_num, col_num) + | &ParserError::IncompleteReduction(line_num, col_num) + | &ParserError::MissingQuote(line_num, col_num) + | &ParserError::NonPrologChar(line_num, col_num) + | &ParserError::ParseBigInt(line_num, col_num) + | &ParserError::ParseFloat(line_num, col_num) + | &ParserError::Utf8Error(line_num, col_num) => + Some((line_num, col_num)), + _ => + None + } + } + + pub fn as_str(&self) -> &'static str { + match self { + &ParserError::Arithmetic(..) => + "arithmetic_error", + &ParserError::BackQuotedString(..) => + "back_quoted_string", + &ParserError::BadPendingByte => + "bad_pending_byte", + &ParserError::UnexpectedChar(..) => + "unexpected_char", + &ParserError::UnexpectedEOF => + "unexpected_end_of_file", + &ParserError::ExpectedRel => + "expected_relation", + &ParserError::ExpectedTopLevelTerm => + "expected_atom_or_cons_or_clause", + &ParserError::InadmissibleFact => + "inadmissible_fact", + &ParserError::InadmissibleQueryTerm => + "inadmissible_query_term", + &ParserError::IncompleteReduction(..) => + "incomplete_reduction", + &ParserError::InconsistentEntry => + "inconsistent_entry", + &ParserError::InvalidDoubleQuotesDecl => + "invalid_double_quotes_declaration", + &ParserError::InvalidHook => + "invalid_hook", + &ParserError::InvalidModuleDecl => + "invalid_module_declaration", + &ParserError::InvalidModuleExport => + "invalid_module_export", + &ParserError::InvalidModuleResolution => + "invalid_module_resolution", + &ParserError::InvalidRuleHead => + "invalid_head_of_rule", + &ParserError::InvalidUseModuleDecl => + "invalid_use_module_declaration", + &ParserError::InvalidSingleQuotedCharacter(..) => + "invalid_single_quoted_character", + &ParserError::IO(_) => + "input_output_error", + &ParserError::MissingQuote(..) => + "missing_quote", + &ParserError::NonPrologChar(..) => + "non_prolog_character", + &ParserError::ParseBigInt(..) => + "cannot_parse_big_int", + &ParserError::ParseFloat(..) => + "cannot_parse_float", + &ParserError::Utf8Error(..) => + "utf8_conversion_error", + &ParserError::CannotParseCyclicTerm => + "cannot_parse_cyclic_term" + } + } +} + +impl From for ParserError { + fn from(err: ArithmeticError) -> ParserError { + ParserError::Arithmetic(err) + } +} + +impl From for ParserError { + fn from(err: IOError) -> ParserError { + ParserError::IO(err) + } +} + +impl From<&IOError> for ParserError { + fn from(error: &IOError) -> ParserError { + if error.get_ref().filter(|e| e.is::()).is_some() { + ParserError::Utf8Error(0, 0) + } else { + ParserError::IO(error.kind().into()) + } + } +} + + +#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub enum Fixity { + In, Post, Pre +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct SharedOpDesc(Rc>); + +impl SharedOpDesc { + #[inline] + pub fn new(priority: usize, spec: Specifier) -> Self { + SharedOpDesc(Rc::new(Cell::new((priority, spec)))) + } + + #[inline] + pub fn ptr_eq(lop_desc: &SharedOpDesc, rop_desc: &SharedOpDesc) -> bool { + Rc::ptr_eq(&lop_desc.0, &rop_desc.0) + } + + #[inline] + pub fn arity(&self) -> usize { + if self.get().1 & (XFX | XFY | YFX) == 0 { + 1 + } else { + 2 + } + } + + #[inline] + pub fn get(&self) -> (usize, Specifier) { + self.0.get() + } + + #[inline] + pub fn set(&self, prec: usize, spec: Specifier) { + self.0.set((prec, spec)); + } + + #[inline] + pub fn prec(&self) -> usize { + self.0.get().0 + } + + #[inline] + pub fn assoc(&self) -> Specifier { + self.0.get().1 + } +} + +// this ensures that SharedOpDesc (which is not consistently placed in +// every atom!) doesn't affect the value of an atom hash. If +// SharedOpDesc values are to be indexed, a BTreeMap or BTreeSet +// should be used, obviously. +impl Hash for SharedOpDesc { + fn hash(&self, state: &mut H) { + 0.hash(state) + } +} + +#[derive(Debug, Clone, Hash)] +pub enum Constant { + Atom(ClauseName, Option), + Char(char), + EmptyList, + Fixnum(isize), + Integer(Rc), + Rational(Rc), + Float(OrderedFloat), + String(Rc), + Usize(usize), +} + +impl fmt::Display for Constant { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &Constant::Atom(ref atom, _) => + if atom.as_str().chars().any(|c| "`.$'\" ".contains(c)) { + write!(f, "'{}'", atom.as_str()) + } else { + write!(f, "{}", atom.as_str()) + }, + &Constant::Char(c) => + write!(f, "'{}'", c as u32), + &Constant::EmptyList => + write!(f, "[]"), + &Constant::Fixnum(n) => + write!(f, "{}", n), + &Constant::Integer(ref n) => + write!(f, "{}", n), + &Constant::Rational(ref n) => + write!(f, "{}", n), + &Constant::Float(ref n) => + write!(f, "{}", n), + &Constant::String(ref s) => + write!(f, "\"{}\"", &s), + &Constant::Usize(integer) => + write!(f, "u{}", integer), + } + } +} + +impl PartialEq for Constant { + fn eq(&self, other: &Constant) -> bool { + match (self, other) { + (&Constant::Atom(ref atom, _), &Constant::Char(c)) + | (&Constant::Char(c), &Constant::Atom(ref atom, _)) => { + atom.is_char() && Some(c) == atom.as_str().chars().next() + }, + (&Constant::Atom(ref a1, _), &Constant::Atom(ref a2, _)) => + a1.as_str() == a2.as_str(), + (&Constant::Char(c1), &Constant::Char(c2)) => + c1 == c2, + (&Constant::Fixnum(n1), &Constant::Fixnum(n2)) => + n1 == n2, + (&Constant::Fixnum(n1), &Constant::Integer(ref n2)) | + (&Constant::Integer(ref n2), &Constant::Fixnum(n1)) => { + if let Some(n2) = n2.to_isize() { + n1 == n2 + } else { + false + } + } + (&Constant::Integer(ref n1), &Constant::Integer(ref n2)) => + n1 == n2, + (&Constant::Rational(ref n1), &Constant::Rational(ref n2)) => + n1 == n2, + (&Constant::Float(ref n1), &Constant::Float(ref n2)) => + n1 == n2, + (&Constant::String(ref s1), &Constant::String(ref s2)) => { + &s1 == &s2 + } + (&Constant::EmptyList, &Constant::EmptyList) => + true, + (&Constant::Usize(u1), &Constant::Usize(u2)) => + u1 == u2, + _ => false + } + } +} + +impl Eq for Constant {} + +impl Constant { + pub fn to_atom(self) -> Option { + match self { + Constant::Atom(a, _) => Some(a.defrock_brackets()), + _ => None + } + } +} + +#[derive(Debug, Clone)] +pub enum ClauseName { + BuiltIn(&'static str), + User(TabledRc) +} + +impl fmt::Display for ClauseName { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl Hash for ClauseName { + fn hash(&self, state: &mut H) { + (*self.as_str()).hash(state) + } +} + +impl PartialEq for ClauseName { + fn eq(&self, other: &ClauseName) -> bool { + *self.as_str() == *other.as_str() + } +} + +impl Eq for ClauseName {} + +impl Ord for ClauseName { + fn cmp(&self, other: &ClauseName) -> Ordering { + (*self.as_str()).cmp(other.as_str()) + } +} + +impl PartialOrd for ClauseName { + fn partial_cmp(&self, other: &ClauseName) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a> From<&'a TabledRc> for ClauseName { + fn from(name: &'a TabledRc) -> ClauseName { + ClauseName::User(name.clone()) + } +} + +impl ClauseName { + #[inline] + pub fn owning_module(&self) -> Self { + match self { + &ClauseName::User(ref name) => { + let module = name.owning_module(); + ClauseName::User(TabledRc { atom: module.clone(), + table: TabledData::new(module) }) + }, + _ => clause_name!("user") + } + } + + #[inline] + pub fn to_rc(&self) -> Rc { + match self { + &ClauseName::BuiltIn(s) => Rc::new(s.to_string()), + &ClauseName::User(ref rc) => rc.inner() + } + } + + #[inline] + pub fn with_table(self, atom_tbl: TabledData) -> Self { + match self { + ClauseName::BuiltIn(_) => self, + ClauseName::User(mut name) => { + name.table = atom_tbl; + ClauseName::User(name) + } + } + } + + #[inline] + pub fn has_table(&self, atom_tbl: &TabledData) -> bool { + match self { + ClauseName::BuiltIn(_) => false, + ClauseName::User(ref name) => &name.table == atom_tbl, + } + } + + #[inline] + pub fn has_table_of(&self, other: &ClauseName) -> bool { + match self { + ClauseName::BuiltIn(_) => { + if let ClauseName::BuiltIn(_) = other { + true + } else { + false + } + } + ClauseName::User(ref name) => { + other.has_table(&name.table) + } + } + } + + #[inline] + pub fn as_str(&self) -> &str { + match self { + &ClauseName::BuiltIn(s) => s, + &ClauseName::User(ref name) => name.as_ref() + } + } + + #[inline] + pub fn is_char(&self) -> bool { + !self.as_str().is_empty() && self.as_str().chars().skip(1).next().is_none() + } + + pub fn defrock_brackets(self) -> Self { + fn defrock_brackets(s: &str) -> &str { + if s.starts_with('(') && s.ends_with(')') { + &s[1 .. s.len() - 1] + } else { + s + } + } + + match self { + ClauseName::BuiltIn(s) => + ClauseName::BuiltIn(defrock_brackets(s)), + ClauseName::User(s) => + ClauseName::User(tabled_rc!(defrock_brackets(s.as_str()).to_owned(), s.table)) + } + } +} + +impl AsRef for ClauseName { + #[inline] + fn as_ref(self: &Self) -> &str { + self.as_str() + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Term { + AnonVar, + Clause(Cell, ClauseName, Vec>, Option), + Cons(Cell, Box, Box), + Constant(Cell, Constant), + Var(Cell, Rc) +} + +impl Term { + pub fn shared_op_desc(&self) -> Option { + match self { + &Term::Clause(_, _, _, ref spec) => spec.clone(), + &Term::Constant(_, Constant::Atom(_, ref spec)) => spec.clone(), + _ => None + } + } + + pub fn to_constant(self) -> Option { + match self { + Term::Constant(_, c) => Some(c), + _ => None + } + } + + pub fn first_arg(&self) -> Option<&Term> { + match self { + &Term::Clause(_, _, ref terms, _) => + terms.first().map(|bt| bt.as_ref()), + _ => None + } + } + + pub fn set_name(&mut self, new_name: ClauseName) { + match self { + Term::Constant(_, Constant::Atom(ref mut atom, _)) + | Term::Clause(_, ref mut atom, ..) => { + *atom = new_name; + } + _ => {} + } + } + + pub fn name(&self) -> Option { + match self { + &Term::Constant(_, Constant::Atom(ref atom, _)) + | &Term::Clause(_, ref atom, ..) => Some(atom.clone()), + _ => None + } + } + + pub fn arity(&self) -> usize { + match self { + &Term::Clause(_, _, ref child_terms, ..) => child_terms.len(), + _ => 0 + } + } +} + +#[derive(Debug, Clone, Copy)] +pub struct CompositeOp<'a, 'b> { + pub op_dir: &'a OpDir, + pub static_op_dir: Option<&'b OpDir> +} + +#[macro_export] +macro_rules! composite_op { + ($include_machine_p:expr, $op_dir:expr, $machine_op_dir:expr) => ( + CompositeOp { op_dir: $op_dir, + static_op_dir: if !$include_machine_p { + Some($machine_op_dir) + } else { + None + }} + ); + ($op_dir:expr) => ( + CompositeOp { op_dir: $op_dir, static_op_dir: None } + ) +} + +impl<'a, 'b> CompositeOp<'a, 'b> +{ + #[inline] + pub(crate) + fn get(&self, name: ClauseName, fixity: Fixity) -> Option + { + let entry = + if let Some(ref static_op_dir) = &self.static_op_dir { + static_op_dir.get(&(name.clone(), fixity)) + } else { + None + }; + + entry.or_else(move || self.op_dir.get(&(name, fixity))) + .cloned() + } +} + +fn unfold_by_str_once(term: &mut Term, s: &str) -> Option<(Term, Term)> { + if let &mut Term::Clause(_, ref name, ref mut subterms, _) = term { + if name.as_str() == s && subterms.len() == 2 { + let snd = *subterms.pop().unwrap(); + let fst = *subterms.pop().unwrap(); + + return Some((fst, snd)); + } + } + + None +} + +pub fn unfold_by_str(mut term: Term, s: &str) -> Vec { + let mut terms = vec![]; + + while let Some((fst, snd)) = unfold_by_str_once(&mut term, s) { + terms.push(fst); + term = snd; + } + + terms.push(term); + terms +} + +pub type ParsingStream = PutBackN>>; + +use unicode_reader::BadUtf8Error; + +#[inline] +pub fn parsing_stream(src: R) -> Result, ParserError> { + let mut stream = put_back_n(CodePoints::from(src.bytes())); + match stream.peek() { + None => Ok(stream), // empty stream is handled gracefully by Lexer::eof + Some(Err(error)) => Err(ParserError::from(error)), + Some(Ok(c)) => { + if *c == '\u{feff}' { + // skip UTF-8 BOM + stream.next(); + } + Ok(stream) + } + } +} diff --git a/prolog_parser/src/lexer.rs b/prolog_parser/src/lexer.rs new file mode 100644 index 00000000..b1656a55 --- /dev/null +++ b/prolog_parser/src/lexer.rs @@ -0,0 +1,898 @@ +use crate::lexical::parse_lossy; +use crate::ordered_float::*; +use crate::rug::Integer; + +use ast::*; +use tabled_rc::*; + +use std::convert::TryFrom; +use std::fmt; +use std::io::Read; +use std::rc::Rc; + +macro_rules! is_not_eof { + ($c:expr) => ( + match $c { + Ok(c) => c, + Err(ParserError::UnexpectedEOF) => return Ok(true), + Err(e) => return Err(e) + } + ) +} + +macro_rules! consume_chars_with { + ($token:expr, $e:expr) => { + loop { + match $e { + Ok(Some(c)) => $token.push(c), + Ok(None) => continue, + Err(ParserError::UnexpectedChar(..)) => break, + Err(e) => return Err(e) + } + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Token { + Constant(Constant), + Var(Rc), + Open, // '(' + OpenCT, // '(' + Close, // ')' + OpenList, // '[' + CloseList, // ']' + OpenCurly, // '{' + CloseCurly, // '}' + HeadTailSeparator, // '|' + Comma, // ',' + End +} + +pub struct Lexer<'a, R: Read> { + pub(crate) atom_tbl: TabledData, + pub(crate) reader: &'a mut ParsingStream, + pub(crate) flags: MachineFlags, + pub(crate) line_num: usize, + pub(crate) col_num: usize +} + +impl<'a, R: Read + fmt::Debug> fmt::Debug for Lexer<'a, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Lexer") + .field("atom_tbl", &self.atom_tbl) + .field("reader", &"&'a mut ParsingStream") // Hacky solution. + .field("line_num", &self.line_num) + .field("col_num", &self.col_num) + .finish() + } +} + +impl<'a, R: Read> Lexer<'a, R> { + pub fn new( + atom_tbl: TabledData, + flags: MachineFlags, + src: &'a mut ParsingStream, + ) -> Self { + Lexer { atom_tbl, flags, reader: src, line_num: 0, col_num: 0 } + } + + fn return_char(&mut self, c: char) { + if new_line_char!(c) { + self.line_num -= 1; + self.col_num = 0; + } + + self.reader.put_back(Ok(c)); + } + + fn skip_char(&mut self) -> Result { + if let Some(Ok(c)) = self.reader.next() { + self.col_num += 1; + + if new_line_char!(c) { + self.line_num += 1; + self.col_num = 0; + } + + Ok(c) + } else { + Err(ParserError::UnexpectedEOF) + } + } + + pub fn eof(&mut self) -> Result { + if self.reader.peek().is_none() { + return Ok(true); + } + + let mut c = is_not_eof!(self.lookahead_char()); + + while layout_char!(c) { + self.skip_char()?; + + if self.reader.peek().is_none() { + return Ok(true); + } + + c = is_not_eof!(self.lookahead_char()); + } + + Ok(false) + } + + pub fn lookahead_char(&mut self) -> Result { + match self.reader.peek() { + Some(&Ok(c)) => Ok(c), + _ => Err(ParserError::UnexpectedEOF), + } + } + + fn single_line_comment(&mut self) -> Result<(), ParserError> + { + loop { + if self.reader.peek().is_none() || new_line_char!(self.skip_char()?) { + break; + } + } + + Ok(()) + } + + fn bracketed_comment(&mut self) -> Result { + // we have already checked that the current lookahead_char is comment_1_char, just skip it + let c = self.skip_char()?; + + if comment_2_char!(self.lookahead_char()?) { + self.skip_char()?; + + // Keep reading until we find characters '*' and '/' + // Deliberately skip checks for prolog_char to allow comments to contain any characters, + // including so-called "extended characters", without having to explicitly add them to a character class. + let mut c = self.lookahead_char()?; + loop { + while !comment_2_char!(c) { + self.skip_char()?; + c = self.lookahead_char()?; + } + + self.skip_char()?; + + c = self.lookahead_char()?; + if comment_1_char!(c) { + break; + } + } + + if prolog_char!(c) { + self.skip_char()?; + Ok(true) + } else { + Err(ParserError::NonPrologChar(self.line_num, self.col_num)) + } + } else { + self.return_char(c); + Ok(false) + } + } + + fn get_back_quoted_char(&mut self) -> Result { + if back_quote_char!(self.lookahead_char()?) { + let c = self.skip_char()?; + + if !back_quote_char!(self.lookahead_char()?) { + self.return_char(c); + Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + } else { + self.skip_char() + } + } else if single_quote_char!(self.lookahead_char()?) { + self.skip_char() + } else { + self.get_non_quote_char() + } + } + + fn get_back_quoted_item(&mut self) -> Result, ParserError> { + if backslash_char!(self.lookahead_char()?) { + let c = self.skip_char()?; + + if new_line_char!(self.lookahead_char()?) { + self.skip_char()?; + Ok(None) + } else { + self.return_char(c); + Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + } + } else { + self.get_back_quoted_char().map(Some) + } + } + + fn get_back_quoted_string(&mut self) -> Result { + let c = self.lookahead_char()?; + + if back_quote_char!(c) { + self.skip_char()?; + + let mut token = String::new(); + consume_chars_with!(token, self.get_back_quoted_item()); + + if back_quote_char!(self.lookahead_char()?) { + self.skip_char()?; + Ok(token) + } else { + Err(ParserError::MissingQuote(self.line_num, self.col_num)) + } + } else { + Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + } + } + + fn get_single_quoted_item(&mut self) -> Result, ParserError> + { + if backslash_char!(self.lookahead_char()?) { + let c = self.skip_char()?; + + if new_line_char!(self.lookahead_char()?) { + self.skip_char()?; + return Ok(None); + } else { + self.return_char(c); + } + } + + self.get_single_quoted_char().map(Some) + } + + fn get_single_quoted_char(&mut self) -> Result { + let c = self.lookahead_char()?; + + if single_quote_char!(c) { + self.skip_char()?; + + if !single_quote_char!(self.lookahead_char()?) { + self.return_char(c); + Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + } else { + self.skip_char() + } + } else if double_quote_char!(c) || back_quote_char!(c) { + self.skip_char() + } else { + self.get_non_quote_char() + } + } + + fn get_double_quoted_item(&mut self) -> Result, ParserError> + { + if backslash_char!(self.lookahead_char()?) { + let c = self.skip_char()?; + + if new_line_char!(self.lookahead_char()?) { + self.skip_char()?; + return Ok(None) + } else { + self.return_char(c); + } + } + + self.get_double_quoted_char().map(Some) + } + + fn get_double_quoted_char(&mut self) -> Result { + if double_quote_char!(self.lookahead_char()?) { + let c = self.skip_char()?; + + if !double_quote_char!(self.lookahead_char()?) { + self.return_char(c); + Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + } else { + self.skip_char() + } + } else if single_quote_char!(self.lookahead_char()?) { + self.skip_char() + } else if back_quote_char!(self.lookahead_char()?) { + self.skip_char() + } else { + self.get_non_quote_char() + } + } + + fn get_control_escape_sequence(&mut self) -> Result + { + let escaped = match self.lookahead_char()? { + 'a' => '\u{07}', // UTF-8 alert + 'b' => '\u{08}', // UTF-8 backspace + 'v' => '\u{0b}', // UTF-8 vertical tab + 'f' => '\u{0c}', // UTF-8 form feed + 't' => '\t', + 'n' => '\n', + 'r' => '\r', + c => return Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + }; + + self.skip_char()?; + return Ok(escaped); + } + + fn get_octal_escape_sequence(&mut self) -> Result + { + self.escape_sequence_to_char(|c| octal_digit_char!(c), 8) + } + + fn get_hexadecimal_escape_sequence(&mut self) -> Result + { + self.skip_char()?; + let c = self.lookahead_char()?; + + if hexadecimal_digit_char!(c) { + self.escape_sequence_to_char(|c| hexadecimal_digit_char!(c), 16) + } else { + Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + } + } + + fn escape_sequence_to_char( + &mut self, + accept_char: impl Fn(char) -> bool, + radix: u32, + ) -> Result { + let mut c = self.lookahead_char()?; + let mut token = String::new(); + + loop { + token.push(c); + + self.skip_char()?; + c = self.lookahead_char()?; + + if !accept_char(c) { + break; + } + } + + if backslash_char!(c) { + self.skip_char()?; + u32::from_str_radix(&token, radix) + .map_or_else( + |_| Err(ParserError::ParseBigInt(self.line_num, self.col_num)), + |n| char::try_from(n) + .map_err(|_| ParserError::Utf8Error(self.line_num, self.col_num)) + ) + } else { + // on failure, restore the token characters and backslash. + self.reader.put_back_all(token.chars().map(Ok)); + self.reader.put_back(Ok('\\')); + + Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) + } + } + + fn get_non_quote_char(&mut self) -> Result { + let c = self.lookahead_char()?; + + if graphic_char!(c) || alpha_numeric_char!(c) || solo_char!(c) || space_char!(c) { + self.skip_char() + } else { + if !backslash_char!(c) { + return Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)); + } + + self.skip_char()?; + + let c = self.lookahead_char()?; + + if meta_char!(c) { + self.skip_char() + } else if octal_digit_char!(c) { + self.get_octal_escape_sequence() + } else if symbolic_hexadecimal_char!(c) { + self.get_hexadecimal_escape_sequence() + } else { + self.get_control_escape_sequence() + } + } + } + + fn char_code_list_token(&mut self) -> Result { + let mut token = String::new(); + + self.skip_char()?; + consume_chars_with!(token, self.get_double_quoted_item()); + + if double_quote_char!(self.lookahead_char()?) { + self.skip_char()?; + Ok(token) + } else { + Err(ParserError::MissingQuote(self.line_num, self.col_num)) + } + } + + fn hexadecimal_constant(&mut self) -> Result { + self.skip_char()?; + + if hexadecimal_digit_char!(self.lookahead_char()?) { + let mut token = String::new(); + + while hexadecimal_digit_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + } + + isize::from_str_radix(&token, 16) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + Integer::from_str_radix(&token, 16) + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } else { + self.return_char('x'); + Err(ParserError::ParseBigInt(self.line_num, self.col_num)) + } + } + + fn octal_constant(&mut self) -> Result { + self.skip_char()?; + + if octal_digit_char!(self.lookahead_char()?) { + let mut token = String::new(); + + while octal_digit_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + } + + isize::from_str_radix(&token, 8) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + Integer::from_str_radix(&token, 8) + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } else { + self.return_char('o'); + Err(ParserError::ParseBigInt(self.line_num, self.col_num)) + } + } + + fn binary_constant(&mut self) -> Result { + self.skip_char()?; + + if binary_digit_char!(self.lookahead_char()?) { + let mut token = String::new(); + + while binary_digit_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + } + + isize::from_str_radix(&token, 2) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + Integer::from_str_radix(&token, 2) + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } else { + self.return_char('b'); + Err(ParserError::ParseBigInt(self.line_num, self.col_num)) + } + } + + fn variable_token(&mut self) -> Result { + let mut s = String::new(); + s.push(self.skip_char()?); + + while alpha_numeric_char!(self.lookahead_char()?) { + s.push(self.skip_char()?); + } + + Ok(Token::Var(rc_atom!(s))) + } + + fn name_token(&mut self, c: char) -> Result { + let mut token = String::new(); + + if small_letter_char!(c) { + token.push(self.skip_char()?); + + while alpha_numeric_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + } + } else if graphic_token_char!(c) { + token.push(self.skip_char()?); + + while graphic_token_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + } + } else if cut_char!(c) { + token.push(self.skip_char()?); + } else if semicolon_char!(c) { + token.push(self.skip_char()?); + } else if single_quote_char!(c) { + self.skip_char()?; + + consume_chars_with!(token, self.get_single_quoted_item()); + + if single_quote_char!(self.lookahead_char()?) { + self.skip_char()?; + + if !token.is_empty() && token.chars().skip(1).next().is_none() { + if let Some(c) = token.chars().next() { + return Ok(Token::Constant(Constant::Char(c))); + } + } + } else { + return Err(ParserError::InvalidSingleQuotedCharacter(self.lookahead_char()?)) + } + } else { + match self.get_back_quoted_string() { + Ok(_) => return Err(ParserError::BackQuotedString(self.line_num, self.col_num)), + Err(e) => return Err(e) + } + } + + if token.as_str() == "[]" { + Ok(Token::Constant(Constant::EmptyList)) + } else { + Ok(Token::Constant(atom!(token, self.atom_tbl))) + } + } + + fn vacate_with_float(&mut self, mut token: String) -> Token { + self.return_char(token.pop().unwrap()); + + let result = OrderedFloat(parse_lossy::(token.as_bytes())); + Token::Constant(Constant::Float(result)) + } + + pub fn number_token(&mut self) -> Result { + let mut token = String::new(); + + token.push(self.skip_char()?); + let mut c = self.lookahead_char()?; + + while decimal_digit_char!(c) { + token.push(c); + self.skip_char()?; + c = self.lookahead_char()?; + } + + if decimal_point_char!(c) { + self.skip_char()?; + + if self.reader.peek().is_none() { + self.return_char('.'); + + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } else if decimal_digit_char!(self.lookahead_char()?) { + token.push('.'); + token.push(self.skip_char()?); + + let mut c = self.lookahead_char()?; + + while decimal_digit_char!(c) { + token.push(c); + self.skip_char()?; + c = self.lookahead_char()?; + } + + if exponent_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + + let c = match self.lookahead_char() { + Err(_) => return Ok(self.vacate_with_float(token)), + Ok(c) => c + }; + + if !sign_char!(c) && !decimal_digit_char!(c) { + return Ok(self.vacate_with_float(token)); + } + + if sign_char!(c) { + token.push(self.skip_char()?); + + let c = match self.lookahead_char() { + Err(_) => { + self.return_char(token.pop().unwrap()); + return Ok(self.vacate_with_float(token)); + }, + Ok(c) => c + }; + + if !decimal_digit_char!(c) { + self.return_char(token.pop().unwrap()); + return Ok(self.vacate_with_float(token)); + } + } + + if decimal_digit_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + + while decimal_digit_char!(self.lookahead_char()?) { + token.push(self.skip_char()?); + } + + let n = OrderedFloat(parse_lossy::(token.as_bytes())); + Ok(Token::Constant(Constant::Float(n))) + } else { + return Ok(self.vacate_with_float(token)); + } + } else { + let n = OrderedFloat(parse_lossy::(token.as_bytes())); + Ok(Token::Constant(Constant::Float(n))) + } + } else { + self.return_char('.'); + + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } + } else { + if token.starts_with('0') && token.len() == 1 { + if c == 'x' { + self.hexadecimal_constant() + .or_else(|e| { + if let ParserError::ParseBigInt(..) = e { + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } else { + Err(e) + } + }) + } else if c == 'o' { + self.octal_constant() + .or_else(|e| { + if let ParserError::ParseBigInt(..) = e { + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } else { + Err(e) + } + }) + } else if c == 'b' { + self.binary_constant() + .or_else(|e| { + if let ParserError::ParseBigInt(..) = e { + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } else { + Err(e) + } + }) + } else if single_quote_char!(c) { + self.skip_char()?; + + if backslash_char!(self.lookahead_char()?) { + self.skip_char()?; + + if new_line_char!(self.lookahead_char()?) { + self.return_char('\\'); + self.return_char('\''); + + return Ok(Token::Constant(Constant::Fixnum(0))); + } else { + self.return_char('\\'); + } + } + + self.get_single_quoted_char() + .and_then(|c| { + Ok(Token::Constant(Constant::Fixnum(c as isize))) + }) + .or_else(|_| { + self.return_char(c); + + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + }) + } else { + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } + } else { + isize::from_str_radix(&token, 10) + .map(|n| Token::Constant(Constant::Fixnum(n))) + .or_else(|_| { + token.parse::() + .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) + .map_err(|_| ParserError::ParseBigInt( + self.line_num, + self.col_num, + )) + }) + } + } + } + + pub fn scan_for_layout(&mut self) -> Result { + let mut layout_inserted = false; + let mut more_layout = true; + + loop { + let cr = self.lookahead_char(); + + match cr { + Ok(c) if layout_char!(c) || new_line_char!(c) => { + self.skip_char()?; + layout_inserted = true; + }, + Ok(c) if end_line_comment_char!(c) => { + self.single_line_comment()?; + layout_inserted = true; + }, + Ok(c) if comment_1_char!(c) => + if self.bracketed_comment()? { + layout_inserted = true; + } else { + more_layout = false; + }, + _ => more_layout = false + }; + + if !more_layout { + break; + } + } + + Ok(layout_inserted) + } + + pub fn next_token(&mut self) -> Result { + let layout_inserted = self.scan_for_layout()?; + let cr = self.lookahead_char(); + + match cr { + Ok(c) => { + if capital_letter_char!(c) || variable_indicator_char!(c) { + return self.variable_token(); + } + + if c == ',' { + self.skip_char()?; + return Ok(Token::Comma); + } + + if c == ')' { + self.skip_char()?; + return Ok(Token::Close); + } + + if c == '(' { + self.skip_char()?; + return Ok(if layout_inserted { Token::Open } + else { Token::OpenCT }); + } + + if c == '.' { + self.skip_char()?; + + match self.lookahead_char() { + Ok(c) if layout_char!(c) || c == '%' => { + if new_line_char!(c) { + self.skip_char()?; + } + + return Ok(Token::End); + }, + Err(ParserError::UnexpectedEOF) => { + return Ok(Token::End); + } + _ => { + self.return_char('.'); + } + }; + } + + if decimal_digit_char!(c) { + return self.number_token(); + } + + if c == ']' { + self.skip_char()?; + return Ok(Token::CloseList); + } + + if c == '[' { + self.skip_char()?; + return Ok(Token::OpenList); + } + + if c == '|' { + self.skip_char()?; + return Ok(Token::HeadTailSeparator); + } + + if c == '{' { + self.skip_char()?; + return Ok(Token::OpenCurly); + } + + if c == '}' { + self.skip_char()?; + return Ok(Token::CloseCurly); + } + + if c == '"' { + let s = self.char_code_list_token()?; + + if let DoubleQuotes::Atom = self.flags.double_quotes { + let s = clause_name!(s, self.atom_tbl); + return Ok(Token::Constant(Constant::Atom(s, None))); + } else { + let s = Rc::new(s); + return Ok(Token::Constant(Constant::String(s))); + } + } + + self.name_token(c) + }, + Err(e) => Err(e) + } + } +} diff --git a/prolog_parser/src/lib.rs b/prolog_parser/src/lib.rs new file mode 100644 index 00000000..f8ef7536 --- /dev/null +++ b/prolog_parser/src/lib.rs @@ -0,0 +1,15 @@ +extern crate lexical; +extern crate ordered_float; +#[cfg(feature = "rug")] +extern crate rug; +#[cfg(feature = "num-rug-adapter")] +extern crate num_rug_adapter as rug; +extern crate unicode_reader; + +#[macro_use] pub mod tabled_rc; +#[macro_use] pub mod ast; +#[macro_use] pub mod macros; +pub mod parser; +pub mod put_back_n; + +pub mod lexer; diff --git a/prolog_parser/src/macros.rs b/prolog_parser/src/macros.rs new file mode 100644 index 00000000..e4520aba --- /dev/null +++ b/prolog_parser/src/macros.rs @@ -0,0 +1,187 @@ +#[macro_export] +macro_rules! char_class { + ($c: expr, [$head:expr]) => ($c == $head); + ($c: expr, [$head:expr $(, $cs:expr)+]) => ($c == $head || char_class!($c, [$($cs),*])); +} + +#[macro_export] +macro_rules! symbolic_control_char { + ($c: expr) => (char_class!($c, ['a', 'b', 'f', 'n', 'r', 't', 'v', '0'])) +} + +#[macro_export] +macro_rules! space_char { + ($c: expr) => ($c == ' ') +} + +#[macro_export] +macro_rules! layout_char { + ($c: expr) => (char_class!($c, [' ', '\n', '\t', '\u{0B}', '\u{0C}'])) +} + +#[macro_export] +macro_rules! symbolic_hexadecimal_char { + ($c: expr) => ($c == 'x') +} + +#[macro_export] +macro_rules! octal_digit_char { + ($c: expr) => ($c >= '0' && $c <= '7') +} + +#[macro_export] +macro_rules! binary_digit_char { + ($c: expr) => ($c >= '0' && $c <= '1') +} + +#[macro_export] +macro_rules! hexadecimal_digit_char { + ($c: expr) => ($c >= '0' && $c <= '9' || + $c >= 'A' && $c <= 'F' || + $c >= 'a' && $c <= 'f') +} + +#[macro_export] +macro_rules! exponent_char { + ($c: expr) => ($c == 'e' || $c == 'E') +} + +#[macro_export] +macro_rules! sign_char { + ($c: expr) => ($c == '-' || $c == '+') +} + +#[macro_export] +macro_rules! new_line_char { + ($c: expr) => ($c == '\n') +} + +#[macro_export] +macro_rules! end_line_comment_char { + ($c: expr) => ($c == '%') +} + +#[macro_export] +macro_rules! comment_1_char { + ($c: expr) => ($c == '/') +} + +#[macro_export] +macro_rules! comment_2_char { + ($c: expr) => ($c == '*') +} + +#[macro_export] +macro_rules! capital_letter_char { + ($c: expr) => ($c >= 'A' && $c <= 'Z') +} + +#[macro_export] +macro_rules! small_letter_char { + ($c: expr) => ($c >= 'a' && $c <= 'z') +} + +#[macro_export] +macro_rules! variable_indicator_char { + ($c: expr) => ($c == '_') +} + +#[macro_export] +macro_rules! graphic_char { + ($c: expr) => (char_class!($c, ['#', '$', '&', '*', '+', '-', '.', '/', ':', + '<', '=', '>', '?', '@', '^', '~'])) +} + +#[macro_export] +macro_rules! graphic_token_char { + ($c: expr) => (graphic_char!($c) || backslash_char!($c)) +} + +#[macro_export] +macro_rules! alpha_char { + ($c: expr) => + (match $c { + 'a' ..= 'z' => true, + 'A' ..= 'Z' => true, + '_' => true, + '\u{00A0}' ..= '\u{00BF}' => true, + '\u{00C0}' ..= '\u{00D6}' => true, + '\u{00D8}' ..= '\u{00F6}' => true, + '\u{00F8}' ..= '\u{00FF}' => true, + '\u{0100}' ..= '\u{017F}' => true, // Latin Extended-A + '\u{0180}' ..= '\u{024F}' => true, // Latin Extended-B + '\u{0250}' ..= '\u{02AF}' => true, // IPA Extensions + '\u{02B0}' ..= '\u{02FF}' => true, // Spacing Modifier Letters + '\u{0300}' ..= '\u{036F}' => true, // Combining Diacritical Marks + '\u{0370}' ..= '\u{03FF}' => true, // Greek/Coptic + '\u{0400}' ..= '\u{04FF}' => true, // Cyrillic + '\u{0500}' ..= '\u{052F}' => true, // Cyrillic Supplement + '\u{0530}' ..= '\u{058F}' => true, // Armenian + '\u{0590}' ..= '\u{05FF}' => true, // Hebrew + '\u{0600}' ..= '\u{06FF}' => true, // Arabic + '\u{0700}' ..= '\u{074F}' => true, // Syriac + _ => false + }) +} + +#[macro_export] +macro_rules! decimal_digit_char { + ($c: expr) => ($c >= '0' && $c <= '9') +} + +#[macro_export] +macro_rules! decimal_point_char { + ($c: expr) => ($c == '.') +} + +#[macro_export] +macro_rules! alpha_numeric_char { + ($c: expr) => (alpha_char!($c) || decimal_digit_char!($c)) +} + +#[macro_export] +macro_rules! cut_char { + ($c: expr) => ($c == '!') +} + +#[macro_export] +macro_rules! semicolon_char { + ($c: expr) => ($c == ';') +} + +#[macro_export] +macro_rules! backslash_char { + ($c: expr) => ($c == '\\') +} + +#[macro_export] +macro_rules! single_quote_char { + ($c: expr) => ($c == '\'') +} + +#[macro_export] +macro_rules! double_quote_char { + ($c: expr) => ($c == '"') +} + +#[macro_export] +macro_rules! back_quote_char { + ($c: expr) => ($c == '`') +} + +#[macro_export] +macro_rules! meta_char { + ($c: expr) => ( char_class!($c, ['\\', '\'', '"', '`']) ) +} + +#[macro_export] +macro_rules! solo_char { + ($c: expr) => ( char_class!($c, ['!', '(', ')', ',', ';', '[', ']', + '{', '}', '|', '%']) ) +} + +#[macro_export] +macro_rules! prolog_char { + ($c: expr) => (graphic_char!($c) || alpha_numeric_char!($c) || solo_char!($c) || + layout_char!($c) || meta_char!($c)) +} diff --git a/prolog_parser/src/parser.rs b/prolog_parser/src/parser.rs new file mode 100644 index 00000000..dd665c26 --- /dev/null +++ b/prolog_parser/src/parser.rs @@ -0,0 +1,983 @@ +use ast::*; +use lexer::*; +use tabled_rc::*; + +use ordered_float::OrderedFloat; + +use rug::ops::NegAssign; + +use std::cell::Cell; +use std::io::Read; +use std::mem::swap; +use std::rc::Rc; + +#[derive(Debug, Clone, Copy, PartialEq)] +enum TokenType { + Term, + Open, + OpenCT, + OpenList, // '[' + OpenCurly, // '{' + HeadTailSeparator, // '|' + Comma, // ',' + Close, + CloseList, // ']' + CloseCurly, // '}' + End +} + +impl TokenType { + fn is_sep(self) -> bool { + match self { + TokenType::HeadTailSeparator | TokenType::OpenCT | TokenType::Open | + TokenType::Close | TokenType::OpenList | TokenType::CloseList | + TokenType::OpenCurly | TokenType::CloseCurly | TokenType::Comma + => true, + _ => false + } + } +} + +#[derive(Debug, Clone, Copy)] +struct TokenDesc { + tt: TokenType, + priority: usize, + spec: u32 +} + +pub +fn get_clause_spec(name: ClauseName, arity: usize, op_dir: CompositeOp) -> Option +{ + match arity { + 1 => { + /* This is a clause with an operator principal functor. Prefix operators + are supposed over post. + */ + if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::Pre) { + return Some(cell); + } + + if let Some(OpDirValue(cell, _)) = op_dir.get(name, Fixity::Post) { + return Some(cell); + } + }, + 2 => + if let Some(OpDirValue(cell, _)) = op_dir.get(name, Fixity::In) { + return Some(cell); + }, + _ => {} + }; + + None +} + +pub fn get_desc(name: ClauseName, op_dir: CompositeOp) -> Option +{ + let mut op_desc = OpDesc { pre: 0, inf: 0, post: 0, spec: 0 }; + + if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::Pre) { + let (pri, spec) = cell.get(); + + if pri > 0 { + op_desc.pre = pri; + op_desc.spec |= spec; + } else if name.as_str() == "-" { + op_desc.spec |= NEGATIVE_SIGN; + } + } + + if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::Post) { + let (pri, spec) = cell.get(); + + if pri > 0 { + op_desc.post = pri; + op_desc.spec |= spec; + } + } + + if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::In) { + let (pri, spec) = cell.get(); + + if pri > 0 { + op_desc.inf = pri; + op_desc.spec |= spec; + } + } + + if op_desc.pre + op_desc.post + op_desc.inf == 0 && !is_negate!(op_desc.spec) { + None + } else { + Some(op_desc) + } +} + +fn affirm_xfx(priority: usize, d2: TokenDesc, d3: TokenDesc, d1: TokenDesc) -> bool +{ + d2.priority <= priority + && is_term!(d3.spec) + && is_term!(d1.spec) + && d3.priority < d2.priority + && d1.priority < d2.priority +} + +fn affirm_yfx(priority: usize, d2: TokenDesc, d3: TokenDesc, d1: TokenDesc) -> bool +{ + d2.priority <= priority + && ((is_term!(d3.spec) && d3.priority < d2.priority) + || (is_lterm!(d3.spec) && d3.priority == d2.priority)) + && is_term!(d1.spec) + && d1.priority < d2.priority +} + + +fn affirm_xfy(priority: usize, d2: TokenDesc, d3: TokenDesc, d1: TokenDesc) -> bool +{ + d2.priority < priority + && is_term!(d3.spec) + && d3.priority < d2.priority + && is_term!(d1.spec) + && d1.priority <= d2.priority +} + +fn affirm_yf(d1: TokenDesc, d2: TokenDesc) -> bool +{ + let is_valid_lterm = is_lterm!(d2.spec) && d2.priority == d1.priority; + (is_term!(d2.spec) && d2.priority < d1.priority) || is_valid_lterm +} + +fn affirm_xf(d1: TokenDesc, d2: TokenDesc) -> bool +{ + is_term!(d2.spec) && d2.priority < d1.priority +} + +fn affirm_fy(priority: usize, d1: TokenDesc, d2: TokenDesc) -> bool +{ + d2.priority < priority && is_term!(d1.spec) && d1.priority <= d2.priority +} + +fn affirm_fx(priority: usize, d1: TokenDesc, d2: TokenDesc) -> bool +{ + d2.priority <= priority && is_term!(d1.spec) && d1.priority < d2.priority +} + +fn sep_to_atom(tt: TokenType) -> Option +{ + match tt { + TokenType::Open | TokenType::OpenCT => + Some(clause_name!("(")), + TokenType::Close => + Some(clause_name!(")")), + TokenType::OpenList => + Some(clause_name!("[")), + TokenType::CloseList => + Some(clause_name!("]")), + TokenType::OpenCurly => + Some(clause_name!("{")), + TokenType::CloseCurly => + Some(clause_name!("}")), + TokenType::HeadTailSeparator => + Some(clause_name!("|")), + TokenType::Comma => + Some(clause_name!(",")), + TokenType::End => + Some(clause_name!(".")), + _ => None + } +} + +#[derive(Debug, Clone, Copy)] +pub struct OpDesc { + pub pre: usize, + pub inf: usize, + pub post: usize, + pub spec: Specifier +} + +#[derive(Debug)] +pub struct Parser<'a, R: Read> { + lexer: Lexer<'a, R>, + tokens: Vec, + stack: Vec, + terms: Vec, +} + +fn read_tokens<'a, R: Read>(lexer: &mut Lexer<'a, R>) -> Result, ParserError> +{ + let mut tokens = vec![]; + + loop { + let token = lexer.next_token()?; + let at_end = Token::End == token; + + tokens.push(token); + + if at_end { + break; + } + } + + tokens.reverse(); + + Ok(tokens) +} + +impl<'a, R: Read> Parser<'a, R> { + pub fn new( + stream: &'a mut ParsingStream, + atom_tbl: TabledData, + flags: MachineFlags, + ) -> Self { + Parser { lexer: Lexer::new(atom_tbl, flags, stream), + tokens: vec![], + stack: Vec::new(), + terms: Vec::new() } + } + + #[inline] + pub fn line_num(&self) -> usize { + self.lexer.line_num + } + + #[inline] + pub fn col_num(&self) -> usize { + self.lexer.col_num + } + + #[inline] + pub fn get_atom_tbl(&self) -> TabledData { + self.lexer.atom_tbl.clone() + } + + #[inline] + pub fn set_atom_tbl(&mut self, atom_tbl: TabledData) { + self.lexer.atom_tbl = atom_tbl; + } + + fn get_term_name(&mut self, td: TokenDesc) -> Option<(ClauseName, Option)> { + match td.tt { + TokenType::HeadTailSeparator => { + Some((clause_name!("|"), Some(SharedOpDesc::new(td.priority, td.spec)))) + } + TokenType::Comma => { + Some((clause_name!(","), Some(SharedOpDesc::new(1000, XFY)))) + } + TokenType::Term => { + match self.terms.pop() { + Some(Term::Constant(_, Constant::Atom(atom, spec))) => + Some((atom, spec)), + Some(term) => { + self.terms.push(term); + None + }, + _ => None + } + } + _ => { + None + } + } + } + + fn push_binary_op(&mut self, td: TokenDesc, spec: Specifier) + { + if let Some(arg2) = self.terms.pop() { + if let Some((name, shared_op_desc)) = self.get_term_name(td) { + if let Some(arg1) = self.terms.pop() { + let term = Term::Clause(Cell::default(), + name, + vec![Box::new(arg1), Box::new(arg2)], + shared_op_desc); + + self.terms.push(term); + self.stack.push(TokenDesc { tt: TokenType::Term, + priority: td.priority, + spec }); + } + } + } + } + + fn push_unary_op(&mut self, td: TokenDesc, spec: Specifier, assoc: u32) + { + if let Some(mut arg1) = self.terms.pop() { + if let Some(mut name) = self.terms.pop() { + if is_postfix!(assoc) { + swap(&mut arg1, &mut name); + } + + if let Term::Constant(_, Constant::Atom(name, shared_op_desc)) = name { + let term = Term::Clause(Cell::default(), name, vec![Box::new(arg1)], + shared_op_desc); + + self.terms.push(term); + self.stack.push(TokenDesc { tt: TokenType::Term, + priority: td.priority, + spec }); + } + } + } + } + + fn promote_atom_op(&mut self, atom: ClauseName, priority: usize, assoc: u32, + op_dir_val: Option) + { + let spec = op_dir_val.map(|op_dir_val| op_dir_val.shared_op_desc()); + + self.terms.push(Term::Constant(Cell::default(), Constant::Atom(atom, spec))); + self.stack.push(TokenDesc { tt: TokenType::Term, priority, spec: assoc }); + } + + fn shift(&mut self, token: Token, priority: usize, spec: Specifier) + { + let tt = match token { + Token::Constant(Constant::String(s)) + if self.lexer.flags.double_quotes.is_codes() => { + let mut list = Term::Constant(Cell::default(), Constant::EmptyList); + + for c in s.chars().rev() { + list = Term::Cons( + Cell::default(), + Box::new(Term::Constant( + Cell::default(), + Constant::Fixnum(c as isize), + )), + Box::new(list), + ); + } + + self.terms.push(list); + TokenType::Term + } + Token::Constant(c) => { + self.terms.push(Term::Constant(Cell::default(), c)); + TokenType::Term + }, + Token::Var(v) => { + if v.trim() == "_" { + self.terms.push(Term::AnonVar); + } else { + self.terms.push(Term::Var(Cell::default(), v)); + } + + TokenType::Term + }, + Token::Comma => TokenType::Comma, + Token::Open => TokenType::Open, + Token::Close => TokenType::Close, + Token::OpenCT => TokenType::OpenCT, + Token::HeadTailSeparator => TokenType::HeadTailSeparator, + Token::OpenList => TokenType::OpenList, + Token::CloseList => TokenType::CloseList, + Token::OpenCurly => TokenType::OpenCurly, + Token::CloseCurly => TokenType::CloseCurly, + Token::End => TokenType::End, + }; + + self.stack.push(TokenDesc { tt, priority, spec }); + } + + fn reduce_op(&mut self, priority: usize) { + loop { + if let Some(desc1) = self.stack.pop() { + if let Some(desc2) = self.stack.pop() { + if let Some(desc3) = self.stack.pop() { + if is_xfx!(desc2.spec) && affirm_xfx(priority, desc2, desc3, desc1) + { + self.push_binary_op(desc2, LTERM); + continue; + } + else if is_yfx!(desc2.spec) && affirm_yfx(priority, desc2, desc3, desc1) + { + self.push_binary_op(desc2, LTERM); + continue; + } + else if is_xfy!(desc2.spec) && affirm_xfy(priority, desc2, desc3, desc1) + { + self.push_binary_op(desc2, TERM); + continue; + } else { + self.stack.push(desc3); + } + } + + if is_yf!(desc1.spec) && affirm_yf(desc1, desc2) { + self.push_unary_op(desc1, LTERM, YF); + continue; + } else if is_xf!(desc1.spec) && affirm_xf(desc1, desc2) { + self.push_unary_op(desc1, LTERM, XF); + continue; + } else if is_fy!(desc2.spec) && affirm_fy(priority, desc1, desc2) { + self.push_unary_op(desc2, TERM, FY); + continue; + } else if is_fx!(desc2.spec) && affirm_fx(priority, desc1, desc2) { + self.push_unary_op(desc2, TERM, FX); + continue; + } else { + self.stack.push(desc2); + self.stack.push(desc1); + } + } else { + self.stack.push(desc1); + } + } + + break; + } + } + + fn compute_arity_in_brackets(&self) -> Option + { + let mut arity = 0; + + for (i, desc) in self.stack.iter().rev().enumerate() { + if i % 2 == 0 { // expect a term or non-comma operator. + if let TokenType::Comma = desc.tt { + return None; + } else if is_term!(desc.spec) || is_op!(desc.spec) || is_negate!(desc.spec) { + arity += 1; + } else { + return None; + } + } else { + if desc.tt == TokenType::OpenCT { + return Some(arity); + } + + if let TokenType::Comma = desc.tt { + continue; + } else { + return None; + } + } + } + + None + } + + fn reduce_term(&mut self, op_dir: CompositeOp) -> bool + { + if self.stack.is_empty() { + return false; + } + + self.reduce_op(999); + + let arity = match self.compute_arity_in_brackets() { + Some(arity) => arity, + None => return false + }; + + if self.stack.len() > 2 * arity { + let idx = self.stack.len() - 2 * arity - 1; + + if is_infix!(self.stack[idx].spec) && idx > 0 { + if !is_op!(self.stack[idx - 1].spec) && !self.stack[idx - 1].tt.is_sep() { + return false; + } + } + + if arity >= 2 && is_prefix!(self.stack[idx].spec) && self.stack[idx].priority > 0 { + return false; + } + } else { + return false; + } + + let stack_len = self.stack.len() - 2 * arity - 1; + let idx = self.terms.len() - arity; + + if TokenType::Term == self.stack[stack_len].tt { + if self.atomize_term(&self.terms[idx - 1]).is_some() { + self.stack.truncate(stack_len + 1); + + let mut subterms: Vec<_> = self.terms.drain(idx ..) + .map(|t| Box::new(t)) + .collect(); + + if let Some(name) = self.terms.pop().and_then(|t| self.atomize_term(&t)) { + // reduce the '.' functor to a cons cell if it applies. + if name.as_str() == "." && subterms.len() == 2 { + let tail = subterms.pop().unwrap(); + let head = subterms.pop().unwrap(); + + self.terms.push(Term::Cons(Cell::default(), head, tail)); + } else { + let spec = get_clause_spec(name.clone(), subterms.len(), op_dir); + self.terms.push(Term::Clause(Cell::default(), name, subterms, spec)); + } + + if let Some(&mut TokenDesc { ref mut priority, ref mut spec, + ref mut tt }) = self.stack.last_mut() + { + *tt = TokenType::Term; + *priority = 0; + *spec = TERM; + } + + return true; + } + } + } + + false + } + + pub fn devour_whitespace(&mut self) -> Result<(), ParserError> { + self.lexer.scan_for_layout()?; + Ok(()) + } + + pub fn reset(&mut self) { + self.stack.clear() + } + + fn expand_comma_compacted_terms(&mut self, index: usize) -> usize + { + if let Some(term) = self.terms.pop() { + let op_desc = self.stack[index - 1]; + + if 0 < op_desc.priority && op_desc.priority < self.stack[index].priority { + /* '|' is a head-tail separator here, not + * an operator, so expand the + * terms it compacted out again. */ + match (term.name(), term.arity()) { + (Some(name), 2) if name.as_str() == "," => { + let terms = unfold_by_str(term, ","); + let arity = terms.len() - 1; + + self.terms.extend(terms.into_iter()); + return arity; + } + _ => { + } + } + } + + self.terms.push(term); + } + + 0 + } + + fn compute_arity_in_list(&self) -> Option + { + let mut arity = 0; + + for (i, desc) in self.stack.iter().rev().enumerate() { + if i % 2 == 0 { // expect a term or non-comma operator. + if let TokenType::Comma = desc.tt { + return None; + } else if is_term!(desc.spec) || is_op!(desc.spec) { + arity += 1; + } else { + return None; + } + } else { + if desc.tt == TokenType::HeadTailSeparator { + if arity == 1 { + continue; + } + + return None; + } else if desc.tt == TokenType::OpenList { + return Some(arity); + } else if desc.tt != TokenType::Comma { + return None; + } + } + } + + None + } + + fn reduce_list(&mut self) -> Result + { + if self.stack.is_empty() { + return Ok(false); + } + + if let Some(ref mut td) = self.stack.last_mut() { + if td.tt == TokenType::OpenList { + td.spec = TERM; + td.tt = TokenType::Term; + td.priority = 0; + + self.terms.push(Term::Constant(Cell::default(), Constant::EmptyList)); + return Ok(true); + } + } + + self.reduce_op(1000); + + let mut arity = match self.compute_arity_in_list() { + Some(arity) => arity, + None => return Ok(false) + }; + + // we know that self.stack.len() >= 2 by this point. + let idx = self.stack.len() - 2; + let list_len = self.stack.len() - 2 * arity; + + let end_term = if self.stack[idx].tt != TokenType::HeadTailSeparator { + Term::Constant(Cell::default(), Constant::EmptyList) + } else { + let term = + match self.terms.pop() { + Some(term) => term, + _ => return Err(ParserError::IncompleteReduction(self.lexer.line_num, + self.lexer.col_num)) + }; + + if self.stack[idx].priority > 1000 { + arity += self.expand_comma_compacted_terms(idx); + } + + arity -= 1; + + term + }; + + let idx = self.terms.len() - arity; + + let list = self.terms.drain(idx ..) + .rev() + .fold(end_term, |acc, t| Term::Cons(Cell::default(), + Box::new(t), + Box::new(acc))); + + self.stack.truncate(list_len); + + self.stack.push(TokenDesc { tt: TokenType::Term, priority: 0, spec: TERM }); + self.terms.push(list); + + Ok(true) + } + + fn reduce_curly(&mut self) -> Result { + if self.stack.is_empty() { + return Ok(false); + } + + if let Some(ref mut td) = self.stack.last_mut() { + if td.tt == TokenType::OpenCurly { + td.tt = TokenType::Term; + td.priority = 0; + td.spec = TERM; + + let term = Term::Constant(Cell::default(), + atom!("{}", self.lexer.atom_tbl)); + self.terms.push(term); + return Ok(true); + } + } + + self.reduce_op(1201); + + if self.stack.len() > 1 { + if let Some(td) = self.stack.pop() { + if let Some(ref mut oc) = self.stack.last_mut() { + if td.tt != TokenType::Term { + return Ok(false); + } + + if oc.tt == TokenType::OpenCurly { + oc.tt = TokenType::Term; + oc.priority = 0; + oc.spec = TERM; + + let term = match self.terms.pop() { + Some(term) => term, + _ => return Err(ParserError::IncompleteReduction(self.lexer.line_num, + self.lexer.col_num)) + }; + + self.terms.push(Term::Clause(Cell::default(), clause_name!("{}"), + vec![Box::new(term)], None)); + + return Ok(true); + } + } + } + } + + Ok(false) + } + + fn reduce_brackets(&mut self) -> bool { + if self.stack.is_empty() { + return false; + } + + self.reduce_op(1400); + + if self.stack.len() == 1 { + return false; + } + + let idx = self.stack.len() - 2; + + match self.stack.remove(idx) { + td => + match td.tt { + TokenType::Open | TokenType::OpenCT => { + if self.stack[idx].tt == TokenType::Comma { + return false; + } + + if let Some(atom) = sep_to_atom(self.stack[idx].tt) { + self.terms.push(Term::Constant(Cell::default(), Constant::Atom(atom, None))); + } + + self.stack[idx].spec = TERM; + self.stack[idx].tt = TokenType::Term; + self.stack[idx].priority = 0; + true + }, + _ => false + } + } + } + + fn shift_op(&mut self, name: ClauseName, op_dir: CompositeOp) -> Result { + if let Some(OpDesc { pre, inf, post, spec }) = get_desc(name.clone(), op_dir) { + if (pre > 0 && inf + post > 0) || is_negate!(spec) { + match self.tokens.last().ok_or(ParserError::UnexpectedEOF)? { + // do this when layout hasn't been inserted, + // ie. why we don't match on Token::Open. + &Token::OpenCT => { + // can't be prefix, so either inf == 0 + // or post == 0. + self.reduce_op(inf + post); + + let fixity = if inf > 0 { Fixity::In } else { Fixity::Post }; + let op_dir_val = op_dir.get(name.clone(), fixity); + + self.promote_atom_op(name, inf + post, spec & (XFX | XFY | YFX | YF | XF), + op_dir_val); + }, + _ => { + self.reduce_op(inf + post); + + if let Some(TokenDesc { spec: pspec, .. }) = self.stack.last().cloned() { + // rterm.c: 412 + if is_term!(pspec) { + let fixity = if inf > 0 { Fixity::In } else { Fixity::Post }; + let op_dir_val = op_dir.get(name.clone(), fixity); + + self.promote_atom_op(name, inf + post, + spec & (XFX | XFY | YFX | XF | YF), + op_dir_val); + } else { + let op_dir_val = op_dir.get(name.clone(), Fixity::Pre); + self.promote_atom_op(name, pre, spec & (FX | FY | NEGATIVE_SIGN), op_dir_val); + } + } else { + let op_dir_val = op_dir.get(name.clone(), Fixity::Pre); + self.promote_atom_op(name, pre, spec & (FX | FY | NEGATIVE_SIGN), op_dir_val); + } + } + } + } else { + let op_dir_val = op_dir.get(name.clone(), + if pre + inf == 0 { + Fixity::Post + } else if post + pre == 0 { + Fixity::In + } else { + Fixity::Pre + }); + + self.reduce_op(pre + inf + post); // only one non-zero priority among these. + self.promote_atom_op(name, pre + inf + post, spec, op_dir_val); + } + + Ok(true) + } else { // not an operator. + Ok(false) + } + } + + fn atomize_term(&self, term: &Term) -> Option { + match term { + &Term::Constant(_, ref c) => self.atomize_constant(c), + _ => None + } + } + + fn atomize_constant(&self, c: &Constant) -> Option { + match c { + &Constant::Atom(ref name, _) => Some(name.clone()), + &Constant::Char(c) => + Some(clause_name!(c.to_string(), self.lexer.atom_tbl)), + &Constant::EmptyList => + Some(clause_name!(c.to_string(), self.lexer.atom_tbl)), + _ => None + } + } + + fn negate_number( + &mut self, + n: N, + negator: Negator, + constr: ToConstant + ) + where Negator: Fn(N) -> N, + ToConstant: Fn(N) -> Constant + { + if let Some(desc) = self.stack.last().cloned() { + if let Some(term) = self.terms.last().cloned() { + match term { + Term::Constant(_, Constant::Atom(ref name, _)) + if name.as_str() == "-" && (is_prefix!(desc.spec) || is_negate!(desc.spec)) => { + self.stack.pop(); + self.terms.pop(); + + self.shift(Token::Constant(constr(negator(n))), 0, TERM); + return; + }, + _ => {} + } + } + } + + self.shift(Token::Constant(constr(n)), 0, TERM); + } + + fn shift_token(&mut self, token: Token, op_dir: CompositeOp) -> Result<(), ParserError> { + fn negate_rc(mut t: Rc) -> Rc { + match Rc::get_mut(&mut t) { + Some(t) => { + t.neg_assign(); + } + None => { + } + }; + + t + } + + match token { + Token::Constant(Constant::Fixnum(n)) => + self.negate_number(n, |n| -n, Constant::Fixnum), + Token::Constant(Constant::Integer(n)) => + self.negate_number(n, negate_rc, Constant::Integer), + Token::Constant(Constant::Rational(n)) => + self.negate_number(n, negate_rc, Constant::Rational), + Token::Constant(Constant::Float(n)) => + self.negate_number( + n, + |n| OrderedFloat(-n.into_inner()), + |n| Constant::Float(n) + ), + Token::Constant(c) => + if let Some(name) = self.atomize_constant(&c) { + if !self.shift_op(name, op_dir)? { + self.shift(Token::Constant(c), 0, TERM); + } + } else { + self.shift(Token::Constant(c), 0, TERM); + }, + Token::Var(v) => self.shift(Token::Var(v), 0, TERM), + Token::Open => self.shift(Token::Open, 1300, DELIMITER), + Token::OpenCT => self.shift(Token::OpenCT, 1300, DELIMITER), + Token::Close => + if !self.reduce_term(op_dir) { + if !self.reduce_brackets() { + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + } + }, + Token::OpenList => self.shift(Token::OpenList, 1300, DELIMITER), + Token::CloseList => + if !self.reduce_list()? { + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + }, + Token::OpenCurly => self.shift(Token::OpenCurly, 1300, DELIMITER), + Token::CloseCurly => + if !self.reduce_curly()? { + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + }, + Token::HeadTailSeparator => { + /* '|' as an operator must have priority > 1000 and can only be infix. + * See: http://www.complang.tuwien.ac.at/ulrich/iso-prolog/dtc2#Res_A78 + */ + let (priority, spec) = get_desc(clause_name!("|"), op_dir) + .map(|OpDesc { inf, spec, .. }| (inf, spec)) + .unwrap_or((1000, DELIMITER)); + + self.reduce_op(priority); + self.shift(Token::HeadTailSeparator, priority, spec); + }, + Token::Comma => { + self.reduce_op(1000); + self.shift(Token::Comma, 1000, XFY); + }, + Token::End => + match self.stack.last().map(|t| t.tt) { + Some(TokenType::Open) + | Some(TokenType::OpenCT) + | Some(TokenType::OpenList) + | Some(TokenType::OpenCurly) + | Some(TokenType::HeadTailSeparator) + | Some(TokenType::Comma) + => return Err(ParserError::IncompleteReduction(self.lexer.line_num, + self.lexer.col_num)), + _ => {} + } + } + + Ok(()) + } + + #[inline] + pub fn eof(&mut self) -> Result { + self.lexer.eof() + } + + pub fn read_term(&mut self, op_dir: CompositeOp) -> Result + { + self.tokens = read_tokens(&mut self.lexer)?; + + while let Some(token) = self.tokens.pop() { + self.shift_token(token, op_dir)?; + } + + self.reduce_op(1400); + + if self.terms.len() > 1 || self.stack.len() > 1 { + return Err(ParserError::IncompleteReduction(self.lexer.line_num, self.lexer.col_num)); + } + + match self.terms.pop() { + Some(term) => if self.terms.is_empty() { + Ok(term) + } else { + Err(ParserError::IncompleteReduction(self.lexer.line_num, self.lexer.col_num)) + }, + _ => Err(ParserError::IncompleteReduction(self.lexer.line_num, self.lexer.col_num)) + } + } + + pub fn read(&mut self, op_dir: CompositeOp) -> Result, ParserError> + { + let mut terms = Vec::new(); + + loop { + terms.push(self.read_term(op_dir)?); + + if self.lexer.eof()? { + break; + } + } + + Ok(terms) + } +} diff --git a/prolog_parser/src/put_back_n.rs b/prolog_parser/src/put_back_n.rs new file mode 100644 index 00000000..8bef7f30 --- /dev/null +++ b/prolog_parser/src/put_back_n.rs @@ -0,0 +1,71 @@ +use std::iter::Peekable; + +#[derive(Debug, Clone)] +pub struct PutBackN { + top: Vec, + iter: Peekable, +} + +pub fn put_back_n(iterable: I) -> PutBackN + where I: IntoIterator +{ + PutBackN { + top: Vec::new(), + iter: iterable.into_iter().peekable(), + } +} + +impl PutBackN { + #[inline] + pub(crate) + fn put_back(&mut self, item: I::Item) { + self.top.push(item); + } + + #[inline] + pub fn take_buf(&mut self) -> Vec { + std::mem::replace(&mut self.top, vec![]) + } + + #[inline] + pub(crate) + fn peek(&mut self) -> Option<&I::Item> { + if self.top.is_empty() { + /* This is a kludge for Ctrl-D not being + * handled properly if self.iter().peek() isn't called + * first. */ + match self.iter.peek() { + Some(_) => { + self.iter.next().and_then(move |item| { + self.top.push(item); + self.top.last() + }) + } + None => { + None + } + } + } else { + self.top.last() + } + } + + #[inline] + pub(crate) + fn put_back_all>(&mut self, iter: DEI) { + self.top.extend(iter.rev()); + } +} + +impl Iterator for PutBackN { + type Item = I::Item; + + #[inline] + fn next(&mut self) -> Option { + if self.top.is_empty() { + self.iter.next() + } else { + self.top.pop() + } + } +} diff --git a/prolog_parser/src/tabled_rc.rs b/prolog_parser/src/tabled_rc.rs new file mode 100644 index 00000000..271ad3bf --- /dev/null +++ b/prolog_parser/src/tabled_rc.rs @@ -0,0 +1,153 @@ +use std::cell::{RefCell, RefMut}; +use std::cmp::Ordering; +use std::collections::HashSet; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::rc::{Rc}; + +pub struct TabledData { + table: Rc>>>, + pub(crate) module_name: Rc +} + +impl fmt::Debug for TabledData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TabledData") + .field("table", &self.table) + .field("module_name", &self.table) + .finish() + } +} + +impl Clone for TabledData { + fn clone(&self) -> Self { + TabledData { table: self.table.clone(), + module_name: self.module_name.clone() } + } +} + +impl PartialEq for TabledData { + fn eq(&self, other: &TabledData) -> bool + { + Rc::ptr_eq(&self.table, &other.table) && self.module_name == other.module_name + } +} + +impl TabledData { + #[inline] + pub fn new(module_name: Rc) -> Self { + TabledData { + table: Rc::new(RefCell::new(HashSet::new())), + module_name + } + } + + #[inline] + pub fn borrow_mut(&self) -> RefMut>> { + self.table.borrow_mut() + } +} + +pub struct TabledRc { + pub(crate) atom: Rc, + pub table: TabledData +} + +impl fmt::Debug for TabledRc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TabledRc") + .field("atom", &self.atom) + .field("table", &self.table) + .finish() + } +} + +// this Clone instance is manually defined to prevent the compiler +// from complaining when deriving Clone for StringList. +impl Clone for TabledRc { + fn clone(&self) -> Self { + TabledRc { atom: self.atom.clone(), table: self.table.clone() } + } +} + +impl PartialOrd for TabledRc { + fn partial_cmp(&self, other: &Self) -> Option + { + Some(self.atom.cmp(&other.atom)) + } +} + +impl Ord for TabledRc { + fn cmp(&self, other: &Self) -> Ordering + { + self.atom.cmp(&other.atom) + } +} + +impl PartialEq for TabledRc { + fn eq(&self, other: &TabledRc) -> bool + { + self.atom == other.atom + } +} + +impl Eq for TabledRc {} + +impl Hash for TabledRc { + fn hash(&self, state: &mut H) { + self.atom.hash(state) + } +} + +impl TabledRc { + pub fn new(atom: T, table: TabledData) -> Self { + let atom = match table.borrow_mut().take(&atom) { + Some(atom) => atom.clone(), + None => Rc::new(atom) + }; + + table.borrow_mut().insert(atom.clone()); + + TabledRc { atom, table } + } + + #[inline] + pub fn inner(&self) -> Rc { + self.atom.clone() + } + + #[inline] + pub(crate) fn owning_module(&self) -> Rc { + self.table.module_name.clone() + } +} + +impl Drop for TabledRc { + fn drop(&mut self) { + if Rc::strong_count(&self.atom) == 2 { + self.table.borrow_mut().remove(&self.atom); + } + } +} + +impl Deref for TabledRc { + type Target = T; + + fn deref(&self) -> &Self::Target { + &*self.atom + } +} + +impl fmt::Display for TabledRc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", &*self.atom) + } +} + +#[macro_export] +macro_rules! tabled_rc { + ($e:expr, $tbl:expr) => ( + TabledRc::new(String::from($e), $tbl.clone()) + ) +} diff --git a/prolog_parser/tests/bom.rs b/prolog_parser/tests/bom.rs new file mode 100644 index 00000000..3be7278a --- /dev/null +++ b/prolog_parser/tests/bom.rs @@ -0,0 +1,43 @@ +extern crate prolog_parser; + +use prolog_parser::ast::*; +use prolog_parser::lexer::{Lexer, Token}; +use prolog_parser::tabled_rc::TabledData; + +use std::rc::Rc; + +#[test] +fn valid_token() { + let stream = parsing_stream("valid text".as_bytes()); + assert!(stream.is_ok()); +} + +#[test] +fn empty_stream() { + let bytes: &[u8] = &[]; + assert!(parsing_stream(bytes).is_ok()); +} + +#[test] +fn skip_utf8_bom() { + let atom_tbl = TabledData::new(Rc::new("my_module".to_string())); + let flags = MachineFlags::default(); + let bytes: &[u8] = &[0xEF, 0xBB, 0xBF, '4' as u8, '\n' as u8]; + let mut stream = parsing_stream(bytes).expect("valid stream"); + let mut lexer = Lexer::new(atom_tbl, flags, &mut stream); + match lexer.next_token() { + Ok(Token::Constant(Constant::Fixnum(4))) => (), + _ => assert!(false) + } +} + +#[test] +fn invalid_utf16_bom() { + let bytes: &[u8] = &[0xFF, 0xFE, 'a' as u8, '\n' as u8]; + let stream = parsing_stream(bytes); + match stream { + Err(ParserError::Utf8Error(0, 0)) => (), + _ => assert!(false) + } +} + diff --git a/prolog_parser/tests/parse_tokens.rs b/prolog_parser/tests/parse_tokens.rs new file mode 100644 index 00000000..263fcf8c --- /dev/null +++ b/prolog_parser/tests/parse_tokens.rs @@ -0,0 +1,107 @@ +extern crate prolog_parser; + +use prolog_parser::ast::*; +use prolog_parser::lexer::{Lexer, Token}; +use prolog_parser::tabled_rc::TabledData; + +use std::rc::Rc; + +fn read_all_tokens(text: &str) -> Result, ParserError> { + let atom_tbl = TabledData::new(Rc::new("my_module".to_string())); + let flags = MachineFlags::default(); + let mut stream = parsing_stream(text.as_bytes())?; + let mut lexer = Lexer::new(atom_tbl, flags, &mut stream); + + let mut tokens = Vec::new(); + while !lexer.eof()? { + let token = lexer.next_token()?; + tokens.push(token); + } + Ok(tokens) +} + +#[test] +fn empty_multiline_comment() -> Result<(), ParserError> { + let tokens = read_all_tokens("/**/ 4\n")?; + assert_eq!(tokens, [Token::Constant(Constant::Fixnum(4))]); + Ok(()) +} + +#[test] +fn any_char_multiline_comment() -> Result<(), ParserError> { + let tokens = read_all_tokens("/* █╗╚═══╝ © */ 4\n")?; + assert_eq!(tokens, [Token::Constant(Constant::Fixnum(4))]); + Ok(()) +} + +#[test] +fn simple_char() -> Result<(), ParserError> { + let tokens = read_all_tokens("'a'\n")?; + assert_eq!(tokens, [Token::Constant(Constant::Char('a'))]); + Ok(()) +} + +#[test] +fn char_with_meta_seq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r#"'\\' '\'' '\"' '\`' "#)?; // use literal string so \ are escaped + assert_eq!(tokens, [Token::Constant(Constant::Char('\\')), + Token::Constant(Constant::Char('\'')), + Token::Constant(Constant::Char('"')), + Token::Constant(Constant::Char('`'))]); + Ok(()) +} + +#[test] +fn char_with_control_seq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\a' '\b' '\r' '\f' '\t' '\n' '\v' ")?; + assert_eq!(tokens, [ + Token::Constant(Constant::Char('\u{07}')), + Token::Constant(Constant::Char('\u{08}')), + Token::Constant(Constant::Char('\r')), + Token::Constant(Constant::Char('\u{0c}')), + Token::Constant(Constant::Char('\t')), + Token::Constant(Constant::Char('\n')), + Token::Constant(Constant::Char('\u{0b}')), + ]); + Ok(()) +} + +#[test] +fn char_with_octseq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\60433\' ")?; + assert_eq!(tokens, [Token::Constant(Constant::Char('愛'))]); // Japanese character + Ok(()) +} + +#[test] +fn char_with_octseq_0() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\0\' ")?; + assert_eq!(tokens, [Token::Constant(Constant::Char('\u{0000}'))]); + Ok(()) +} + +#[test] +fn char_with_hexseq() -> Result<(), ParserError> { + let tokens = read_all_tokens(r"'\x2124\' ")?; + assert_eq!(tokens, [Token::Constant(Constant::Char('ℤ'))]); // Z math symbol + Ok(()) +} + +#[test] +fn char_with_hexseq_invalid() { + assert!(read_all_tokens(r"'\x\' ").is_err()); +} + +#[test] +fn empty() -> Result<(), ParserError> { + let tokens = read_all_tokens("")?; + assert!(tokens.is_empty()); + Ok(()) +} + +#[test] +fn comment_then_eof() -> Result<(), ParserError> { + let tokens = read_all_tokens("% only a comment")?; + assert_eq!(tokens, [Token::End]); + Ok(()) +} -- 2.54.0