]> Repositorios git - scryer-prolog.git/commitdiff
replace select by scrape
authorBennet Bleßmann <[email protected]>
Sun, 4 Aug 2024 00:35:36 +0000 (02:35 +0200)
committerBennet Bleßmann <[email protected]>
Sun, 4 Aug 2024 00:35:36 +0000 (02:35 +0200)
- the latter appears better maintained and more up to date
- removes some dupplicate/outdated transitive deps

Cargo.lock
Cargo.toml
src/machine/system_calls.rs

index 7366ffac02f9380029dc37a5c811c006be87c016..f2d3832232dabaaadd2880fd8b2af0a4d7e122fc 100644 (file)
@@ -189,30 +189,15 @@ dependencies = [
  "serde",
 ]
 
-[[package]]
-name = "bit-set"
-version = "0.5.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
-dependencies = [
- "bit-vec 0.6.3",
-]
-
 [[package]]
 name = "bit-set"
 version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
 dependencies = [
- "bit-vec 0.8.0",
+ "bit-vec",
 ]
 
-[[package]]
-name = "bit-vec"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
-
 [[package]]
 name = "bit-vec"
 version = "0.8.0"
@@ -577,6 +562,29 @@ dependencies = [
  "typenum",
 ]
 
+[[package]]
+name = "cssparser"
+version = "0.31.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
+dependencies = [
+ "cssparser-macros",
+ "dtoa-short",
+ "itoa",
+ "phf 0.11.2",
+ "smallvec",
+]
+
+[[package]]
+name = "cssparser-macros"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
+dependencies = [
+ "quote",
+ "syn 2.0.72",
+]
+
 [[package]]
 name = "ctrlc"
 version = "3.4.4"
@@ -691,6 +699,17 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "derive_more"
+version = "0.99.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.72",
+]
+
 [[package]]
 name = "difflib"
 version = "0.4.0"
@@ -741,12 +760,33 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
 
+[[package]]
+name = "dtoa"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653"
+
+[[package]]
+name = "dtoa-short"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
+dependencies = [
+ "dtoa",
+]
+
 [[package]]
 name = "dunce"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b"
 
+[[package]]
+name = "ego-tree"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
+
 [[package]]
 name = "either"
 version = "1.13.0"
@@ -1123,16 +1163,16 @@ dependencies = [
 
 [[package]]
 name = "html5ever"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
+checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
 dependencies = [
  "log",
  "mac",
  "markup5ever",
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1542,30 +1582,18 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
 
 [[package]]
 name = "markup5ever"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
+checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
 dependencies = [
  "log",
- "phf 0.10.1",
- "phf_codegen",
+ "phf 0.11.2",
+ "phf_codegen 0.11.2",
  "string_cache",
  "string_cache_codegen",
  "tendril",
 ]
 
-[[package]]
-name = "markup5ever_rcdom"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2"
-dependencies = [
- "html5ever",
- "markup5ever",
- "tendril",
- "xml5ever",
-]
-
 [[package]]
 name = "memchr"
 version = "2.7.4"
@@ -1880,6 +1908,16 @@ dependencies = [
  "phf_shared 0.10.0",
 ]
 
+[[package]]
+name = "phf_codegen"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
+dependencies = [
+ "phf_generator 0.11.2",
+ "phf_shared 0.11.2",
+]
+
 [[package]]
 name = "phf_generator"
 version = "0.10.0"
@@ -2457,6 +2495,21 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "scraper"
+version = "0.19.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "761fb705fdf625482d2ed91d3f0559dcfeab2798fe2771c69560a774865d0802"
+dependencies = [
+ "ahash",
+ "cssparser",
+ "ego-tree",
+ "html5ever",
+ "once_cell",
+ "selectors",
+ "tendril",
+]
+
 [[package]]
 name = "scryer-modular-bitfield"
 version = "0.11.4"
@@ -2485,7 +2538,7 @@ dependencies = [
  "arcu",
  "assert_cmd",
  "base64 0.22.1",
- "bit-set 0.8.0",
+ "bit-set",
  "bitvec",
  "blake2",
  "bytes",
@@ -2500,6 +2553,7 @@ dependencies = [
  "derive_deref",
  "dirs-next",
  "divrem",
+ "ego-tree",
  "futures",
  "fxhash",
  "getrandom",
@@ -2530,8 +2584,8 @@ dependencies = [
  "roxmltree",
  "rustyline",
  "ryu",
+ "scraper",
  "scryer-modular-bitfield",
- "select",
  "serde",
  "serde-wasm-bindgen",
  "serde_json",
@@ -2583,14 +2637,22 @@ dependencies = [
 ]
 
 [[package]]
-name = "select"
-version = "0.6.0"
+name = "selectors"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f9da09dc3f4dfdb6374cbffff7a2cffcec316874d4429899eefdc97b3b94dcd"
+checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
 dependencies = [
- "bit-set 0.5.3",
- "html5ever",
- "markup5ever_rcdom",
+ "bitflags 2.6.0",
+ "cssparser",
+ "derive_more",
+ "fxhash",
+ "log",
+ "new_debug_unreachable",
+ "phf 0.10.1",
+ "phf_codegen 0.10.0",
+ "precomputed-hash",
+ "servo_arc",
+ "smallvec",
 ]
 
 [[package]]
@@ -2682,6 +2744,15 @@ dependencies = [
  "syn 2.0.72",
 ]
 
+[[package]]
+name = "servo_arc"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
+dependencies = [
+ "stable_deref_trait",
+]
+
 [[package]]
 name = "sha1"
 version = "0.10.6"
@@ -3694,17 +3765,6 @@ dependencies = [
  "tap",
 ]
 
-[[package]]
-name = "xml5ever"
-version = "0.17.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650"
-dependencies = [
- "log",
- "mac",
- "markup5ever",
-]
-
 [[package]]
 name = "zerocopy"
 version = "0.7.35"
index 219d454b36de9e8703826fd6e22f013fadc1253d..131304ef4396c68703ffe201f2bb09e5c36c3c17 100644 (file)
@@ -37,6 +37,7 @@ to-syn-value_derive = "0.1.1"
 walkdir = "2"
 
 [dependencies]
+arcu = { version = "0.1.1", features = ["thread_local_counter"] }
 base64 = "0.22.1"
 bit-set = "0.8.0"
 bitvec = "1"
@@ -67,14 +68,18 @@ ring = { version = "0.17.8", features = ["wasm32_unknown_unknown_js"] }
 ripemd = "0.1.3"
 roxmltree = "0.20.0"
 ryu = "1.0.18"
-select = "0.6.0"
 sha3 = "0.10.8"
 smallvec = "1.13.2"
 static_assertions = "1.1.0"
 
+scraper = { version = "0.19.1", default-features = false, features = [
+    "errors",
+] }
+ego-tree = "0.6.2"
+
+
 serde_json = "1.0.122"
 serde = "1.0.204"
-arcu = { version = "0.1.1", features = ["thread_local_counter"] }
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 crossterm = { version = "0.28.1", optional = true }
index f2b648961824674ed317bf5a43ba58c059f3f519..9cbdca3f145f49dce7b9c7b32b775b9293917d0b 100644 (file)
@@ -89,7 +89,6 @@ use native_tls::{Identity, TlsAcceptor, TlsConnector};
 
 use base64;
 use roxmltree;
-use select;
 
 #[cfg(feature = "http")]
 use futures::future;
@@ -7772,8 +7771,8 @@ impl Machine {
             .machine_st
             .value_to_str_like(self.machine_st.registers[1])
         {
-            let doc = select::document::Document::from_read(string.as_str().as_bytes()).unwrap();
-            let result = self.html_node_to_term(doc.nth(0).unwrap());
+            let document = scraper::Html::parse_document(&string.as_str());
+            let result = self.html_node_to_term(document.tree.root());
 
             unify!(self.machine_st, self.machine_st.registers[2], result);
         } else {
@@ -8218,17 +8217,20 @@ impl Machine {
         }
     }
 
-    pub(super) fn html_node_to_term(&mut self, node: select::node::Node) -> HeapCellValue {
-        match node.name() {
+    pub(super) fn html_node_to_term(
+        &mut self,
+        node: ego_tree::NodeRef<'_, scraper::Node>,
+    ) -> HeapCellValue {
+        match node.value().as_element() {
             None => put_complete_string(
                 &mut self.machine_st.heap,
-                &node.text(),
+                &node.value().as_text().unwrap().text,
                 &self.machine_st.atom_tbl,
             ),
-            Some(name) => {
+            Some(element) => {
                 let mut avec = Vec::new();
 
-                for attr in node.attrs() {
+                for attr in element.attrs() {
                     let name = AtomTable::build_with(&self.machine_st.atom_tbl, attr.0);
                     let value = put_complete_string(
                         &mut self.machine_st.heap,
@@ -8259,7 +8261,7 @@ impl Machine {
                     cvec.into_iter()
                 ));
 
-                let tag = AtomTable::build_with(&self.machine_st.atom_tbl, name);
+                let tag = AtomTable::build_with(&self.machine_st.atom_tbl, element.name());
                 let result = str_loc_as_cell!(self.machine_st.heap.len());
 
                 self.machine_st