From 260c52adec6e942d529727e683b185cac0e1bf1d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bennet=20Ble=C3=9Fmann?= Date: Sun, 4 Aug 2024 02:35:36 +0200 Subject: [PATCH] replace select by scrape - the latter appears better maintained and more up to date - removes some dupplicate/outdated transitive deps --- Cargo.lock | 168 ++++++++++++++++++++++++------------ Cargo.toml | 9 +- src/machine/system_calls.rs | 20 +++-- 3 files changed, 132 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7366ffac..f2d38322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -189,30 +189,15 @@ dependencies = [ "serde", ] -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec 0.6.3", -] - [[package]] name = "bit-set" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "bit-vec 0.8.0", + "bit-vec", ] -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bit-vec" version = "0.8.0" @@ -577,6 +562,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.11.2", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.72", +] + [[package]] name = "ctrlc" version = "3.4.4" @@ -691,6 +699,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_more" +version = "0.99.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + [[package]] name = "difflib" version = "0.4.0" @@ -741,12 +760,33 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "dunce" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + [[package]] name = "either" version = "1.13.0" @@ -1123,16 +1163,16 @@ dependencies = [ [[package]] name = "html5ever" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" dependencies = [ "log", "mac", "markup5ever", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.72", ] [[package]] @@ -1542,30 +1582,18 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "markup5ever" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" dependencies = [ "log", - "phf 0.10.1", - "phf_codegen", + "phf 0.11.2", + "phf_codegen 0.11.2", "string_cache", "string_cache_codegen", "tendril", ] -[[package]] -name = "markup5ever_rcdom" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2" -dependencies = [ - "html5ever", - "markup5ever", - "tendril", - "xml5ever", -] - [[package]] name = "memchr" version = "2.7.4" @@ -1880,6 +1908,16 @@ dependencies = [ "phf_shared 0.10.0", ] +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + [[package]] name = "phf_generator" version = "0.10.0" @@ -2457,6 +2495,21 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "761fb705fdf625482d2ed91d3f0559dcfeab2798fe2771c69560a774865d0802" +dependencies = [ + "ahash", + "cssparser", + "ego-tree", + "html5ever", + "once_cell", + "selectors", + "tendril", +] + [[package]] name = "scryer-modular-bitfield" version = "0.11.4" @@ -2485,7 +2538,7 @@ dependencies = [ "arcu", "assert_cmd", "base64 0.22.1", - "bit-set 0.8.0", + "bit-set", "bitvec", "blake2", "bytes", @@ -2500,6 +2553,7 @@ dependencies = [ "derive_deref", "dirs-next", "divrem", + "ego-tree", "futures", "fxhash", "getrandom", @@ -2530,8 +2584,8 @@ dependencies = [ "roxmltree", "rustyline", "ryu", + "scraper", "scryer-modular-bitfield", - "select", "serde", "serde-wasm-bindgen", "serde_json", @@ -2583,14 +2637,22 @@ dependencies = [ ] [[package]] -name = "select" -version = "0.6.0" +name = "selectors" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f9da09dc3f4dfdb6374cbffff7a2cffcec316874d4429899eefdc97b3b94dcd" +checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" dependencies = [ - "bit-set 0.5.3", - "html5ever", - "markup5ever_rcdom", + "bitflags 2.6.0", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.10.1", + "phf_codegen 0.10.0", + "precomputed-hash", + "servo_arc", + "smallvec", ] [[package]] @@ -2682,6 +2744,15 @@ dependencies = [ "syn 2.0.72", ] +[[package]] +name = "servo_arc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.6" @@ -3694,17 +3765,6 @@ dependencies = [ "tap", ] -[[package]] -name = "xml5ever" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" -dependencies = [ - "log", - "mac", - "markup5ever", -] - [[package]] name = "zerocopy" version = "0.7.35" diff --git a/Cargo.toml b/Cargo.toml index 219d454b..131304ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ to-syn-value_derive = "0.1.1" walkdir = "2" [dependencies] +arcu = { version = "0.1.1", features = ["thread_local_counter"] } base64 = "0.22.1" bit-set = "0.8.0" bitvec = "1" @@ -67,14 +68,18 @@ ring = { version = "0.17.8", features = ["wasm32_unknown_unknown_js"] } ripemd = "0.1.3" roxmltree = "0.20.0" ryu = "1.0.18" -select = "0.6.0" sha3 = "0.10.8" smallvec = "1.13.2" static_assertions = "1.1.0" +scraper = { version = "0.19.1", default-features = false, features = [ + "errors", +] } +ego-tree = "0.6.2" + + serde_json = "1.0.122" serde = "1.0.204" -arcu = { version = "0.1.1", features = ["thread_local_counter"] } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] crossterm = { version = "0.28.1", optional = true } diff --git a/src/machine/system_calls.rs b/src/machine/system_calls.rs index f2b64896..9cbdca3f 100644 --- a/src/machine/system_calls.rs +++ b/src/machine/system_calls.rs @@ -89,7 +89,6 @@ use native_tls::{Identity, TlsAcceptor, TlsConnector}; use base64; use roxmltree; -use select; #[cfg(feature = "http")] use futures::future; @@ -7772,8 +7771,8 @@ impl Machine { .machine_st .value_to_str_like(self.machine_st.registers[1]) { - let doc = select::document::Document::from_read(string.as_str().as_bytes()).unwrap(); - let result = self.html_node_to_term(doc.nth(0).unwrap()); + let document = scraper::Html::parse_document(&string.as_str()); + let result = self.html_node_to_term(document.tree.root()); unify!(self.machine_st, self.machine_st.registers[2], result); } else { @@ -8218,17 +8217,20 @@ impl Machine { } } - pub(super) fn html_node_to_term(&mut self, node: select::node::Node) -> HeapCellValue { - match node.name() { + pub(super) fn html_node_to_term( + &mut self, + node: ego_tree::NodeRef<'_, scraper::Node>, + ) -> HeapCellValue { + match node.value().as_element() { None => put_complete_string( &mut self.machine_st.heap, - &node.text(), + &node.value().as_text().unwrap().text, &self.machine_st.atom_tbl, ), - Some(name) => { + Some(element) => { let mut avec = Vec::new(); - for attr in node.attrs() { + for attr in element.attrs() { let name = AtomTable::build_with(&self.machine_st.atom_tbl, attr.0); let value = put_complete_string( &mut self.machine_st.heap, @@ -8259,7 +8261,7 @@ impl Machine { cvec.into_iter() )); - let tag = AtomTable::build_with(&self.machine_st.atom_tbl, name); + let tag = AtomTable::build_with(&self.machine_st.atom_tbl, element.name()); let result = str_loc_as_cell!(self.machine_st.heap.len()); self.machine_st -- 2.54.0