is_sgml_source([C|Cs]) :- must_be(chars, [C|Cs]).
load_structure_([], [], _, _).
-load_structure_([C|Cs], [E], Options, What) :-
- load_(What, [C|Cs], E, Options).
-load_structure_(file(Fs), [E], Options, What) :-
+load_structure_([C|Cs], [E|Es], Options, What) :-
+ load_(What, [C|Cs], [E|Es], Options).
+load_structure_(file(Fs), [E|Es], Options, What) :-
once(phrase_from_file(seq(Cs), Fs)),
- load_(What, Cs, E, Options).
-load_structure_(stream(Stream), [E], Options, What) :-
+ load_(What, Cs, [E|Es], Options).
+load_structure_(stream(Stream), [E|Es], Options, What) :-
get_n_chars(Stream, _, Cs),
- load_(What, Cs, E, Options).
+ load_(What, Cs, [E|Es], Options).
load_(html, Cs, E, Options) :- '$load_html'(Cs, E, Options).
load_(xml, Cs, E, Options) :- '$load_xml'(Cs, E, Options).
.value_to_str_like(self.machine_st.registers[1])
{
let document = scraper::Html::parse_document(&string.as_str());
- let result = self.html_node_to_term(document.tree.root().first_child().unwrap())?;
- unify!(self.machine_st, self.machine_st.registers[2], result);
+ let root_nodes = document
+ .tree
+ .root()
+ .children()
+ .map(|child| self.html_node_to_term(child))
+ .collect::<Result<Vec<_>, _>>()?;
+
+ let nodes = sized_iter_to_heap_list(
+ &mut self.machine_st.heap,
+ root_nodes.len(),
+ root_nodes.into_iter(),
+ )?;
+
+ unify!(self.machine_st, self.machine_st.registers[2], nodes);
} else {
self.machine_st.fail = true;
}
&mut self,
node: ego_tree::NodeRef<'_, scraper::Node>,
) -> Result<HeapCellValue, usize> {
- match node.value().as_element() {
- None => self
- .machine_st
- .heap
- .allocate_cstr(&node.value().as_text().unwrap().text),
- Some(element) => {
+ match node.value() {
+ scraper::Node::Document | scraper::Node::Fragment => {
+ unreachable!("we never iterate the root itself only its children")
+ }
+ scraper::Node::Doctype(doctype) => {
+ // what about public and system id?
+ let name = self.machine_st.heap.allocate_cstr(&doctype.name)?;
+
+ let result = str_loc_as_cell!(self.machine_st.heap.cell_len());
+ let mut writer = self.machine_st.heap.reserve(2)?;
+
+ writer.write_with(|section| {
+ section.push_cell(atom_as_cell!(atom!("doctype"), 1));
+ section.push_cell(name);
+ });
+
+ Ok(result)
+ }
+ scraper::Node::Comment(comment) => {
+ let comment = self.machine_st.heap.allocate_cstr(&comment)?;
+
+ let result = str_loc_as_cell!(self.machine_st.heap.cell_len());
+ let mut writer = self.machine_st.heap.reserve(2)?;
+
+ writer.write_with(|section| {
+ section.push_cell(atom_as_cell!(atom!("comment"), 1));
+ section.push_cell(comment);
+ });
+
+ Ok(result)
+ }
+ scraper::Node::Text(text) => self.machine_st.heap.allocate_cstr(&text.text),
+ scraper::Node::Element(element) => {
let mut avec = Vec::new();
for attr in element.attrs() {
avec.into_iter(),
)?;
- let mut cvec = Vec::new();
-
- for child in node.children() {
- cvec.push(self.html_node_to_term(child)?);
- }
+ let cvec = node
+ .children()
+ .map(|child| self.html_node_to_term(child))
+ .collect::<Result<Vec<_>, _>>()?;
let children = sized_iter_to_heap_list(
&mut self.machine_st.heap,
section.push_cell(children);
});
+ Ok(result)
+ }
+ scraper::Node::ProcessingInstruction(processing_instruction) => {
+ let target = self
+ .machine_st
+ .heap
+ .allocate_cstr(&processing_instruction.target)?;
+ let data = self
+ .machine_st
+ .heap
+ .allocate_cstr(&processing_instruction.data)?;
+
+ let result = str_loc_as_cell!(self.machine_st.heap.cell_len());
+ let mut writer = self.machine_st.heap.reserve(3)?;
+
+ writer.write_with(|section| {
+ section.push_cell(atom_as_cell!(atom!("processing_instruction"), 2));
+ section.push_cell(target);
+ section.push_cell(data);
+ });
+
Ok(result)
}
}
--- /dev/null
+:- use_module(library(sgml)).
+
+test :-
+ load_html("<!DOCTYPE html><html><head><title>Hello!</title></head></html>", Es, []),
+ write(Es),
+ load_html("<!DOCTYPE html><html><head><title>Hello!</title><!-- comment --></head></html>", Es2, []),
+ write(Es2),
+ load_html("<!", Es3, []),
+ write(Es3).
+
+:- initialization(test).
load_module_test("tests-pl/issue2588.pl", "[element(html,[],[element(head,[],[element(title,[],[[H,e,l,l,o,!]])]),element(body,[],[])])]");
}
+#[test]
+#[cfg_attr(miri, ignore = "unsupported operation when isolation is enabled")]
+fn issue2949_load_html() {
+ load_module_test("tests-pl/issue2949.pl", "[doctype([h,t,m,l]),element(html,[],[element(head,[],[element(title,[],[[H,e,l,l,o,!]])]),element(body,[],[])])][doctype([h,t,m,l]),element(html,[],[element(head,[],[element(title,[],[[H,e,l,l,o,!]]),comment([ ,c,o,m,m,e,n,t, ])]),element(body,[],[])])][comment([]),element(html,[],[element(head,[],[]),element(body,[],[])])]");
+}
+
// issue #2361
#[serial]
#[test]