From: Markus Triska <triska@metalevel.at>
Date: Sat, 20 Jun 2020 10:25:13 +0000 (+0200)
Subject: ADDED: library(xpath) for convenient analysis of HTML and XML documents.
X-Git-Tag: v0.8.127~23^2~1
X-Git-Url: https://git.sagredo.dev/?a=commitdiff_plain;h=d66ea07ba4c1e856c60804e967b720b058f80681;p=scryer-prolog.git

ADDED: library(xpath) for convenient analysis of HTML and XML documents.
---

diff --git a/README.md b/README.md
index c870cb85..9d5faaaf 100644
--- a/README.md
+++ b/README.md
@@ -426,7 +426,11 @@ The modules that ship with Scryer&nbsp;Prolog are also called
   read answers from web&nbsp;servers. HTTPS is also supported.
 * [`sgml`](src/lib/sgml.pl)
   `load_html/3` represents HTML&nbsp;documents as Prolog&nbsp;terms
-  for convenient and efficient reasoning.
+  for convenient and efficient reasoning. See also `library(xpath)`.
+* [`xpath`](src/lib/xpath.pl)
+  The predicate `xpath/3` is used for convenient reasoning about
+  HTML and XML&nbsp;documents, inspired by the XPath language. This
+  library is often used together with `library(sgml)`.
 * [`sockets`](src/lib/sockets.pl)
   Predicates for opening and accepting TCP connections as streams.
   TLS negotiation is performed via the option `tls(true)` in
diff --git a/src/lib/sgml.pl b/src/lib/sgml.pl
index 2a15e425..56825920 100644
--- a/src/lib/sgml.pl
+++ b/src/lib/sgml.pl
@@ -21,6 +21,11 @@
 
    Currently, Options are ignored. In the future, more options may be
    provided to control parsing.
+
+   Use http_open/3 from library(http/http_open) to read answers from
+   web servers via streams.
+
+   library(xpath) provides convenient reasoning about parsed documents.
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 
 :- module(sgml, [load_html/3]).
diff --git a/src/lib/xpath.pl b/src/lib/xpath.pl
new file mode 100644
index 00000000..96c4bead
--- /dev/null
+++ b/src/lib/xpath.pl
@@ -0,0 +1,643 @@
+/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+   The original copyright and licence information is shown below.
+
+   Adapted in June 2020 by Markus Triska (triska@metalevel.at)
+   Part of Scryer Prolog.
+
+   In order to improve portability and reliability of this library, I
+   have changed various parts of the original code to make it
+   conforming to the Prolog ISO standard. I have removed all
+   occurrences of proprietary non-standard ad hoc data types.
+
+   In Scryer Prolog, we represent strings as lists of characters.
+   This makes them amenable to processing with DCGs and other
+   built-in mechanisms such as existing predicates on lists.
+   Scryer Prolog represents lists of characters very compactly,
+   making it an ideal representation for large amounts of text. It
+   fully conforms to the ISO standard and is very portable.
+
+   This library is intended to be used in tandem with load_html/3
+   from library(sgml), which creates DOM trees from markup documents.
+   It can be combined with http_open/3 from library(http/http_open)
+   to read such documents directly from network connections.
+
+   For instance, to inspect all links to Prolog files (*.pl) on
+   Scryer Prolog's project page, we can use:
+
+      :- use_module(library(http/http_open)).
+      :- use_module(library(sgml)).
+      :- use_module(library(format)).
+      :- use_module(library(lists)).
+      :- use_module(library(xpath)).
+
+      link_to_pl_file(File) :-
+          http_open("https://github.com/mthom/scryer-prolog", S, []),
+          load_html(stream(S), DOM, []),
+          xpath(DOM, //a(@href), File),
+          append(_, ".pl", File).
+
+   Yielding:
+
+      ?- link_to_pl_file(File).
+      %@    File = "/mthom/scryer-prolog/blob/master/src/lib/tabling.pl"
+      %@ ;  File = "/mthom/scryer-prolog/blob/master/src/lib/dif.pl"
+      %@ ;  File = "/mthom/scryer-prolog/blob/master/src/lib/freeze.pl"
+      %@ ;  ...
+
+   Parts of the original functionality may not yet work. Please
+   consider such parts opportunities for improvements, and file
+   issues in case you need additional features.
+
+   The original copyright information follows.
+
+   ======================================================================
+
+
+    Author:        Jan Wielemaker
+    E-mail:        J.Wielemaker@vu.nl
+    WWW:           http://www.swi-prolog.org
+    Copyright (c)  2009-2019, University of Amsterdam
+                              VU University Amsterdam
+                              CWI, Amsterdam
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in
+       the documentation and/or other materials provided with the
+       distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+:- module(xpath,
+          [ xpath/3,                    % +DOM, +Spec, -Value
+            xpath_chk/3,                % +DOM, +Spec, -Value
+
+            op(400, fx, //),
+            op(400, fx, /),
+            op(200, fy, @)
+          ]).
+
+:- use_module(library(lists),[member/2,memberchk/2]).
+:- use_module(library(error)).
+:- use_module(library(dcgs)).
+:- use_module(library(si)).
+
+/** <module> Select nodes in an XML DOM
+
+The library xpath.pl provides predicates to select nodes from an XML DOM
+tree as produced by library(sgml) based  on descriptions inspired by the
+XPath language.
+
+The   predicate   xpath/3   selects   a   sub-structure   of   the   DOM
+non-deterministically based on an  XPath-like   specification.  Not  all
+selectors of XPath are implemented, but the ability to mix xpath/3 calls
+with arbitrary Prolog code  provides  a   powerful  tool  for extracting
+information from XML parse-trees.
+
+@see http://www.w3.org/TR/xpath
+*/
+
+element_name(element(Name,_,_), Name).
+element_attributes(element(_,Attributes,_), Attributes).
+element_content(element(_,_,Content), Content).
+
+%!  xpath_chk(+DOM, +Spec, ?Content) is semidet.
+%
+%   Semi-deterministic version of xpath/3.
+
+xpath_chk(DOM, Spec, Content) :-
+    xpath(DOM, Spec, Content),
+    !.
+
+%!  xpath(+DOM, +Spec, ?Content) is nondet.
+%
+%   Match an element in a DOM structure.   The syntax is inspired by
+%   XPath, using () rather than  []   to  select  inside an element.
+%   First we can construct paths using / and //:
+%
+%       $ =|//|=Term :
+%       Select any node in the DOM matching term.
+%       $ =|/|=Term :
+%       Match the root against Term.
+%       $ Term :
+%       Select the immediate children of the root matching Term.
+%
+%   The Terms above are of type   _callable_.  The functor specifies
+%   the element name. The element name   '*'  refers to any element.
+%   The name =self= refers to the   top-element  itself and is often
+%   used for processing matches of an  earlier xpath/3 query. A term
+%   NS:Term refers to an XML  name   in  the  namespace NS. Optional
+%   arguments specify additional  constraints   and  functions.  The
+%   arguments are processed from left  to right. Defined conditional
+%   argument values are:
+%
+%       $ index(?Index) :
+%       True if the element is the Index-th child of its parent,
+%       where 1 denotes the first child. Index can be one of:
+%         $ `Var` :
+%         `Var` is unified with the index of the matched element.
+%         $ =last= :
+%         True for the last element.
+%         $ =last= - `IntExpr` :
+%         True for the last-minus-nth element. For example,
+%         `last-1` is the element directly preceding the last one.
+%         $ `IntExpr` :
+%         True for the element whose index equals `IntExpr`.
+%       $ Integer :
+%       The N-th element with the given name, with 1 denoting the
+%       first element. Same as index(Integer).
+%       $ =last= :
+%       The last element with the given name. Same as
+%       index(last).
+%       $ =last= - IntExpr :
+%       The IntExpr-th element before the last.
+%       Same as index(last-IntExpr).
+%
+%   Defined function argument values are:
+%
+%       $ =self= :
+%       Evaluate to the entire element
+%       $ =content= :
+%       Evaluate to the content of the element (a list)
+%       $ =text= :
+%       Evaluates to all text from the sub-tree, represented
+%       as a list of characters.
+%       $ `text(atom)` :
+%       Evaluates to all text from the sub-tree as an atom.
+%       $ =normalize_space= :
+%       As =text=, but uses normalize_space/2 to normalise
+%       white-space in the output
+%       $ =number= :
+%       Extract an integer or float from the value.  Ignores
+%       leading and trailing white-space
+%       $ =|@|=Attribute :
+%       Evaluates to the value of the given attribute.  Attribute
+%       can be a compound term. In this case the functor name
+%       denotes the element and arguments perform transformations
+%       on the attribute value.  Defined transformations are:
+%
+%         - number
+%         Translate the value into a number using
+%         xsd_number_chars/2.
+%         - integer
+%         As `number`, but subsequently transform the value
+%         into an integer using the round/1 function.
+%         - float
+%         As `number`, but subsequently transform the value
+%         into a float using the float/1 function.
+%         - lower
+%         Translate the value to lower case, preserving
+%         the type.
+%         - upper
+%         Translate the value to upper case, preserving
+%         the type.
+%
+%   In addition, the argument-list can be _conditions_:
+%
+%       $ Left = Right :
+%       Succeeds if the left-hand unifies with the right-hand.
+%       If the left-hand side is a function, this is evaluated.
+%       The right-hand side is _never_ evaluated, and thus the
+%       condition `content = content` defines that the content
+%       of the element is the atom `content`.
+%       The functions `lower_case` and `upper_case` can be applied
+%       to Right (see example below).
+%       $ contains(Haystack, Needle) :
+%       Succeeds if Needle is a sub-list of Haystack.
+%       $ XPath :
+%       Succeeds if XPath matches in the currently selected
+%       sub-DOM.  For example, the following expression finds
+%       an =h3= element inside a =div= element, where the =div=
+%       element itself contains an =h2= child with a =strong=
+%       child.
+%
+%         ==
+%         //div(h2/strong)/h3
+%         ==
+%
+%       This is equivalent to the conjunction of XPath goals below.
+%
+%         ==
+%            ...,
+%            xpath(DOM, //(div), Div),
+%            xpath(Div, h2/strong, _),
+%            xpath(Div, h3, Result)
+%         ==
+%
+%   **Examples**:
+%
+%   Match each table-row in DOM:
+%
+%       ==
+%       xpath(DOM, //tr, TR)
+%       ==
+%
+%   Match the last cell  of  each   tablerow  in  DOM.  This example
+%   illustrates that a result can be the input of subsequent xpath/3
+%   queries. Using multiple queries  on   the  intermediate  TR term
+%   guarantee that all results come from the same table-row:
+%
+%       ==
+%       xpath(DOM, //tr, TR),
+%       xpath(TR,  /td(last), TD)
+%       ==
+%
+%   Match each =href= attribute in an <a> element
+%
+%       ==
+%       xpath(DOM, //a(@href), HREF)
+%       ==
+%
+%   Suppose we have a table containing  rows where each first column
+%   is the name of a product with a   link to details and the second
+%   is the price (a number).  The   following  predicate matches the
+%   name, URL and price:
+%
+%       ==
+%       product(DOM, Name, URL, Price) :-
+%           xpath(DOM, //tr, TR),
+%           xpath(TR, td(1), C1),
+%           xpath(C1, /self(normalize_space), Name),
+%           xpath(C1, a(@href), URL),
+%           xpath(TR, td(2, number), Price).
+%       ==
+%
+%   Suppose we want to select  books   with  genre="thriller" from a
+%   tree containing elements =|<book genre=...>|=
+%
+%       ==
+%       thriller(DOM, Book) :-
+%           xpath(DOM, //book(@genre=thiller), Book).
+%       ==
+%
+%   Match the elements =|<table align="center">|= _and_ =|<table
+%   align="CENTER">|=:
+%
+%       ```prolog
+%           //table(@align(lower) = center)
+%       ```
+%
+%   Get the `width` and `height` of a `div` element as a number,
+%   and the `div` node itself:
+%
+%       ==
+%           xpath(DOM, //div(@width(number)=W, @height(number)=H), Div)
+%       ==
+%
+%   Note that `div` is an infix operator, so parentheses must be
+%   used in cases like the following:
+%
+%       ==
+%           xpath(DOM, //(div), Div)
+%       ==
+
+xpath(DOM, Spec, Content) :-
+    in_dom(Spec, DOM, Content).
+
+in_dom(//Spec, DOM, Value) :-
+    !,
+    element_spec(Spec, Name, Modifiers),
+    sub_dom(I, Len, Name, E, DOM),
+    modifiers(Modifiers, I, Len, E, Value).
+in_dom(/Spec, E, Value) :-
+    !,
+    element_spec(Spec, Name, Modifiers),
+    (   Name == self
+    ->  true
+    ;   element_name(E, Name)
+    ),
+    modifiers(Modifiers, 1, 1, E, Value).
+in_dom(A/B, DOM, Value) :-
+    !,
+    in_dom(A, DOM, Value0),
+    in_dom(B, Value0, Value).
+in_dom(A//B, DOM, Value) :-
+    !,
+    in_dom(A, DOM, Value0),
+    in_dom(//B, Value0, Value).
+in_dom(Spec, element(_, _, Content), Value) :-
+    element_spec(Spec, Name, Modifiers),
+    count_named_elements(Content, Name, CLen),
+    CLen > 0,
+    nth_element(N, Name, E, Content),
+    modifiers(Modifiers, N, CLen, E, Value).
+
+element_spec(Var, _, _) :-
+    var(Var),
+    !,
+    instantiation_error(Var).
+element_spec(NS:Term, NS:Name, Modifiers) :-
+    !,
+    callable_name_arguments(Term, Name0, Modifiers),
+    star(Name0, Name).
+element_spec(Term, Name, Modifiers) :-
+    !,
+    callable_name_arguments(Term, Name0, Modifiers),
+    star(Name0, Name).
+
+callable_name_arguments(Atom, Name, Arguments) :-
+    atom(Atom),
+    !,
+    Name = Atom, Arguments = [].
+callable_name_arguments(Compound, Name, Arguments) :-
+    Compound =.. [Name|Arguments].
+
+
+star(*, _) :- !.
+star(Name, Name).
+
+
+%!  sub_dom(-Index, -Count, +Name, -Sub, +DOM) is nondet.
+%
+%   Sub is a node in DOM with Name.
+%
+%   @param Count    is the total number of nodes in the content
+%                   list Sub appears that have the same name.
+%   @param Index    is the 1-based index of Sub of nodes with
+%                   Name.
+
+sub_dom(1, 1, Name, DOM, DOM) :-
+    element_name(DOM, Name0),
+    \+ Name \= Name0.
+sub_dom(N, Len, Name, E, element(_,_,Content)) :-
+    !,
+    sub_dom_2(N, Len, Name, E, Content).
+sub_dom(N, Len, Name, E, Content) :-
+    list_si(Content),
+    sub_dom_2(N, Len, Name, E, Content).
+
+sub_dom_2(N, Len, Name, Element, Content) :-
+    (   count_named_elements(Content, Name, Len),
+        nth_element(N, Name, Element, Content)
+    ;   member(element(_,_,C2), Content),
+        sub_dom_2(N, Len, Name, Element, C2)
+    ).
+
+
+%!  count_named_elements(+Content, +Name, -Count) is det.
+%
+%   Count is the number of nodes with Name in Content.
+
+count_named_elements(Content, Name, Count) :-
+    count_named_elements(Content, Name, 0, Count).
+
+count_named_elements([], _, Count, Count).
+count_named_elements([element(Name,_,_)|T], Name0, C0, C) :-
+    \+ Name \= Name0,
+    !,
+    C1 is C0+1,
+    count_named_elements(T, Name0, C1, C).
+count_named_elements([_|T], Name, C0, C) :-
+    count_named_elements(T, Name, C0, C).
+
+
+%!  nth_element(?N, +Name, -Element, +Content:list) is nondet.
+%
+%   True if Element is the N-th element with name in Content.
+
+nth_element(N, Name, Element, Content) :-
+    nth_element_(1, N, Name, Element, Content).
+
+nth_element_(I, N, Name, E, [H|T]) :-
+    element_name(H, Name0),
+    \+ Name \= Name0,
+    !,
+    (   N = I,
+        E = H
+    ;   I2 is I + 1,
+        (   nonvar(N), I2 > N
+        ->  !, fail
+        ;   true
+        ),
+        nth_element_(I2, N, Name, E, T)
+    ).
+nth_element_(I, N, Name, E, [_|T]) :-
+    nth_element_(I, N, Name, E, T).
+
+
+%!  modifiers(+Modifiers, +I, +Clen, +DOM, -Value)
+%
+%
+
+modifiers([], _, _, Value, Value).
+modifiers([H|T], I, L, Value0, Value) :-
+    modifier(H, I, L, Value0, Value1),
+    modifiers(T, I, L, Value1, Value).
+
+modifier(M, _, _, _, _) :-
+    var(M),
+    !,
+    instantiation_error(M).
+modifier(Index, I, L, Value0, Value) :-
+    implicit_index_modifier(Index),
+    !,
+    Value = Value0,
+    index_modifier(Index, I, L).
+modifier(index(Index), I, L, Value, Value) :-
+    !,
+    index_modifier(Index, I, L).
+modifier(Function, _, _, In, Out) :-
+    xpath_function(Function),
+    !,
+    xpath_function(Function, In, Out).
+modifier(Function, _, _, In, Out) :-
+    xpath_condition(Function, In),
+    Out = In.
+
+implicit_index_modifier(I) :-
+    integer(I),
+    !.
+implicit_index_modifier(last).
+implicit_index_modifier(last-_Expr).
+
+index_modifier(Var, I, _L) :-
+    var(Var),
+    !,
+    Var = I.
+index_modifier(last, I, L) :-
+    !,
+    I =:= L.
+index_modifier(last-Expr, I, L) :-
+    !,
+    I =:= L-Expr.
+index_modifier(N, I, _) :-
+    N =:= I.
+
+xpath_function(self, DOM, DOM).                                % self
+xpath_function(content, Element, Value) :-                     % content
+    element_content(Element, Value).
+xpath_function(text, DOM, Text) :-                             % text
+    text_of_dom(DOM, chars, Text).
+xpath_function(text(As), DOM, Text) :-                         % text(atom)
+    text_of_dom(DOM, As, Text).
+xpath_function(normalize_space, DOM, Text) :-                  % normalize_space
+    text_of_dom(DOM, chars, Text0),
+    normalize_space(Text0, Text).
+xpath_function(number, DOM, Number) :-                         % number
+    text_of_dom(DOM, chars, Text0),
+    normalize_space(Text0, Text),
+    catch(xsd_number_chars(Number, Text), _, fail).
+xpath_function(@Name, element(_, Attrs, _), Value) :-          % @Name
+    (   atom(Name)
+    ->  memberchk(Name=Value, Attrs)
+    ;   compound(Name)
+    ->  Name =.. [AName|AOps],
+        memberchk(AName=Value0, Attrs),
+        translate_attribute(AOps, Value0, Value)
+    ;   member(Name=Value, Attrs)
+    ).
+xpath_function(quote(Value), _, Value).                         % quote(Value)
+
+xpath_function(self).
+xpath_function(content).
+xpath_function(text).
+xpath_function(text(_)).
+xpath_function(normalize_space).
+xpath_function(number).
+xpath_function(@_).
+xpath_function(quote(_)).
+
+translate_attribute([], Value, Value).
+translate_attribute([H|T], Value0, Value) :-
+    translate_attr(H, Value0, Value1),
+    translate_attribute(T, Value1, Value).
+
+translate_attr(number, Value0, Value) :-
+    xsd_number_chars(Value, Value0).
+translate_attr(integer, Value0, Value) :-
+    xsd_number_chars(Value1, Value0),
+    Value is round(Value1).
+translate_attr(float, Value0, Value) :-
+    xsd_number_chars(Value1, Value0),
+    Value is float(Value1).
+% The implementation of these translations is left for later.
+% translate_attr(lower, Value0, Value) :- ...
+% translate_attr(upper, Value0, Value) :- ...
+
+xpath_condition(Left = Right, Value) :-                        % =
+    !,
+    var_or_function(Left, Value, LeftValue),
+    process_equality(LeftValue, Right).
+xpath_condition(contains(Haystack, Needle), Value) :-          % contains(Haystack, Needle)
+    !,
+    val_or_function(Haystack, Value, HaystackValue),
+    val_or_function(Needle, Value, NeedleValue),
+    (   phrase((list(_),list(NeedleValue),list(_)), HaystackValue)
+    ->  true
+    ).
+xpath_condition(Spec, Dom) :-
+    in_dom(Spec, Dom, _).
+
+
+%!  process_equality(+Left, +Right) is semidet.
+%
+%   Provides (very) partial support for XSLT   functions that can be
+%   applied according to the XPath 2 specification.
+%
+%   For example the XPath expression  in   [1],  and  the equivalent
+%   Prolog expression in [2], would both   match the HTML element in
+%   [3].
+%
+%     ==
+%     [1] //table[align=lower-case(center)]
+%     [2] //table(@align=lower_case(center))
+%     [3] <table align="CENTER">
+%     ==
+
+process_equality(Left, Right) :-
+    var(Right),
+    !,
+    Left = Right.
+process_equality(Left, lower_case(Right)) :-
+    !,
+    downcase_atom(Left, Right).
+process_equality(Left, upper_case(Right)) :-
+    !,
+    upcase_atom(Left, Right).
+process_equality(Left, Right) :-
+    Left = Right.
+
+
+var_or_function(Arg, _, Arg) :-
+    var(Arg),
+    !.
+var_or_function(Func, Value0, Value) :-
+    xpath_function(Func),
+    !,
+    xpath_function(Func, Value0, Value).
+var_or_function(Value, _, Value).
+
+val_or_function(Arg, _, Arg) :-
+    var(Arg),
+    !,
+    instantiation_error(Arg).
+val_or_function(Func, Value0, Value) :-                         % TBD
+    xpath_function(Func, Value0, Value),
+    !.
+val_or_function(Value, _, Value).
+
+
+%!  text_of_dom(+DOM, +As, -Text:(chars | atom)) is det.
+%
+%   Text is the joined textual content of DOM.
+
+text_of_dom(DOM, As, Text) :-
+    phrase(text_of(DOM), Text0),
+    (   As == chars ->
+        Text = Text0
+    ;   As == atom ->
+        atom_chars(Text, Text0)
+    ;   domain_error(As, atom_or_chars, xpath)
+    ).
+
+text_of(element(_,_,Content)) -->
+    text_of_list(Content).
+text_of([]) -->
+    [].
+text_of([H|T]) -->
+    text_of(H),
+    text_of(T).
+
+
+text_of_list([]) -->
+    [].
+text_of_list([H|T]) -->
+    text_of_1(H),
+    text_of_list(T).
+
+
+text_of_1(element(_,_,Content)) -->
+        text_of_list(Content).
+text_of_1([C|Cs]) --> list([C|Cs]).
+
+list([]) --> [].
+list([L|Ls]) --> [L], list(Ls).
+
+% For now, we use number_chars/2 to parse XML numbers.
+% If the need arises, we can extend this to additional
+% float constants and syntax that may occur in XML files.
+
+xsd_number_chars(Number, Chars) :-
+        number_chars(Number, Chars).
+
+normalize_space(Text0, Text) :-
+        Text0 = Text.           % no conversion for the moment.