From e2c61485a1b8d4c09d4993a61a8bb014d2543c99 Mon Sep 17 00:00:00 2001 From: Cyrille Duret Date: Thu, 25 Jun 2020 21:13:29 +0200 Subject: [PATCH] add csv library --- README.md | 3 ++ src/lib/csv.pl | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/lib/csv.pl diff --git a/README.md b/README.md index d8c4acd8..73c0747e 100644 --- a/README.md +++ b/README.md @@ -428,6 +428,9 @@ The modules that ship with Scryer Prolog are also called `load_html/3` and `load_xml/3` represent HTML and XML documents as Prolog terms for convenient and efficient reasoning. Use `library(xpath)` to extract information from parsed documents. +* [`csv`](src/lib/csv.pl) + `parse_csv//1` and `parse_csv//2` can be used with [`phrase_from_file/2`](src/lib/pio.pl) + or [`phrase/2`](src/lib/dcgs.pl) to parse csv * [`xpath`](src/lib/xpath.pl) The predicate `xpath/3` is used for convenient reasoning about HTML and XML documents, inspired by the XPath language. This diff --git a/src/lib/csv.pl b/src/lib/csv.pl new file mode 100644 index 00000000..16216166 --- /dev/null +++ b/src/lib/csv.pl @@ -0,0 +1,140 @@ +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Predicates for parsing CSV data + + Only two options are provided with default values : + - token_separator(',') + - skip_header(false) + + Examples + + * parsing a csv string: + + ?- use_module(library(csv)). + ?- use_module(library(dcgs)). + ?- phrase(parse_csv(Data), "col1,col2,col3,col4\none,2,,three"). + Data = frame(["col1","col2","col3","col4"],[["one",2,[],"three"]]). + + * with some options: + + ?- phrase(parse_csv(Data, [skip_header(true),token_separator(';')]), "col1;col2;col3,col4\none;2;;three"). + Data = frame([],[["one",2,[],"three"]]). + + * parsing a csv file: + + ?- use_module(library(csv)). + ?- use_module(library(pio)). + ?- phrase_from_file(parse_csv(frame(Header, Rows)), './test.csv'). +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +:- module(csv, [ + parse_csv//1, + parse_csv//2 +]). + +:- use_module(library(dcgs)). +:- use_module(library(lists)). + + +option(W, O) :- + ( member(W, O) -> true + ; throw(not_found_error(W, O))). + + +option_extends([], Opt, Opt). +option_extends([X | Y], Opt0, Opt) :- + functor(X, Name, 1), + F0 =.. [Name, _], + ( select(F0, Opt0, R) -> + option_extends(Y, [X | R], Opt) + ; option_extends(Y, [X | Opt0], Opt) ). + + + +tokens([], Opt), [Tk_Sep] --> + { option(token_separator(Tk_Sep), Opt) }, + [Tk_Sep], + !. +tokens([], _), "\r\n" --> + "\r\n", + !. +tokens([], _), "\n" --> + "\n", + !. +tokens([], _), "\r" --> + "\r", + !. +tokens([X | Y], Opt) --> + [X], + !, + tokens(Y, Opt). +tokens([], _) --> []. + + +field(R, Opt) --> + "\"", + !, + string_tokens(R, Opt). +field(R, Opt) --> + tokens(R0, Opt), + { R0 \== [], + catch(number_chars(R, R0), _, R = R0) + }. +field([], _) --> []. + + +string_tokens(R, Opt) --> + [X], + ( { X == '"' } -> + ( "\"" -> + { R = [X | Y] }, + string_tokens(Y, Opt) + ; { R = [] }) + ; { R = [X | Y] }, + string_tokens(Y, Opt)). + + +end_token --> "\r\n". +end_token --> "\n". +end_token --> "\r". +end_token --> []. + + +separator(Opt) --> + { option(token_separator(Tk_Sep), Opt) }, + [Tk_Sep]. + + +row([X | Y], Opt) --> + field(X, Opt), + !, + ( separator(Opt) -> + row(Y, Opt) + ; end_token -> + { Y = [] }). + + +rows(R, Opt) --> + row(X, Opt), + !, + ( { X \== [[]] } -> + rows(Y, Opt), + { R = [X | Y] } + ; { R = [] }). + + +parse_csv(frame(Header, Rows), Opt) --> + { option_extends(Opt, [ + skip_header(false), + token_separator(',') + ], Opt0) + }, + ( { member(skip_header(false), Opt0) } -> + row(Header, Opt0), + { Header \== [[]] }, + end_token + ; row(_, Opt0), + end_token, + { Header = [] }), + rows(Rows, Opt0). +parse_csv(R) --> + parse_csv(R, []). -- 2.54.0