Qore CsvUtil Module Reference  1.2
 All Classes Namespaces Functions Variables Groups Pages
CsvUtil.qm.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // @file CsvUtil.qm Qore user module for working with CSV files
3 
4 /* CsvUtil.qm Copyright 2012 - 2014 Qore Technologies, sro
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // this module requires Qore 0.8.8 or better
26 
27 
28 /* Version History
29  * 2014-01-31 v1.2: David Nichols <david@qore.org>
30  + fixed CsvDataIterator::next() when header_lines > 0 and working with empty input data
31 
32  * 2013-10-21 v1.2: David Nichols <david@qore.org>
33  + implemented support for the "*int", "*float", "*number", and "*date" types
34  + implemented support for allowing subclasses of CsvFileIterator to implement support for other custom types
35  + fixed "date" type handling with empty input; now returns 1970-01-01 (use "*date" to map empty input to NOTHING)
36 
37  * 2013-10-09 v1.2: Petr Vanek <petr.vanek@qoretechnologies.com>
38  + CsvStringWriter addon
39  + AbstractCsvWriter addon
40 
41  * 2013-09-29 v1.2: David Nichols <david@qore.org>:
42  + if "headers" are not given in the CsvUtil::CsvFileIterator::constructor() but "fields" are, then set the headers from the field descriptions automatically
43 
44  * 2013-06-28 v1.2: Petr Vanek <petr.vanek@qoretechnologies.com>
45  + CsvFileWriter addon
46 
47  * 2013-05-15 v1.1: David Nichols <david@qore.org>:
48  + bug fixes to header and fields option processing
49  + fixed CsvUtil::CsvFileIterator::index() to return the line index
50  + added CsvUtil::CsvFileIterator::lineNumber() to return the current line number in the file
51 
52  * 2012-10-13 v1.0: David Nichols <david@qore.org>:
53  + initial version of module
54 */
55 
133 // private class used to iterate a list and skip elements without any value
134 class ListValueIterator : public ListIterator {
135 
136 public:
137  constructor(*list l);
138 
139 
140  bool next();
141 
142 };
143 
144 class CsvHelper {
145 
146 public:
147  private :
149  const Types = (
150  "int": True,
151  "*int": True,
152  "float": True,
153  "*float": True,
154  "number": True,
155  "*number": True,
156  "string": True,
157  "*string": True,
158  "date": True,
159  "*date": True,
160  );
161 
163  const FieldAttrs = ("type", "format", "timezone", "code");
164 
165 public:
166 
168  private setFields();
169 
170 
171  checkType(string key, string value);
172 
173 }; // class CsvHelper
174 
176 namespace CsvUtil {
178  const EOL_UNIX = "\n";
180  const EOL_WIN = "\r\n";
182  const EOL_MACINTOSH = "\r";
183 
184  // helper list of end of line values
185  const EOLS = (EOL_UNIX, EOL_WIN, EOL_MACINTOSH, );
186 
188 
340 class CsvAbstractIterator : public Qore::AbstractIterator, private CsvHelper {
341 
342 public:
343  private :
345  const Options = (
346  "encoding": True,
347  "separator": True,
348  "quote": True,
349  "eol": True,
350  "ignore-empty": True,
351  "ignore-whitespace": True,
352  "header-lines": True,
353  "header-names": True,
354  "headers": True,
355  "verify-columns": True,
356  "fields": True,
357  "timezone": True,
358  );
359 
360 public:
361 
362  private :
363  // field separator
364  string separator = ",";
365 
366  // field content delimiter
367  string quote = "\"";
368 
369  // number of header lines
370  softint headerLines = 0;
371 
372  // flag to use string names from the first header row if possible
373  bool headerNames = False;
374 
375  // True if empty lines should be ignored
376  bool ignoreEmptyLines = True;
377 
378  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
379  bool ignoreWhitespace = True;
380 
381  // headers / column names for lines iterated
382  *softlist headers;
383 
384  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
385  *hash fields;
386 
387  // list of field descriptions (from fields, ordered when headers are set)
388  *list fdesc;
389 
390  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
391  *TimeZone tz;
392 
393  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
394  bool checkElementCounts = False;
395 
396  // column count for verifying column counts
397  int cc;
398 
399  // current record count for the index() method
400  int rc = 0;
401 
402 public:
403 
405 
409  constructor(*hash opts);
410 
411 
413  private *string getDataName();
414 
415 
417  private abstract int lineNumberImpl();
418 
420  private abstract string getLineValueImpl();
421 
423  private abstract bool nextLineImpl();
424 
426 
431  bool next();
432 
433 
435 
442  any memberGate(string name);
443 
444 
446 
455  hash getValue();
456 
457 
459 
468  hash getRecord();
469 
470 
472 
482 
483 
485 
492  string getSeparator();
493 
494 
496 
503  string getQuote();
504 
505 
507 
513  *list getHeaders();
514 
515 
517 
528  int index();
529 
530 
532 
547  int lineNumber();
548 
549 
550  private any handleType(hash fh, *string val);
551 
552 
554  private list parseLine();
555 
556  };
557 
559 
564 class CsvFileIterator : public CsvUtil::CsvAbstractIterator,public Qore::FileLineIterator, private CsvHelper {
565 
566 public:
568 
573  constructor(string path, *hash opts);
574 
575 
577  private *string getDataName();
578 
579 
581  private int lineNumberImpl();
582 
583 
585  private string getLineValueImpl();
586 
587 
589  private bool nextLineImpl();
590 
591  }; // CsvFileIterator class
592 
594 
599 class CsvDataIterator : public CsvUtil::CsvAbstractIterator, private CsvHelper {
600 
601 public:
602  private :
604  string data;
606  *string eol;
608  *string line;
610  int pos = 0;
612  int lineno = 0;
614  bool valid = False;
615 
616 public:
617 
619 
624  constructor(string data, *hash opts);
625 
626 
628 
636  bool valid();
637 
638 
640  private int lineNumberImpl();
641 
642 
644  private string getLineValueImpl();
645 
646 
648  private bool nextLineImpl();
649 
650  };
651 
653 
710 class AbstractCsvWriter : private CsvHelper {
711 
712 public:
713  private :
715  const Options = (
716  "encoding": True,
717  "separator": True,
718  "quote": True,
719  "eol": True,
720  "verify-columns": True,
721  "fields": True,
722  "headers": True,
723  "date-format": True,
724  );
725 
726 public:
727 
728  private :
729  // TODO/FIXME
730  string encoding = get_default_encoding();
731 
732  // field separator
733  string separator = ",";
734 
735  // field content delimiter
736  string quote = "\"";
737 
738  // end of line sequence
739  string eol = EOL_UNIX;
740 
741  // default date->string format
742  string dateFormat = 'DD/MM/YYYY hh:mm:SS';
743 
744  // headers / column names for lines iterated
745  *softlist headers;
746 
747  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
748  *hash fields;
749 
750  // list of field descriptions (from fields, ordered when headers are set)
751  *list fdesc;
752 
753  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
754  bool checkElementCounts = False;
755 
756  // the latest line number
757  int lineNo = 0;
758 
759  // base template for value format
760  string baseTemplate;
761 
762  string errname;
763 
764 public:
765 
767 
773  constructor(string errname, *hash opts);
774 
775 
777 
782  writeLine(list values);
783 
784 
786 
791  writeLine(hash values);
792 
793 
795 
802  write(AbstractIterator iterator);
803 
804 
806  abstract private writeRawLine(list values);
807 
809 
813  private string prepareRawLine(list values);
814 
815 
817  private string dateFormat(int ix);
818 
819 
820  }; // AbstractCsvWriter class
821 
824 
825 public:
826 
827  private :
828  // a file to write
829  File file;
830 
831 public:
832 
834 
842  constructor(string path, *hash opts);
843 
844 
845  private writeRawLine(list values);
846 
847 
848  }; // CsvFileWriter
849 
852 
853 public:
854 
855  private :
856  // a csv content
857  string content;
858 
859 public:
860 
862 
867  constructor(*hash opts);
868 
869 
870  private writeRawLine(list values);
871 
872 
874  string getContent();
875 
876 
877  }; // CsvStringWriter
878 
879 }; // CsvUtil namespace
880 
private writeRawLine(list values)
real write implementation. Without any checking.
string getQuote()
returns the current quote string
string get_default_encoding()
list getRecordList()
returns the current record as a list
constructor(string data, *hash opts)
creates the CsvDataIterator with the input data and optionally an option hash
constructor(*hash opts)
creates the CsvStringWriter with content in the memory
private string prepareRawLine(list values)
Prepare a string (line with EOF) with formatting and escaping.
private list parseLine()
parses a line in the file and returns a processed list of the fields
the CsvAbstractIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: CsvUtil.qm.dox.h:340
*list getHeaders()
returns the current column headers or NOTHING if no headers have been detected or saved yet ...
write(AbstractIterator iterator)
stream iterator into the file.
private *string getDataName()
Returns the name of the input data.
*string eol
EOL marker.
Definition: CsvUtil.qm.dox.h:606
int lineNumber()
returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
hash getValue()
returns the current record as a hash
constructor(string errname, *hash opts)
creates the AbstractCsvWriter
hash getRecord()
returns the current record as a hash
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const True
private writeRawLine(list values)
real write implementation. Without any checking.
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:345
int lineno
current line number
Definition: CsvUtil.qm.dox.h:612
int pos
current byte pos
Definition: CsvUtil.qm.dox.h:610
private int lineNumberImpl()
Returns the current line number.
constructor(string path, *hash opts)
creates the CsvFileIterator with the path of the file to read and optionally an option hash ...
const False
private *string getDataName()
Returns the name of the input data.
list list(...)
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:715
abstract private string getLineValueImpl()
Returns the current line.
the CsvFileIterator class allows CSV files to be iterated on a record basis
Definition: CsvUtil.qm.dox.h:564
string getContent()
Get the current in-memory content as a string.
constructor(*hash opts)
creates the CsvAbstractIterator with an option hash
the CsvStringWriter class for in-memory string CSV creation
Definition: CsvUtil.qm.dox.h:851
the AbstractCsvWriter class provides a parent for all CSV writers
Definition: CsvUtil.qm.dox.h:710
const EOL_MACINTOSH
Old (pre-OSX) Macintosh end of line character sequence.
Definition: CsvUtil.qm.dox.h:182
any memberGate(string name)
returns the given column value for the current row
private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const EOL_UNIX
Unix end of line character sequence (for new OS X too)
Definition: CsvUtil.qm.dox.h:178
*string line
current line
Definition: CsvUtil.qm.dox.h:608
private int lineNumberImpl()
Returns the current line number; returns 0 if not pointing at any data.
writeLine(list values)
write a line with list of values. Data are checked against column rules.
int index()
returns the row index being iterated, which does not necessarily correspond to the line number when t...
string getSeparator()
returns the current separator string
private string getLineValueImpl()
Returns the current line trimmed of the EOL character(s)
const EOL_WIN
MS DOS/Windows end of line character sequence.
Definition: CsvUtil.qm.dox.h:180
the CsvDataIterator class allows arbitrary CSV string data to be iterated on a record basis ...
Definition: CsvUtil.qm.dox.h:599
bool valid()
returns True if the iterator is currently pointing at a valid element, False if not ...
constructor(string path, *hash opts)
creates the CsvFileWriter with the path of the file to read with an options
abstract private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
abstract private int lineNumberImpl()
Returns the current line number.
string data
input data
Definition: CsvUtil.qm.dox.h:604
the CsvFileWriter class for easy and safe CSV file creation
Definition: CsvUtil.qm.dox.h:823
hash hash(object obj)
private string getLineValueImpl()
Returns the current line trimmed of the EOL character(s)
abstract private writeRawLine(list values)
real write implementation. Without any checking.