Qore CsvUtil Module Reference  1.6.1
CsvUtil.qm.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // @file CsvUtil.qm Qore user module for working with CSV files
3 
4 /* CsvUtil.qm Copyright 2012 - 2018 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // minimum required Qore version
26 
27 
28 // assume local var scope, do not use "$" for vars, members, and method calls
29 
30 
31 /* see release notes below for version history
32 */
33 
301 class CsvHelper {
302 
303 public:
304  private :
305  const C_OPT1 = 0x1;
306  const C_OPT2 = 0x2;
308  const Types = (
309  "int": True,
310  "*int": True,
311  "float": True,
312  "*float": True,
313  "number": True,
314  "*number": True,
315  "string": True,
316  "*string": True,
317  "date": True,
318  "*date": True,
319  );
320 
322  const FieldAttrs = ("type", "format", "timezone", "code", "header");
323 
325  bool tolwr = False;
326 
328  string date_format;
329 
331  hash m_specs;
332 
334  string errname;
335 
336  // reorder data according headers set by options.headers or read from CsvHeader
337  bool headerReorder = True;
338 
339 public:
340 
342  constructor (string n_errname);
343 
344 
346 
347 private:
348  bool isMultiType();
349 public:
350 
351 
353 
354 private:
355  checkType(string fld_errs, string key, string value);
356 public:
357 
358 
359  // get spec from options.fields for old Csv. Check spec param for new Csv
360 
361 private:
362  hash getSpec(*hash fields, string fld_errs, int C_OPTx);
363 public:
364 
365 
366 
367 private:
368  hash getSpec1(*hash fields);
369 public:
370 
371 
372 
373 private:
374  hash getSpec2(hash spec);
375 public:
376 
377 
383 private:
384  list adjustFieldsFromHeaders(string type, *list headers, bool check = False);
385 public:
386 
387 
388 }; // class CsvHelper
389 
391 namespace CsvUtil {
393  const EOL_UNIX = "\n";
395  const EOL_WIN = "\r\n";
397  const EOL_MACINTOSH = "\r";
398 
399  // helper list of end of line values
400  const EOLS = (EOL_UNIX, EOL_WIN, EOL_MACINTOSH, );
401 
403  const CSV_TYPE_UNKNOWN = "<unknown>";
405  const CSV_TYPE_SINGLE = "<single>";
406 
409 
412 
413 
415 
653 class AbstractCsvIterator : public Qore::AbstractIterator, private CsvHelper {
654 
655 public:
656  private :
658  const Options = (
659  "compat_force_empty_string": C_OPT1|C_OPT2,
660  "date_format": C_OPT1|C_OPT2,
661  "date-format": C_OPT1|C_OPT2,
662  "encoding": C_OPT1|C_OPT2,
663  "eol": C_OPT1|C_OPT2,
664  "extended_record": C_OPT2,
665  "fields": C_OPT1,
666  "header-lines": C_OPT1|C_OPT2,
667  "header_lines": C_OPT1|C_OPT2,
668  "header-names": C_OPT1|C_OPT2,
669  "header_names": C_OPT1|C_OPT2,
670  "header_reorder": C_OPT1|C_OPT2,
671  "headers": C_OPT1,
672  "ignore-empty": C_OPT1|C_OPT2,
673  "ignore_empty": C_OPT1|C_OPT2,
674  "ignore-whitespace": C_OPT1|C_OPT2,
675  "ignore_whitespace": C_OPT1|C_OPT2,
676  "quote": C_OPT1|C_OPT2,
677  "separator": C_OPT1|C_OPT2,
678  "timezone": C_OPT1|C_OPT2,
679  "tolwr": C_OPT1|C_OPT2,
680  "verify-columns": C_OPT1|C_OPT2,
681  "verify_columns": C_OPT1|C_OPT2,
682  );
683 
684  // field separator
685  string separator = ",";
686 
687  // field content delimiter
688  string quote = "\"";
689 
690  // number of header lines
691  softint headerLines = 0;
692 
693  // flag to use string names from the first header row if possible
694  bool headerNames = False;
695 
696  // True if empty lines should be ignored
697  bool ignoreEmptyLines = True;
698 
699  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
700  bool ignoreWhitespace = True;
701 
702  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
703  *TimeZone timezone;
704 
705  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
706  bool checkElementCounts = False;
707 
708  // getRecord/getValue returns extended hash
709  bool extendedRecord = False;
710 
711  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
712  bool compat_force_empty_string = False;
713 
714  // column count for verifying column counts
715  int cc;
716 
717  // current record count for the index() method
718  int rc = 0;
719 
720  // to resolve record type by rules
721  hash m_resolve_by_rule;
722 
723  // to resolve record type by number of fields
724  hash m_resolve_by_count;
725 
726  // list of idx to field transformarions, in order of spec
727  hash m_resolve_by_idx;
728 
729  // fake specs based on the first non-header row
730  bool fakeHeaderNames;
731 
732  // data source iterator
733  AbstractLineIterator lineIterator;
734 
735 public:
736 
738 
744  constructor(AbstractLineIterator li, *hash opts);
745 
746 
748 
753  // NOTE: when declared as *hash then always calls this constructor
754  constructor(AbstractLineIterator li, hash spec, hash opts);
755 
756 
758 
759 private:
760  processCommonOptions(*hash opts, int C_OPTx);
761 public:
762 
763 
765 
766 private:
767  processSpec(hash spec);
768 public:
769 
770 
772 
773 private:
774  prepareFieldsFromHeaders(*list headers);
775 public:
776 
777 
778  bool valid();
779 
780 
782 
787  bool next();
788 
789 
791 
798  auto memberGate(string name);
799 
800 
802 
813  hash getValue();
814 
815 
817 
830  hash getRecord(bool extended);
831 
832 
834 
845  hash getRecord();
846 
847 
849 
861  auto getRecordList();
862 
863 
865 
872  string getSeparator();
873 
874 
876 
883  string getQuote();
884 
885 
887 
894  *list getHeaders();
895 
896 
898 
903  *list getHeaders(string type);
904 
905 
907 
918  int index();
919 
920 
922 
935  int lineNumber();
936 
937 
938 
939 private:
940  auto handleType(hash fh, *string val);
941 public:
942 
943 
945 
946 private:
947  list getLineAndSplit();
948 public:
949 
950 
952 
959  string identifyType(list rec);
960 
961 
963 
971 private:
972  *string identifyTypeImpl(list rec);
973 public:
974 
975 
977 
978 private:
979  hash parseLine();
980 public:
981 
982  };
983 
985 
991 
992 public:
994 
999  constructor(Qore::AbstractLineIterator li, *hash opts) ;
1000 
1001 
1003 
1009  constructor(Qore::AbstractLineIterator li, hash spec, hash opts) ;
1010 
1011 
1013 
1019  constructor(Qore::InputStream input, string encoding = "UTF-8", *hash opts) ;
1020 
1021 
1023 
1030  constructor(Qore::InputStream input, string encoding = "UTF-8", hash spec, hash opts) ;
1031 
1032 
1033  auto memberGate(string name);
1034 
1035  };
1036 
1038 
1046 
1047 public:
1048  private :
1050  string m_file_path;
1051 
1052 public:
1053 
1055 
1060  constructor(string path, *hash opts) ;
1061 
1062 
1064 
1068  constructor(string path, hash spec, hash opts) ;
1069 
1070 
1072  auto memberGate(string name);
1073 
1074 
1076  string getEncoding();
1077 
1078 
1080  string getFileName();
1081 
1082 
1084  hash<Qore::StatInfo> hstat();
1085 
1086 
1088  list stat();
1089 
1090  }; // CsvFileIterator class
1091 
1093 
1101 
1102 public:
1103 
1105 
1110  constructor(string data, *hash opts) ;
1111 
1112 
1114 
1118  constructor(string data, hash spec, hash opts) ;
1119 
1120 
1121  auto memberGate(string name);
1122 
1123 
1124  };
1125 
1127 
1236 class AbstractCsvWriter : private CsvHelper {
1237 
1238 public:
1239  private :
1241  const Options = (
1242  "block": C_OPT1|C_OPT2,
1243  "datamap": C_OPT1,
1244  "date_format": C_OPT1|C_OPT2,
1245  "date-format": C_OPT1|C_OPT2,
1246  "encoding": C_OPT1|C_OPT2,
1247  "eol": C_OPT1|C_OPT2,
1248  "fields": C_OPT1,
1249  "headers": C_OPT1,
1250  "header_reorder": C_OPT1,
1251  "info_log": C_OPT1|C_OPT2,
1252  "optimal_quotes": C_OPT1|C_OPT2,
1253  "optimal-quotes": C_OPT1|C_OPT2,
1254  "quote": C_OPT1|C_OPT2,
1255  "quote_escape": C_OPT1|C_OPT2,
1256  "separator": C_OPT1|C_OPT2,
1257  "verify_columns": C_OPT1|C_OPT2,
1258  "verify-columns": C_OPT1|C_OPT2,
1259  "write_headers": C_OPT1|C_OPT2,
1260  "write-headers": C_OPT1|C_OPT2,
1261  );
1262 
1264  string encoding;
1265 
1267  string separator = ",";
1268 
1270  string quote = "\"";
1271 
1273  string m_quoteEscapeChar = "\\";
1274 
1276  string eol = EOL_UNIX;
1277 
1279  bool checkElementCounts = False;
1280 
1282  int lineNo = 0;
1283 
1285  int block = 1000;
1286 
1289 
1291  bool write_headers = True;
1292 
1294  bool optimal_quotes = True;
1295 
1297  *code info_log;
1298 
1301 
1304 
1305 public:
1306 
1308 
1314  constructor(string n_errname, *hash n_opts);
1315 
1316 
1318 
1326  constructor(string n_errname, hash spec, hash n_opts);
1327 
1328 
1330 
1331 private:
1332  processCommonOptions(*hash n_opts, int C_OPTx);
1333 public:
1334 
1335 
1337 
1338 private:
1339  processSpec();
1340 public:
1341 
1342 
1344 
1345 private:
1346  writeHeaders();
1347 public:
1348 
1349 
1351 
1356  writeLine(list values);
1357 
1358 
1360 
1365  writeLine(hash values);
1366 
1367 
1369 
1375  writeLine(string type, list values);
1376 
1377 
1379 
1385  writeLine(string type, hash values);
1386 
1387 
1389 
1396  write(Qore::AbstractIterator iterator);
1397 
1398 
1400 
1407  write(Qore::SQL::SQLStatement iterator);
1408 
1409 
1411 
1418  write(list l);
1419 
1420 
1422 
1423 private:
1424  abstract writeRawLine(list values);
1425 public:
1426 
1428 
1434 private:
1435  string prepareRawLine(list values);
1436 public:
1437 
1438 
1439 
1440 private:
1441  string prepareRawLineIntern(list values);
1442 public:
1443 
1444 
1445  }; // AbstractCsvWriter class
1446 
1449 
1450 public:
1451  private :
1453  StreamWriter output;
1454 
1455 public:
1456 
1458 
1464  constructor(Qore::OutputStream output, *hash opts) ;
1465 
1466 
1468 
1475  constructor(Qore::OutputStream output, hash spec, hash opts) ;
1476 
1477 
1479 
1480 private:
1481  writeRawLine(list values);
1482 public:
1483 
1484  };
1485 
1487 
1492 
1493 public:
1494  private :
1497 
1498 public:
1499 
1501 
1509  constructor(string path, *hash opts) ;
1510 
1511 
1513 
1522  constructor(string path, hash spec, hash opts) ;
1523 
1524 
1525 
1526 private:
1527  openFile(string path);
1528 public:
1529 
1530 
1531 
1532 private:
1533  writeRawLine(list values);
1534 public:
1535 
1536  }; // CsvFileWriter
1537 
1539 
1544 
1545 public:
1546  private :
1547  // a csv content
1548  string content;
1549 
1550 public:
1551 
1553 
1558  constructor(*hash opts) ;
1559 
1560 
1562 
1568  constructor(hash spec, hash opts) ;
1569 
1570 
1571 
1572 private:
1573  initContent();
1574 public:
1575 
1576 
1577 
1578 private:
1579  writeRawLine(list values);
1580 public:
1581 
1582 
1584 
1593  string write(Qore::AbstractIterator iterator);
1594 
1595 
1597 
1606  string write(list l);
1607 
1608 
1610  string getContent();
1611 
1612  }; // CsvStringWriter
1613 }; // CsvUtil namespace
hash m_out_by_name
mapping output field by name
Definition: CsvUtil.qm.dox.h:1300
*list stat(string path)
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: CsvUtil.qm.dox.h:653
Qore::File file
the file to write
Definition: CsvUtil.qm.dox.h:1496
const True
hash m_out_by_idx
mapping output field by index
Definition: CsvUtil.qm.dox.h:1303
const False
*code info_log
a closure/call reference for informational logging when using write(SQLStatement) ...
Definition: CsvUtil.qm.dox.h:1297
list list(...)
*hash< StatInfo > hstat(string path)
The CsvFileIterator class allows CSV files to be iterated on a record basis.
Definition: CsvUtil.qm.dox.h:1045
int index(softstring str, softstring substr, softint pos=0)
The CsvStringWriter class for in-memory string CSV creation.
Definition: CsvUtil.qm.dox.h:1543
csvutil_set_global_compat_force_empty_string(softbool val)
sets the global_compat_force_empty_string variable to force "*string" fields with no value to ret...
The AbstractCsvWriter class provides a parent for all CSV writers.
Definition: CsvUtil.qm.dox.h:1236
string baseTemplate
base template for value format
Definition: CsvUtil.qm.dox.h:1288
const EOL_MACINTOSH
Old (pre-OSX) Macintosh end of line character sequence.
Definition: CsvUtil.qm.dox.h:397
const CSV_TYPE_UNKNOWN
Record type when non matching any type.
Definition: CsvUtil.qm.dox.h:403
const EOL_UNIX
Unix end of line character sequence (for new OS X too)
Definition: CsvUtil.qm.dox.h:393
StreamWriter output
the output stream for the CSV data
Definition: CsvUtil.qm.dox.h:1453
The CsvWriter class for safe CSV data creation.
Definition: CsvUtil.qm.dox.h:1448
The CsvIterator class allows CSV sources to be iterated on a record basis. The source of the input da...
Definition: CsvUtil.qm.dox.h:990
string type(auto arg)
const EOL_WIN
MS DOS/Windows end of line character sequence.
Definition: CsvUtil.qm.dox.h:395
The CsvDataIterator class allows arbitrary CSV string data to be iterated on a record basis...
Definition: CsvUtil.qm.dox.h:1100
string m_file_path
the path of the file being iterated
Definition: CsvUtil.qm.dox.h:1050
string encoding
output file character encoding
Definition: CsvUtil.qm.dox.h:1264
the CsvUtil namespace contains all the objects in the CsvUtil module
Definition: CsvUtil.qm.dox.h:391
bool global_compat_force_empty_string
global option to force "*string" fields with no value to return an empty string when parsing rath...
The CsvFileWriter class for safe CSV file creation.
Definition: CsvUtil.qm.dox.h:1491
hash hash(object obj)
const CSV_TYPE_SINGLE
Record type when multi-type is disabled.
Definition: CsvUtil.qm.dox.h:405