Qore CsvUtil Module Reference  1.6.2
AbstractCsvIterator.qc.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // Qore AbstractCsvIterator class definition
3 
4 /* AbstractCsvIterator.qc Copyright 2012 - 2018 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // assume local var scope, do not use "$" for vars, members, and method calls
26 
28 namespace CsvUtil {
30 
273 class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
274 
275 public:
276 protected:
278  const Options = (
279  "compat_force_empty_string": C_OPT1|C_OPT2,
280  "date_format": C_OPT1|C_OPT2,
281  "date-format": C_OPT1|C_OPT2,
282  "encoding": C_OPT1|C_OPT2,
283  "eol": C_OPT1|C_OPT2,
284  "extended_record": C_OPT2,
285  "fields": C_OPT1,
286  "header-lines": C_OPT1|C_OPT2,
287  "header_lines": C_OPT1|C_OPT2,
288  "header-names": C_OPT1|C_OPT2,
289  "header_names": C_OPT1|C_OPT2,
290  "header_reorder": C_OPT1|C_OPT2,
291  "headers": C_OPT1,
292  "ignore-empty": C_OPT1|C_OPT2,
293  "ignore_empty": C_OPT1|C_OPT2,
294  "ignore-whitespace": C_OPT1|C_OPT2,
295  "ignore_whitespace": C_OPT1|C_OPT2,
296  "number_format": C_OPT1|C_OPT2,
297  "quote": C_OPT1|C_OPT2,
298  "separator": C_OPT1|C_OPT2,
299  "timezone": C_OPT1|C_OPT2,
300  "tolwr": C_OPT1|C_OPT2,
301  "verify-columns": C_OPT1|C_OPT2,
302  "verify_columns": C_OPT1|C_OPT2,
303  );
304 
305  // field separator
306  string separator = ",";
307 
308  // field content delimiter
309  string quote = "\"";
310 
311  // number of header lines
312  softint headerLines = 0;
313 
314  // flag to use string names from the first header row if possible
315  bool headerNames = False;
316 
317  // True if empty lines should be ignored
318  bool ignoreEmptyLines = True;
319 
320  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
321  bool ignoreWhitespace = True;
322 
323  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
324  *TimeZone timezone;
325 
326  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
327  bool checkElementCounts = False;
328 
329  // getRecord/getValue returns extended hash
330  bool extendedRecord = False;
331 
332  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
333  bool compat_force_empty_string = False;
334 
335  // column count for verifying column counts
336  int cc;
337 
338  // current record count for the index() method
339  int rc = 0;
340 
341  // to resolve record type by rules
342  hash m_resolve_by_rule;
343 
344  // to resolve record type by number of fields
345  hash m_resolve_by_count;
346 
347  // list of idx to field transformarions, in order of spec
348  hash m_resolve_by_idx;
349 
350  // fake specs based on the first non-header row
351  bool fakeHeaderNames;
352 
353  // data source iterator
354  AbstractLineIterator lineIterator;
355 
356 public:
357 
359 
365  constructor(AbstractLineIterator li, *hash opts);
366 
367 
369 
374  // NOTE: when declared as *hash then always calls this constructor
375  constructor(AbstractLineIterator li, hash spec, hash opts);
376 
377 
379 protected:
380  processCommonOptions(*hash opts, int C_OPTx);
381 public:
382 
383 
385 protected:
386  processSpec(hash spec);
387 public:
388 
389 
391 protected:
392  prepareFieldsFromHeaders(*list headers);
393 public:
394 
395 
396  bool valid();
397 
398 
400 
405  bool next();
406 
407 
409 
416  auto memberGate(string name);
417 
418 
420 
431  hash getValue();
432 
433 
435 
448  hash getRecord(bool extended);
449 
450 
452 
463  hash getRecord();
464 
465 
467 
479  auto getRecordList();
480 
481 
483 
490  string getSeparator();
491 
492 
494 
501  string getQuote();
502 
503 
505 
512  *list getHeaders();
513 
514 
516 
521  *list getHeaders(string type);
522 
523 
525 
536  int index();
537 
538 
540 
553  int lineNumber();
554 
555 
557 
566  string getRawLine();
567 
568 
570 
581 
582 
583 protected:
584  auto handleType(hash fh, *string val);
585 public:
586 
587 
589 protected:
591 public:
592 
593 
595 
602  string identifyType(list rec);
603 
604 
606 
613 protected:
614  *string identifyTypeImpl(list rec);
615 public:
616 
617 
619 protected:
620  hash parseLine();
621 public:
622 
623  }; // AbstractCsvIterator class
624 }; // CsvUtil namespace
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
hash parseLine()
Parses a line in the file and returns a processed list of the fields.
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: AbstractCsvIterator.qc.dox.h:273
list getLineAndSplit()
Read line split by separator/quote into list.
const True
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: AbstractCsvIterator.qc.dox.h:278
string getQuote()
Returns the current quote string.
const False
list list(...)
string getSeparator()
Returns the current separator string.
*string identifyTypeImpl(list rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
string getRawLine()
Returns the current line 'as it is', i.e. the original string.
*list getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet...
string type(auto arg)
hash getValue()
Returns the current record as a hash.
list getRawLineValues()
Returns the list of raw string values of the current line.
string identifyType(list rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
auto memberGate(string name)
Returns the given column value for the current row.
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace ...
Definition: AbstractCsvIterator.qc.dox.h:28
processCommonOptions(*hash opts, int C_OPTx)
process common options and and assing internal fields
hash hash(object obj)
auto getRecordList()
Returns the current record as a list.
prepareFieldsFromHeaders(*list headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
processSpec(hash spec)
process specification and assing internal data for resolving
constructor(AbstractLineIterator li, *hash opts)
creates the AbstractCsvIterator with an option hash in single-type mode
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
hash getRecord()
Returns the current record as a hash.
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...