Qore CsvUtil Module Reference  1.7.1
AbstractCsvIterator.qc.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // Qore AbstractCsvIterator class definition
3 
4 /* AbstractCsvIterator.qc Copyright 2012 - 2020 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // assume local var scope, do not use "$" for vars, members, and method calls
26 
28 namespace CsvUtil {
30 
273 class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
274 
275 public:
276 protected:
278  const Options = ...;
279 
280 
281  // field separator
282  string separator = ",";
283 
284  // field content delimiter
285  string quote = "\"";
286 
287  // number of header lines
288  softint headerLines = 0;
289 
290  // flag to use string names from the first header row if possible
291  bool headerNames = False;
292 
293  // True if empty lines should be ignored
294  bool ignoreEmptyLines = True;
295 
296  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
297  bool ignoreWhitespace = True;
298 
299  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
300  *TimeZone timezone;
301 
302  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
303  bool checkElementCounts = False;
304 
305  // getRecord/getValue returns extended hash
306  bool extendedRecord = False;
307 
308  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
309  bool compat_force_empty_string = False;
310 
311  // read ahead flag
312  bool read_ahead;
313 
314  // column count for verifying column counts
315  int cc;
316 
317  // current record count for the index() method
318  int rc = 0;
319 
320  // to resolve record type by rules
321  hash<string, hash<string, list<hash<auto>>>> m_resolve_by_rule;
322 
323  // to resolve record type by number of fields
324  hash<string, list<string>> m_resolve_by_count;
325 
326  // list of idx to field transformarions, in order of spec
327  hash<string, list<string>> m_resolve_by_idx;
328 
329  // fake specs based on the first non-header row
330  bool fakeHeaderNames;
331 
333  *string eol;
334 
335  // data source iterator
336  AbstractLineIterator lineIterator;
337 
338 public:
339 
341 
348  constructor(AbstractLineIterator li, *hash<auto> opts);
349 
350 
352 
357  // NOTE: when declared as *hash then always calls this constructor
358  constructor(AbstractLineIterator li, hash<auto> spec, hash<auto> opts);
359 
360 
362 protected:
363  processCommonOptions(*hash<auto> opts, int C_OPTx);
364 public:
365 
366 
368 protected:
369  processSpec(hash<auto> spec);
370 public:
371 
372 
374 protected:
375  prepareFieldsFromHeaders(*list<auto> headers);
376 public:
377 
378 
379  bool valid();
380 
381 
383 
388  bool next();
389 
390 
392 
394  peek();
395 
396 
398 
405  auto memberGate(string name);
406 
407 
409 
420  hash<auto> getValue();
421 
422 
424 
437  hash<auto> getRecord(bool extended);
438 
439 
441 
452  hash<auto> getRecord();
453 
454 
456 
469 
470 
472 
479  string getSeparator();
480 
481 
483 
490  string getQuote();
491 
492 
494  *hash<string, AbstractDataField> getRecordType();
495 
496 
498 
505  *list<string> getHeaders();
506 
507 
509 
514  *list<string> getHeaders(string type);
515 
516 
518 
529  int index();
530 
531 
533 
546  int lineNumber();
547 
548 
550 
559  string getRawLine();
560 
561 
563 
573  list<*string> getRawLineValues();
574 
575 
576 protected:
577  auto handleType(hash<auto> fh, *string val);
578 public:
579 
580 
582 protected:
583  list<*string> getLineAndSplit();
584 public:
585 
586 
588 
595  string identifyType(list<auto> rec);
596 
597 
599 
606 protected:
607  *string identifyTypeImpl(list<auto> rec);
608 public:
609 
610 
612 protected:
613  hash<auto> parseLine();
614 public:
615 
616  }; // AbstractCsvIterator class
617 }; // CsvUtil namespace
CsvUtil::AbstractCsvIterator::processSpec
processSpec(hash< auto > spec)
process specification and assing internal data for resolving
CsvUtil::AbstractCsvIterator::getRecordList
auto getRecordList()
Returns the current record as a list.
CsvUtil::AbstractCsvIterator::lineNumber
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
CsvUtil::AbstractCsvIterator::next
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
CsvUtil::AbstractCsvIterator::Options
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: AbstractCsvIterator.qc.dox.h:278
type
string type(auto arg)
CsvUtil::AbstractCsvIterator::getQuote
string getQuote()
Returns the current quote string.
CsvUtil::AbstractCsvIterator
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated
Definition: AbstractCsvIterator.qc.dox.h:273
CsvUtil::AbstractCsvIterator::index
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
CsvUtil::AbstractCsvIterator::getHeaders
*list< string > getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet.
CsvUtil::AbstractCsvIterator::getRawLineValues
list< *string > getRawLineValues()
Returns the list of raw string values of the current line.
CsvUtil::AbstractCsvIterator::getRecord
hash< auto > getRecord()
Returns the current record as a hash.
CsvUtil::AbstractCsvIterator::prepareFieldsFromHeaders
prepareFieldsFromHeaders(*list< auto > headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
True
const True
CsvUtil
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace
Definition: AbstractCsvIterator.qc.dox.h:28
CsvUtil::AbstractCsvIterator::processCommonOptions
processCommonOptions(*hash< auto > opts, int C_OPTx)
process common options and and assing internal fields
CsvUtil::AbstractCsvIterator::identifyTypeImpl
*string identifyTypeImpl(list< auto > rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
CsvUtil::AbstractCsvIterator::constructor
constructor(AbstractLineIterator li, *hash< auto > opts)
creates the AbstractCsvIterator with an option hash in single-type mode
CsvUtil::AbstractCsvIterator::getValue
hash< auto > getValue()
Returns the current record as a hash.
CsvUtil::AbstractCsvIterator::getHeaders
*list< string > getHeaders(string type)
Returns a list of headers for the given record or NOTHING if the record is not recognized.
CsvUtil::AbstractCsvIterator::getSeparator
string getSeparator()
Returns the current separator string.
CsvUtil::AbstractCsvIterator::identifyType
string identifyType(list< auto > rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
CsvUtil::AbstractCsvIterator::eol
*string eol
the eol marker, if any
Definition: AbstractCsvIterator.qc.dox.h:333
CsvUtil::AbstractCsvIterator::getRecord
hash< auto > getRecord(bool extended)
Returns the current record as a hash.
CsvUtil::AbstractCsvIterator::getRawLine
string getRawLine()
Returns the current line 'as it is', i.e. the original string.
CsvUtil::AbstractCsvIterator::getRecordType
*hash< string, AbstractDataField > getRecordType()
Returns the description of the record type, if any.
False
const False
CsvUtil::AbstractCsvIterator::peek
peek()
Reads a single row without moving the index position.
CsvUtil::AbstractCsvIterator::constructor
constructor(AbstractLineIterator li, hash< auto > spec, hash< auto > opts)
creates the AbstractCsvIterator with an option hash in multi-type mode
CsvUtil::AbstractCsvIterator::parseLine
hash< auto > parseLine()
Parses a line in the file and returns a processed list of the fields.
Qore::AbstractIterator
CsvUtil::AbstractCsvIterator::memberGate
auto memberGate(string name)
Returns the given column value for the current row.
CsvUtil::AbstractCsvIterator::getLineAndSplit
list< *string > getLineAndSplit()
Read line split by separator/quote into list.