#!/usr/bin/env python
"""Support for ISTP-compliant CDFs
The `ISTP metadata standard <https://spdf.gsfc.nasa.gov/sp_use_of_cdf.html>`_
specifies the interpretation of the attributes in a CDF to describe
relationships between the variables and their physical interpretation.
This module supports that subset of CDFs.
Authors: Jon Niehof
Additional Contributors: Lorna Ellis, Asher Merrill
Institution: University of New Hampshire
Contact: Jonathan.Niehof@unh.edu
.. rubric:: Classes
.. autosummary::
:toctree: autosummary
:template: clean_class.rst
FileChecks
VariableChecks
.. rubric:: Functions
.. autosummary::
:toctree: autosummary
fillval
format
"""
import datetime
import math
import os.path
import re
import numpy
import spacepy.datamodel
import spacepy.pycdf
import spacepy.pycdf.const
[docs]class VariableChecks(object):
"""ISTP compliance checks for a single variable.
Checks a variable's compliance with ISTP standards. This mostly
performs checks that are not currently performed by the `ISTP
skeleton editor <https://spdf.gsfc.nasa.gov/skteditor/>`_. All
tests return a list, one error string for every noncompliance
found (empty list if compliant). :meth:`all` will perform all
tests and concatenate all errors.
.. autosummary::
all
depends
depsize
fieldnam
recordcount
validdisplaytype
validrange
validscale
.. automethod:: all
.. automethod:: depends
.. automethod:: depsize
.. automethod:: fieldnam
.. automethod:: recordcount
.. automethod:: validdisplaytype
.. automethod:: validrange
.. automethod:: validscale
"""
#When adding new tests, add to list above, and the list in all()
#Validation failures should be formatted as a sentence (initial cap,
#closing period) and NOT include the variable name.
@classmethod
[docs] def all(cls, v, catch=False):
"""Perform all variable tests
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
catch : bool
Catch exceptions in tests (default False). If True, any
exceptions in subtests will result in an addition to the
validation failures of the form "Test x did not complete."
Calling the individual test will reveal the full traceback.
Returns
-------
list of str
Description of each validation failure.
Examples
--------
>>> import spacepy.pycdf
>>> import spacepy.pycdf.istp
>>> f = spacepy.pycdf.CDF('foo.cdf', create=True)
>>> v = f.new('Var', data=[1, 2, 3])
>>> spacepy.pycdf.istp.VariableChecks.all(v)
['No FIELDNAM attribute.']
"""
#Update this list when adding new test functions
callme = (cls.depends, cls.depsize, cls.fieldnam, cls.recordcount,
cls.validrange, cls.validscale, cls.validdisplaytype)
errors = []
for f in callme:
try:
errors.extend(f(v))
except:
if catch:
errors.append('Test {} did not complete.'.format(
f.__name__))
else:
raise
return errors
@classmethod
[docs] def depends(cls, v):
"""Checks that DEPEND and LABL_PTR variables actually exist
Check that variables specified in the variable attributes for
`DEPEND
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#DEPEND_0>`_
and `LABL_PTR
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#LABL_PTR_1>`_
exist in the CDF.
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
Returns
-------
list of str
Description of each validation failure.
"""
return ['{} variable {} missing'.format(a, v.attrs[a])
for a in v.attrs
if a.startswith(('DEPEND_', 'LABL_PTR_')) and
not v.attrs[a] in v.cdf_file]
@classmethod
[docs] def depsize(cls, v):
"""Checks that DEPEND has same shape as that dim
Compares the size of variables specified in the variable
attributes for `DEPEND
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#DEPEND_0>`_
and compares to the size of the corresponding dimension in
this variable.
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
Returns
-------
list of str
Description of each validation failure.
"""
rv = int(v.rv()) #RV is a leading dimension
errs = []
# Check that don't have invalid DEPEND_1
if v.shape == (0,):
if 'DEPEND_1' in v.attrs or 'DEPEND_2' in v.attrs:
errs.append('Do not expect DEPEND_1 or DEPEND_2 in 1 dimensional variable.')
for i in range(rv, len(v.shape)): #This is index on shape (of var)
depidx = i + 1 - rv #This is x in DEPEND_x
target = v.shape[i]
if not 'DEPEND_{}'.format(depidx) in v.attrs:
continue
d = v.attrs['DEPEND_{}'.format(depidx)]
if d in v.cdf_file:
dv = v.cdf_file[d]
else:
continue #this is a different error
if dv.rv() != ('DEPEND_0' in dv.attrs):
errs.append('DEPEND_{} {} is RV but has no DEPEND_0.'
.format(depidx, d))
continue
#We hope the only weirdness is whether the dependency
#is constant, or dependent on record. If it's dependent
#on another dependency, this gets really weird really fast
# If the dependency is dependent, remove the lower level
# dependency size from consideration
# eg. if counts [80,48], depends on energy [80,48],
# depends on look [80], remove 80 from the view of energy
# so that we accurately check 48==48.
# NB: This assumes max of two layers of dependency
if 'DEPEND_2' in dv.attrs:
errs.append('Do not expect three layers of dependency.')
continue
elif 'DEPEND_1' in dv.attrs:
dd = dv.attrs['DEPEND_1']
if dd in v.cdf_file:
ddv = v.cdf_file[dd]
else:
continue #this is a different error
actual = list(dv.shape)
for ii in actual:
if ii in ddv.shape:
actual.remove(ii)
if 'DEPEND_0' in dv.attrs:
# record varying
dd = dv.attrs['DEPEND_0']
if dd[:5] != 'Epoch':
errs.append('Expect DEPEND_0 to be Epoch.')
continue
if dd in v.cdf_file:
ddv = v.cdf_file[dd]
else:
continue #this is a different error
for ii in actual:
if ii in ddv.shape:
actual.remove(ii)
if len(actual) != 1:
errs.append('More complicated double dependency than taken into account.')
continue
else:
actual = actual[0]
else:
actual = dv.shape[int(dv.rv())]
if target != actual:
errs.append('Dim {} sized {} but DEPEND_{} {} sized {}.'.format(
i, target, depidx, d, actual))
return errs
@classmethod
[docs] def recordcount(cls, v):
"""Check that the DEPEND_0 has same record count as variable
Checks the record count of the variable specified in the
variable attribute for `DEPEND_0
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#DEPEND_0>`_
and compares to the record count for this variable.
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
Returns
-------
list of str
Description of each validation failure.
"""
if not v.rv() or not 'DEPEND_0' in v.attrs:
return []
dep0 = v.attrs['DEPEND_0']
if not dep0 in v.cdf_file: #This is a DIFFERENT error
return []
if len(v) != len(v.cdf_file[dep0]):
return ['{} records; DEPEND_0 {} has {}.'.format(
len(v), dep0, len(v.cdf_file[dep0]))]
return []
@classmethod
def _validhelper(cls, v, rng=True):
"""Helper function for checking SCALEMIN/MAX, VALIDMIN/MAX
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
rng : bool
Do range check (True, default) or scale check (False)
Returns
-------
list of str
Description of each validation failure.
"""
validscale = 'VALID' if rng else 'SCALE'
whichmin, whichmax = ('VALIDMIN', 'VALIDMAX') if rng \
else ('SCALEMIN', 'SCALEMAX')
errs = []
vshape = v.shape
minval, maxval = spacepy.pycdf.lib.get_minmax(v.type())
if rng:
data = v[...]
if 'FILLVAL' in v.attrs:
if numpy.issubdtype(v.dtype, numpy.float):
is_fill = numpy.isclose(data, v.attrs['FILLVAL'])
else:
is_fill = data == v.attrs['FILLVAL']
else:
is_fill = numpy.zeros(shape=vshape, dtype=numpy.bool)
for which in (whichmin, whichmax):
if not which in v.attrs:
continue
attrval = v.attrs[which]
multidim = bool(numpy.shape(attrval)) #multi-dimensional
if multidim: #Compare shapes, require only 1D var
#Match attribute dim to first non-record var dim
firstdim = int(v.rv())
if vshape[firstdim] != numpy.shape(attrval)[0]:
errs.append(('{} element count {} does not match first data'
' dimension size {}.').format(
which, numpy.shape(attrval)[0],
v.shape[firstdim]))
continue
if len(vshape) != firstdim + 1: #only one non-record dim
errs.append('Multi-element {} only valid with 1D variable.'
.format(which))
continue
if firstdim: #Add pseudo-record dim
attrval = numpy.reshape(attrval, (1, -1))
if numpy.any((attrval < minval)) or numpy.any((attrval > maxval)):
errs.append('{} ({}) outside data range ({},{}).'.format(
which, attrval[0, :] if multidim else attrval,
minval, maxval))
if not rng or not len(v): #nothing to compare
continue
#Always put numpy array on the left so knows to do element compare
idx = (data < attrval) if which == whichmin \
else (data > attrval)
idx = numpy.logical_and(idx, numpy.logical_not(is_fill))
if idx.any():
direction = 'under' if which == whichmin else 'over'
if len(vshape) == 0: #Scalar
errs.append('Value {} {} {} {}.'.format(
data, direction, which,
attrval[0, :] if multidim else attrval))
continue
badidx = numpy.nonzero(idx)
badvals = data[badidx]
if len(badidx) > 1: #Multi-dimensional data
badidx = numpy.transpose(badidx) #Group by value not axis
else:
badidx = badidx[0] #Just recover the index value
if len(badvals) < 10:
badvalstr = ', '.join(str(d) for d in badvals)
badidxstr = ', '.join(str(d) for d in badidx)
errs.append('Value {} at index {} {} {} {}.'.format(
badvalstr, badidxstr,
direction, which,
attrval[0, :] if multidim else attrval))
else:
errs.append('{} values {} {} {}'.format(
len(badvals), direction, which,
attrval[0, :] if multidim else attrval))
if (whichmin in v.attrs) and (whichmax in v.attrs):
if numpy.any(v.attrs[whichmin] > v.attrs[whichmax]):
errs.append('{} > {}.'.format(whichmin, whichmax))
return errs
@classmethod
[docs] def validrange(cls, v):
"""Check that all values are within VALIDMIN/VALIDMAX, or FILLVAL
Compare all values of this variable to `VALIDMIN
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#VALIDMIN>`_
and ``VALIDMAX``; fails validation if any values are below
VALIDMIN or above ``VALIDMAX`` unless equal to `FILLVAL
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#FILLVAL>`_.
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
Returns
-------
list of str
Description of each validation failure.
"""
return cls._validhelper(v)
@classmethod
[docs] def validscale(cls, v):
"""Check SCALEMIN<=SCALEMAX, and both in range for CDF datatype.
Compares `SCALEMIN
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#SCALEMIN>`_
to ``SCALEMAX`` to make sure it isn't larger and both are
within range of the variable CDF datatype.
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
Returns
-------
list of str
Description of each validation failure.
"""
return cls._validhelper(v, False)
@classmethod
[docs] def validdisplaytype(cls, v):
"""Check that plottype matches dimensions.
Check `DISPLAYTYPE
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#DISPLAY_TYPE>`_
of this variable and makes sure it is reasonable for the
variable dimensions.
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
Returns
-------
list of str
Description of each validation failure.
"""
time_st = 'time_series'
spec_st = 'spectrogram'
errs = []
if 'DISPLAY_TYPE' in v.attrs:
if (len(v.shape) == 1) and (v.attrs['DISPLAY_TYPE'] != time_st):
errs.append('1 dim variable with {} display type.'.format(
v.attrs['DISPLAY_TYPE']))
elif (len(v.shape) > 1) and (v.attrs['DISPLAY_TYPE'] != spec_st):
errs.append('Multi dim variable with {} display type.'.format(
v.attrs['DISPLAY_TYPE']))
return errs
@classmethod
[docs] def fieldnam(cls, v):
"""Check that FIELDNAM attribute matches variable name.
Compare `FIELDNAM
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#FIELDNAM>`_
attribute to the variable name; fail validation if they don't
match.
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
Variable to check
Returns
-------
list of str
Description of each validation failure.
"""
errs = []
vname = v.name()
if 'FIELDNAM' not in v.attrs:
errs.append('No FIELDNAM attribute.')
elif v.attrs['FIELDNAM'] != vname:
errs.append('FIELDNAM attribute {} does not match var name.'
.format(v.attrs['FIELDNAM']))
return errs
[docs]class FileChecks(object):
"""ISTP compliance checks for a CDF file.
Checks a file's compliance with ISTP standards. This mostly
performs checks that are not currently performed by the `ISTP
skeleton editor <https://spdf.gsfc.nasa.gov/skteditor/>`_. All
tests return a list, one error string for every noncompliance
found (empty list if compliant). :meth:`all` will perform all
tests and concatenate all errors.
.. autosummary::
all
filename
time_monoton
times
.. automethod:: all
.. automethod:: filename
.. automethod:: time_monoton
.. automethod:: times
"""
#When adding new tests, add to list above, and the list in all()
#Validation failures should be formatted as a sentence (initial cap,
#closing period).
@classmethod
[docs] def all(cls, f, catch=False):
"""Perform all variable and file-level tests
In addition to calling every test in this class, will also call
:meth:`VariableChecks.all` for every variable in the file.
Parameters
----------
f : :class:`~spacepy.pycdf.CDF`
Open CDF file to check
catch : bool
Catch exceptions in tests (default False). If True, any
exceptions in subtests will result in an addition to the
validation failures of the form "Test x did not complete."
Calling the individual test will reveal the full traceback.
Returns
-------
list of str
Description of each validation failure.
Examples
--------
>>> import spacepy.pycdf
>>> import spacepy.pycdf.istp
>>> f = spacepy.pycdf.CDF('foo.cdf', create=True)
>>> v = f.new('Var', data=[1, 2, 3])
>>> spacepy.pycdf.istp.FileChecks.all(f)
['No Logical_source in global attrs.',
'No Logical_file_id in global attrs.',
'Cannot parse date from filename foo.cdf.',
'Var: No FIELDNAM attribute.']
"""
#Update this list when adding new test functions
callme = (cls.filename, cls.time_monoton, cls.times,)
errors = []
for func in callme:
try:
errors.extend(func(f))
except:
if catch:
errors.append('Test {} did not complete.'.format(
f.__name__))
else:
raise
for v in f:
errors.extend(('{}: {}'.format(v, e)
for e in VariableChecks.all(f[v], catch=catch)))
return errors
@classmethod
[docs] def filename(cls, f):
"""Compare filename to global attributes
Check global attribute `Logical_file_id
<https://spdf.gsfc.nasa.gov/istp_guide/gattributes.html#Logical_file_id>`_
and `Logical_source
<https://spdf.gsfc.nasa.gov/istp_guide/gattributes.html#Logical_source>`_
for consistency with CDF filename.
Parameters
----------
f : :class:`~spacepy.pycdf.CDF`
Open CDF file to check
Returns
-------
list of str
Description of each validation failure.
"""
errs = []
for a in ('Logical_source', 'Logical_file_id'):
if not a in f.attrs or len(f.attrs[a]) == 0:
errs.append('No {} in global attrs.'.format(a))
if errs:
return errs
fname = os.path.basename(f.pathname)
if not bytes is str:
fname = fname.decode('ascii')
if not fname.startswith(f.attrs['Logical_source'][0]):
errs.append("Logical_source {} doesn't match filename {}.".format(
f.attrs['Logical_source'][0], fname))
if fname[:-4] != f.attrs['Logical_file_id'][0]:
errs.append("Logical_file_id {} doesn't match filename {}.".format(
f.attrs['Logical_file_id'][0], fname))
return errs
@classmethod
[docs] def time_monoton(cls, f):
"""Checks that times are monotonic
Check that all `Epoch
<https://spdf.gsfc.nasa.gov/istp_guide/variables.html#support_data_eg1>`_
variables are monotonically increasing.
Parameters
----------
f : :class:`~spacepy.pycdf.CDF`
Open CDF file to check
Returns
-------
list of str
Description of each validation failure.
"""
errs = []
for v in f:
if not f[v].type() in (spacepy.pycdf.const.CDF_EPOCH.value,
spacepy.pycdf.const.CDF_EPOCH16.value,
spacepy.pycdf.const.CDF_TIME_TT2000.value):
continue
data = f[v][...]
idx = numpy.where(numpy.diff(data) < datetime.timedelta(0))[0]
if not any(idx):
continue
errs.append('{}: Nonmonotonic time at record {}.'.format(
v, ', '.join((str(i) for i in (idx + 1)))))
return errs
@classmethod
[docs] def times(cls, f):
"""Compare filename to times
Check that all `Epoch
<https://spdf.gsfc.nasa.gov/istp_guide/variables.html#support_data_eg1>`_
variables only contain times matching filename.
Parameters
----------
f : :class:`~spacepy.pycdf.CDF`
Open CDF file to check
Returns
-------
list of str
Description of each validation failure.
Notes
-----
This function assumes daily files and should be extended based on the
File_naming_convention global attribute (which itself is another good
check to have.)
"""
errs = []
fname = os.path.basename(f.pathname)
if not bytes is str:
fname = fname.decode('ascii')
m = re.search('\d{8}', fname)
if not m:
return ['Cannot parse date from filename {}'.format(fname)]
datestr = m.group(0)
for v in f:
if f[v].type() in (spacepy.pycdf.const.CDF_EPOCH.value,
spacepy.pycdf.const.CDF_EPOCH16.value,
spacepy.pycdf.const.CDF_TIME_TT2000.value):
datestrs = list(set((d.strftime('%Y%m%d') for d in f[v][...])))
if len(datestrs) == 0:
continue
elif len(datestrs) > 1:
errs.append('{}: multiple days {}.'.format(
v, ', '.join(sorted(datestrs))))
elif datestrs[0] != datestr:
errs.append('{}: date {} doesn\'t match file {}.'.format(
v, datestrs[0], fname))
return errs
[docs]def fillval(v):
"""Set ISTP-compliant FILLVAL on a variable
Sets a CDF variable's `FILLVAL
<https://spdf.gsfc.nasa.gov/istp_guide/vattributes.html#FILLVAL>`_
attribute to the value required by ISTP (based on variable type).
Parameters
----------
v : :class:`~spacepy.pycdf.Var`
CDF variable to update
Examples
--------
>>> import spacepy.pycdf
>>> import spacepy.pycdf.istp
>>> f = spacepy.pycdf.CDF('foo.cdf', create=True)
>>> v = f.new('Var', data=[1, 2, 3])
>>> spacepy.pycdf.istp.fillval(v)
>>> v.attrs['FILLVAL']
-128
"""
#Fill value, indexed by the CDF type (numeric)
fillvals = {}
#Integers
for i in (1, 2, 4, 8):
fillvals[getattr(spacepy.pycdf.const, 'CDF_INT{}'.format(i)).value] = \
- 2 ** (8*i - 1)
if i == 8:
continue
fillvals[getattr(spacepy.pycdf.const, 'CDF_UINT{}'.format(i)).value] = \
2 ** (8*i) - 1
fillvals[spacepy.pycdf.const.CDF_EPOCH16.value] = (-1e31, -1e31)
fillvals[spacepy.pycdf.const.CDF_REAL8.value] = -1e31
fillvals[spacepy.pycdf.const.CDF_REAL4.value] = -1e31
fillvals[spacepy.pycdf.const.CDF_CHAR.value] = ' '
fillvals[spacepy.pycdf.const.CDF_UCHAR.value] = ' '
#Equivalent pairs
for cdf_t, equiv in (
(spacepy.pycdf.const.CDF_TIME_TT2000, spacepy.pycdf.const.CDF_INT8),
(spacepy.pycdf.const.CDF_EPOCH, spacepy.pycdf.const.CDF_REAL8),
(spacepy.pycdf.const.CDF_BYTE, spacepy.pycdf.const.CDF_INT1),
(spacepy.pycdf.const.CDF_FLOAT, spacepy.pycdf.const.CDF_REAL4),
(spacepy.pycdf.const.CDF_DOUBLE, spacepy.pycdf.const.CDF_REAL8),
):
fillvals[cdf_t.value] = fillvals[equiv.value]
if 'FILLVAL' in v.attrs:
del v.attrs['FILLVAL']
v.attrs.new('FILLVAL', data=fillvals[v.type()], type=v.type())