Source code for baseband.vlbi_base.file_info

# Licensed under the GPLv3 - see LICENSE
"""Provide a base class for "info" properties.

Loosely based on `~astropy.utils.data_info.DataInfo`.
"""
import warnings
from collections import OrderedDict

import numpy as np
import astropy.units as u


__all__ = ['VLBIInfoMeta', 'VLBIInfoBase',
           'VLBIFileReaderInfo', 'VLBIStreamReaderInfo']


[docs]class VLBIInfoMeta(type):
    # Ensure all attributes are initialized to None, so that they are
    # always available (do this rather than overwrite __getattr__ so that
    # we can generate docstrings in sphinx for them).
    def __init__(cls, name, bases, dct):
        super().__init__(name, bases, dct)
        attr_names = dct.get('attr_names', ())
        for attr in attr_names:
            setattr(cls, attr, None)


[docs]class VLBIInfoBase(metaclass=VLBIInfoMeta):
    """Container providing a standardized interface to file information.

    In order to ensure that information is always returned, all access
    to the parent should be within ``try/except`` with a possible error
    stored in ``self.errors``.  See ``self._getattr`` for an example.
    """

    attr_names = ('format',)
    """Attributes that the container provides."""

    _parent_attrs = ()
    _parent = None

    def _getattr(self, object_, attr, error=True):
        """Guarded getattr, returning None on error (and storing the error)."""
        try:
            return getattr(object_, attr)
        except Exception as exc:
            if error:
                self.errors[attr] = exc
            return None

    def _collect_info(self):
        # We link to attributes from the parent rather than just overriding
        # __getattr__ to allow us to look for changes.
        self.missing = {}
        self.errors = OrderedDict()
        for attr in self._parent_attrs:
            setattr(self, attr, self._getattr(self._parent, attr))

    def _up_to_date(self):
        """Determine whether the information we have stored is up to date."""
        return all(getattr(self, attr) == self._getattr(self._parent, attr,
                                                        error=False)
                   for attr in self._parent_attrs)

    def __get__(self, instance, owner_cls):
        if instance is None:
            # Unbound descriptor, nothing to do.
            return self

        # Check if we have a stored and up to date copy.
        info = instance.__dict__.get('info')
        if info is None or not info._up_to_date():
            # If not, create a new instance and fill it.  Notes:
            # - We cannot change "self", as this was created on the class.
            # - We start from scratch rather than determine what is no longer
            #   up to date, since we cannot know what an update may influence
            #   (e.g., for Mark 4, a change in ref_time affect start_time).
            info = instance.__dict__['info'] = self.__class__()
            info._parent = instance
            info._collect_info()

        return info

    def __set__(self, info):
        # We do need to define __set__ since this ensures we are treated as
        # a "data descriptor", i.e., that our __get__ will get called even
        # if "info" is present in instance.__dict__; see
        # https://docs.python.org/3/howto/descriptor.html
        raise AttributeError("can't set info attribute.")

    def __bool__(self):
        return self.format is not None

    # PY2
    __nonzero__ = __bool__

[docs]    def __call__(self):
        """Create a dict with file information, including missing pieces."""
        info = {}
        if self:
            for attr in self.attr_names:
                value = getattr(self, attr)
                if value is not None:
                    info[attr] = value
            if self.missing:
                info['missing'] = self.missing

        if self.errors:
            info['errors'] = self.errors

        return info

    def __repr__(self):
        # Use the repr for quick display of file information.
        if self._parent is None:
            return super().__repr__()

        if not self:
            if self._parent.closed:
                return 'File closed. Not parsable.'
            else:
                return 'Not parsable. Wrong format?'

        result = ''
        for attr in self.attr_names:
            value = getattr(self, attr)
            if value is not None:
                if hasattr(value, 'isot'):
                    value.precision = 9
                    value = value.isot
                elif attr == 'sample_rate':
                    value = value.to(u.MHz)
                result += '{} = {}\n'.format(attr, value)

        if self.missing:
            result += '\n'
            prefix = 'missing: '
            for msg in sorted(set(self.missing.values())):
                keys = sorted(set(key for key in self.missing
                                  if self.missing[key] == msg))
                result += "{} {}: {}\n".format(prefix, ', '.join(keys), msg)
                prefix = ' ' * len(prefix)

        if self.errors:
            result += '\n'
            prefix = 'errors: '
            for item, error in self.errors.items():
                result += "{} {}: {}\n".format(prefix, item, str(error))
                prefix = ' ' * len(prefix)

        return result


[docs]class VLBIFileReaderInfo(VLBIInfoBase):
    """Standardized information on file readers.

    The ``info`` descriptor has a number of standard attributes, which are
    determined from arguments passed in opening the file, from the first header
    (``info.header0``) and from possibly scanning the file to determine the
    duration of frames.

    Attributes
    ----------
    format : str or `None`
        File format, or `None` if the underlying file cannot be parsed.
    frame_rate : `~astropy.units.Quantity`
        Number of data frames per unit of time.
    sample_rate : `~astropy.units.Quantity`
        Complete samples per unit of time.
    samples_per_frame : int
        Number of complete samples in each frame.
    sample_shape : tuple
        Dimensions of each complete sample (e.g., ``(nchan,)``).
    bps : int
        Number of bits used to encode each elementary sample.
    complex_data : bool
        Whether the data are complex.
    start_time : `~astropy.time.Time`
        Time of the first complete sample.
    readable : bool
        Whether the first sample could be read and decoded.
    missing : dict
        Entries are keyed by names of arguments that should be passed to
        the file reader to obtain full information. The associated entries
        explain why these arguments are needed.
    errors : dict
        Any exceptions raised while trying to determine attributes.  Keyed
        by the attributes.

    Examples
    --------
    The most common use is simply to print information::

        >>> from baseband.data import SAMPLE_MARK5B
        >>> from baseband import mark5b
        >>> fh = mark5b.open(SAMPLE_MARK5B, 'rb')
        >>> fh.info
        File information:
        format = mark5b
        frame_rate = 6400.0 Hz
        bps = 2
        complex_data = False
        readable = False
        <BLANKLINE>
        missing:  nchan: needed to determine sample shape and rate.
                  kday, ref_time: needed to infer full times.
        <BLANKLINE>
        errors:  start_time: unsupported operand type(s) for +: 'NoneType' and 'int'
                 frame0: In order to read frames, the file handle should be initialized with nchan set.

        >>> fh.close()

        >>> fh = mark5b.open(SAMPLE_MARK5B, 'rb', kday=56000, nchan=8)
        >>> fh.info
        File information:
        format = mark5b
        frame_rate = 6400.0 Hz
        sample_rate = 32.0 MHz
        samples_per_frame = 5000
        sample_shape = (8,)
        bps = 2
        complex_data = False
        start_time = 2014-06-13T05:30:01.000000000
        readable = True
        >>> fh.close()
    """
    attr_names = ('format', 'frame_rate', 'sample_rate', 'samples_per_frame',
                  'sample_shape', 'bps', 'complex_data', 'start_time', 'readable')
    _header0_attrs = ('bps', 'complex_data', 'samples_per_frame',
                      'sample_shape')

    def _get_header0(self):
        # Here, we do not even know whether the file is open or whether we
        # have the right format. We thus use a try/except and filter out all
        # warnings.
        try:
            with self._parent.temporary_offset() as fh:
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore')
                    fh.seek(0)
                    return fh.read_header()
        except Exception as exc:
            self.errors['header0'] = exc
            return None

    def _get_frame0(self):
        # Try reading a frame.  This has no business failing if a
        # frame rate could be determined, but try anyway; maybe file is closed.
        try:
            with self._parent.temporary_offset() as fh:
                fh.seek(0)
                return fh.read_frame()
        except Exception as exc:
            self.errors['frame0'] = exc
            return None

    def _readable(self):
        frame0 = self._get_frame0()
        if frame0 is None:
            return False

        # Getting the first sample can fail if we don't have the right decoder.
        try:
            first_sample = frame0[0]
        except Exception as exc:
            self.errors['readable'] = exc
            return False

        if not isinstance(first_sample, np.ndarray):
            self.errors['readable'] = 'first sample is not an ndarray'
            return False

        return True

    def _get_format(self):
        return self._parent.__class__.__name__.split('File')[0].lower()

    def _get_frame_rate(self):
        try:
            return self._parent.get_frame_rate()
        except Exception as exc:
            self.errors['frame_rate'] = exc
            return None

    def _get_start_time(self):
        try:
            return self.header0.time
        except Exception as exc:
            self.errors['start_time'] = exc
            return None

    def _collect_info(self):
        super()._collect_info()
        self.header0 = self._get_header0()
        if self.header0 is not None:
            for attr in self._header0_attrs:
                setattr(self, attr, getattr(self.header0, attr))
            self.format = self._get_format()
            self.frame_rate = self._get_frame_rate()
            if ('sample_rate' not in self._header0_attrs and
                    self.frame_rate is not None and
                    self.samples_per_frame is not None):
                self.sample_rate = self.frame_rate * self.samples_per_frame
            self.start_time = self._get_start_time()
            self.readable = self._readable()

    def __repr__(self):
        result = 'File information:\n'
        result += super().__repr__()
        return result


[docs]class VLBIStreamReaderInfo(VLBIInfoBase):
    """Standardized information on stream readers.

    The ``info`` descriptor provides a few standard attributes, all of which
    can also be accessed directly on the stream filehandle. More detailed
    information on the underlying file is stored in its info, accessible via
    ``info.file_info``.

    Attributes
    ----------
    start_time : `~astropy.time.Time`
        Time of the first complete sample.
    stop_time : `~astropy.time.Time`
        Time of the complete sample just beyond the end of the file.
    sample_rate : `~astropy.units.Quantity`
        Complete samples per unit of time.
    shape : tuple
        Equivalent shape of the whole file, i.e., combining the number of
        complete samples and the shape of those samples.
    bps : int
        Number of bits used to encode each elementary sample.
    complex_data : bool
        Whether the data are complex.
    readable : bool
        Whether the first sample could be read and decoded.
    """
    attr_names = ('start_time', 'stop_time', 'sample_rate', 'shape',
                  'format', 'bps', 'complex_data', 'readable')
    _parent_attrs = tuple(attr for attr in attr_names
                          if attr not in ('format', 'readable'))

    def _raw_file_info(self):
        # Mostly here so GSB can override.
        return self._parent.fh_raw.info

    def _readable(self):
        # Again mostly here so GSB can override.
        return self._parent.readable()

    def _collect_info(self):
        super()._collect_info()
        # We also want the raw info.
        self.file_info = self._raw_file_info()
        self.format = self.file_info.format
        self.readable = self._readable()

    def _up_to_date(self):
        # Stream readers cannot change after initialization, so the check is easy.
        return True

[docs]    def __call__(self):
        """Create a dict with information about the stream and the raw file."""
        info = super().__call__()
        info['file_info'] = self.file_info()
        return info

    def __repr__(self):
        result = 'Stream information:\n'
        result += super().__repr__()
        file_info = getattr(self, 'file_info', None)
        if file_info is not None:
            # Add information from the raw file.
            raw_attrs = file_info.attr_names
            raw_only_attrs = [attr for attr in raw_attrs
                              if attr not in self.attr_names]
            try:
                file_info.attr_names = raw_only_attrs
                result += '\n' + repr(file_info)
            finally:
                file_info.attr_names = raw_attrs

        return result
Navigation

Source code for baseband.vlbi_base.file_info