Source code for baseband.base.encoding

# Licensed under the GPLv3 - see LICENSE
"""Encoders and decoders for generic binary data formats."""
import numpy as np


__all__ = ['OPTIMAL_2BIT_HIGH', 'TWO_BIT_1_SIGMA', 'FOUR_BIT_1_SIGMA',
           'EIGHT_BIT_1_SIGMA', 'decoder_levels',
           'encode_1bit_base', 'encode_2bit_base', 'encode_4bit_base',
           'decode_8bit', 'encode_8bit']


# The high mag value for 2-bit reconstruction.  Note that mark5access uses
# OPTIMAL_2BIT_HIGH = 3.3359, which is possibly a typo.
OPTIMAL_2BIT_HIGH = 3.316505
r"""Optimal high value for a 2-bit digitizer for which the low value is 1.

It is chosen such that for a normal distribution in which 68.269% of all
values are at the low level, this is the mean of the others, i.e.,

.. math::

    l = \frac{\int_\sigma^\infty x \exp(-\frac{x^2}{2\sigma^2}) dx}
             {\int_\sigma^\infty \exp(-\frac{x^2}{2\sigma^2}) dx},

where the standard deviation is determined from:

.. math::

    1 = \frac{\int_0^\sigma x \exp(-\frac{x^2}{2\sigma^2}) dx}
         {\int_0^\sigma \exp(-\frac{x^2}{2\sigma^2}) dx}.

These give:

.. math::

    \sigma = \frac{\sqrt{\frac{\pi}{2}}\mathrm{erf}
             (\sqrt{1/2})}{1 - \sqrt{1/e}} = 2.174564,

and

.. math::

    l = \frac{1}{(\sqrt{e} - 1)(1/\mathrm{erf}(\sqrt{1/2}) - 1)} = 3.316505
"""
TWO_BIT_1_SIGMA = 2.174564
"""Optimal level between low and high for the above OPTIMAL_2BIT_HIGH."""
FOUR_BIT_1_SIGMA = 2.95
"""Scaling for four-bit encoding that makes it look like 2 bit."""
EIGHT_BIT_1_SIGMA = 71.0 / 2.
"""Scaling for eight-bit encoding that makes it look like 2 bit."""

decoder_levels = {
    1: np.array([-1.0, 1.0], dtype=np.float32),
    2: np.array([-OPTIMAL_2BIT_HIGH, -1.0, 1.0, OPTIMAL_2BIT_HIGH],
                dtype=np.float32),
    4: (np.arange(16, dtype=np.float32) - 8.) / FOUR_BIT_1_SIGMA}
"""Levels for data encoded with different numbers of bits.."""

two_bit_2_sigma = 2 * TWO_BIT_1_SIGMA
clip_low, clip_high = -1.5 * TWO_BIT_1_SIGMA, 1.5 * TWO_BIT_1_SIGMA


[docs]def encode_1bit_base(values): """Generic encoder for data stored using one bit. This returns an unsigned integer array containing encoded sample values that are either 0 (negative value) or 1 (positive value). This does not pack the samples into bytes. """ # Optimized for speed by doing calculations in-place, and ensuring that # the dtypes match. bitvalues = np.empty(values.shape, np.uint8) return np.greater_equal(values, 0., out=bitvalues, casting='unsafe')
[docs]def encode_2bit_base(values): """Generic encoder for data stored using two bits. This returns an unsigned integer array containing encoded sample values that range from 0 to 3. The conversion from floating point sample value to unsigned int is given below, with ``lv = TWO_BIT_1_SIGMA = 2.1745``: ================= ====== Input range Output ================= ====== value < -lv 0 -lv < value < 0. 2 0. < value < lv 1 lv < value 3 ================= ====== This does not pack the samples into bytes. """ # Optimized for speed by doing calculations in-place, and ensuring that # the dtypes match. values = np.clip(values, clip_low, clip_high) values += two_bit_2_sigma bitvalues = np.empty(values.shape, np.uint8) return np.floor_divide(values, TWO_BIT_1_SIGMA, out=bitvalues, casting='unsafe')
[docs]def encode_4bit_base(values): """Generic encoder for data stored using four bits. This returns an unsigned integer array containing encoded sample values that range from 0 to 15. Floating point sample values are converted to unsigned int by first scaling them by ``FOUR_BIT_1_SIGMA = 2.95``, then adding 8.5 (the 0.5 to ensure proper rounding when typecasting to uint8). Some sample output levels are: ========================= ====== Input range Output ========================= ====== value*scale < -7.5 0 -7.5 < value*scale < -6.5 1 -0.5 < value*scale < +0.5 8 6.5 < value*scale 15 ========================= ====== This does not pack the samples into bytes. """ # Optimized for speed by doing calculations in-place. values = values * FOUR_BIT_1_SIGMA values += 8.5 return np.clip(values, 0., 15., out=values).astype(np.uint8)
[docs]def decode_8bit(words): """Generic decoder for data stored using 8 bits. We follow mark5access, which assumes the values 0 to 255 encode -127.5 to 127.5, scaled down to match 2 bit data by a factor of 35.5 (`~baseband.base.encoding.EIGHT_BIT_1_SIGMA`) For comparison, GMRT phased data treats the 8-bit data values simply as signed integers. """ b = words.view(np.uint8).astype(np.float32) b -= 127.5 b /= EIGHT_BIT_1_SIGMA return b
[docs]def encode_8bit(values): """Encode 8 bit VDIF data. We follow mark5access, which assumes the values 0 to 255 encode -127.5 to 127.5, scaled down to match 2 bit data by a factor of 35.5 (`~baseband.base.encoding.EIGHT_BIT_1_SIGMA`) For comparison, GMRT phased data treats the 8-bit data values simply as signed integers. """ return (np.clip(np.rint(values * EIGHT_BIT_1_SIGMA + 127.5), 0, 255) .astype(np.uint8))