Source code for numcodecs.pcodec

from typing import Literal, Optional

from numcodecs.abc import Codec
from numcodecs.compat import ensure_contiguous_ndarray

try:
    from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
except ImportError:  # pragma: no cover
    standalone = None


DEFAULT_MAX_PAGE_N = 262144


[docs] class PCodec(Codec): """ PCodec (or pco, pronounced "pico") losslessly compresses and decompresses numerical sequences with high compression ratio and fast speed. See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information. PCodec supports only the following numerical dtypes: uint16, uint32, uint64, int16, int32, int64, float16, float32, and float64. Parameters ---------- level : int A compression level from 0-12, where 12 take the longest and compresses the most. delta_encoding_order : init or None Either a delta encoding level from 0-7 or None. If set to None, pcodec will try to infer the optimal delta encoding order. mode_spec : {'auto', 'classic'} Configures whether Pcodec should try to infer the best "mode" or structure of the data (e.g. approximate multiples of 0.1) to improve compression ratio, or skip this step and just use the numbers as-is (Classic mode). equal_pages_up_to : int Divide the chunk into equal pages of up to this many numbers. """ codec_id = "pcodec" def __init__( self, level: int = 8, delta_encoding_order: Optional[int] = None, equal_pages_up_to: int = 262144, # TODO one day, add support for the Try* mode specs mode_spec: Literal['auto', 'classic'] = 'auto', ): if standalone is None: # pragma: no cover raise ImportError("pcodec must be installed to use the PCodec codec.") # note that we use `level` instead of `compression_level` to # match other codecs self.level = level self.delta_encoding_order = delta_encoding_order self.equal_pages_up_to = equal_pages_up_to self.mode_spec = mode_spec
[docs] def encode(self, buf): buf = ensure_contiguous_ndarray(buf) match self.mode_spec: case 'auto': mode_spec = ModeSpec.auto() case 'classic': mode_spec = ModeSpec.classic() case _: raise ValueError(f"unknown value for mode_spec: {self.mode_spec}") paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to) config = ChunkConfig( compression_level=self.level, delta_encoding_order=self.delta_encoding_order, mode_spec=mode_spec, paging_spec=paging_spec, ) return standalone.simple_compress(buf, config)
[docs] def decode(self, buf, out=None): if out is not None: out = ensure_contiguous_ndarray(out) standalone.simple_decompress_into(buf, out) return out else: return standalone.simple_decompress(buf)