Source code for z5py.group

import os
from shutil import rmtree

try:
    from collections.abc import Mapping
except ImportError:
    from collections import Mapping

from ._z5py import create_group, FileMode
from .dataset import Dataset
from .attribute_manager import AttributeManager
from .shape_utils import is_group


[docs]class Group(Mapping):
    """ Group inside of a z5py container.

    Corresponds to a directory on the filesystem.
    Supports python dict api.
    Should not be instantiated directly, but rather be created
    or opened via the `create_group`, `request_group` or `[]` operators
    of Group or File.
    """

    # the python / h5py file modes and the corresponding internal types
    # as far as I can tell there is no difference between 'w-' and 'x',
    # so for now they get mapped to the same internal type

    #: available modes for opening files. these correspond to the ``h5py`` file modes
    file_modes = {'a': FileMode.a, 'r': FileMode.r,
                  'r+': FileMode.r_p, 'w': FileMode.w,
                  'w-': FileMode.w_m, 'x': FileMode.w_m}

    def __init__(self, path, is_zarr=True, mode='a'):
        if mode not in self.file_modes:
            raise ValueError("Invalid file mode: %s" % mode)

        self.mode = mode
        self._internal_mode = self.file_modes[mode]
        self._permissions = FileMode(self._internal_mode)
        self.path = path
        self.is_zarr = is_zarr
        self._attrs = AttributeManager(path, is_zarr)

    #
    # Magic Methods, Attributes, Keys, Contains
    #

    def __iter__(self):
        for name in os.listdir(self.path):
            if os.path.isdir(os.path.join(self.path, name)):
                yield name

    def __len__(self):
        counter = 0
        for _ in self:
            counter += 1
        return counter

    def __delitem__(self, name):
        path_ = os.path.join(self.path, name)
        if not os.path.exists(path_):
            raise KeyError("%s does not exist" % name)
        rmtree(path_)

    def __getitem__(self, name):
        """ Access group or dataset in the container.

        Fails if no dataset or group of the specified name exists.

        Args:
            name (str): name of group or dataset in container.

        Returns:
            ``Group`` or ``Dataset``.
        """
        path = os.path.join(self.path, name)
        if not os.path.isdir(path):
            raise KeyError("Key %s does not exist" % name)

        if is_group(path, self.is_zarr):
            return Group._open_group(path, self.is_zarr, self.mode)
        else:
            return Dataset._open_dataset(path, self._internal_mode)

    @property
    def attrs(self):
        """ Access additional attributes.

        Returns:
            ``AttributeManager``.
        """
        return self._attrs

    #
    # Group functionality
    #

    @classmethod
    def _create_group(cls, path, is_zarr, mode):
        create_group(path, is_zarr, cls.file_modes[mode])
        return cls(path, is_zarr, mode)

    @classmethod
    def _open_group(cls, path, is_zarr, mode):
        return cls(path, is_zarr, mode)

[docs]    def create_group(self, name):
        """ Create a new group.

        Create new (sub-)group of the group.
        Fails if a group of this name already exists.

        Args:
            name (str): name of the new group.

        Returns:
            ``Group``: group of the requested name.
        """
        if name in self:
            raise KeyError("Group %s is already existing" % name)
        path = os.path.join(self.path, name)
        return Group._create_group(path, self.is_zarr, self.mode)

[docs]    def require_group(self, name):
        """ Require group.

        Require that a group of the given name exists.
        The group will be created if it does not already exist.

        Args:
            name (str): name of the required group.

        Returns:
            ``Group``: group of the requested name.
        """
        path = os.path.join(self.path, name)
        if os.path.exists(path):
            if not is_group(path, self.is_zarr):
                raise TypeError("Incompatible object (Dataset) already exists")
            return Group._open_group(path, self.is_zarr, self.mode)
        else:
            return self.create_group(name)

    #
    # Dataset functionality
    #

[docs]    def create_dataset(self, name,
                       shape=None, dtype=None,
                       data=None, chunks=None,
                       compression=None, fillvalue=0,
                       n_threads=1, **compression_options):
        """ Create a new dataset.

        Create a new dataset in the group. Syntax and behaviour similar to the
        corresponding ``h5py`` functionality.
        In contrast to ``h5py``, there is no option to store a dataset without chunking
        (if no chunks are given default values, suitable for the dimension of the dataset, will be used).
        Also, if a dataset is created with data and a dtype that is different
        from the data's is specified, the function throws a RuntimeError, instead
        of converting the data.

        Args:
            name (str): name of the new dataset.
            shape (tuple): shape of the new dataset. If no shape is given,
                the ``data`` argument must be given. (default: None).
            dtype (str or np.dtpye): datatype of the new dataset. If no dtype is given,
                the ``data`` argument must be given (default: None).
            data (np.ndarray): data used to infer shape, dtype and fill the dataset
                upon creation (default: None).
            chunks (tuple): chunk sizes of the new dataset. If no chunks are given,
                a suitable default value for the number of dimensions will be used (default: None).
            compression (str): name of the compression library used to compress chunks.
                If no compression is given, the default for the current format is used (default: None).
            fillvalue (float): fillvalue for empty chunks (only zarr) (default: 0).
            n_threads (int): number of threads used for chunk I/O (default: 1).
            **compression_options: options for the compression library.

        Returns:
            ``Dataset``: the new dataset.
        """

        if not self._permissions.can_write():
            raise ValueError("Cannot create dataset with read-only permissions.")
        if name in self:
            raise KeyError("Dataset %s is already existing." % name)
        path = os.path.join(self.path, name)
        return Dataset._create_dataset(path, shape, dtype,
                                       data, chunks, compression,
                                       fillvalue, n_threads,
                                       compression_options,
                                       self.is_zarr, self._internal_mode)

[docs]    def require_dataset(self, name, shape,
                        dtype=None, chunks=None,
                        n_threads=1, **kwargs):
        """ Require dataset.

        Require dataset in the group.
        Will create the dataset if it does not exist, otherwise returns
        existing dataset. If the dataset already exists, consistency with the
        arguments ``shape``, ``dtype`` (if given) and ``chunks`` (if given) is enforced.

        Args:
            name (str): name of the dataset.
            shape (tuple): shape of the dataset.
            dtype (str or np.dtpye): datatype of dataset (default: None).
            chunks (tuple): chunk sizes of the dataset (default: None).
            n_threads (int): number of threads used for chunk I/O (default: 1).
            **kwargs: additional arguments that will only be used for creation
                if the dataset does not exist.

        Returns:
            ``Dataset``: the required dataset.
        """
        if not self._permissions.can_write():
            raise ValueError("Cannot create dataset with read-only permissions.")
        path = os.path.join(self.path, name)
        return Dataset._require_dataset(path, shape, dtype, chunks,
                                        n_threads, self.is_zarr, self._internal_mode,
                                        **kwargs)