import os
from shutil import rmtree
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from ._z5py import create_group, FileMode
from .dataset import Dataset
from .attribute_manager import AttributeManager
from .shape_utils import is_group
[docs]class Group(Mapping):
""" Group inside of a z5py container.
Corresponds to a directory on the filesystem.
Supports python dict api.
Should not be instantiated directly, but rather be created
or opened via the `create_group`, `request_group` or `[]` operators
of Group or File.
"""
# the python / h5py file modes and the corresponding internal types
# as far as I can tell there is no difference between 'w-' and 'x',
# so for now they get mapped to the same internal type
#: available modes for opening files. these correspond to the ``h5py`` file modes
file_modes = {'a': FileMode.a, 'r': FileMode.r,
'r+': FileMode.r_p, 'w': FileMode.w,
'w-': FileMode.w_m, 'x': FileMode.w_m}
def __init__(self, path, is_zarr=True, mode='a'):
if mode not in self.file_modes:
raise ValueError("Invalid file mode: %s" % mode)
self.mode = mode
self._internal_mode = self.file_modes[mode]
self._permissions = FileMode(self._internal_mode)
self.path = path
self.is_zarr = is_zarr
self._attrs = AttributeManager(path, is_zarr)
#
# Magic Methods, Attributes, Keys, Contains
#
def __iter__(self):
for name in os.listdir(self.path):
if os.path.isdir(os.path.join(self.path, name)):
yield name
def __len__(self):
counter = 0
for _ in self:
counter += 1
return counter
def __delitem__(self, name):
path_ = os.path.join(self.path, name)
if not os.path.exists(path_):
raise KeyError("%s does not exist" % name)
rmtree(path_)
def __getitem__(self, name):
""" Access group or dataset in the container.
Fails if no dataset or group of the specified name exists.
Args:
name (str): name of group or dataset in container.
Returns:
``Group`` or ``Dataset``.
"""
path = os.path.join(self.path, name)
if not os.path.isdir(path):
raise KeyError("Key %s does not exist" % name)
if is_group(path, self.is_zarr):
return Group._open_group(path, self.is_zarr, self.mode)
else:
return Dataset._open_dataset(path, self._internal_mode)
@property
def attrs(self):
""" Access additional attributes.
Returns:
``AttributeManager``.
"""
return self._attrs
#
# Group functionality
#
@classmethod
def _create_group(cls, path, is_zarr, mode):
create_group(path, is_zarr, cls.file_modes[mode])
return cls(path, is_zarr, mode)
@classmethod
def _open_group(cls, path, is_zarr, mode):
return cls(path, is_zarr, mode)
[docs] def create_group(self, name):
""" Create a new group.
Create new (sub-)group of the group.
Fails if a group of this name already exists.
Args:
name (str): name of the new group.
Returns:
``Group``: group of the requested name.
"""
if name in self:
raise KeyError("Group %s is already existing" % name)
path = os.path.join(self.path, name)
return Group._create_group(path, self.is_zarr, self.mode)
[docs] def require_group(self, name):
""" Require group.
Require that a group of the given name exists.
The group will be created if it does not already exist.
Args:
name (str): name of the required group.
Returns:
``Group``: group of the requested name.
"""
path = os.path.join(self.path, name)
if os.path.exists(path):
if not is_group(path, self.is_zarr):
raise TypeError("Incompatible object (Dataset) already exists")
return Group._open_group(path, self.is_zarr, self.mode)
else:
return self.create_group(name)
#
# Dataset functionality
#
[docs] def create_dataset(self, name,
shape=None, dtype=None,
data=None, chunks=None,
compression=None, fillvalue=0,
n_threads=1, **compression_options):
""" Create a new dataset.
Create a new dataset in the group. Syntax and behaviour similar to the
corresponding ``h5py`` functionality.
In contrast to ``h5py``, there is no option to store a dataset without chunking
(if no chunks are given default values, suitable for the dimension of the dataset, will be used).
Also, if a dataset is created with data and a dtype that is different
from the data's is specified, the function throws a RuntimeError, instead
of converting the data.
Args:
name (str): name of the new dataset.
shape (tuple): shape of the new dataset. If no shape is given,
the ``data`` argument must be given. (default: None).
dtype (str or np.dtpye): datatype of the new dataset. If no dtype is given,
the ``data`` argument must be given (default: None).
data (np.ndarray): data used to infer shape, dtype and fill the dataset
upon creation (default: None).
chunks (tuple): chunk sizes of the new dataset. If no chunks are given,
a suitable default value for the number of dimensions will be used (default: None).
compression (str): name of the compression library used to compress chunks.
If no compression is given, the default for the current format is used (default: None).
fillvalue (float): fillvalue for empty chunks (only zarr) (default: 0).
n_threads (int): number of threads used for chunk I/O (default: 1).
**compression_options: options for the compression library.
Returns:
``Dataset``: the new dataset.
"""
if not self._permissions.can_write():
raise ValueError("Cannot create dataset with read-only permissions.")
if name in self:
raise KeyError("Dataset %s is already existing." % name)
path = os.path.join(self.path, name)
return Dataset._create_dataset(path, shape, dtype,
data, chunks, compression,
fillvalue, n_threads,
compression_options,
self.is_zarr, self._internal_mode)
[docs] def require_dataset(self, name, shape,
dtype=None, chunks=None,
n_threads=1, **kwargs):
""" Require dataset.
Require dataset in the group.
Will create the dataset if it does not exist, otherwise returns
existing dataset. If the dataset already exists, consistency with the
arguments ``shape``, ``dtype`` (if given) and ``chunks`` (if given) is enforced.
Args:
name (str): name of the dataset.
shape (tuple): shape of the dataset.
dtype (str or np.dtpye): datatype of dataset (default: None).
chunks (tuple): chunk sizes of the dataset (default: None).
n_threads (int): number of threads used for chunk I/O (default: 1).
**kwargs: additional arguments that will only be used for creation
if the dataset does not exist.
Returns:
``Dataset``: the required dataset.
"""
if not self._permissions.can_write():
raise ValueError("Cannot create dataset with read-only permissions.")
path = os.path.join(self.path, name)
return Dataset._require_dataset(path, shape, dtype, chunks,
n_threads, self.is_zarr, self._internal_mode,
**kwargs)