Skip to content

Commit

Permalink
Merge pull request #194 from constantinpape/zarr-fixes
Browse files Browse the repository at this point in the history
Zarr format updates
  • Loading branch information
constantinpape authored Jan 26, 2022
2 parents 1ac8fc9 + 83a9302 commit 75f087c
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 14 deletions.
33 changes: 32 additions & 1 deletion include/z5/factory.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,60 @@


namespace z5 {
namespace factory_detail {
inline void getZarrDelimiter(const fs::path & root, const std::string & key, std::string & zarrDelimiter) {
const fs::path path = root / key / ".zarray";
if(!fs::exists(path)) {
return;
}
nlohmann::json j;

#ifdef WITH_BOOST_FS
fs::ifstream file(path);
#else
std::ifstream file(path);
#endif
file >> j;
file.close();

const auto it = j.find("dimension_separator");
if(it != j.end()) {
zarrDelimiter = *it;
}
}
}


template<class GROUP>
inline std::unique_ptr<Dataset> openDataset(const handle::Group<GROUP> & root,
const std::string & key) {
std::string zarrDelimiter = ".";

// check if this is a s3 group
#ifdef WITH_S3
if(root.isS3()) {
// TODO support zarr dataset with dimension separator by reading this from s3
s3::handle::Dataset ds(root, key);
return s3::openDataset(ds);
}
#endif
#ifdef WITH_GCS
if(root.isGcs()) {
// TODO support zarr dataset with dimension separator by reading this from gcs
gcs::handle::Dataset ds(root, key);
return gcs::openDataset(ds);
}
#endif

filesystem::handle::Dataset ds(root, key);
if(root.isZarr()) {
factory_detail::getZarrDelimiter(root.path(), key, zarrDelimiter);
}
filesystem::handle::Dataset ds(root, key, zarrDelimiter);
return filesystem::openDataset(ds);
}


// TODO support passing zarr delimiter (need to also adapt this upstream)
template<class GROUP>
inline std::unique_ptr<Dataset> createDataset(
const handle::Group<GROUP> & root,
Expand All @@ -61,6 +90,7 @@ namespace z5 {
}


// TODO support passing zarr delimiter (need to also adapt this upstream)
template<class GROUP>
inline std::unique_ptr<Dataset> createDataset(
const handle::Group<GROUP> & root,
Expand Down Expand Up @@ -95,6 +125,7 @@ namespace z5 {
}


// TODO support passing zarr delimiter (need to also adapt this upstream)
// dataset creation from json, because wrapping the CompressionOptions type
// to python is very brittle
template<class GROUP>
Expand Down
8 changes: 4 additions & 4 deletions include/z5/filesystem/handle.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ namespace handle {
typedef z5::handle::Dataset<Dataset> BaseType;

template<class GROUP>
Dataset(const z5::handle::Group<GROUP> & group, const std::string & key)
: BaseType(group.mode()), HandleImpl(group.path() / key) {
Dataset(const z5::handle::Group<GROUP> & group, const std::string & key, const std::string & zarrDelimiter=".")
: BaseType(group.mode(), zarrDelimiter), HandleImpl(group.path() / key) {
}

Dataset(const fs::path & path, const FileMode & mode)
Expand Down Expand Up @@ -239,8 +239,8 @@ namespace handle {
const types::ShapeType & chunkIndices,
const types::ShapeType & chunkShape,
const types::ShapeType & shape) : BaseType(chunkIndices, chunkShape, shape, ds.mode()),
dsHandle_(ds),
path_(ds.path() / getChunkKey(ds.isZarr())){}
dsHandle_(ds),
path_(ds.path() / getChunkKey(ds.isZarr(), ds.zarrDelimiter())){}

// make the top level directories for a n5 chunk
inline void create() const {
Expand Down
17 changes: 10 additions & 7 deletions include/z5/handle.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,13 @@ namespace handle {
template<class DATASET>
class Dataset : public Handle {
public:
Dataset(const FileMode mode) : Handle(mode){}
Dataset(const FileMode mode, const std::string zarrDelimiter=".") : Handle(mode), zarrDelimiter_(zarrDelimiter){}
virtual ~Dataset() {}

const std::string & zarrDelimiter() const {return zarrDelimiter_;}

private:
std::string zarrDelimiter_;
};


Expand Down Expand Up @@ -147,19 +152,17 @@ namespace handle {
}

protected:
inline std::string getChunkKey(const bool isZarr) const {
inline std::string getChunkKey(const bool isZarr, const std::string & zarrDelimiter=".") const {
const auto & indices = chunkIndices();
std::string name;

// if we have the zarr-format, chunk indices
// are separated by a '.'
// are separated by a '.' by default, but the delimiter may be changed in the metadata
if(isZarr) {
std::string delimiter = ".";
util::join(indices.begin(), indices.end(), name, delimiter);
util::join(indices.begin(), indices.end(), name, zarrDelimiter);
}

// otherwise (n5-format), each chunk index has
// its own directory
// in n5 each chunk index has its own directory, i.e. the delimiter is '/'
else {
std::string delimiter = "/";
// N5-Axis order: we need to read the chunks in reverse order
Expand Down
2 changes: 1 addition & 1 deletion include/z5/metadata.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ namespace z5 {

jIt = j.find("filters");
if(jIt != j.end()) {
if(!j["filters"].is_null()) {
if(!j["filters"].is_null() && j["filters"].size() > 0) {
throw std::runtime_error(
"Invalid Filters: Z5 does not support filters"
);
Expand Down
11 changes: 10 additions & 1 deletion src/python/test/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_read_zarr(self):
compressor=zarr_compressors[compression])
ar[:] = data
# read with z5py
out = z5py.File(self.path)[key][:]
out = f_z5[key][:]
self.assertEqual(data.shape, out.shape)
self.assertTrue(np.allclose(data, out))

Expand Down Expand Up @@ -137,6 +137,15 @@ def test_fillvalue(self):
self.assertEqual(self.shape, out.shape)
self.assertTrue(np.allclose(val, out))

@unittest.skipIf(int(zarr.__version__.split(".")[1]) < 10, "Need zarr >= 2.10 for supported of nested storage")
def test_zarr_nested(self):
data = np.random.rand(128, 128)
with zarr.open(self.path, mode="a") as f:
f.create_dataset("data", data=data, chunks=(16, 16), dimension_separator="/")
with z5py.File(self.path, mode="r") as f:
res = f["data"][:]
self.assertTrue(np.allclose(data, res))


@unittest.skipUnless(zarr, 'Requires zarr package')
class TestZarrN5(ZarrTestMixin, unittest.TestCase):
Expand Down

0 comments on commit 75f087c

Please sign in to comment.