From 275b5e39606ad673076a25f98506e91cfd0a0633 Mon Sep 17 00:00:00 2001 From: Constantin Pape <c.pape@gmx.net> Date: Wed, 26 Jan 2022 22:05:46 +0100 Subject: [PATCH 1/3] Enable reading zarr arrays with empty list of filters --- include/z5/metadata.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/z5/metadata.hxx b/include/z5/metadata.hxx index b7adfb3..3417434 100644 --- a/include/z5/metadata.hxx +++ b/include/z5/metadata.hxx @@ -271,7 +271,7 @@ namespace z5 { jIt = j.find("filters"); if(jIt != j.end()) { - if(!j["filters"].is_null()) { + if(!j["filters"].is_null() && j["filters"].size() > 0) { throw std::runtime_error( "Invalid Filters: Z5 does not support filters" ); From 332713996a118a459b60c155c19defd36e10e2f9 Mon Sep 17 00:00:00 2001 From: Constantin Pape <c.pape@gmx.net> Date: Wed, 26 Jan 2022 23:00:21 +0100 Subject: [PATCH 2/3] Support opening zarr datasets with dimension separator --- include/z5/factory.hxx | 31 ++++++++++++++++++++++++++++++- include/z5/filesystem/handle.hxx | 8 ++++---- include/z5/handle.hxx | 17 ++++++++++------- src/python/test/test_zarr.py | 11 ++++++++++- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/include/z5/factory.hxx b/include/z5/factory.hxx index 238e07b..d849910 100644 --- a/include/z5/factory.hxx +++ b/include/z5/factory.hxx @@ -12,31 +12,58 @@ namespace z5 { + namespace factory_detail { + inline void getZarrDelimiter(const fs::path & root, const std::string & key, std::string & zarrDelimiter) { + const fs::path path = root / key / ".zarray"; + if(!fs::exists(path)) { + return; + } + nlohmann::json j; + + #ifdef WITH_BOOST_FS + fs::ifstream file(path); + #else + std::ifstream file(path); + #endif + file >> j; + file.close(); + + const auto it = j.find("dimension_separator"); + if(it != j.end()) { + zarrDelimiter = *it; + } + } + } template<class GROUP> inline std::unique_ptr<Dataset> openDataset(const handle::Group<GROUP> & root, const std::string & key) { + std::string zarrDelimiter = "."; // check if this is a s3 group #ifdef WITH_S3 if(root.isS3()) { + // TODO support zarr dataset with dimension separator by reading this from s3 s3::handle::Dataset ds(root, key); return s3::openDataset(ds); } #endif #ifdef WITH_GCS if(root.isGcs()) { + // TODO support zarr dataset with dimension separator by reading this from gcs gcs::handle::Dataset ds(root, key); return gcs::openDataset(ds); } #endif - filesystem::handle::Dataset ds(root, key); + factory_detail::getZarrDelimiter(root.path(), key, zarrDelimiter); + filesystem::handle::Dataset ds(root, key, zarrDelimiter); return filesystem::openDataset(ds); } + // TODO support passing zarr delimiter (need to also adapt this upstream) template<class GROUP> inline std::unique_ptr<Dataset> createDataset( const handle::Group<GROUP> & root, @@ -61,6 +88,7 @@ namespace z5 { } + // TODO support passing zarr delimiter (need to also adapt this upstream) template<class GROUP> inline std::unique_ptr<Dataset> createDataset( const handle::Group<GROUP> & root, @@ -95,6 +123,7 @@ namespace z5 { } + // TODO support passing zarr delimiter (need to also adapt this upstream) // dataset creation from json, because wrapping the CompressionOptions type // to python is very brittle template<class GROUP> diff --git a/include/z5/filesystem/handle.hxx b/include/z5/filesystem/handle.hxx index 93ad36e..a2d2654 100644 --- a/include/z5/filesystem/handle.hxx +++ b/include/z5/filesystem/handle.hxx @@ -188,8 +188,8 @@ namespace handle { typedef z5::handle::Dataset<Dataset> BaseType; template<class GROUP> - Dataset(const z5::handle::Group<GROUP> & group, const std::string & key) - : BaseType(group.mode()), HandleImpl(group.path() / key) { + Dataset(const z5::handle::Group<GROUP> & group, const std::string & key, const std::string & zarrDelimiter=".") + : BaseType(group.mode(), zarrDelimiter), HandleImpl(group.path() / key) { } Dataset(const fs::path & path, const FileMode & mode) @@ -239,8 +239,8 @@ namespace handle { const types::ShapeType & chunkIndices, const types::ShapeType & chunkShape, const types::ShapeType & shape) : BaseType(chunkIndices, chunkShape, shape, ds.mode()), - dsHandle_(ds), - path_(ds.path() / getChunkKey(ds.isZarr())){} + dsHandle_(ds), + path_(ds.path() / getChunkKey(ds.isZarr(), ds.zarrDelimiter())){} // make the top level directories for a n5 chunk inline void create() const { diff --git a/include/z5/handle.hxx b/include/z5/handle.hxx index 220b7c2..29bc2e0 100644 --- a/include/z5/handle.hxx +++ b/include/z5/handle.hxx @@ -93,8 +93,13 @@ namespace handle { template<class DATASET> class Dataset : public Handle { public: - Dataset(const FileMode mode) : Handle(mode){} + Dataset(const FileMode mode, const std::string zarrDelimiter=".") : Handle(mode), zarrDelimiter_(zarrDelimiter){} virtual ~Dataset() {} + + const std::string & zarrDelimiter() const {return zarrDelimiter_;} + + private: + std::string zarrDelimiter_; }; @@ -147,19 +152,17 @@ namespace handle { } protected: - inline std::string getChunkKey(const bool isZarr) const { + inline std::string getChunkKey(const bool isZarr, const std::string & zarrDelimiter=".") const { const auto & indices = chunkIndices(); std::string name; // if we have the zarr-format, chunk indices - // are separated by a '.' + // are separated by a '.' by default, but the delimiter may be changed in the metadata if(isZarr) { - std::string delimiter = "."; - util::join(indices.begin(), indices.end(), name, delimiter); + util::join(indices.begin(), indices.end(), name, zarrDelimiter); } - // otherwise (n5-format), each chunk index has - // its own directory + // in n5 each chunk index has its own directory, i.e. the delimiter is '/' else { std::string delimiter = "/"; // N5-Axis order: we need to read the chunks in reverse order diff --git a/src/python/test/test_zarr.py b/src/python/test/test_zarr.py index 3e61169..a953e61 100644 --- a/src/python/test/test_zarr.py +++ b/src/python/test/test_zarr.py @@ -79,7 +79,7 @@ def test_read_zarr(self): compressor=zarr_compressors[compression]) ar[:] = data # read with z5py - out = z5py.File(self.path)[key][:] + out = f_z5[key][:] self.assertEqual(data.shape, out.shape) self.assertTrue(np.allclose(data, out)) @@ -137,6 +137,15 @@ def test_fillvalue(self): self.assertEqual(self.shape, out.shape) self.assertTrue(np.allclose(val, out)) + @unittest.skipIf(int(zarr.__version__.split(".")[1]) < 10, "Need zarr >= 2.10 for supported of nested storage") + def test_zarr_nested(self): + data = np.random.rand(128, 128) + with zarr.open(self.path, mode="a") as f: + f.create_dataset("data", data=data, chunks=(16, 16), dimension_separator="/") + with z5py.File(self.path, mode="r") as f: + res = f["data"][:] + self.assertTrue(np.allclose(data, res)) + @unittest.skipUnless(zarr, 'Requires zarr package') class TestZarrN5(ZarrTestMixin, unittest.TestCase): From 83a9302f21bd2b97d1d7b29701369baf6eea6548 Mon Sep 17 00:00:00 2001 From: Constantin Pape <c.pape@gmx.net> Date: Wed, 26 Jan 2022 23:19:44 +0100 Subject: [PATCH 3/3] Only read dimension_separator for zarr format --- include/z5/factory.hxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/z5/factory.hxx b/include/z5/factory.hxx index d849910..65f573b 100644 --- a/include/z5/factory.hxx +++ b/include/z5/factory.hxx @@ -57,7 +57,9 @@ namespace z5 { } #endif - factory_detail::getZarrDelimiter(root.path(), key, zarrDelimiter); + if(root.isZarr()) { + factory_detail::getZarrDelimiter(root.path(), key, zarrDelimiter); + } filesystem::handle::Dataset ds(root, key, zarrDelimiter); return filesystem::openDataset(ds); }