diff --git a/autotest/gdrivers/data/zarr/v3/test.zr3/ar/zarr.json b/autotest/gdrivers/data/zarr/v3/test.zr3/ar/zarr.json index 8dec6c80d052..aa255257d709 100644 --- a/autotest/gdrivers/data/zarr/v3/test.zr3/ar/zarr.json +++ b/autotest/gdrivers/data/zarr/v3/test.zr3/ar/zarr.json @@ -19,5 +19,6 @@ "separator":"\/" } }, - "fill_value": 255 + "fill_value": 255, + "codecs": [{"name":"bytes"}] } diff --git a/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/android/zarr.json b/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/android/zarr.json index 1e423fa8f742..7ccfe257d12d 100644 --- a/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/android/zarr.json +++ b/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/android/zarr.json @@ -18,5 +18,6 @@ "chunk_key_encoding":{ "name":"v2" }, - "fill_value":255 + "fill_value":255, + "codecs": [{"name":"bytes"}] } diff --git a/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/paranoid/zarr.json b/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/paranoid/zarr.json new file mode 100644 index 000000000000..c9c096a15b06 --- /dev/null +++ b/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/paranoid/zarr.json @@ -0,0 +1,4 @@ +{ + "zarr_format":3, + "node_type":"group" +} diff --git a/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/ar/c/0 b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/ar/c/0 new file mode 100644 index 000000000000..71bd63e62027 --- /dev/null +++ b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/ar/c/0 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/ar/zarr.json b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/ar/zarr.json new file mode 100644 index 000000000000..8dec6c80d052 --- /dev/null +++ b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/ar/zarr.json @@ -0,0 +1,23 @@ +{ + "zarr_format":3, + "node_type":"array", + "shape":[ + 2 + ], + "data_type":"uint8", + "chunk_grid":{ + "name":"regular", + "configuration":{ + "chunk_shape":[ + 2 + ] + } + }, + "chunk_key_encoding":{ + "name":"default", + "configuration":{ + "separator":"\/" + } + }, + "fill_value": 255 +} diff --git a/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/android/0.0 b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/android/0.0 new file mode 100644 index 000000000000..b0a3118514f3 --- /dev/null +++ b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/android/0.0 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/android/zarr.json b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/android/zarr.json new file mode 100644 index 000000000000..1e423fa8f742 --- /dev/null +++ b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/android/zarr.json @@ -0,0 +1,22 @@ +{ + "zarr_format":3, + "node_type":"array", + "shape":[ + 5, + 4 + ], + "data_type":"uint8", + "chunk_grid":{ + "name":"regular", + "configuration":{ + "chunk_shape":[ + 5, + 4 + ] + } + }, + "chunk_key_encoding":{ + "name":"v2" + }, + "fill_value":255 +} diff --git a/autotest/gdrivers/data/zarr/v3/test.zr3/marvin/paranoid/DO_NOT_REMOVE_ME b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/paranoid/DO_NOT_REMOVE_ME similarity index 100% rename from autotest/gdrivers/data/zarr/v3/test.zr3/marvin/paranoid/DO_NOT_REMOVE_ME rename to autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/paranoid/DO_NOT_REMOVE_ME diff --git a/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/zarr.json b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/zarr.json new file mode 100644 index 000000000000..410a0e31303f --- /dev/null +++ b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/marvin/zarr.json @@ -0,0 +1,7 @@ +{ + "zarr_format":3, + "node_type":"group", + "attributes":{ + "foo":"bar" + } +} \ No newline at end of file diff --git a/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/zarr.json b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/zarr.json new file mode 100644 index 000000000000..164d1a096862 --- /dev/null +++ b/autotest/gdrivers/data/zarr/v3/test_deprecated_no_codecs.zr3/zarr.json @@ -0,0 +1,7 @@ +{ + "zarr_format":3, + "node_type":"group", + "attributes":{ + "root_foo":"bar" + } +} \ No newline at end of file diff --git a/autotest/gdrivers/zarr_driver.py b/autotest/gdrivers/zarr_driver.py index e3292285a128..a0587b58e9b4 100644 --- a/autotest/gdrivers/zarr_driver.py +++ b/autotest/gdrivers/zarr_driver.py @@ -982,11 +982,13 @@ def test_zarr_read_ARRAY_DIMENSIONS(use_zmetadata, filename): assert len(rg.GetDimensions()) == 2 +@pytest.mark.parametrize( + "ds_name", ["data/zarr/v3/test_deprecated_no_codecs.zr3", "data/zarr/v3/test.zr3"] +) @pytest.mark.parametrize("use_get_names", [True, False]) -def test_zarr_read_v3(use_get_names): +def test_zarr_read_v3(ds_name, use_get_names): - filename = "data/zarr/v3/test.zr3" - ds = gdal.OpenEx(filename, gdal.OF_MULTIDIM_RASTER) + ds = gdal.OpenEx(ds_name, gdal.OF_MULTIDIM_RASTER) assert ds is not None rg = ds.GetRootGroup() assert rg.GetName() == "/" @@ -1930,21 +1932,28 @@ def create(): @pytest.mark.parametrize( "compressor,options,expected_json", [ - ["NONE", [], None], + ["NONE", [], [{"name": "bytes", "configuration": {"endian": "little"}}]], [ "gzip", [], - [{"name": "gzip", "configuration": {"level": 6}}], + [ + {"name": "bytes", "configuration": {"endian": "little"}}, + {"name": "gzip", "configuration": {"level": 6}}, + ], ], [ "gzip", ["GZIP_LEVEL=1"], - [{"name": "gzip", "configuration": {"level": 1}}], + [ + {"name": "bytes", "configuration": {"endian": "little"}}, + {"name": "gzip", "configuration": {"level": 1}}, + ], ], [ "blosc", [], [ + {"name": "bytes", "configuration": {"endian": "little"}}, { "name": "blosc", "configuration": { @@ -1954,7 +1963,7 @@ def create(): "typesize": 1, "blocksize": 0, }, - } + }, ], ], [ @@ -1966,6 +1975,7 @@ def create(): "BLOSC_BLOCKSIZE=2", ], [ + {"name": "bytes", "configuration": {"endian": "little"}}, { "name": "blosc", "configuration": { @@ -1974,7 +1984,23 @@ def create(): "shuffle": "noshuffle", "blocksize": 2, }, - } + }, + ], + ], + [ + "zstd", + ["ZSTD_LEVEL=20"], + [ + {"name": "bytes", "configuration": {"endian": "little"}}, + {"name": "zstd", "configuration": {"level": 20, "checksum": False}}, + ], + ], + [ + "zstd", + ["ZSTD_CHECKSUM=YES"], + [ + {"name": "bytes", "configuration": {"endian": "little"}}, + {"name": "zstd", "configuration": {"level": 13, "checksum": True}}, ], ], ], @@ -2032,28 +2058,28 @@ def read(): [ [ ["@ENDIAN=little"], - [{"configuration": {"endian": "little"}, "name": "endian"}], + [{"configuration": {"endian": "little"}, "name": "bytes"}], ], - [["@ENDIAN=big"], [{"configuration": {"endian": "big"}, "name": "endian"}]], + [["@ENDIAN=big"], [{"configuration": {"endian": "big"}, "name": "bytes"}]], [ ["@ENDIAN=little", "CHUNK_MEMORY_LAYOUT=F"], [ {"name": "transpose", "configuration": {"order": "F"}}, - {"configuration": {"endian": "little"}, "name": "endian"}, + {"configuration": {"endian": "little"}, "name": "bytes"}, ], ], [ ["@ENDIAN=big", "CHUNK_MEMORY_LAYOUT=F"], [ {"name": "transpose", "configuration": {"order": "F"}}, - {"configuration": {"endian": "big"}, "name": "endian"}, + {"configuration": {"endian": "big"}, "name": "bytes"}, ], ], [ ["@ENDIAN=big", "CHUNK_MEMORY_LAYOUT=F", "COMPRESS=GZIP"], [ {"name": "transpose", "configuration": {"order": "F"}}, - {"name": "endian", "configuration": {"endian": "big"}}, + {"name": "bytes", "configuration": {"endian": "big"}}, {"name": "gzip", "configuration": {"level": 6}}, ], ], @@ -3180,6 +3206,7 @@ def create(): assert "codecs" in j assert j["codecs"] == [ {"name": "transpose", "configuration": {"order": "F"}}, + {"name": "bytes", "configuration": {"endian": "little"}}, {"name": "gzip", "configuration": {"level": 6}}, ] @@ -5512,7 +5539,7 @@ def test_zarr_driver_copy_files(format): assert gdal.Open(filename) assert gdal.VSIStatL(newfilename) - print(gdal.ReadDirRecursive(newfilename)) + # print(gdal.ReadDirRecursive(newfilename)) assert gdal.Open(newfilename) finally: diff --git a/doc/source/drivers/raster/zarr.rst b/doc/source/drivers/raster/zarr.rst index 9f636a6813cb..06dbc93a9bab 100644 --- a/doc/source/drivers/raster/zarr.rst +++ b/doc/source/drivers/raster/zarr.rst @@ -348,6 +348,9 @@ with ``ARRAY:`` using :program:`gdalmdimtranslate`): Compression method. + For FORMAT=ZARR_V3, only ``NONE``, ``BLOSC``, ``GZIP`` and ``ZSTD`` are + supported. + - .. co:: FILTER :choices: NONE, DELTA :default: NONE diff --git a/frmts/zarr/zarr.h b/frmts/zarr/zarr.h index 8cd675b46dd1..3285da6be97b 100644 --- a/frmts/zarr/zarr.h +++ b/frmts/zarr/zarr.h @@ -1207,25 +1207,24 @@ class ZarrV3Codec CPL_NON_FINAL }; /************************************************************************/ -/* ZarrV3CodecGZip */ +/* ZarrV3CodecAbstractCompressor */ /************************************************************************/ -// Implements https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/v1.0.html -class ZarrV3CodecGZip final : public ZarrV3Codec +class ZarrV3CodecAbstractCompressor CPL_NON_FINAL : public ZarrV3Codec { + protected: CPLStringList m_aosCompressorOptions{}; const CPLCompressor *m_pDecompressor = nullptr; const CPLCompressor *m_pCompressor = nullptr; - ZarrV3CodecGZip(const ZarrV3CodecGZip &) = delete; - ZarrV3CodecGZip &operator=(const ZarrV3CodecGZip &) = delete; - - public: - static constexpr const char *NAME = "gzip"; + explicit ZarrV3CodecAbstractCompressor(const std::string &osName); - ZarrV3CodecGZip(); - ~ZarrV3CodecGZip() override; + ZarrV3CodecAbstractCompressor(const ZarrV3CodecAbstractCompressor &) = + delete; + ZarrV3CodecAbstractCompressor & + operator=(const ZarrV3CodecAbstractCompressor &) = delete; + public: IOType GetInputType() const override { return IOType::BYTES; @@ -1236,6 +1235,24 @@ class ZarrV3CodecGZip final : public ZarrV3Codec return IOType::BYTES; } + bool Encode(const ZarrByteVectorQuickResize &abySrc, + ZarrByteVectorQuickResize &abyDst) const override; + bool Decode(const ZarrByteVectorQuickResize &abySrc, + ZarrByteVectorQuickResize &abyDst) const override; +}; + +/************************************************************************/ +/* ZarrV3CodecGZip */ +/************************************************************************/ + +// Implements https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/v1.0.html +class ZarrV3CodecGZip final : public ZarrV3CodecAbstractCompressor +{ + public: + static constexpr const char *NAME = "gzip"; + + ZarrV3CodecGZip(); + static CPLJSONObject GetConfiguration(int nLevel); bool @@ -1244,11 +1261,6 @@ class ZarrV3CodecGZip final : public ZarrV3Codec ZarrArrayMetadata &oOutputArrayMetadata) override; std::unique_ptr Clone() const override; - - bool Encode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const override; - bool Decode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const override; }; /************************************************************************/ @@ -1256,30 +1268,12 @@ class ZarrV3CodecGZip final : public ZarrV3Codec /************************************************************************/ // Implements https://zarr-specs.readthedocs.io/en/latest/v3/codecs/blosc/v1.0.html -class ZarrV3CodecBlosc final : public ZarrV3Codec +class ZarrV3CodecBlosc final : public ZarrV3CodecAbstractCompressor { - CPLStringList m_aosCompressorOptions{}; - const CPLCompressor *m_pDecompressor = nullptr; - const CPLCompressor *m_pCompressor = nullptr; - - ZarrV3CodecBlosc(const ZarrV3CodecBlosc &) = delete; - ZarrV3CodecBlosc &operator=(const ZarrV3CodecBlosc &) = delete; - public: static constexpr const char *NAME = "blosc"; ZarrV3CodecBlosc(); - ~ZarrV3CodecBlosc() override; - - IOType GetInputType() const override - { - return IOType::BYTES; - } - - IOType GetOutputType() const override - { - return IOType::BYTES; - } static CPLJSONObject GetConfiguration(const char *cname, int clevel, const char *shuffle, int typesize, @@ -1291,27 +1285,43 @@ class ZarrV3CodecBlosc final : public ZarrV3Codec ZarrArrayMetadata &oOutputArrayMetadata) override; std::unique_ptr Clone() const override; +}; - bool Encode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const override; - bool Decode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const override; +/************************************************************************/ +/* ZarrV3CodecZstd */ +/************************************************************************/ + +// Implements https://github.com/zarr-developers/zarr-specs/pull/256 +class ZarrV3CodecZstd final : public ZarrV3CodecAbstractCompressor +{ + public: + static constexpr const char *NAME = "zstd"; + + ZarrV3CodecZstd(); + + static CPLJSONObject GetConfiguration(int level, bool checksum); + + bool + InitFromConfiguration(const CPLJSONObject &configuration, + const ZarrArrayMetadata &oInputArrayMetadata, + ZarrArrayMetadata &oOutputArrayMetadata) override; + + std::unique_ptr Clone() const override; }; /************************************************************************/ -/* ZarrV3CodecEndian */ +/* ZarrV3CodecBytes */ /************************************************************************/ -// Implements https://zarr-specs.readthedocs.io/en/latest/v3/codecs/endian/v1.0.html -class ZarrV3CodecEndian final : public ZarrV3Codec +// Implements https://zarr-specs.readthedocs.io/en/latest/v3/codecs/bytes/v1.0.html +class ZarrV3CodecBytes final : public ZarrV3Codec { bool m_bLittle = true; public: - static constexpr const char *NAME = "endian"; + static constexpr const char *NAME = "bytes"; - ZarrV3CodecEndian(); - ~ZarrV3CodecEndian() override; + ZarrV3CodecBytes(); IOType GetInputType() const override { @@ -1374,7 +1384,6 @@ class ZarrV3CodecTranspose final : public ZarrV3Codec static constexpr const char *NAME = "transpose"; ZarrV3CodecTranspose(); - ~ZarrV3CodecTranspose() override; IOType GetInputType() const override { diff --git a/frmts/zarr/zarr_v3_codec.cpp b/frmts/zarr/zarr_v3_codec.cpp index 739d623da555..b2ba0418e9ef 100644 --- a/frmts/zarr/zarr_v3_codec.cpp +++ b/frmts/zarr/zarr_v3_codec.cpp @@ -29,18 +29,76 @@ ZarrV3Codec::ZarrV3Codec(const std::string &osName) : m_osName(osName) ZarrV3Codec::~ZarrV3Codec() = default; /************************************************************************/ -/* ZarrV3CodecGZip() */ +/* ZarrV3CodecAbstractCompressor() */ +/************************************************************************/ + +ZarrV3CodecAbstractCompressor::ZarrV3CodecAbstractCompressor( + const std::string &osName) + : ZarrV3Codec(osName) +{ +} + +/************************************************************************/ +/* ZarrV3CodecAbstractCompressor::Encode() */ +/************************************************************************/ + +bool ZarrV3CodecAbstractCompressor::Encode( + const ZarrByteVectorQuickResize &abySrc, + ZarrByteVectorQuickResize &abyDst) const +{ + abyDst.resize(abyDst.capacity()); + void *pOutputData = abyDst.data(); + size_t nOutputSize = abyDst.size(); + bool bRet = m_pCompressor->pfnFunc( + abySrc.data(), abySrc.size(), &pOutputData, &nOutputSize, + m_aosCompressorOptions.List(), m_pCompressor->user_data); + if (bRet) + { + abyDst.resize(nOutputSize); + } + else if (nOutputSize > abyDst.size()) + { + CPLError(CE_Failure, CPLE_AppDefined, + "%s codec:Encode(): output buffer too small", + m_osName.c_str()); + } + return bRet; +} + +/************************************************************************/ +/* ZarrV3CodecAbstractCompressor::Decode() */ /************************************************************************/ -ZarrV3CodecGZip::ZarrV3CodecGZip() : ZarrV3Codec(NAME) +bool ZarrV3CodecAbstractCompressor::Decode( + const ZarrByteVectorQuickResize &abySrc, + ZarrByteVectorQuickResize &abyDst) const { + abyDst.resize(abyDst.capacity()); + void *pOutputData = abyDst.data(); + size_t nOutputSize = abyDst.size(); + bool bRet = m_pDecompressor->pfnFunc(abySrc.data(), abySrc.size(), + &pOutputData, &nOutputSize, nullptr, + m_pDecompressor->user_data); + if (bRet) + { + abyDst.resize(nOutputSize); + } + else if (nOutputSize > abyDst.size()) + { + CPLError(CE_Failure, CPLE_AppDefined, + "%s codec:Decode(): output buffer too small", + m_osName.c_str()); + } + return bRet; } /************************************************************************/ -/* ~ZarrV3CodecGZip() */ +/* ZarrV3CodecGZip() */ /************************************************************************/ -ZarrV3CodecGZip::~ZarrV3CodecGZip() = default; +ZarrV3CodecGZip::ZarrV3CodecGZip() : ZarrV3CodecAbstractCompressor(NAME) +{ +} /************************************************************************/ /* GetConfiguration() */ @@ -136,68 +194,130 @@ std::unique_ptr ZarrV3CodecGZip::Clone() const } /************************************************************************/ -/* ZarrV3CodecGZip::Encode() */ +/* ZarrV3CodecZstd() */ /************************************************************************/ -bool ZarrV3CodecGZip::Encode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const +ZarrV3CodecZstd::ZarrV3CodecZstd() : ZarrV3CodecAbstractCompressor(NAME) { - abyDst.resize(abyDst.capacity()); - void *pOutputData = abyDst.data(); - size_t nOutputSize = abyDst.size(); - bool bRet = m_pCompressor->pfnFunc( - abySrc.data(), abySrc.size(), &pOutputData, &nOutputSize, - m_aosCompressorOptions.List(), m_pCompressor->user_data); - if (bRet) - { - abyDst.resize(nOutputSize); - } - else if (nOutputSize > abyDst.size()) - { - CPLError(CE_Failure, CPLE_AppDefined, - "ZarrV3CodecGZip::Encode(): output buffer too small"); - } - return bRet; } /************************************************************************/ -/* ZarrV3CodecGZip::Decode() */ +/* GetConfiguration() */ /************************************************************************/ -bool ZarrV3CodecGZip::Decode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const +/* static */ CPLJSONObject ZarrV3CodecZstd::GetConfiguration(int nLevel, + bool checksum) { - abyDst.resize(abyDst.capacity()); - void *pOutputData = abyDst.data(); - size_t nOutputSize = abyDst.size(); - bool bRet = m_pDecompressor->pfnFunc(abySrc.data(), abySrc.size(), - &pOutputData, &nOutputSize, nullptr, - m_pDecompressor->user_data); - if (bRet) + CPLJSONObject oConfig; + oConfig.Add("level", nLevel); + oConfig.Add("checksum", checksum); + return oConfig; +} + +/************************************************************************/ +/* ZarrV3CodecZstd::InitFromConfiguration() */ +/************************************************************************/ + +bool ZarrV3CodecZstd::InitFromConfiguration( + const CPLJSONObject &configuration, + const ZarrArrayMetadata &oInputArrayMetadata, + ZarrArrayMetadata &oOutputArrayMetadata) +{ + m_pCompressor = CPLGetCompressor("zstd"); + m_pDecompressor = CPLGetDecompressor("zstd"); + if (!m_pCompressor || !m_pDecompressor) { - abyDst.resize(nOutputSize); + CPLError(CE_Failure, CPLE_AppDefined, "zstd compressor not available"); + return false; } - else if (nOutputSize > abyDst.size()) + + m_oConfiguration = configuration.Clone(); + m_oInputArrayMetadata = oInputArrayMetadata; + // byte->byte codec + oOutputArrayMetadata = oInputArrayMetadata; + + int nLevel = 13; + bool bChecksum = false; + + if (configuration.IsValid()) { - CPLError(CE_Failure, CPLE_AppDefined, - "ZarrV3CodecGZip::Decode(): output buffer too small"); + if (configuration.GetType() != CPLJSONObject::Type::Object) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Codec zstd: configuration is not an object"); + return false; + } + + for (const auto &oChild : configuration.GetChildren()) + { + if (oChild.GetName() != "level" && oChild.GetName() != "checksum") + { + CPLError( + CE_Failure, CPLE_AppDefined, + "Codec zstd: configuration contains a unhandled member: %s", + oChild.GetName().c_str()); + return false; + } + } + + const auto oLevel = configuration.GetObj("level"); + if (oLevel.IsValid()) + { + if (oLevel.GetType() != CPLJSONObject::Type::Integer) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Codec zstd: level is not an integer"); + return false; + } + nLevel = oLevel.ToInteger(); + if (nLevel < 0 || nLevel > 22) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Codec zstd: invalid value for level: %d", nLevel); + return false; + } + } + + const auto oChecksum = configuration.GetObj("checksum"); + if (oChecksum.IsValid()) + { + if (oChecksum.GetType() != CPLJSONObject::Type::Boolean) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Codec zstd: checksum is not a boolean"); + return false; + } + bChecksum = oChecksum.ToBool(); + } } - return bRet; + + m_aosCompressorOptions.SetNameValue("LEVEL", CPLSPrintf("%d", nLevel)); + if (bChecksum) + m_aosCompressorOptions.SetNameValue("CKECKSUM", "YES"); + + return true; } /************************************************************************/ -/* ZarrV3CodecBlosc() */ +/* ZarrV3CodecZstd::Clone() */ /************************************************************************/ -ZarrV3CodecBlosc::ZarrV3CodecBlosc() : ZarrV3Codec(NAME) +std::unique_ptr ZarrV3CodecZstd::Clone() const { + auto psClone = std::make_unique(); + ZarrArrayMetadata oOutputArrayMetadata; + psClone->InitFromConfiguration(m_oConfiguration, m_oInputArrayMetadata, + oOutputArrayMetadata); + return psClone; } /************************************************************************/ -/* ~ZarrV3CodecBlosc() */ +/* ZarrV3CodecBlosc() */ /************************************************************************/ -ZarrV3CodecBlosc::~ZarrV3CodecBlosc() = default; +ZarrV3CodecBlosc::ZarrV3CodecBlosc() : ZarrV3CodecAbstractCompressor(NAME) +{ +} /************************************************************************/ /* GetConfiguration() */ @@ -355,74 +475,18 @@ std::unique_ptr ZarrV3CodecBlosc::Clone() const } /************************************************************************/ -/* ZarrV3CodecBlosc::Encode() */ -/************************************************************************/ - -bool ZarrV3CodecBlosc::Encode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const -{ - abyDst.resize(abyDst.capacity()); - void *pOutputData = abyDst.data(); - size_t nOutputSize = abyDst.size(); - bool bRet = m_pCompressor->pfnFunc( - abySrc.data(), abySrc.size(), &pOutputData, &nOutputSize, - m_aosCompressorOptions.List(), m_pCompressor->user_data); - if (bRet) - { - abyDst.resize(nOutputSize); - } - else if (nOutputSize > abyDst.size()) - { - CPLError(CE_Failure, CPLE_AppDefined, - "ZarrV3CodecBlosc::Encode(): output buffer too small"); - } - return bRet; -} - -/************************************************************************/ -/* ZarrV3CodecBlosc::Decode() */ -/************************************************************************/ - -bool ZarrV3CodecBlosc::Decode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const -{ - abyDst.resize(abyDst.capacity()); - void *pOutputData = abyDst.data(); - size_t nOutputSize = abyDst.size(); - bool bRet = m_pDecompressor->pfnFunc(abySrc.data(), abySrc.size(), - &pOutputData, &nOutputSize, nullptr, - m_pDecompressor->user_data); - if (bRet) - { - abyDst.resize(nOutputSize); - } - else if (nOutputSize > abyDst.size()) - { - CPLError(CE_Failure, CPLE_AppDefined, - "ZarrV3CodecBlosc::Decode(): output buffer too small"); - } - return bRet; -} - -/************************************************************************/ -/* ZarrV3CodecEndian() */ +/* ZarrV3CodecBytes() */ /************************************************************************/ -ZarrV3CodecEndian::ZarrV3CodecEndian() : ZarrV3Codec(NAME) +ZarrV3CodecBytes::ZarrV3CodecBytes() : ZarrV3Codec(NAME) { } -/************************************************************************/ -/* ~ZarrV3CodecEndian() */ -/************************************************************************/ - -ZarrV3CodecEndian::~ZarrV3CodecEndian() = default; - /************************************************************************/ /* GetConfiguration() */ /************************************************************************/ -/* static */ CPLJSONObject ZarrV3CodecEndian::GetConfiguration(bool bLittle) +/* static */ CPLJSONObject ZarrV3CodecBytes::GetConfiguration(bool bLittle) { CPLJSONObject oConfig; oConfig.Add("endian", bLittle ? "little" : "big"); @@ -430,10 +494,10 @@ ZarrV3CodecEndian::~ZarrV3CodecEndian() = default; } /************************************************************************/ -/* ZarrV3CodecEndian::InitFromConfiguration() */ +/* ZarrV3CodecBytes::InitFromConfiguration() */ /************************************************************************/ -bool ZarrV3CodecEndian::InitFromConfiguration( +bool ZarrV3CodecBytes::InitFromConfiguration( const CPLJSONObject &configuration, const ZarrArrayMetadata &oInputArrayMetadata, ZarrArrayMetadata &oOutputArrayMetadata) @@ -490,12 +554,12 @@ bool ZarrV3CodecEndian::InitFromConfiguration( } /************************************************************************/ -/* ZarrV3CodecEndian::Clone() */ +/* ZarrV3CodecBytes::Clone() */ /************************************************************************/ -std::unique_ptr ZarrV3CodecEndian::Clone() const +std::unique_ptr ZarrV3CodecBytes::Clone() const { - auto psClone = std::make_unique(); + auto psClone = std::make_unique(); ZarrArrayMetadata oOutputArrayMetadata; psClone->InitFromConfiguration(m_oConfiguration, m_oInputArrayMetadata, oOutputArrayMetadata); @@ -503,11 +567,11 @@ std::unique_ptr ZarrV3CodecEndian::Clone() const } /************************************************************************/ -/* ZarrV3CodecEndian::Encode() */ +/* ZarrV3CodecBytes::Encode() */ /************************************************************************/ -bool ZarrV3CodecEndian::Encode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const +bool ZarrV3CodecBytes::Encode(const ZarrByteVectorQuickResize &abySrc, + ZarrByteVectorQuickResize &abyDst) const { CPLAssert(!IsNoOp()); @@ -566,11 +630,11 @@ bool ZarrV3CodecEndian::Encode(const ZarrByteVectorQuickResize &abySrc, } /************************************************************************/ -/* ZarrV3CodecEndian::Decode() */ +/* ZarrV3CodecBytes::Decode() */ /************************************************************************/ -bool ZarrV3CodecEndian::Decode(const ZarrByteVectorQuickResize &abySrc, - ZarrByteVectorQuickResize &abyDst) const +bool ZarrV3CodecBytes::Decode(const ZarrByteVectorQuickResize &abySrc, + ZarrByteVectorQuickResize &abyDst) const { return Encode(abySrc, abyDst); } @@ -583,12 +647,6 @@ ZarrV3CodecTranspose::ZarrV3CodecTranspose() : ZarrV3Codec(NAME) { } -/************************************************************************/ -/* ~ZarrV3CodecTranspose() */ -/************************************************************************/ - -ZarrV3CodecTranspose::~ZarrV3CodecTranspose() = default; - /************************************************************************/ /* IsNoOp() */ /************************************************************************/ @@ -930,26 +988,33 @@ bool ZarrV3CodecSequence::InitFromJson(const CPLJSONObject &oCodecs) ZarrV3Codec::IOType eLastType = ZarrV3Codec::IOType::ARRAY; std::string osLastCodec; -#if !CPL_IS_LSB const auto InsertImplicitEndianCodecIfNeeded = - [this, &oInputArrayMetadata, &eLastType, &osLastCodec]() + [ +#if !CPL_IS_LSB + this, +#endif + &oInputArrayMetadata, &eLastType, &osLastCodec]() { - // Insert a little endian codec if we are on a big endian target if (eLastType == ZarrV3Codec::IOType::ARRAY && oInputArrayMetadata.oElt.nativeSize > 1) { - auto poEndianCodec = std::make_unique(); + CPLError(CE_Warning, CPLE_AppDefined, + "'bytes' codec missing. Assuming little-endian storage, " + "but such tolerance may be removed in future versions"); + auto poEndianCodec = std::make_unique(); ZarrArrayMetadata oOutputArrayMetadata; poEndianCodec->InitFromConfiguration( - ZarrV3CodecEndian::GetConfiguration(true), oInputArrayMetadata, + ZarrV3CodecBytes::GetConfiguration(true), oInputArrayMetadata, oOutputArrayMetadata); oInputArrayMetadata = oOutputArrayMetadata; eLastType = poEndianCodec->GetOutputType(); osLastCodec = poEndianCodec->GetName(); +#if !CPL_IS_LSB + // Insert a little endian codec if we are on a big endian target m_apoCodecs.emplace_back(std::move(poEndianCodec)); +#endif } }; -#endif for (const auto &oCodec : oCodecsArray) { @@ -964,8 +1029,11 @@ bool ZarrV3CodecSequence::InitFromJson(const CPLJSONObject &oCodecs) poCodec = std::make_unique(); else if (osName == "blosc") poCodec = std::make_unique(); - else if (osName == "endian") - poCodec = std::make_unique(); + else if (osName == "zstd") + poCodec = std::make_unique(); + else if (osName == "bytes" || + osName == "endian" /* endian is the old name */) + poCodec = std::make_unique(); else if (osName == "transpose") poCodec = std::make_unique(); else @@ -985,12 +1053,10 @@ bool ZarrV3CodecSequence::InitFromJson(const CPLJSONObject &oCodecs) return false; } } -#if !CPL_IS_LSB else { InsertImplicitEndianCodecIfNeeded(); } -#endif ZarrArrayMetadata oOutputArrayMetadata; if (!poCodec->InitFromConfiguration(oCodec["configuration"], @@ -1007,16 +1073,14 @@ bool ZarrV3CodecSequence::InitFromJson(const CPLJSONObject &oCodecs) m_apoCodecs.emplace_back(std::move(poCodec)); } -#if !CPL_IS_LSB InsertImplicitEndianCodecIfNeeded(); -#endif m_oCodecArray = oCodecs.Clone(); return true; } /************************************************************************/ -/* ZarrV3CodecEndian::AllocateBuffer() */ +/* ZarrV3CodecBytes::AllocateBuffer() */ /************************************************************************/ bool ZarrV3CodecSequence::AllocateBuffer(ZarrByteVectorQuickResize &abyBuffer) diff --git a/frmts/zarr/zarr_v3_group.cpp b/frmts/zarr/zarr_v3_group.cpp index 4a340778a3be..e149df5b84dc 100644 --- a/frmts/zarr/zarr_v3_group.cpp +++ b/frmts/zarr/zarr_v3_group.cpp @@ -248,6 +248,11 @@ ZarrV3Group::OpenZarrGroup(const std::string &osName, CSLConstList) const // Implicit group if (VSIStatL(osSubDir.c_str(), &sStat) == 0 && VSI_ISDIR(sStat.st_mode)) { + // Note: Python zarr v3.0.2 still generates implicit groups + // See https://github.com/zarr-developers/zarr-python/issues/2794 + CPLError(CE_Warning, CPLE_AppDefined, + "Support for Zarr V3 implicit group is now deprecated, and " + "may be removed in a future version"); auto poSubGroup = ZarrV3Group::Create(m_poSharedResource, GetFullName(), osName, osSubDir); poSubGroup->m_poParent = @@ -580,13 +585,13 @@ std::shared_ptr ZarrV3Group::CreateMDArray( oCodecs.Add(oCodec); } - // Not documented - const char *pszEndian = CSLFetchNameValue(papszOptions, "@ENDIAN"); - if (pszEndian) + // Not documented option, but 'bytes' codec is required + const char *pszEndian = + CSLFetchNameValueDef(papszOptions, "@ENDIAN", "little"); { CPLJSONObject oCodec; - oCodec.Add("name", "endian"); - oCodec.Add("configuration", ZarrV3CodecEndian::GetConfiguration( + oCodec.Add("name", "bytes"); + oCodec.Add("configuration", ZarrV3CodecBytes::GetConfiguration( EQUAL(pszEndian, "little"))); oCodecs.Add(oCodec); } @@ -655,6 +660,18 @@ std::shared_ptr ZarrV3Group::CreateMDArray( typesize, blocksize)); oCodecs.Add(oCodec); } + else if (EQUAL(pszCompressor, "ZSTD")) + { + CPLJSONObject oCodec; + oCodec.Add("name", "zstd"); + const char *pszLevel = + CSLFetchNameValueDef(papszOptions, "ZSTD_LEVEL", "13"); + const bool bChecksum = CPLTestBool( + CSLFetchNameValueDef(papszOptions, "ZSTD_CHECKSUM", "FALSE")); + oCodec.Add("configuration", ZarrV3CodecZstd::GetConfiguration( + atoi(pszLevel), bChecksum)); + oCodecs.Add(oCodec); + } else if (!EQUAL(pszCompressor, "NONE")) { CPLError(CE_Failure, CPLE_AppDefined, diff --git a/fuzzers/build_seed_corpus.sh b/fuzzers/build_seed_corpus.sh index 291fc011250d..f107e010e1c2 100755 --- a/fuzzers/build_seed_corpus.sh +++ b/fuzzers/build_seed_corpus.sh @@ -334,6 +334,7 @@ rm -f $OUT/zarr_fuzzer_seed_corpus.zip CUR_DIR=$PWD cd $(dirname $0)/../autotest/gdrivers/data/zarr for dirname in *.zarr v3/*.zr3; do + CUR_DIR2=$PWD cd $dirname { filelist=$(find . -type f) @@ -343,7 +344,7 @@ for dirname in *.zarr v3/*.zr3; do cat $f done } > $CUR_DIR/$(basename $dirname).tar - cd .. + cd $CUR_DIR2 done cd $CUR_DIR zip -r $OUT/zarr_fuzzer_seed_corpus.zip ./*.zarr.tar ./*.zr3.tar >/dev/null diff --git a/port/cpl_compressor.cpp b/port/cpl_compressor.cpp index 5f7a8b2b38b2..f721868020ed 100644 --- a/port/cpl_compressor.cpp +++ b/port/cpl_compressor.cpp @@ -373,7 +373,6 @@ static bool CPLZSTDCompressor(const void *input_data, size_t input_size, if (output_data != nullptr && *output_data != nullptr && output_size != nullptr && *output_size != 0) { - const int level = atoi(CSLFetchNameValueDef(options, "LEVEL", "13")); ZSTD_CCtx *ctx = ZSTD_createCCtx(); if (ctx == nullptr) { @@ -381,8 +380,24 @@ static bool CPLZSTDCompressor(const void *input_data, size_t input_size, return false; } - size_t ret = ZSTD_compressCCtx(ctx, *output_data, *output_size, - input_data, input_size, level); + const int level = atoi(CSLFetchNameValueDef(options, "LEVEL", "13")); + if (ZSTD_isError( + ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, level))) + { + CPLError(CE_Failure, CPLE_AppDefined, "Invalid compression level"); + ZSTD_freeCCtx(ctx); + *output_size = 0; + return false; + } + + if (CPLTestBool(CSLFetchNameValueDef(options, "CHECKSUM", "NO"))) + { + CPL_IGNORE_RET_VAL( + ZSTD_CCtx_setParameter(ctx, ZSTD_c_checksumFlag, 1)); + } + + size_t ret = ZSTD_compress2(ctx, *output_data, *output_size, input_data, + input_size); ZSTD_freeCCtx(ctx); if (ZSTD_isError(ret)) { @@ -1370,6 +1385,9 @@ static void CPLAddBuiltinCompressors() "OPTIONS=" " "; const char *const apszMetadata[] = {pszOptions, nullptr}; sComp.papszMetadata = apszMetadata;