Skip to content

Commit

Permalink
Use another filter ID for fixed bytedelta
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Jul 3, 2023
1 parent f14aa15 commit e5ff89e
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 79 deletions.
2 changes: 1 addition & 1 deletion include/blosc2.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ enum {
BLOSC2_GLOBAL_REGISTERED_FILTERS_START = 32,
BLOSC2_GLOBAL_REGISTERED_FILTERS_STOP = 159,
//!< Blosc-registered filters must be between 32 - 159.
BLOSC2_GLOBAL_REGISTERED_FILTERS = 3,
BLOSC2_GLOBAL_REGISTERED_FILTERS = 4,
//!< Number of Blosc-registered filters at the moment.
BLOSC2_USER_REGISTERED_FILTERS_START = 160,
BLOSC2_USER_REGISTERED_FILTERS_STOP = 255,
Expand Down
3 changes: 2 additions & 1 deletion include/blosc2/filters-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ extern "C" {
enum {
BLOSC_FILTER_NDCELL = 32,
BLOSC_FILTER_NDMEAN = 33,
BLOSC_FILTER_BYTEDELTA = 34,
BLOSC_FILTER_BYTEDELTA_V1 = 34, // buggy version. See #524
BLOSC_FILTER_BYTEDELTA = 35, // fixed version
};

void register_filters(void);
Expand Down
94 changes: 92 additions & 2 deletions plugins/filters/bytedelta/bytedelta.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ int bytedelta_forward(const uint8_t *input, uint8_t *output, int32_t length, uin
}
if (stream_len > 15) {
_v2 = simd_get_last(v2);

}
#endif // #if defined(CPU_HAS_SIMD)
// scalar leftover
Expand All @@ -129,7 +128,7 @@ int bytedelta_forward(const uint8_t *input, uint8_t *output, int32_t length, uin

// Fetch 16b from N streams, sum SIMD undelta
int bytedelta_backward(const uint8_t *input, uint8_t *output, int32_t length, uint8_t meta,
blosc2_dparams *dparams, uint8_t id) {
blosc2_dparams *dparams, uint8_t id) {
BLOSC_UNUSED_PARAM(id);

int typesize = meta;
Expand Down Expand Up @@ -173,3 +172,94 @@ int bytedelta_backward(const uint8_t *input, uint8_t *output, int32_t length, ui

return BLOSC2_ERROR_SUCCESS;
}

// This is the original (and buggy) version of bytedelta. It is kept here for backwards compatibility.
// See #524 for details.
int bytedelta_forward_v1(const uint8_t *input, uint8_t *output, int32_t length, uint8_t meta,
blosc2_cparams *cparams, uint8_t id) {
BLOSC_UNUSED_PARAM(id);

int typesize = meta;
if (typesize == 0) {
if (cparams->schunk == NULL) {
BLOSC_TRACE_ERROR("When meta is 0, you need to be on a schunk!");
BLOSC_ERROR(BLOSC2_ERROR_FAILURE);
}
blosc2_schunk* schunk = (blosc2_schunk*)(cparams->schunk);
typesize = schunk->typesize;
}

const int stream_len = length / typesize;
for (int ich = 0; ich < typesize; ++ich) {
int ip = 0;
// SIMD delta within each channel, store
#if defined(CPU_HAS_SIMD)
bytes16 v2 = {0};
for (; ip < stream_len - 15; ip += 16) {
bytes16 v = simd_load(input);
input += 16;
bytes16 delta = simd_sub(v, simd_concat(v, v2));
simd_store(output, delta);
output += 16;
v2 = v;
}
#endif // #if defined(CPU_HAS_SIMD)
// scalar leftover
uint8_t _v2 = 0;
for (; ip < stream_len ; ip++) {
uint8_t v = *input;
input++;
*output = v - _v2;
output++;
_v2 = v;
}
}

return BLOSC2_ERROR_SUCCESS;
}


// This is the original (and buggy) version of bytedelta. It is kept here for backwards compatibility.
// See #524 for details.
int bytedelta_backward_v1(const uint8_t *input, uint8_t *output, int32_t length, uint8_t meta,
blosc2_dparams *dparams, uint8_t id) {
BLOSC_UNUSED_PARAM(id);

int typesize = meta;
if (typesize == 0) {
if (dparams->schunk == NULL) {
BLOSC_TRACE_ERROR("When meta is 0, you need to be on a schunk!");
BLOSC_ERROR(BLOSC2_ERROR_FAILURE);
}
blosc2_schunk* schunk = (blosc2_schunk*)(dparams->schunk);
typesize = schunk->typesize;
}

const int stream_len = length / typesize;
for (int ich = 0; ich < typesize; ++ich) {
int ip = 0;
// SIMD fetch 16 bytes from each channel, prefix-sum un-delta
#if defined(CPU_HAS_SIMD)
bytes16 v2 = {0};
for (; ip < stream_len - 15; ip += 16) {
bytes16 v = simd_load(input);
input += 16;
// un-delta via prefix sum
v2 = simd_add(simd_prefix_sum(v), simd_duplane15(v2));
simd_store(output, v2);
output += 16;
}
#endif // #if defined(CPU_HAS_SIMD)
// scalar leftover
uint8_t _v2 = 0;
for (; ip < stream_len; ip++) {
uint8_t v = *input + _v2;
input++;
*output = v;
output++;
_v2 = v;
}
}

return BLOSC2_ERROR_SUCCESS;
}
10 changes: 8 additions & 2 deletions plugins/filters/bytedelta/bytedelta.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ int bytedelta_forward(const uint8_t* input, uint8_t* output, int32_t length, uin
blosc2_cparams* cparams, uint8_t id);

int bytedelta_backward(const uint8_t* input, uint8_t* output, int32_t length, uint8_t meta,
blosc2_dparams* dparams, uint8_t id);
blosc2_dparams* dparams, uint8_t id);

#endif /* BLOSC_PLUGINS_FILTERS_BYTEDELTA_BYTEDELTA_H*/
int bytedelta_forward_v1(const uint8_t* input, uint8_t* output, int32_t length, uint8_t meta,
blosc2_cparams* cparams, uint8_t id);

int bytedelta_backward_v1(const uint8_t* input, uint8_t* output, int32_t length, uint8_t meta,
blosc2_dparams* dparams, uint8_t id);

#endif /* BLOSC_PLUGINS_FILTERS_BYTEDELTA_BYTEDELTA_H */
79 changes: 8 additions & 71 deletions plugins/filters/bytedelta/test_bytedelta.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,63 +49,6 @@
#include <stdio.h>
#include <stdint.h>

/* The original implmentation of the bytedelta filter had incorrect
* roundtrip behavior between SIMD and non-SIMD binaries. This filter provides
* the correct behavior regardless of SIMD availability.
* See https://github.com/Blosc/c-blosc2/issues/524 */
int correct_bytedelta_forward(const uint8_t *input, uint8_t *output, int32_t length, uint8_t meta, blosc2_cparams *cparams, uint8_t id)
{
int typesize = meta;
if (typesize == 0) {
if (cparams->schunk == NULL) {
BLOSC_TRACE_ERROR("When meta is 0, you need to be on a schunk!");
BLOSC_ERROR(BLOSC2_ERROR_FAILURE);
}
blosc2_schunk* schunk = (blosc2_schunk*)(cparams->schunk);
typesize = schunk->typesize;
}
int stream_len = length / typesize;
for (int ich = 0; ich < typesize; ++ich) {
int ip = 0;
uint8_t _v2 = 0;
for (; ip < stream_len ; ip++) {
uint8_t v = *input;
input++;
*output = v - _v2;
output++;
_v2 = v;
}
}
return BLOSC2_ERROR_SUCCESS;
}

int correct_bytedelta_backward(const uint8_t *input, uint8_t *output, int32_t length, uint8_t meta,
blosc2_dparams *dparams, uint8_t id) {
int typesize = meta;
if (typesize == 0) {
if (dparams->schunk == NULL) {
BLOSC_TRACE_ERROR("When meta is 0, you need to be on a schunk!");
BLOSC_ERROR(BLOSC2_ERROR_FAILURE);
}
blosc2_schunk* schunk = (blosc2_schunk*)(dparams->schunk);
typesize = schunk->typesize;
}

const int stream_len = length / typesize;
for (int ich = 0; ich < typesize; ++ich) {
int ip = 0;
uint8_t _v2 = 0;
for (; ip < stream_len; ip++) {
uint8_t v = *input + _v2;
input++;
*output = v;
output++;
_v2 = v;
}
}

return BLOSC2_ERROR_SUCCESS;
}

static int test_bytedelta(blosc2_schunk *schunk) {

Expand All @@ -119,14 +62,6 @@ static int test_bytedelta(blosc2_schunk *schunk) {
uint8_t *data_out = malloc(chunksize + BLOSC2_MAX_OVERHEAD);
uint8_t *data_dest = malloc(chunksize);

blosc2_filter correct_bytedelta = {.id = 250, .name = "bytedelta_correct", .version = 1,
.forward = correct_bytedelta_forward, .backward = correct_bytedelta_backward};
if (blosc2_register_filter(&correct_bytedelta)) {
printf("Cannot register bytedelta filter!");
return -1;
}


// Test each pair of possible forward/backward implementations:
for (int direction = 0; direction < 4; direction++) {
bool write_correct = (direction % 2 == 0);
Expand All @@ -139,9 +74,9 @@ static int test_bytedelta(blosc2_schunk *schunk) {
cparams.compcode = BLOSC_LZ4;
cparams.filters[BLOSC2_MAX_FILTERS - 2] = BLOSC_SHUFFLE;
if (write_correct) {
cparams.filters[BLOSC2_MAX_FILTERS - 1] = 250;
} else {
cparams.filters[BLOSC2_MAX_FILTERS - 1] = BLOSC_FILTER_BYTEDELTA;
} else {
cparams.filters[BLOSC2_MAX_FILTERS - 1] = BLOSC_FILTER_BYTEDELTA_V1;
}
cparams.filters_meta[BLOSC2_MAX_FILTERS - 1] = 0; // 0 means typesize when using schunks
cparams.clevel = 9;
Expand Down Expand Up @@ -174,7 +109,8 @@ static int test_bytedelta(blosc2_schunk *schunk) {
}

/* Compress with clevel=5 and shuffle active */
csize = blosc2_compress_ctx(cctx, data_in, chunksize, data_out, chunksize + BLOSC2_MAX_OVERHEAD);
csize = blosc2_compress_ctx(cctx, data_in, chunksize, data_out,
chunksize + BLOSC2_MAX_OVERHEAD);
if (csize == 0) {
printf("Buffer is incompressible. Giving up.\n");
return -1;
Expand All @@ -186,13 +122,14 @@ static int test_bytedelta(blosc2_schunk *schunk) {

// Force usage of alternative decoder
if (read_correct) {
data_out[BLOSC2_CHUNK_FILTER_CODES + BLOSC2_MAX_FILTERS - 1] = 250;
} else {
data_out[BLOSC2_CHUNK_FILTER_CODES + BLOSC2_MAX_FILTERS - 1] = BLOSC_FILTER_BYTEDELTA;
} else {
data_out[BLOSC2_CHUNK_FILTER_CODES + BLOSC2_MAX_FILTERS - 1] = BLOSC_FILTER_BYTEDELTA_V1;
}

/* Decompress */
dsize = blosc2_decompress_ctx(dctx, data_out, chunksize + BLOSC2_MAX_OVERHEAD, data_dest, chunksize);
dsize = blosc2_decompress_ctx(dctx, data_out, chunksize + BLOSC2_MAX_OVERHEAD,
data_dest, chunksize);
if (dsize <= 0) {
printf("Decompression error. Error code: %" PRId64 "\n", dsize);
return (int) dsize;
Expand Down
14 changes: 12 additions & 2 deletions plugins/filters/filters-registry.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 The Blosc Development Team <[email protected]>
Copyright (c) 2021 The Blosc Development Team <[email protected]>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
*/
Expand Down Expand Up @@ -29,11 +29,21 @@ void register_filters(void) {
ndmean.backward = &ndmean_backward;
register_filter_private(&ndmean);

// Buggy version. See #524
blosc2_filter bytedelta_v1;
bytedelta_v1.id = BLOSC_FILTER_BYTEDELTA_V1;
bytedelta_v1.name = "bytedelta_v1";
bytedelta_v1.version = 1;
bytedelta_v1.forward = &bytedelta_forward_v1;
bytedelta_v1.backward = &bytedelta_backward_v1;
register_filter_private(&bytedelta_v1);

// Fixed version. See #524
blosc2_filter bytedelta;
bytedelta.id = BLOSC_FILTER_BYTEDELTA;
bytedelta.name = "bytedelta";
bytedelta.version = 1;
bytedelta.forward = &bytedelta_forward;
bytedelta.backward = &bytedelta_backward;
register_filter_private(&bytedelta);

}

0 comments on commit e5ff89e

Please sign in to comment.