Skip to content

Commit

Permalink
Merge pull request #32 from osamu620/develop
Browse files Browse the repository at this point in the history
Improve zigzag scan
  • Loading branch information
osamu620 authored Oct 6, 2023
2 parents f788f96 + 8afe6a8 commit 436efcd
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 14 deletions.
24 changes: 16 additions & 8 deletions lib/block_coding_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,14 @@ auto row4_ne_0 = VecFromMask(s16, Eq(row4, zero));
auto row5_ne_0 = VecFromMask(s16, Eq(row5, zero));
auto row6_ne_0 = VecFromMask(s16, Eq(row6, zero));
auto row7_ne_0 = VecFromMask(s16, Eq(row7, zero));
auto row10_ne_0 = ConcatEven(u8, BitCast(u8, row0_ne_0), BitCast(u8, row1_ne_0));
auto row32_ne_0 = ConcatEven(u8, BitCast(u8, row2_ne_0), BitCast(u8, row3_ne_0));
auto row54_ne_0 = ConcatEven(u8, BitCast(u8, row4_ne_0), BitCast(u8, row5_ne_0));
auto row76_ne_0 = ConcatEven(u8, BitCast(u8, row6_ne_0), BitCast(u8, row7_ne_0));
auto row10_ne_0 = OrderedTruncate2To(u8, BitCast(u16, row1_ne_0), BitCast(u16, row0_ne_0));
auto row32_ne_0 = OrderedTruncate2To(u8, BitCast(u16, row3_ne_0), BitCast(u16, row2_ne_0));
auto row54_ne_0 = OrderedTruncate2To(u8, BitCast(u16, row5_ne_0), BitCast(u16, row4_ne_0));
auto row76_ne_0 = OrderedTruncate2To(u8, BitCast(u16, row7_ne_0), BitCast(u16, row6_ne_0));
// auto row10_ne_0 = ConcatEven(u8, BitCast(u8, row0_ne_0), BitCast(u8, row1_ne_0));
// auto row32_ne_0 = ConcatEven(u8, BitCast(u8, row2_ne_0), BitCast(u8, row3_ne_0));
// auto row54_ne_0 = ConcatEven(u8, BitCast(u8, row4_ne_0), BitCast(u8, row5_ne_0));
// auto row76_ne_0 = ConcatEven(u8, BitCast(u8, row6_ne_0), BitCast(u8, row7_ne_0));

/* { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 } */
HWY_ALIGN constexpr uint64_t bm[] = {0x0102040810204080, 0x0102040810204080};
Expand Down Expand Up @@ -129,10 +133,14 @@ auto row6_lz = LeadingZeroCount(abs_row6);
auto row7_lz = LeadingZeroCount(abs_row7);

/* Narrow leading zero count to 8 bits. */
auto row01_lz = ConcatEven(u8, BitCast(u8, row1_lz), BitCast(u8, row0_lz));
auto row23_lz = ConcatEven(u8, BitCast(u8, row3_lz), BitCast(u8, row2_lz));
auto row45_lz = ConcatEven(u8, BitCast(u8, row5_lz), BitCast(u8, row4_lz));
auto row67_lz = ConcatEven(u8, BitCast(u8, row7_lz), BitCast(u8, row6_lz));
auto row01_lz = OrderedTruncate2To(u8, BitCast(u16, row0_lz), BitCast(u16, row1_lz));
auto row23_lz = OrderedTruncate2To(u8, BitCast(u16, row2_lz), BitCast(u16, row3_lz));
auto row45_lz = OrderedTruncate2To(u8, BitCast(u16, row4_lz), BitCast(u16, row5_lz));
auto row67_lz = OrderedTruncate2To(u8, BitCast(u16, row6_lz), BitCast(u16, row7_lz));
// auto row01_lz = ConcatEven(u8, BitCast(u8, row1_lz), BitCast(u8, row0_lz));
// auto row23_lz = ConcatEven(u8, BitCast(u8, row3_lz), BitCast(u8, row2_lz));
// auto row45_lz = ConcatEven(u8, BitCast(u8, row5_lz), BitCast(u8, row4_lz));
// auto row67_lz = ConcatEven(u8, BitCast(u8, row7_lz), BitCast(u8, row6_lz));
/* Compute nbits needed to specify magnitude of each coefficient. */
const auto sixteen = Set(u8, 16);
auto row01_nbits = Sub(sixteen, row01_lz);
Expand Down
8 changes: 4 additions & 4 deletions lib/block_coding_256.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ auto row01_ne_0 = VecFromMask(s16, Eq(row01, zero));
auto row23_ne_0 = VecFromMask(s16, Eq(row23, zero));
auto row45_ne_0 = VecFromMask(s16, Eq(row45, zero));
auto row67_ne_0 = VecFromMask(s16, Eq(row67, zero));
auto row3210_ne_0 = ConcatEven(u8, BitCast(u8, row23_ne_0), BitCast(u8, row01_ne_0));
auto row7654_ne_0 = ConcatEven(u8, BitCast(u8, row67_ne_0), BitCast(u8, row45_ne_0));
auto row3210_ne_0 = OrderedTruncate2To(u8, BitCast(u16, row01_ne_0), BitCast(u16, row23_ne_0));
auto row7654_ne_0 = OrderedTruncate2To(u8, BitCast(u16, row45_ne_0), BitCast(u16, row67_ne_0));

/* { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 } */
HWY_ALIGN constexpr uint64_t bm[] = {0x0102040810204080, 0x0102040810204080, 0x0102040810204080,
Expand Down Expand Up @@ -96,8 +96,8 @@ auto row23_lz = LeadingZeroCount(abs_row23);
auto row45_lz = LeadingZeroCount(abs_row45);
auto row67_lz = LeadingZeroCount(abs_row67);
/* Narrow leading zero count to 8 bits. */
auto row0123_lz = ConcatEven(u8, BitCast(u8, row23_lz), BitCast(u8, row01_lz));
auto row4567_lz = ConcatEven(u8, BitCast(u8, row67_lz), BitCast(u8, row45_lz));
auto row0123_lz = OrderedTruncate2To(u8, BitCast(u16, row01_lz), BitCast(u16, row23_lz));
auto row4567_lz = OrderedTruncate2To(u8, BitCast(u16, row45_lz), BitCast(u16, row67_lz));
/* Compute nbits needed to specify magnitude of each coefficient. */
auto row0123_nbits = Sub(Set(u8, 16), row0123_lz);
auto row4567_nbits = Sub(Set(u8, 16), row4567_lz);
Expand Down
4 changes: 2 additions & 2 deletions lib/block_coding_512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ auto row4567 = TwoTablesLookupLanes(s16, v0, v1, SetTableIndices(s16, &indices[1
auto zero = Zero(s16);
auto row0123_ne_0 = VecFromMask(s16, Eq(row0123, zero));
auto row4567_ne_0 = VecFromMask(s16, Eq(row4567, zero));
auto row76543210_ne_0 = ConcatEven(u8, BitCast(u8, row4567_ne_0), BitCast(u8, row0123_ne_0));
auto row76543210_ne_0 = OrderedTruncate2To(u8, BitCast(u16, row0123_ne_0), BitCast(u16, row4567_ne_0));

/* { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 } */
HWY_ALIGN constexpr uint64_t bm[] = {0x0102040810204080, 0x0102040810204080, 0x0102040810204080,
Expand All @@ -50,7 +50,7 @@ auto abs_row4567 = Abs(row4567);
auto row0123_lz = LeadingZeroCount(abs_row0123);
auto row4567_lz = LeadingZeroCount(abs_row4567);
/* Narrow leading zero count to 8 bits. */
auto row01234567_lz = ConcatEven(u8, BitCast(u8, row4567_lz), BitCast(u8, row0123_lz));
auto row01234567_lz = OrderedTruncate2To(u8, BitCast(u16, row0123_lz), BitCast(u16, row4567_lz));
/* Compute nbits needed to specify magnitude of each coefficient. */
auto row01234567_nbits = Sub(Set(u8, 16), row01234567_lz);
/* Store nbits. */
Expand Down

0 comments on commit 436efcd

Please sign in to comment.