From ff6102fadb0cf845690d89e044b4f8c84fdeb128 Mon Sep 17 00:00:00 2001
From: Kristof <kristofr@gmail.com>
Date: Mon, 10 Jun 2024 17:19:16 +0200
Subject: [PATCH 1/3] nonzero divisors

---
 src/structs/lepton_decoder.rs      |  4 ++--
 src/structs/lepton_encoder.rs      |  4 ++--
 src/structs/probability_tables.rs  | 11 ++---------
 src/structs/quantization_tables.rs | 25 +++++++++++++++++++++++++
 4 files changed, 31 insertions(+), 13 deletions(-)
diff --git a/src/structs/lepton_decoder.rs b/src/structs/lepton_decoder.rs
index ae2caf88..4a25a614 100644
--- a/src/structs/lepton_decoder.rs
+++ b/src/structs/lepton_decoder.rs
@@ -529,13 +529,13 @@ fn decode_one_edge<R: Read, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
 
     let mut coord_tr = delta;
 
-    for _lane in 0..7 {
+    for lane in 0..7 {
         if num_non_zeros_edge == 0 {
             break;
         }
 
         let best_prior =
-            pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(qt, coord_tr, pred);
+            pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(qt, lane, pred);
 
         let coef = model_per_color.read_edge_coefficient(
             bool_reader,
diff --git a/src/structs/lepton_encoder.rs b/src/structs/lepton_encoder.rs
index 71e4db6a..5ef16e46 100644
--- a/src/structs/lepton_encoder.rs
+++ b/src/structs/lepton_encoder.rs
@@ -586,13 +586,13 @@ fn encode_one_edge<W: Write, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
 
     let mut coord_tr = delta;
 
-    for _lane in 0..7 {
+    for lane in 0..7 {
         if num_non_zeros_edge == 0 {
             break;
         }
 
         let best_prior =
-            pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(qt, coord_tr, pred);
+            pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(qt, lane, pred);
 
         let coef = block.get_coefficient(coord_tr);
 
diff --git a/src/structs/probability_tables.rs b/src/structs/probability_tables.rs
index 8c0f18df..9bd50534 100644
--- a/src/structs/probability_tables.rs
+++ b/src/structs/probability_tables.rs
@@ -208,7 +208,7 @@ impl ProbabilityTables {
     pub fn calc_coefficient_context8_lak<const ALL_PRESENT: bool, const HORIZONTAL: bool>(
         &self,
         qt: &QuantizationTables,
-        coefficient_tr: usize,
+        lane: usize,
         pred: &[i32; 8],
     ) -> i32 {
         if !ALL_PRESENT
@@ -217,14 +217,7 @@ impl ProbabilityTables {
             return 0;
         }
 
-        let mut best_prior: i32 = pred[if HORIZONTAL {
-            coefficient_tr >> 3
-        } else {
-            coefficient_tr
-        }];
-        best_prior /= (qt.get_quantization_table_transposed()[coefficient_tr] as i32) << 13;
-
-        best_prior
+        pred[lane + 1] / qt.get_quantization_table_divisors::<HORIZONTAL>()[lane + 1].get()
     }
 
     pub fn adv_predict_dc_pix<const ALL_PRESENT: bool>(
diff --git a/src/structs/quantization_tables.rs b/src/structs/quantization_tables.rs
index 34093b6f..311a0701 100644
--- a/src/structs/quantization_tables.rs
+++ b/src/structs/quantization_tables.rs
@@ -4,6 +4,8 @@
  *  This software incorporates material from third parties. See NOTICE.txt for details.
  *--------------------------------------------------------------------------------------------*/
 
+use std::num::NonZeroI32;
+
 use crate::consts::*;
 use crate::helpers::*;
 
@@ -12,6 +14,10 @@ use super::jpeg_header::JPegHeader;
 pub struct QuantizationTables {
     quantization_table: [u16; 64],
     quantization_table_transposed: [u16; 64],
+
+    quantization_table_divisors_horiz: [NonZeroI32; 8],
+    quantization_table_divisors_vert: [NonZeroI32; 8],
+
     // Values for discrimination between "regular" and "noise" part of
     // edge AC coefficients, used in `read/write_edge_coefficient`.
     // Calculated using approximate maximal magnitudes
@@ -31,6 +37,8 @@ impl QuantizationTables {
             quantization_table: [0; 64],
             quantization_table_transposed: [0; 64],
             min_noise_threshold: [0; 14],
+            quantization_table_divisors_horiz: [NonZeroI32::new(1).unwrap(); 8],
+            quantization_table_divisors_vert: [NonZeroI32::new(1).unwrap(); 8],
         };
 
         for pixel_row in 0..8 {
@@ -41,6 +49,16 @@ impl QuantizationTables {
 
                 retval.quantization_table[coord] = q;
                 retval.quantization_table_transposed[coord_tr] = q;
+
+                if pixel_row == 0 {
+                    retval.quantization_table_divisors_horiz[pixel_column] =
+                        NonZeroI32::new(i32::from(q) << 13).unwrap();
+                }
+
+                if pixel_column == 0 {
+                    retval.quantization_table_divisors_vert[pixel_row] =
+                        NonZeroI32::new(i32::from(q) << 13).unwrap();
+                }
             }
         }
 
@@ -70,6 +88,13 @@ impl QuantizationTables {
         &self.quantization_table_transposed
     }
 
+    pub fn get_quantization_table_divisors<const HORIZONTAL: bool>(&self) -> &[NonZeroI32; 8] {
+        if HORIZONTAL {
+            &self.quantization_table_divisors_horiz
+        } else {
+            &self.quantization_table_divisors_vert
+        }
+    }
     pub fn get_min_noise_threshold(&self, coef: usize) -> u8 {
         self.min_noise_threshold[coef]
     }

From 5714798b5c5bda028ce3333559df65f2694f758f Mon Sep 17 00:00:00 2001
From: Kristof <kristofr@gmail.com>
Date: Tue, 11 Jun 2024 15:50:30 +0200
Subject: [PATCH 2/3] added comments

---
 src/structs/quantization_tables.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/structs/quantization_tables.rs b/src/structs/quantization_tables.rs
index 311a0701..fe15713f 100644
--- a/src/structs/quantization_tables.rs
+++ b/src/structs/quantization_tables.rs
@@ -13,9 +13,16 @@ use super::jpeg_header::JPegHeader;
 
 pub struct QuantizationTables {
     quantization_table: [u16; 64],
+
+    /// transposed version of quantization table
     quantization_table_transposed: [u16; 64],
 
+    /// precalculated divisors * 8192 for the top row of the quantization table for final step of lak calculation
+    /// compiler sees non-zero to avoid having to check for division-by-zero
     quantization_table_divisors_horiz: [NonZeroI32; 8],
+
+    /// precalculated divisors * 8192 for the left column of the quantization table for final step of lak calculation
+    /// compiler sees non-zero to avoid having to check for division-by-zero
     quantization_table_divisors_vert: [NonZeroI32; 8],
 
     // Values for discrimination between "regular" and "noise" part of

From 89424afef62505736305c88715a442a0bcf8e653 Mon Sep 17 00:00:00 2001
From: Kristof <kristofr@gmail.com>
Date: Tue, 11 Jun 2024 15:58:07 +0200
Subject: [PATCH 3/3] keep simd type

---
 src/structs/lepton_decoder.rs     | 8 ++++----
 src/structs/lepton_encoder.rs     | 8 ++++----
 src/structs/probability_tables.rs | 4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/structs/lepton_decoder.rs b/src/structs/lepton_decoder.rs
index 4a25a614..99d86e03 100644
--- a/src/structs/lepton_decoder.rs
+++ b/src/structs/lepton_decoder.rs
@@ -475,7 +475,7 @@ fn decode_edge<R: Read, const ALL_PRESENT: bool>(
     decode_one_edge::<R, ALL_PRESENT, true>(
         model_per_color,
         bool_reader,
-        &curr_horiz_pred.to_array(),
+        &curr_horiz_pred,
         here_mut,
         qt,
         pt,
@@ -486,7 +486,7 @@ fn decode_edge<R: Read, const ALL_PRESENT: bool>(
     decode_one_edge::<R, ALL_PRESENT, false>(
         model_per_color,
         bool_reader,
-        &curr_vert_pred.to_array(),
+        &curr_vert_pred,
         here_mut,
         qt,
         pt,
@@ -504,7 +504,7 @@ fn decode_edge<R: Read, const ALL_PRESENT: bool>(
 fn decode_one_edge<R: Read, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
     model_per_color: &mut ModelPerColor,
     bool_reader: &mut VPXBoolReader<R>,
-    pred: &[i32; 8],
+    pred: &i32x8,
     here_mut: &mut AlignedBlock,
     qt: &QuantizationTables,
     pt: &ProbabilityTables,
@@ -529,7 +529,7 @@ fn decode_one_edge<R: Read, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
 
     let mut coord_tr = delta;
 
-    for lane in 0..7 {
+    for lane in 1..8 {
         if num_non_zeros_edge == 0 {
             break;
         }
diff --git a/src/structs/lepton_encoder.rs b/src/structs/lepton_encoder.rs
index 5ef16e46..2f7fa476 100644
--- a/src/structs/lepton_encoder.rs
+++ b/src/structs/lepton_encoder.rs
@@ -500,7 +500,7 @@ fn encode_edge<W: Write, const ALL_PRESENT: bool>(
         here_tr,
         model_per_color,
         bool_writer,
-        &curr_horiz_pred.to_array(),
+        &curr_horiz_pred,
         qt,
         pt,
         num_non_zeros_bin,
@@ -512,7 +512,7 @@ fn encode_edge<W: Write, const ALL_PRESENT: bool>(
         here_tr,
         model_per_color,
         bool_writer,
-        &curr_vert_pred.to_array(),
+        &curr_vert_pred,
         qt,
         pt,
         num_non_zeros_bin,
@@ -538,7 +538,7 @@ fn encode_one_edge<W: Write, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
     block: &AlignedBlock,
     model_per_color: &mut ModelPerColor,
     bool_writer: &mut VPXBoolWriter<W>,
-    pred: &[i32; 8],
+    pred: &i32x8,
     qt: &QuantizationTables,
     pt: &ProbabilityTables,
     num_non_zeros_bin: u8,
@@ -586,7 +586,7 @@ fn encode_one_edge<W: Write, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
 
     let mut coord_tr = delta;
 
-    for lane in 0..7 {
+    for lane in 1..8 {
         if num_non_zeros_edge == 0 {
             break;
         }
diff --git a/src/structs/probability_tables.rs b/src/structs/probability_tables.rs
index 9bd50534..db1ee8b7 100644
--- a/src/structs/probability_tables.rs
+++ b/src/structs/probability_tables.rs
@@ -209,7 +209,7 @@ impl ProbabilityTables {
         &self,
         qt: &QuantizationTables,
         lane: usize,
-        pred: &[i32; 8],
+        pred: &i32x8,
     ) -> i32 {
         if !ALL_PRESENT
             && ((HORIZONTAL && !self.above_present) || (!HORIZONTAL && !self.left_present))
@@ -217,7 +217,7 @@ impl ProbabilityTables {
             return 0;
         }
 
-        pred[lane + 1] / qt.get_quantization_table_divisors::<HORIZONTAL>()[lane + 1].get()
+        pred.as_array_ref()[lane] / qt.get_quantization_table_divisors::<HORIZONTAL>()[lane].get()
     }
 
     pub fn adv_predict_dc_pix<const ALL_PRESENT: bool>(