From b49c9f2f1997d891ba94d40d007c49c9348779aa Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Thu, 30 Oct 2025 14:07:54 -0400
Subject: [PATCH 1/8] stake program float to fixed-point

---
 ...91-replace-stake-program-floating-point.md | 138 ++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 proposals/0391-replace-stake-program-floating-point.md

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
new file mode 100644
index 000000000..db54f2ba7
--- /dev/null
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -0,0 +1,138 @@
+---
+simd: "0391"
+title: Stake Program Float to Fixed-Point
+authors:
+  - Gabe (Anza)
+  - Pete (Anza)
+category: Standard
+type: Core
+status: Idea
+created: 2025-10-23
+feature: (to be assigned upon acceptance)
+---
+
+## Summary
+
+Replace of all floating-point (`f64`) arithmetic within the Solana
+Stake Program's warmup and cooldown logic with a deterministic
+fixed-point implementation using integer arithmetic. This change is a
+prerequisite to the Stake Program's migration to a `no_std`
+Pinocchio-based implementation and ensures compatibility with upstream
+eBPF toolchains, which do not support floating-point operations.
+
+## Motivation
+
+The Stake Program's use of `f64` presents two blockers to the upcoming roadmap:
+
+1. **Upstream eBPF incompatibility:** Standard eBPF strictly forbids
+   floating-point operations. While the solana fork (SBF) currently
+   supports `f64` via a deterministic (and inefficient) `soft-float`
+   compiler built-in, aligning with upstream standards requires
+   removing all `f64` usage from the program.
+
+2. **Pinocchio migration inconsistency:** There is appetite for
+   converting the Stake Program to a highly efficient, `no_std`
+   Pinocchio implementation (reducing CU usage by +90%). These efforts
+   are undermined by the immense cost of soft-float operations.
+   [Benchmarking shows](https://solana.com/docs/programs/limitations#limited-float-support:~:text=Recent%20results%20show,Divide%20%20%20%20%20%209%20%20%20219)
+   a 22x performance penalty for a single multiplication of an `f32`
+   versus a `u64`. Using an `f64` with an operation
+   like division is even more complex. Further, doing the float
+   migration independently allows p-stake to enforce semantic
+   equivalence for its migration.
+
+## Requirements
+
+The new implementation must be a replacement that precisely models the
+intent of the original logic. Any resulting differences in output
+should be minor and a direct result of improved numerical precision.
+
+## New Terminology
+
+None
+
+## Detailed Design
+
+### Proposed Fixed-Point Implementation
+
+This proposal replaces `f64` with _rational arithmetic_, expressing
+the `warmup_cooldown_rate` as a fraction and reordering operations to
+preserve precision while using integer arithmetic.
+
+- The floating-point rates will be converted to their fractional
+  equivalents:
+  - `DEFAULT_WARMUP_COOLDOWN_RATE` (`0.25`) becomes a fraction of
+    **(numerator: 1, denominator: 4)**.
+  - `NEW_WARMUP_COOLDOWN_RATE` (`0.09`) becomes a fraction of
+    **(numerator: 9, denominator: 100)**.
+
+- The current flow
+  `(account_portion / cluster_portion) * (cluster_effective * rate)` is
+  reordered to
+  `(account_portion * cluster_effective * rate_numerator) / (cluster_portion * rate_denominator)`.
+  Instead of performing divisions early in the process (which truncates
+  intermediate results), all multiplications are performed first.
+
+- All intermediate multiplications are performed using `u128` to
+  prevent overflow.
+
+### State Compatibility
+
+To maintain backwards compatibility with on-chain stake account data,
+the `Delegation` struct will be modified:
+
+```diff
+pub struct Delegation {
+    pub voter_pubkey: Pubkey,
+    pub stake: u64,
+    pub activation_epoch: Epoch,
+    pub deactivation_epoch: Epoch,
+-   pub warmup_cooldown_rate: f64,
++    _reserved: [u8; 8],
+}
+```
+
+This preserves the exact memory size and layout of existing accounts.
+It is a legacy field anyway, with the actual rate being determined
+dynamically in functions.
+
+## Alternatives Considered
+
+Decimal scaling factor. Uses a uniform scaling factor represent rates
+as integers like BPS (e.g 0.25 = 2500). Rejected as rational
+arithmetic is more precise. It may be easier to use for external
+consumers, but these values are really only used internally to the
+stake interface crate.
+
+## Impact
+
+- **Stake Program:** The program must be updated to use the new
+  integer-based calculation helpers from `solana-stake-interface`. It is
+  doing so mostly through its use of
+  `stake.delegation.stake_activating_and_deactivating()`. Also, the new
+  `Delegation` struct definition (with its private field), will likely
+  impact how its being instantiated in a few areas.
+
+- **Agave:** Update the workspace dependency on
+  `solana-stake-interface`. Runtime stake processing already funnels
+  through Delegation::stake_activating_and_deactivating, so a dependency
+  bump automatically picks up the fixed-point math without touching
+  Agave code.
+
+- **Firedancer:** Will also need to update their stake interface
+  dependency in lockstep with Agave.
+
+## Security Considerations
+
+1. **Unit tests:** Baseline of correctness by testing specific, known
+   scenarios and edge cases.
+2. **Differential Fuzzing (`proptest`):**
+   - An oracle function preserving the original `f64` logic will be
+     maintained for testing purposes only.
+   - The test will run the new integer implementation against the oracle
+     with millions of random inputs.
+   - Assert that the results are within a relative tolerance to account
+     for the increased precision.
+3. **External Audit:** A comprehensive audit from an auditor with good
+   skills in numerical audits to validate arithmetic equivalence or
+   regressions.

From 1eff97a83bdd933944fba06c8d7e1ab9c2c13592 Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Mon, 17 Nov 2025 16:14:29 +0100
Subject: [PATCH 2/8] Review updates

---
 ...91-replace-stake-program-floating-point.md | 292 +++++++++++++-----
 1 file changed, 222 insertions(+), 70 deletions(-)

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
index db54f2ba7..c5bc07bb8 100644
--- a/proposals/0391-replace-stake-program-floating-point.md
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -13,39 +13,40 @@ feature: (to be assigned upon acceptance)
 
 ## Summary
 
-Replace of all floating-point (`f64`) arithmetic within the Solana
-Stake Program's warmup and cooldown logic with a deterministic
-fixed-point implementation using integer arithmetic. This change is a
-prerequisite to the Stake Program's migration to a `no_std`
-Pinocchio-based implementation and ensures compatibility with upstream
-eBPF toolchains, which do not support floating-point operations.
+Replace all floating-point (`f64`) arithmetic within the Solana Stake Program's
+warmup and cooldown logic with a fixed-point implementation using integer
+arithmetic. The new logic expresses the warmup/cooldown rate in basis points
+(bps) and performs all proportional stake calculations using `u128`
+intermediates.
+
+This change is a prerequisite to the Stake Program's migration to a `no_std`
+Pinocchio-based implementation and ensures compatibility with upstream eBPF
+toolchains, which do not support floating-point operations.
 
 ## Motivation
 
 The Stake Program's use of `f64` presents two blockers to the upcoming roadmap:
 
 1. **Upstream eBPF incompatibility:** Standard eBPF strictly forbids
-   floating-point operations. While the solana fork (SBF) currently
-   supports `f64` via a deterministic (and inefficient) `soft-float`
-   compiler built-in, aligning with upstream standards requires
-   removing all `f64` usage from the program.
+   floating-point operations. While the solana fork (SBF) currently supports `f64`
+   via a deterministic (and inefficient) `soft-float` compiler built-in, aligning
+   with upstream standards requires removing all `f64` usage from the program.
 
 2. **Pinocchio migration inconsistency:** There is appetite for
-   converting the Stake Program to a highly efficient, `no_std`
-   Pinocchio implementation (reducing CU usage by +90%). These efforts
-   are undermined by the immense cost of soft-float operations.
-   [Benchmarking shows](https://solana.com/docs/programs/limitations#limited-float-support:~:text=Recent%20results%20show,Divide%20%20%20%20%20%209%20%20%20219)
-   a 22x performance penalty for a single multiplication of an `f32`
-   versus a `u64`. Using an `f64` with an operation
-   like division is even more complex. Further, doing the float
-   migration independently allows p-stake to enforce semantic
-   equivalence for its migration.
+   converting the Stake Program to a highly efficient, `no_std` Pinocchio
+   implementation (reducing CU usage by +90%). These efforts are undermined by the
+   immense cost of soft-float operations. [Benchmarking
+   shows](https://solana.com/docs/programs/limitations#limited-float-support:~:text=Recent%20results%20show,Divide%20%20%20%20%20%209%20%20%20219)
+   a 22x performance penalty for a single multiplication of an `f32` versus a
+   `u64`. Using an `f64` with an operation like division is even more complex.
+   Further, doing the float migration independently allows p-stake to enforce
+   semantic equivalence for its migration.
 
 ## Requirements
 
-The new implementation must be a replacement that precisely models the
-intent of the original logic. Any resulting differences in output
-should be minor and a direct result of improved numerical precision.
+The new implementation must be a replacement that precisely models the intent of
+the original logic. Any resulting differences in output should be minor and a
+direct result of improved numerical precision.
 
 ## New Terminology
 
@@ -55,31 +56,120 @@ None
 
 ### Proposed Fixed-Point Implementation
 
-This proposal replaces `f64` with _rational arithmetic_, expressing
-the `warmup_cooldown_rate` as a fraction and reordering operations to
-preserve precision while using integer arithmetic.
+This proposal replaces `f64` with fixed-point arithmetic in basis points and
+reorders operations to preserve precision while using integer arithmetic only.
+
+#### Rate representation (basis points)
+
+Instead of storing the warmup/cooldown rate as an `f64`, it is represented in
+basis points (bps). The default (`0.25`) and new warmup (`0.09`) are encoded as:
+
+```rust
+pub const BASIS_POINTS_PER_UNIT: u64 = 10_000;
+pub const ORIGINAL_WARMUP_COOLDOWN_RATE_BPS: u64 = 2_500; // 25%
+pub const TOWER_WARMUP_COOLDOWN_RATE_BPS: u64 = 900; // 9%
+```
+
+with a new helper that determines the active rate based on the epoch:
+
+```rust
+pub fn warmup_cooldown_rate_bps(
+    epoch: Epoch,
+    new_rate_activation_epoch: Option<Epoch>,
+) -> u64
+```
+
+The legacy `f64` constants and function are preserved but marked deprecated:
+
+```rust
+// All marked as deprecated as of 2.0.1
+pub const DEFAULT_WARMUP_COOLDOWN_RATE: f64 = 0.25;
+pub const NEW_WARMUP_COOLDOWN_RATE: f64 = 0.09;
+pub fn warmup_cooldown_rate(
+    current_epoch: Epoch,
+    new_rate_activation_epoch: Option<Epoch>,
+) -> f64
+```
+
+#### Reordered proportional stake formula
+
+The original float logic computed:
+
+```text
+(account_portion / cluster_portion) * (cluster_effective * rate)
+```
+
+This is algebraically equivalent to the fixed-point re-ordering:
+
+```text
+change =
+    (account_portion * cluster_effective * rate_bps) /
+    (cluster_portion * BASIS_POINTS_PER_UNIT)
+```
+
+All multiplications are performed first in `u128` to maximize precision and
+delay truncation.
+
+In the case the `u128` product overflows, the helper falls back to allowing the
+account's entire portion to move and returns `account_portion`. This is not
+expected under normal network conditions. It requires extremely large values
+where even the legacy `f64` implementation is highly imprecise.
+
+#### New methods
+
+The Delegation/Stake implementation uses the new integer math helpers in the
+primary methods:
+
+```rust
+// === Integer math used under-the-hood === 
+impl Delegation {
+    pub fn stake_activating_and_deactivating<T: StakeHistoryGetEntry>(
+        ...
+    ) -> StakeActivationStatus
+    fn stake_and_activating<T: StakeHistoryGetEntry>(...) -> (u64, u64)
+}
+
+impl Stake {
+    pub fn stake<T: StakeHistoryGetEntry>(...) -> u64
+}
+```
+
+This means the integer version is now the primary API under the original names.
+For consumers that need to feature-gate the change, the legacy f64 path is preserved
+under deprecated methods:
+
+```rust
+impl Delegation {
+    #[deprecated(since = "2.0.1", note = "Use stake() instead")]
+    pub fn stake_v1_legacy<T: StakeHistoryGetEntry>(...) -> u64
+
+    #[deprecated(
+        since = "2.0.1",
+        note = "Use stake_activating_and_deactivating() instead",
+    )]
+    pub fn stake_activating_and_deactivating_v1_legacy<
+        T: StakeHistoryGetEntry,
+    >(...) -> StakeActivationStatus
+}
 
-- The floating-point rates will be converted to their fractional
-  equivalents:
-  - `DEFAULT_WARMUP_COOLDOWN_RATE` (`0.25`) becomes a fraction of
-    **(numerator: 1, denominator: 4)**.
-  - `NEW_WARMUP_COOLDOWN_RATE` (`0.09`) becomes a fraction of
-    **(numerator: 9, denominator: 100)**.
+impl Stake {
+    #[deprecated(since = "2.0.1", note = "Use stake() instead")]
+    pub fn stake_v1_legacy<T: StakeHistoryGetEntry>(...) -> u64
+}
+```
 
-- The current flow
-  `(account_portion / cluster_portion) * (cluster_effective * rate)` is
-  reordered to
-  `(account_portion * cluster_effective * rate_numerator) / (cluster_portion * rate_denominator)`.
-  Instead of performing divisions early in the process (which truncates
-  intermediate results), all multiplications are performed first.
+#### Minimum Progress Clamp (`max(1)`)
 
-- All intermediate multiplications are performed using `u128` to
-  prevent overflow.
+To match legacy behavior, the fixed-point implementation preserves a minimum
+per-epoch change of 1 lamport for non-zero stake. This preserves the "always
+make forward progress" invariant for both warmup and cooldown, ensuring small
+delegations do not get stuck in activating/deactivating states due to
+truncation.
 
 ### State Compatibility
 
-To maintain backwards compatibility with on-chain stake account data,
-the `Delegation` struct will be modified:
+To maintain backwards compatibility with on-chain stake account data, the
+`Delegation` struct is modified as follows:
 
 ```diff
 pub struct Delegation {
@@ -88,51 +178,113 @@ pub struct Delegation {
     pub activation_epoch: Epoch,
     pub deactivation_epoch: Epoch,
 -   pub warmup_cooldown_rate: f64,
-+    _reserved: [u8; 8],
++   pub _reserved: [u8; 8],
 }
 ```
 
-This preserves the exact memory size and layout of existing accounts.
-It is a legacy field anyway, with the actual rate being determined
-dynamically in functions.
+This preserves the exact memory size and layout of existing accounts. It is a
+legacy field anyway, with the actual rate being determined dynamically in
+functions.
 
 ## Alternatives Considered
 
-Decimal scaling factor. Uses a uniform scaling factor represent rates
-as integers like BPS (e.g 0.25 = 2500). Rejected as rational
-arithmetic is more precise. It may be easier to use for external
-consumers, but these values are really only used internally to the
-stake interface crate.
+Tested a number of other libraries [have been
+tested](https://github.com/grod220/stake-ebpf-check) for upstream bpf
+compatibility.
+
+| Method        | Result  | Notes                              |
+|---------------|---------|------------------------------------|
+| bnum          | Success | Requires using `u32` limbs         |
+| crypto-bigint | Failure | Composite return types not allowed |
+| fixed-bigint  | Failure | Composite return types not allowed |
+| uint          | Failure | `__multi3` is not supported        |
+
+Note also that this SIMD recommends using `u128` arithmetic. Currently, this is
+_not_ supported in upstream bpf (`__multi3` error is
+raised). [Llvm-project PR#168442](https://github.com/llvm/llvm-project/pull/168442)
+is currently up to get upstream bpf support for it, and VM maintainers feel
+confident it will be merged and included in the next release. For that reason,
+scaled math (without a library) is preferred.
 
 ## Impact
 
-- **Stake Program:** The program must be updated to use the new
-  integer-based calculation helpers from `solana-stake-interface`. It is
-  doing so mostly through its use of
-  `stake.delegation.stake_activating_and_deactivating()`. Also, the new
-  `Delegation` struct definition (with its private field), will likely
-  impact how its being instantiated in a few areas.
+### Entities
+
+- **Stake Program:** The on-chain program is updated to use the new
+  integer-based calculation helpers from `solana-stake-interface`. It is doing so
+  mostly through its use of
+  `stake.delegation.stake_activating_and_deactivating()`.
 
 - **Agave:** Update the workspace dependency on
-  `solana-stake-interface`. Runtime stake processing already funnels
-  through Delegation::stake_activating_and_deactivating, so a dependency
-  bump automatically picks up the fixed-point math without touching
-  Agave code.
+  `solana-stake-interface` & feature gate the use of the new
+  `Stake::stake()` w/ `Stake::stake_v1_legacy()` & `Delegation::stake_activating_and_deactivating()`
+  with `Delegation::stake_activating_and_deactivating_v1_legacy()`.
+
+- **Firedancer:** Will need to update their stake calculations in
+  lock-step with the above integer-math changes.
+
+### Differential Fuzzing
+
+To quantify the numerical differences between the fixed-point implementation and
+the legacy `f64` path, we run an additional prop test that:
+
+- samples random non-zero `account`, `cluster_portion`, and
+  `cluster_effective` values across the full `u64` range,
+- exercises both the legacy `f64` formula and the new integer
+  implementation at the current 9% rate
+
+For 100,000 samples at the 9% rate we observe:
+
+| Metric           | Value                    | Notes                        |
+|------------------|--------------------------|------------------------------|
+| Avg. abs. diff.  | 0.505 lamports           | Mean abs(candidate − oracle) |
+| Avg. diff (ULPs) | 0.218 ULPs               | Avg. ULP distance of `f64`   |
+| p50/p90/p95/p99  | 0 / 1 / 1 / 6 lamports   | Percentiles of abs. diff.    |
+| Worst-case diff. | 932 lamports (1.82 ULPs) | Float imprecision at high #s |
+
+In short, there is high agreement and minimal deviation in outputs. Over 50% of
+results were identical, and 95% of results differed by at most 1 lamport. In the
+worst case, the difference was still only a difference of 1.82 ULPs, confirming it
+is an expected artifact of f64 precision limitations, not a logic error.
+
+#### Note on ULPs
+
+A "Unit in the Last Place" (ULP) measures the gap between adjacent representable
+`f64` values. We use this metric to compare our new integer implementation
+against the legacy float implementation. Because a `f64` cannot represent every
+integer precisely past 2^53, the float-based result can differ slightly from the
+integer-based one, even when both are logically correct. Measuring this
+difference in ULPs allows us to verify the discrepancy is due to expected
+floating-point artifacts, not a bug.
+
+### Performance
+
+For a sample configuration (`account_portion = 1_000_000_000`, `cluster_portion
+= 100_000_000_000`, `cluster_effective = 5_000_000_000_000`,
+`new_rate_activation_epoch = 50`), the results show a minor increase in CU
+consumption for the new logic:
+
+- **Legacy (`f64`) Implementation:** 985 CUs
+- **New (`u128`) Implementation:** 1046 CUs
 
-- **Firedancer:** Will also need to update their stake interface
-  dependency in lockstep with Agave.
+The fixed-point implementation is **6.2% more expensive** for this benchmark.
+This result is due to the type widening to `u128` and checked math. However,
+this is acceptable given the vast majority of CU costs are due to serialization
+(improved by [zero-copy
+p-stake](https://github.com/solana-foundation/solana-improvement-documents/pull/401)).
 
 ## Security Considerations
 
 1. **Unit tests:** Baseline of correctness by testing specific, known
    scenarios and edge cases.
 2. **Differential Fuzzing (`proptest`):**
-   - An oracle function preserving the original `f64` logic will be
-     maintained for testing purposes only.
-   - The test will run the new integer implementation against the oracle
-     with millions of random inputs.
-   - Assert that the results are within a relative tolerance to account
-     for the increased precision.
+    - Maintains an oracle implementation that preserves the original
+      `f64` logic, used only in tests.
+    - Runs the new integer implementation against the oracle over
+      thousands of randomly generated inputs spanning the full `u64` domain.
+    - Uses a ULP-based tolerance (`4 × ULP`) to account for the
+      accumulated rounding error inherent in the float-based path while
+      ensuring the integer implementation never deviates more than expected
+      from the float oracle.
 3. **External Audit:** A comprehensive audit from an auditor with good
-   skills in numerical audits to validate arithmetic equivalence or
-   regressions.
+   skills in numerical audits to validate arithmetic equivalence or regressions.

From a73ecf7b4d91a8b669cf98af840a1cce43956bb9 Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Tue, 2 Dec 2025 12:01:02 +0100
Subject: [PATCH 3/8] Update to v2 methods

---
 ...91-replace-stake-program-floating-point.md | 50 +++++++++----------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
index c5bc07bb8..39289ec76 100644
--- a/proposals/0391-replace-stake-program-floating-point.md
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -108,53 +108,50 @@ change =
 ```
 
 All multiplications are performed first in `u128` to maximize precision and
-delay truncation.
-
-In the case the `u128` product overflows, the helper falls back to allowing the
-account's entire portion to move and returns `account_portion`. This is not
-expected under normal network conditions. It requires extremely large values
-where even the legacy `f64` implementation is highly imprecise.
+delay truncation. If the intermediate product would overflow, the numerator
+saturates to `u128::MAX` before division and the final result is clamped to the
+account's stake (`account_portion`), so the overflow path remains rate-limited
+(fail-safe rather than fail-open).
 
 #### New methods
 
-The Delegation/Stake implementation uses the new integer math helpers in the
-primary methods:
+The Delegation/Stake implementation exposes the integer math helpers under
+new `_v2` entrypoints:
 
 ```rust
-// === Integer math used under-the-hood === 
+// === Integer math used under-the-hood ===
 impl Delegation {
-    pub fn stake_activating_and_deactivating<T: StakeHistoryGetEntry>(
+    pub fn stake_activating_and_deactivating_v2<T: StakeHistoryGetEntry>(
         ...
     ) -> StakeActivationStatus
-    fn stake_and_activating<T: StakeHistoryGetEntry>(...) -> (u64, u64)
+    fn stake_and_activating_v2<T: StakeHistoryGetEntry>(...) -> (u64, u64)
 }
 
 impl Stake {
-    pub fn stake<T: StakeHistoryGetEntry>(...) -> u64
+    pub fn stake_v2<T: StakeHistoryGetEntry>(...) -> u64
 }
 ```
 
-This means the integer version is now the primary API under the original names.
-For consumers that need to feature-gate the change, the legacy f64 path is preserved
-under deprecated methods:
+The pre-existing float-based functions remain under their original names for
+API compatibility but are marked deprecated in favor of the `_v2` versions:
 
 ```rust
 impl Delegation {
-    #[deprecated(since = "2.0.1", note = "Use stake() instead")]
-    pub fn stake_v1_legacy<T: StakeHistoryGetEntry>(...) -> u64
+    #[deprecated(since = "2.0.1", note = "Use stake_v2() instead")]
+    pub fn stake<T: StakeHistoryGetEntry>(...) -> u64
 
     #[deprecated(
         since = "2.0.1",
-        note = "Use stake_activating_and_deactivating() instead",
+        note = "Use stake_activating_and_deactivating_v2() instead",
     )]
-    pub fn stake_activating_and_deactivating_v1_legacy<
+    pub fn stake_activating_and_deactivating<
         T: StakeHistoryGetEntry,
     >(...) -> StakeActivationStatus
 }
 
 impl Stake {
-    #[deprecated(since = "2.0.1", note = "Use stake() instead")]
-    pub fn stake_v1_legacy<T: StakeHistoryGetEntry>(...) -> u64
+    #[deprecated(since = "2.0.1", note = "Use stake_v2() instead")]
+    pub fn stake<T: StakeHistoryGetEntry>(...) -> u64
 }
 ```
 
@@ -211,14 +208,13 @@ scaled math (without a library) is preferred.
 ### Entities
 
 - **Stake Program:** The on-chain program is updated to use the new
-  integer-based calculation helpers from `solana-stake-interface`. It is doing so
-  mostly through its use of
-  `stake.delegation.stake_activating_and_deactivating()`.
+  integer-based calculation helpers from `solana-stake-interface`. It now
+  routes through `stake.delegation.stake_activating_and_deactivating_v2()`.
 
 - **Agave:** Update the workspace dependency on
-  `solana-stake-interface` & feature gate the use of the new
-  `Stake::stake()` w/ `Stake::stake_v1_legacy()` & `Delegation::stake_activating_and_deactivating()`
-  with `Delegation::stake_activating_and_deactivating_v1_legacy()`.
+  `solana-stake-interface` and adopt the integer entrypoints
+  (`Stake::stake_v2()` and `Delegation::stake_activating_and_deactivating_v2()`).
+  behind feature gate.
 
 - **Firedancer:** Will need to update their stake calculations in
   lock-step with the above integer-math changes.

From d4468702ada0df09a20cadea0f27538724d390aa Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Thu, 11 Dec 2025 14:39:58 +0100
Subject: [PATCH 4/8] Implementation agnostic rework

---
 ...91-replace-stake-program-floating-point.md | 357 ++++++++----------
 1 file changed, 148 insertions(+), 209 deletions(-)

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
index 39289ec76..8a4b1de76 100644
--- a/proposals/0391-replace-stake-program-floating-point.md
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -13,274 +13,213 @@ feature: (to be assigned upon acceptance)
 
 ## Summary
 
-Replace all floating-point (`f64`) arithmetic within the Solana Stake Program's
-warmup and cooldown logic with a fixed-point implementation using integer
-arithmetic. The new logic expresses the warmup/cooldown rate in basis points
-(bps) and performs all proportional stake calculations using `u128`
-intermediates.
-
-This change is a prerequisite to the Stake Program's migration to a `no_std`
-Pinocchio-based implementation and ensures compatibility with upstream eBPF
-toolchains, which do not support floating-point operations.
+This SIMD proposes replacing all IEEE-754 double-precision floating-point
+arithmetic within the Solana Stake Program & validator client's warmup/
+cooldown logic with a fixed-point implementation using integer arithmetic.
+The new logic expresses the warmup/cooldown rate in basis points (bps) and
+performs stake calculations using unsigned 128-bit integers to maintain
+precision.
 
 ## Motivation
 
-The Stake Program's use of `f64` presents two blockers to the upcoming roadmap:
-
-1. **Upstream eBPF incompatibility:** Standard eBPF strictly forbids
-   floating-point operations. While the solana fork (SBF) currently supports `f64`
-   via a deterministic (and inefficient) `soft-float` compiler built-in, aligning
-   with upstream standards requires removing all `f64` usage from the program.
-
-2. **Pinocchio migration inconsistency:** There is appetite for
-   converting the Stake Program to a highly efficient, `no_std` Pinocchio
-   implementation (reducing CU usage by +90%). These efforts are undermined by the
-   immense cost of soft-float operations. [Benchmarking
-   shows](https://solana.com/docs/programs/limitations#limited-float-support:~:text=Recent%20results%20show,Divide%20%20%20%20%20%209%20%20%20219)
-   a 22x performance penalty for a single multiplication of an `f32` versus a
-   `u64`. Using an `f64` with an operation like division is even more complex.
-   Further, doing the float migration independently allows p-stake to enforce
-   semantic equivalence for its migration.
-
-## Requirements
+This change is a prerequisite to the Stake Program's migration to a `no_std`
+& upstream eBPF-toolchain friendly implementation. Standard eBPF strictly
+forbids floating-point operations. While the solana fork (SBF) allows for it
+via a deterministic (and inefficient) `soft-float` compiler built-in,
+aligning with upstream standards requires removing all floating-point usage
+from the program.
 
-The new implementation must be a replacement that precisely models the intent of
-the original logic. Any resulting differences in output should be minor and a
-direct result of improved numerical precision.
+The validator client shares the same warmup/cooldown calculation logic with
+the on-chain program, so it is also in need of a lock-step update to stay in
+sync.
 
 ## New Terminology
 
-None
+- **Basis points (bps)**: An integer representation of a percentage where
+  `bps = percent × 100`.
+  - 1 bps = 0.01%
+  - 1% = 100 bps
+
+- Formula variables
+  - **account_portion**: The amount of stake (in lamports) for a single
+    account that is eligible to warm up or cool down in a given epoch.
+  - **cluster_portion**: The total amount of stake (in lamports) across the
+    cluster that is in the same warmup/cooldown phase as `account_portion`
+    for the previous epoch.
+  - **cluster_effective**: The total effective stake in the cluster (in
+    lamports) for the previous epoch.
 
 ## Detailed Design
 
-### Proposed Fixed-Point Implementation
+### Rate representation (basis points)
 
-This proposal replaces `f64` with fixed-point arithmetic in basis points and
-reorders operations to preserve precision while using integer arithmetic only.
+The current network warmup/cooldown rate is 9%. This means that, in any given
+epoch, at most 9% of the previous epoch's effective stake can be activated or
+deactivated.
 
-#### Rate representation (basis points)
+Currently, this figure is represented in floating-point: `0.09`. The new
+representation is an integer of basis points: `900`.
 
-Instead of storing the warmup/cooldown rate as an `f64`, it is represented in
-basis points (bps). The default (`0.25`) and new warmup (`0.09`) are encoded as:
+### Maintaining precision
 
-```rust
-pub const BASIS_POINTS_PER_UNIT: u64 = 10_000;
-pub const ORIGINAL_WARMUP_COOLDOWN_RATE_BPS: u64 = 2_500; // 25%
-pub const TOWER_WARMUP_COOLDOWN_RATE_BPS: u64 = 900; // 9%
-```
+The original float logic computes:
 
-with a new helper that determines the active rate based on the epoch:
+```text
+RATE_FLOAT = 0.09
 
-```rust
-pub fn warmup_cooldown_rate_bps(
-    epoch: Epoch,
-    new_rate_activation_epoch: Option<Epoch>,
-) -> u64
+allowed_change = (account_portion / cluster_portion) * (cluster_effective * RATE_FLOAT)
 ```
 
-The legacy `f64` constants and function are preserved but marked deprecated:
+For an integer implementation, it's important to re-arrange the formula so
+that the division happens last to maintain the highest precision. This is
+achieved via an algebraically equivalent re-ordering:
+
+```text
+BASIS_POINTS_PER_UNIT = 10_000
+RATE_BPS = 900
 
-```rust
-// All marked as deprecated as of 2.0.1
-pub const DEFAULT_WARMUP_COOLDOWN_RATE: f64 = 0.25;
-pub const NEW_WARMUP_COOLDOWN_RATE: f64 = 0.09;
-pub fn warmup_cooldown_rate(
-    current_epoch: Epoch,
-    new_rate_activation_epoch: Option<Epoch>,
-) -> f64
+allowed_change =
+    (account_portion * cluster_effective * RATE_BPS) /
+    (cluster_portion * BASIS_POINTS_PER_UNIT)
 ```
 
-#### Reordered proportional stake formula
+Note: any truncation in the division that occurs should truncate toward zero.
 
-The original float logic computed:
+#### Widening arithmetic to 128-bit integers
 
-```text
-(account_portion / cluster_portion) * (cluster_effective * rate)
-```
+Because of the extra multiplication, 64-bit integer math is not sufficient for
+safety. For that reason, all values used in the formula should be widened to
+128-bit integers. The final value should then be cast back down to a 64-bit
+integer.
 
-This is algebraically equivalent to the fixed-point re-ordering:
+Implementations that do not offer native unsigned 128-bit arithmetic must
+emulate it (for example via fixed-width limb arithmetic).
 
-```text
-change =
-    (account_portion * cluster_effective * rate_bps) /
-    (cluster_portion * BASIS_POINTS_PER_UNIT)
-```
+#### Saturation and fail-safe behavior
 
-All multiplications are performed first in `u128` to maximize precision and
-delay truncation. If the intermediate product would overflow, the numerator
-saturates to `u128::MAX` before division and the final result is clamped to the
-account's stake (`account_portion`), so the overflow path remains rate-limited
+If the intermediate multiplication overflows the maximum representable
+unsigned 128-bit value, the numerator saturates to the maximum 128-bit value
+before division. The result is then clamped to `account_portion`. This ensures
+that overflow cannot amplify a stake change beyond the account's own portion
 (fail-safe rather than fail-open).
 
-#### New methods
-
-The Delegation/Stake implementation exposes the integer math helpers under
-new `_v2` entrypoints:
+### Minimum progress clamp
 
-```rust
-// === Integer math used under-the-hood ===
-impl Delegation {
-    pub fn stake_activating_and_deactivating_v2<T: StakeHistoryGetEntry>(
-        ...
-    ) -> StakeActivationStatus
-    fn stake_and_activating_v2<T: StakeHistoryGetEntry>(...) -> (u64, u64)
-}
-
-impl Stake {
-    pub fn stake_v2<T: StakeHistoryGetEntry>(...) -> u64
-}
-```
+Currently, when `account_portion > 0`, there is a granted minimum change of 1
+lamport per epoch so that small delegations do not get stuck in activating/
+deactivating states due to truncation. The new implementation keeps this
+behavior.
 
-The pre-existing float-based functions remain under their original names for
-API compatibility but are marked deprecated in favor of the `_v2` versions:
-
-```rust
-impl Delegation {
-    #[deprecated(since = "2.0.1", note = "Use stake_v2() instead")]
-    pub fn stake<T: StakeHistoryGetEntry>(...) -> u64
-
-    #[deprecated(
-        since = "2.0.1",
-        note = "Use stake_activating_and_deactivating_v2() instead",
-    )]
-    pub fn stake_activating_and_deactivating<
-        T: StakeHistoryGetEntry,
-    >(...) -> StakeActivationStatus
-}
-
-impl Stake {
-    #[deprecated(since = "2.0.1", note = "Use stake_v2() instead")]
-    pub fn stake<T: StakeHistoryGetEntry>(...) -> u64
-}
-```
+### Pseudocode guidance
 
-#### Minimum Progress Clamp (`max(1)`)
+#### Current implementation
 
-To match legacy behavior, the fixed-point implementation preserves a minimum
-per-epoch change of 1 lamport for non-zero stake. This preserves the "always
-make forward progress" invariant for both warmup and cooldown, ensuring small
-delegations do not get stuck in activating/deactivating states due to
-truncation.
+```text
+RATE_FLOAT = 0.09
 
-### State Compatibility
+# All params 64-bit integer
+function rate_limited_stake_change(account_portion, cluster_portion, cluster_effective):
+    if account_portion == 0 or cluster_portion == 0 or cluster_effective == 0:
+        return 0
 
-To maintain backwards compatibility with on-chain stake account data, the
-`Delegation` struct is modified as follows:
+    # Cast all params to double
+    weight_float = account_portion_float / cluster_portion_float
+    allowed_change_float = weight_float * cluster_effective_float * RATE_FLOAT
 
-```diff
-pub struct Delegation {
-    pub voter_pubkey: Pubkey,
-    pub stake: u64,
-    pub activation_epoch: Epoch,
-    pub deactivation_epoch: Epoch,
--   pub warmup_cooldown_rate: f64,
-+   pub _reserved: [u8; 8],
-}
-```
+    # Truncate toward zero via cast
+    allowed_change = allowed_change_float as 64-bit integer
 
-This preserves the exact memory size and layout of existing accounts. It is a
-legacy field anyway, with the actual rate being determined dynamically in
-functions.
+    # Never allow more than the account's own portion to change
+    if allowed_change > account_portion:
+        allowed_change = account_portion
 
-## Alternatives Considered
+    # Minimum progress clamp
+    if allowed_change == 0:
+        return 1
 
-Tested a number of other libraries [have been
-tested](https://github.com/grod220/stake-ebpf-check) for upstream bpf
-compatibility.
+    return allowed_change
+```
 
-| Method        | Result  | Notes                              |
-|---------------|---------|------------------------------------|
-| bnum          | Success | Requires using `u32` limbs         |
-| crypto-bigint | Failure | Composite return types not allowed |
-| fixed-bigint  | Failure | Composite return types not allowed |
-| uint          | Failure | `__multi3` is not supported        |
+#### Proposed new implementation
 
-Note also that this SIMD recommends using `u128` arithmetic. Currently, this is
-_not_ supported in upstream bpf (`__multi3` error is
-raised). [Llvm-project PR#168442](https://github.com/llvm/llvm-project/pull/168442)
-is currently up to get upstream bpf support for it, and VM maintainers feel
-confident it will be merged and included in the next release. For that reason,
-scaled math (without a library) is preferred.
+```text
+BASIS_POINTS_PER_UNIT = 10_000
+RATE_BPS = 900
 
-## Impact
+# All params 64-bit integer
+function rate_limited_stake_change(account_portion, cluster_portion, cluster_effective):
+    if account_portion == 0 or cluster_portion == 0 or cluster_effective == 0:
+        return 0
 
-### Entities
+    # Cast all params to 128-bit integer
+    # All multiplications saturate
+    numerator = account_portion_128 * cluster_effective_128 * RATE_BPS_128
 
-- **Stake Program:** The on-chain program is updated to use the new
-  integer-based calculation helpers from `solana-stake-interface`. It now
-  routes through `stake.delegation.stake_activating_and_deactivating_v2()`.
+    denominator = cluster_portion_128 * BASIS_POINTS_PER_UNIT_128
 
-- **Agave:** Update the workspace dependency on
-  `solana-stake-interface` and adopt the integer entrypoints
-  (`Stake::stake_v2()` and `Delegation::stake_activating_and_deactivating_v2()`).
-  behind feature gate.
+    allowed_change_128 = numerator / denominator
 
-- **Firedancer:** Will need to update their stake calculations in
-  lock-step with the above integer-math changes.
+    # Never allow more than the account's own portion to change
+    if allowed_change_128 > account_portion_128:
+        allowed_change_128 = account_portion_128
 
-### Differential Fuzzing
+    # Narrow back to 64-bit integer
+    allowed_change = allowed_change_128 as 64-bit integer
 
-To quantify the numerical differences between the fixed-point implementation and
-the legacy `f64` path, we run an additional prop test that:
+    # Minimum progress clamp
+    if allowed_change == 0:
+        return 1
 
-- samples random non-zero `account`, `cluster_portion`, and
-  `cluster_effective` values across the full `u64` range,
-- exercises both the legacy `f64` formula and the new integer
-  implementation at the current 9% rate
+    return allowed_change
+```
 
-For 100,000 samples at the 9% rate we observe:
+## State compatibility
 
-| Metric           | Value                    | Notes                        |
-|------------------|--------------------------|------------------------------|
-| Avg. abs. diff.  | 0.505 lamports           | Mean abs(candidate − oracle) |
-| Avg. diff (ULPs) | 0.218 ULPs               | Avg. ULP distance of `f64`   |
-| p50/p90/p95/p99  | 0 / 1 / 1 / 6 lamports   | Percentiles of abs. diff.    |
-| Worst-case diff. | 932 lamports (1.82 ULPs) | Float imprecision at high #s |
+In existing stake account data, there is an 8-byte field that historically
+stored warmup/cooldown rate value as a double-precision float. It is legacy
+and currently unused by any part of the program. To preserve backwards
+compatibility with existing stake account state, this SIMD does not change
+stake account layout or size. Instead, it reclassifies that field as 8 bytes
+of reserved data.
 
-In short, there is high agreement and minimal deviation in outputs. Over 50% of
-results were identical, and 95% of results differed by at most 1 lamport. In the
-worst case, the difference was still only a difference of 1.82 ULPs, confirming it
-is an expected artifact of f64 precision limitations, not a logic error.
+The implementations should continue not using this field when computing warmup/
+cooldown values and setting it to zero when creating new stake accounts.
 
-#### Note on ULPs
+## Alternatives Considered
 
-A "Unit in the Last Place" (ULP) measures the gap between adjacent representable
-`f64` values. We use this metric to compare our new integer implementation
-against the legacy float implementation. Because a `f64` cannot represent every
-integer precisely past 2^53, the float-based result can differ slightly from the
-integer-based one, even when both are logically correct. Measuring this
-difference in ULPs allows us to verify the discrepancy is due to expected
-floating-point artifacts, not a bug.
+The primary alternative is to continue using floating-point arithmetic. For
+reasons given in the motivation section, this blocks upstream eBPF-toolchain
+usage, which just puts the technical debt off to handle later.
 
-### Performance
+## Impact
 
-For a sample configuration (`account_portion = 1_000_000_000`, `cluster_portion
-= 100_000_000_000`, `cluster_effective = 5_000_000_000_000`,
-`new_rate_activation_epoch = 50`), the results show a minor increase in CU
-consumption for the new logic:
+- **Stake Interface**:
+  - Export new integer-based stake activation and deactivation logic for rust
+    consumers
+  - Deprecate the floating-point rate field while preserving binary layout
+    compatibility
 
-- **Legacy (`f64`) Implementation:** 985 CUs
-- **New (`u128`) Implementation:** 1046 CUs
+- **Stake Program**: Feature gate v2 interface helpers in:
+  - Stake Merging
+  - Stake Splitting
+  - Stake Redelegation
 
-The fixed-point implementation is **6.2% more expensive** for this benchmark.
-This result is due to the type widening to `u128` and checked math. However,
-this is acceptable given the vast majority of CU costs are due to serialization
-(improved by [zero-copy
-p-stake](https://github.com/solana-foundation/solana-improvement-documents/pull/401)).
+- **Validator Clients (Agave & Firedancer)**: Feature gate fixed-point math
+  in:
+  - Runtime & stake logic for stake activation and deactivation calculations
+  - Stake cache & history for effective stake derivation and history
+    aggregation
+  - Inflation rewards for calculation of stake-weighted rewards
 
 ## Security Considerations
 
+All implementations must adhere to the following standards:
+
 1. **Unit tests:** Baseline of correctness by testing specific, known
    scenarios and edge cases.
-2. **Differential Fuzzing (`proptest`):**
-    - Maintains an oracle implementation that preserves the original
-      `f64` logic, used only in tests.
-    - Runs the new integer implementation against the oracle over
-      thousands of randomly generated inputs spanning the full `u64` domain.
-    - Uses a ULP-based tolerance (`4 × ULP`) to account for the
-      accumulated rounding error inherent in the float-based path while
-      ensuring the integer implementation never deviates more than expected
-      from the float oracle.
-3. **External Audit:** A comprehensive audit from an auditor with good
-   skills in numerical audits to validate arithmetic equivalence or regressions.
+2. **Differential Fuzzing:** maintains an oracle implementation that preserves
+   the original logic, used only in tests. Those should then be run against
+   the integer arithmetic to ensure a difference of no more than `4 x ULP`
+   (units of last place).
+3. **External Audit:** A comprehensive audit from an auditor with good skills
+   in numerical audits to validate arithmetic equivalence or regressions.

From 785b0b0da9ce058ca4a810d8279777ae4422906b Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Fri, 19 Dec 2025 17:40:33 +0100
Subject: [PATCH 5/8] More detail on validatior client reqs

---
 ...91-replace-stake-program-floating-point.md | 83 ++++++++++++++-----
 1 file changed, 60 insertions(+), 23 deletions(-)

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
index 8a4b1de76..01a00fa07 100644
--- a/proposals/0391-replace-stake-program-floating-point.md
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -37,17 +37,17 @@ sync.
 
 - **Basis points (bps)**: An integer representation of a percentage where
   `bps = percent × 100`.
-  - 1 bps = 0.01%
-  - 1% = 100 bps
+    - 1 bps = 0.01%
+    - 1% = 100 bps
 
 - Formula variables
-  - **account_portion**: The amount of stake (in lamports) for a single
-    account that is eligible to warm up or cool down in a given epoch.
-  - **cluster_portion**: The total amount of stake (in lamports) across the
-    cluster that is in the same warmup/cooldown phase as `account_portion`
-    for the previous epoch.
-  - **cluster_effective**: The total effective stake in the cluster (in
-    lamports) for the previous epoch.
+    - **account_portion**: The amount of stake (in lamports) for a single
+      account that is eligible to warm up or cool down in a given epoch.
+    - **cluster_portion**: The total amount of stake (in lamports) across the
+      cluster that is in the same warmup/cooldown phase as `account_portion`
+      for the previous epoch.
+    - **cluster_effective**: The total effective stake in the cluster (in
+      lamports) for the previous epoch.
 
 ## Detailed Design
 
@@ -194,22 +194,59 @@ usage, which just puts the technical debt off to handle later.
 ## Impact
 
 - **Stake Interface**:
-  - Export new integer-based stake activation and deactivation logic for rust
-    consumers
-  - Deprecate the floating-point rate field while preserving binary layout
-    compatibility
+    - Export new integer-based stake activation and deactivation logic for rust
+      consumers
+    - Deprecate the floating-point rate field while preserving binary layout
+      compatibility
 
 - **Stake Program**: Feature gate v2 interface helpers in:
-  - Stake Merging
-  - Stake Splitting
-  - Stake Redelegation
-
-- **Validator Clients (Agave & Firedancer)**: Feature gate fixed-point math
-  in:
-  - Runtime & stake logic for stake activation and deactivation calculations
-  - Stake cache & history for effective stake derivation and history
-    aggregation
-  - Inflation rewards for calculation of stake-weighted rewards
+    - **Stake Merging**: Stake calculations are used to determine if the
+      account is in a transient state, ensuring that merges are rejected if the
+      account is not effectively fully active or inactive.
+    - **Stake Splitting**: Stake calculations are used determine if the source
+      stake is currently active (effective stake > 0). This status is required
+      to correctly enforce rent-exempt reserve prefunding requirements for the
+      destination account.
+    - **Stake Redelegation**: The account's cooldown status is determined with
+      stake calculations and confirms that effective stake is exactly zero
+      before allowing redelegation.
+    - **Stake Withdrawal**: When withdrawing from a deactivated account, stake
+      calculations are used to determine the remaining effective stake.
+
+- **Validator Clients (Agave & Firedancer)**: Clients must feature gate the
+  transition from floating-point to fixed-point arithmetic in all
+  consensus-critical operations involving effective, activating, or
+  deactivating stake. The following operations require updates:
+    - **Stake Activation and Deactivation**: When querying a stake delegation's
+      status for a given epoch, the validator _computes how much of the
+      delegation's stake has completed warmup or cooldown_. This requires
+      walking through epochs from the delegation's activation or deactivation
+      point, computing the allowed stake change at each epoch boundary to
+      determine the portion that transitioned. The result categorizes the
+      delegation's lamports into effective, activating, and deactivating
+      buckets.
+    - **Epoch Boundary Stake History**: At each epoch boundary, the validator
+      iterates over all stake delegations and _computes their activation status_
+      as of the concluding epoch. These per-delegation values are summed to
+      produce the cluster-wide totals (effective/activating/deactivating) that
+      form the new stake history entry. This entry is then used as input for
+      subsequent epoch calculations.
+    - **Stake Cache Updates**: The validator maintains a cache mapping vote
+      accounts to their delegated stake. When a stake account is
+      created/modified/closed, the cache entry for the associated vote account
+      must be updated. This requires _computing the delegation's effective stake_
+      contribution before and after the change to correctly adjust the cached
+      totals.
+    - **Vote Account Stake Totals**: At epoch boundaries, the validator
+      refreshes the stake distribution across vote accounts for the upcoming
+      epoch. For each vote account, it _sums the effective stake_ of all
+      delegations pointing to that account. These totals determine leader
+      schedule weights and fork choice voting power.
+    - **Inflation Rewards**: Reward calculation iterates over each epoch in a
+      vote account's credit history. For each epoch, the validator _computes the
+      delegation's effective stake_ at that epoch, multiplies by the earned vote
+      credits to produce points and accumulates these across epochs. The final
+      reward is proportional to the delegation's share of total cluster points.
 
 ## Security Considerations
 

From 4e67205f6a3ad84c0b7711ef8f34a15d23e8408f Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Wed, 7 Jan 2026 10:54:52 +0100
Subject: [PATCH 6/8] Signedness + clamp clarifications

---
 ...91-replace-stake-program-floating-point.md | 42 +++++++++----------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
index 01a00fa07..c8fdd41c0 100644
--- a/proposals/0391-replace-stake-program-floating-point.md
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -83,14 +83,14 @@ allowed_change =
     (cluster_portion * BASIS_POINTS_PER_UNIT)
 ```
 
-Note: any truncation in the division that occurs should truncate toward zero.
+Note: The division uses unsigned integer division so it truncates (rounds down).
 
 #### Widening arithmetic to 128-bit integers
 
-Because of the extra multiplication, 64-bit integer math is not sufficient for
-safety. For that reason, all values used in the formula should be widened to
-128-bit integers. The final value should then be cast back down to a 64-bit
-integer.
+All inputs are unsigned 64-bit lamport quantities. Because of the extra
+multiplication, all values used in the formula should be widened to unsigned
+128-bit integers (or an exact emulation). The final value should then be cast
+back down to an unsigned 64-bit integer.
 
 Implementations that do not offer native unsigned 128-bit arithmetic must
 emulate it (for example via fixed-width limb arithmetic).
@@ -110,6 +110,11 @@ lamport per epoch so that small delegations do not get stuck in activating/
 deactivating states due to truncation. The new implementation keeps this
 behavior.
 
+**Note:** This clamp applies only to stake activation/deactivation
+transitions, not to inflation reward payouts. Reward distribution has a
+separate mechanism that defers sub-lamport payouts by not advancing
+`credits_observed` until a full lamport can be paid.
+
 ### Pseudocode guidance
 
 #### Current implementation
@@ -117,7 +122,7 @@ behavior.
 ```text
 RATE_FLOAT = 0.09
 
-# All params 64-bit integer
+# All params are unsigned 64-bit integers
 function rate_limited_stake_change(account_portion, cluster_portion, cluster_effective):
     if account_portion == 0 or cluster_portion == 0 or cluster_effective == 0:
         return 0
@@ -127,7 +132,7 @@ function rate_limited_stake_change(account_portion, cluster_portion, cluster_eff
     allowed_change_float = weight_float * cluster_effective_float * RATE_FLOAT
 
     # Truncate toward zero via cast
-    allowed_change = allowed_change_float as 64-bit integer
+    allowed_change = allowed_change_float as unsigned 64-bit integer
 
     # Never allow more than the account's own portion to change
     if allowed_change > account_portion:
@@ -146,12 +151,12 @@ function rate_limited_stake_change(account_portion, cluster_portion, cluster_eff
 BASIS_POINTS_PER_UNIT = 10_000
 RATE_BPS = 900
 
-# All params 64-bit integer
+# All params are unsigned 64-bit integers
 function rate_limited_stake_change(account_portion, cluster_portion, cluster_effective):
     if account_portion == 0 or cluster_portion == 0 or cluster_effective == 0:
         return 0
 
-    # Cast all params to 128-bit integer
+    # Cast all params to unsigned 128-bit integer
     # All multiplications saturate
     numerator = account_portion_128 * cluster_effective_128 * RATE_BPS_128
 
@@ -163,8 +168,8 @@ function rate_limited_stake_change(account_portion, cluster_portion, cluster_eff
     if allowed_change_128 > account_portion_128:
         allowed_change_128 = account_portion_128
 
-    # Narrow back to 64-bit integer
-    allowed_change = allowed_change_128 as 64-bit integer
+    # Narrow back to unsigned 64-bit integer
+    allowed_change = allowed_change_128 as unsigned 64-bit integer
 
     # Minimum progress clamp
     if allowed_change == 0:
@@ -173,18 +178,6 @@ function rate_limited_stake_change(account_portion, cluster_portion, cluster_eff
     return allowed_change
 ```
 
-## State compatibility
-
-In existing stake account data, there is an 8-byte field that historically
-stored warmup/cooldown rate value as a double-precision float. It is legacy
-and currently unused by any part of the program. To preserve backwards
-compatibility with existing stake account state, this SIMD does not change
-stake account layout or size. Instead, it reclassifies that field as 8 bytes
-of reserved data.
-
-The implementations should continue not using this field when computing warmup/
-cooldown values and setting it to zero when creating new stake accounts.
-
 ## Alternatives Considered
 
 The primary alternative is to continue using floating-point arithmetic. For
@@ -247,6 +240,9 @@ usage, which just puts the technical debt off to handle later.
       delegation's effective stake_ at that epoch, multiplies by the earned vote
       credits to produce points and accumulates these across epochs. The final
       reward is proportional to the delegation's share of total cluster points.
+        - Note: Only the effective stake computation (warmup/cooldown) is
+          affected by this SIMD. The downstream reward-to-lamport conversion
+          and sub-lamport deferral logic remain unchanged.
 
 ## Security Considerations
 

From d5faf7402f9a2464e7b449a273b9d1c144371a0d Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Fri, 9 Jan 2026 21:55:37 +0100
Subject: [PATCH 7/8] Fix imperatives + sequence text improvement

---
 ...91-replace-stake-program-floating-point.md | 61 +++++++++++--------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
index c8fdd41c0..f33305dc2 100644
--- a/proposals/0391-replace-stake-program-floating-point.md
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -70,9 +70,9 @@ RATE_FLOAT = 0.09
 allowed_change = (account_portion / cluster_portion) * (cluster_effective * RATE_FLOAT)
 ```
 
-For an integer implementation, it's important to re-arrange the formula so
-that the division happens last to maintain the highest precision. This is
-achieved via an algebraically equivalent re-ordering:
+For an integer implementation, the division MUST occur last (after all
+multiplications) to maintain the highest precision and done via an
+algebraically equivalent re-ordering:
 
 ```text
 BASIS_POINTS_PER_UNIT = 10_000
@@ -81,37 +81,50 @@ RATE_BPS = 900
 allowed_change =
     (account_portion * cluster_effective * RATE_BPS) /
     (cluster_portion * BASIS_POINTS_PER_UNIT)
+
+# Note: all multiplications saturate
 ```
 
-Note: The division uses unsigned integer division so it truncates (rounds down).
+Note: The division MUST use unsigned integer division and truncate (round down).
+
+#### Widening arithmetic and safety
 
-#### Widening arithmetic to 128-bit integers
+All inputs are unsigned 64-bit integers. To maintain precision and bound
+overflow behavior, all values used in the formula MUST be widened to unsigned
+128-bit integers (or an exact emulation) prior to any multiplication or
+division.
 
-All inputs are unsigned 64-bit lamport quantities. Because of the extra
-multiplication, all values used in the formula should be widened to unsigned
-128-bit integers (or an exact emulation). The final value should then be cast
-back down to an unsigned 64-bit integer.
+Implementations MUST NOT fault or abort due to overflow in intermediate
+arithmetic. Instead, the computation MUST adhere to the following sequence:
 
-Implementations that do not offer native unsigned 128-bit arithmetic must
-emulate it (for example via fixed-width limb arithmetic).
+1. Saturate: All intermediate 128-bit multiplications in the computation
+   (including both numerator and denominator multiplications) MUST use
+   saturating arithmetic, capping at the maximum representable unsigned 128-bit
+   value.
+2. Divide: The division MUST use unsigned integer division and truncate
+   (round down).
+3. Clamp: The post-division result MUST be clamped to `account_portion`.
+4. Narrow: The clamped value MUST be converted back to an unsigned 64-bit
+   integer. Because the value is capped at `account_portion`, this conversion
+   MUST be exact (lossless) and NOT truncate, wrap, or otherwise alter the
+   clamped value.
 
-#### Saturation and fail-safe behavior
+Rationale: Saturating multiplication combined with post-division clamping
+ensures that overflow cannot amplify a stake change beyond the account’s own
+portion (fail-safe rather than fail-open) and avoids introducing a fault/abort
+path.
 
-If the intermediate multiplication overflows the maximum representable
-unsigned 128-bit value, the numerator saturates to the maximum 128-bit value
-before division. The result is then clamped to `account_portion`. This ensures
-that overflow cannot amplify a stake change beyond the account's own portion
-(fail-safe rather than fail-open).
+Implementations without native 128-bit support MUST emulate these semantics exactly.
 
 ### Minimum progress clamp
 
 Currently, when `account_portion > 0`, there is a granted minimum change of 1
 lamport per epoch so that small delegations do not get stuck in activating/
-deactivating states due to truncation. The new implementation keeps this
+deactivating states due to truncation. The new implementation MUST keep this
 behavior.
 
-**Note:** This clamp applies only to stake activation/deactivation
-transitions, not to inflation reward payouts. Reward distribution has a
+**Note:** This clamp MUST apply only to stake activation/deactivation
+transitions and NOT to inflation reward payouts. Reward distribution has a
 separate mechanism that defers sub-lamport payouts by not advancing
 `credits_observed` until a full lamport can be paid.
 
@@ -196,7 +209,7 @@ usage, which just puts the technical debt off to handle later.
     - **Stake Merging**: Stake calculations are used to determine if the
       account is in a transient state, ensuring that merges are rejected if the
       account is not effectively fully active or inactive.
-    - **Stake Splitting**: Stake calculations are used determine if the source
+    - **Stake Splitting**: Stake calculations are used to determine if the source
       stake is currently active (effective stake > 0). This status is required
       to correctly enforce rent-exempt reserve prefunding requirements for the
       destination account.
@@ -206,7 +219,7 @@ usage, which just puts the technical debt off to handle later.
     - **Stake Withdrawal**: When withdrawing from a deactivated account, stake
       calculations are used to determine the remaining effective stake.
 
-- **Validator Clients (Agave & Firedancer)**: Clients must feature gate the
+- **Validator Clients (Agave & Firedancer)**: Clients MUST feature gate the
   transition from floating-point to fixed-point arithmetic in all
   consensus-critical operations involving effective, activating, or
   deactivating stake. The following operations require updates:
@@ -227,7 +240,7 @@ usage, which just puts the technical debt off to handle later.
     - **Stake Cache Updates**: The validator maintains a cache mapping vote
       accounts to their delegated stake. When a stake account is
       created/modified/closed, the cache entry for the associated vote account
-      must be updated. This requires _computing the delegation's effective stake_
+      MUST be updated. This requires _computing the delegation's effective stake_
       contribution before and after the change to correctly adjust the cached
       totals.
     - **Vote Account Stake Totals**: At epoch boundaries, the validator
@@ -246,7 +259,7 @@ usage, which just puts the technical debt off to handle later.
 
 ## Security Considerations
 
-All implementations must adhere to the following standards:
+All implementations MUST adhere to the following standards:
 
 1. **Unit tests:** Baseline of correctness by testing specific, known
    scenarios and edge cases.

From cf9869634f2f6d1f3be4c2de69854faf2efdb7d4 Mon Sep 17 00:00:00 2001
From: Gabe Rodriguez <grod220@gmail.com>
Date: Fri, 16 Jan 2026 11:36:28 +0100
Subject: [PATCH 8/8] Update pseudocode conventions

---
 ...91-replace-stake-program-floating-point.md | 56 +++++++++++--------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/proposals/0391-replace-stake-program-floating-point.md b/proposals/0391-replace-stake-program-floating-point.md
index f33305dc2..b6ca506c7 100644
--- a/proposals/0391-replace-stake-program-floating-point.md
+++ b/proposals/0391-replace-stake-program-floating-point.md
@@ -51,6 +51,18 @@ sync.
 
 ## Detailed Design
 
+### Pseudocode conventions
+
+This document uses the following notation to describe arithmetic operations
+with explicit bit-width and overflow semantics:
+
+- Uint64 / Uint128: Unsigned 64-bit / 128-bit integer types
+- widen(x): Zero-extend a Uint64 to Uint128 (lossless)
+- narrow(x): Convert a Uint128 to Uint64 (caller must ensure x ≤ 2^64−1)
+- sat_mul(a, b): Saturating multiplication—returns a × b or 2^128−1 if
+  the result would overflow
+- trunc_div(a, b): Truncating unsigned integer division (floor toward zero)
+
 ### Rate representation (basis points)
 
 The current network warmup/cooldown rate is 9%. This means that, in any given
@@ -78,11 +90,10 @@ algebraically equivalent re-ordering:
 BASIS_POINTS_PER_UNIT = 10_000
 RATE_BPS = 900
 
-allowed_change =
-    (account_portion * cluster_effective * RATE_BPS) /
-    (cluster_portion * BASIS_POINTS_PER_UNIT)
+numerator = sat_mul(sat_mul(account_portion, cluster_effective), RATE_BPS)
+denominator = sat_mul(cluster_portion, BASIS_POINTS_PER_UNIT)
 
-# Note: all multiplications saturate
+allowed_change = trunc_div(numerator, denominator)
 ```
 
 Note: The division MUST use unsigned integer division and truncate (round down).
@@ -110,7 +121,7 @@ arithmetic. Instead, the computation MUST adhere to the following sequence:
    clamped value.
 
 Rationale: Saturating multiplication combined with post-division clamping
-ensures that overflow cannot amplify a stake change beyond the account’s own
+ensures that overflow cannot amplify a stake change beyond the account's own
 portion (fail-safe rather than fail-open) and avoids introducing a fault/abort
 path.
 
@@ -135,7 +146,7 @@ separate mechanism that defers sub-lamport payouts by not advancing
 ```text
 RATE_FLOAT = 0.09
 
-# All params are unsigned 64-bit integers
+# All params are Uint64
 function rate_limited_stake_change(account_portion, cluster_portion, cluster_effective):
     if account_portion == 0 or cluster_portion == 0 or cluster_effective == 0:
         return 0
@@ -145,7 +156,7 @@ function rate_limited_stake_change(account_portion, cluster_portion, cluster_eff
     allowed_change_float = weight_float * cluster_effective_float * RATE_FLOAT
 
     # Truncate toward zero via cast
-    allowed_change = allowed_change_float as unsigned 64-bit integer
+    allowed_change = allowed_change_float as Uint64
 
     # Never allow more than the account's own portion to change
     if allowed_change > account_portion:
@@ -161,34 +172,35 @@ function rate_limited_stake_change(account_portion, cluster_portion, cluster_eff
 #### Proposed new implementation
 
 ```text
-BASIS_POINTS_PER_UNIT = 10_000
-RATE_BPS = 900
+BASIS_POINTS_PER_UNIT: Uint128 = 10_000
+RATE_BPS: Uint128 = 900
 
-# All params are unsigned 64-bit integers
+# All params are Uint64
 function rate_limited_stake_change(account_portion, cluster_portion, cluster_effective):
     if account_portion == 0 or cluster_portion == 0 or cluster_effective == 0:
         return 0
 
-    # Cast all params to unsigned 128-bit integer
-    # All multiplications saturate
-    numerator = account_portion_128 * cluster_effective_128 * RATE_BPS_128
-
-    denominator = cluster_portion_128 * BASIS_POINTS_PER_UNIT_128
+    # Widen inputs to Uint128
+    numerator = sat_mul(
+                    sat_mul(widen(account_portion), widen(cluster_effective)), 
+                    RATE_BPS
+                )
+    denominator = sat_mul(widen(cluster_portion), BASIS_POINTS_PER_UNIT)
 
-    allowed_change_128 = numerator / denominator
+    allowed_change = trunc_div(numerator, denominator)
 
     # Never allow more than the account's own portion to change
-    if allowed_change_128 > account_portion_128:
-        allowed_change_128 = account_portion_128
+    if allowed_change > widen(account_portion):
+        allowed_change = widen(account_portion)
 
-    # Narrow back to unsigned 64-bit integer
-    allowed_change = allowed_change_128 as unsigned 64-bit integer
+    # Narrow back to Uint64
+    result = narrow(allowed_change)
 
     # Minimum progress clamp
-    if allowed_change == 0:
+    if result == 0:
         return 1
 
-    return allowed_change
+    return result
 ```
 
 ## Alternatives Considered