diff --git a/chronos/ratelimit.nim b/chronos/ratelimit.nim
index ad66c067e..508e6a262 100644
--- a/chronos/ratelimit.nim
+++ b/chronos/ratelimit.nim
@@ -14,6 +14,12 @@ import timer
 export timer
 
 type
+  ReplenishMode* = enum  
+    Continuous  
+      # Tokens are continuously replenished at a rate of `capacity / fillDuration`, up to the configured capacity  
+    Discrete  
+      # Up to `capacity` tokens are replenished once every `fillDuration`, in discrete steps, such that at the beginning of every `fillDuration` period, there are `capacity` tokens available  
+
   BucketWaiter = object
     future: Future[void]
     value: int
@@ -21,38 +27,88 @@ type
 
   TokenBucket* = ref object
     budget: int
-    budgetCap: int
+    capacity: int
     lastUpdate: Moment
     fillDuration: Duration
     workFuture: Future[void]
     pendingRequests: seq[BucketWaiter]
     manuallyReplenished: AsyncEvent
+    replenishMode: ReplenishMode
 
-proc update(bucket: TokenBucket, currentTime: Moment) =
+func periodDistance(bucket: TokenBucket, currentTime: Moment): float =
+  if currentTime <= bucket.lastUpdate or bucket.fillDuration == default(Duration):
+    return 0.0
+
+  nanoseconds(currentTime - bucket.lastUpdate).float / nanoseconds(bucket.fillDuration).float
+
+proc calcUpdateDiscrete(bucket: TokenBucket, currentTime: Moment): tuple[budget: int, lastUpdate: Moment] =
   if bucket.fillDuration == default(Duration):
-    bucket.budget = min(bucket.budgetCap, bucket.budget)
-    return
+    # with zero fillDuration we only allow manual replenish till capacity
+    return (min(bucket.capacity, bucket.budget), bucket.lastUpdate)
 
-  if currentTime < bucket.lastUpdate:
-    return
+  let distance = periodDistance(bucket, currentTime)
+  if distance < 1.0:
+    return (bucket.budget, bucket.lastUpdate)
 
-  let
-    timeDelta = currentTime - bucket.lastUpdate
-    fillPercent = timeDelta.milliseconds.float / bucket.fillDuration.milliseconds.float
-    replenished =
-      int(bucket.budgetCap.float * fillPercent)
-    deltaFromReplenished =
-      int(bucket.fillDuration.milliseconds.float *
-      replenished.float / bucket.budgetCap.float)
+  (bucket.capacity, bucket.lastUpdate + (bucket.fillDuration * int(distance)))
 
-  bucket.lastUpdate += milliseconds(deltaFromReplenished)
-  bucket.budget = min(bucket.budgetCap, bucket.budget + replenished)
+proc calcUpdateContinuous(bucket: TokenBucket, currentTime: Moment): tuple[budget: int, lastUpdate: Moment]  =
+  if bucket.fillDuration == default(Duration):
+    # with zero fillDuration we only allow manual replenish till capacity
+    return (min(bucket.capacity, bucket.budget), bucket.lastUpdate)
+
+  if currentTime <= bucket.lastUpdate:
+    # don't allow backward timing
+    return (bucket.budget, bucket.lastUpdate)
+
+  let timeDelta = currentTime - bucket.lastUpdate
+  let capacity = bucket.capacity
+  let periodNs = bucket.fillDuration.nanoseconds.int64
+  let deltaNs = timeDelta.nanoseconds.int64
+
+  # How many whole tokens could be produced by the elapsed time.
+  let possibleTokens = int((deltaNs * capacity.int64) div periodNs)
+  if possibleTokens <= 0:
+    return (bucket.budget, bucket.lastUpdate)
+
+  let budgetLeft = capacity - bucket.budget
+  if budgetLeft <= 0:
+    # Bucket already full the entire elapsed time: burn the elapsed time
+    # so we do not accumulate implicit credit and do not allow over budgeting
+    return (capacity, currentTime)
+
+  let toAdd = min(possibleTokens, budgetLeft)
+
+  # Advance lastUpdate only by the fraction of time actually “spent” to mint toAdd tokens.
+  # (toAdd / capacity) * period = time used
+  let usedNs = (periodNs * toAdd.int64) div capacity.int64
+  let newbudget = bucket.budget + toAdd
+  var newLastUpdate = bucket.lastUpdate + nanoseconds(usedNs)
+  if toAdd == budgetLeft and possibleTokens > budgetLeft:
+    # We hit the capacity; discard leftover elapsed time to prevent multi-call burst inflation
+    newLastUpdate = currentTime
+
+  (newbudget, newLastUpdate)
+
+proc calcUpdate(bucket: TokenBucket, currentTime: Moment): tuple[budget: int, lastUpdate: Moment] =
+  if bucket.replenishMode == ReplenishMode.Discrete:
+    return bucket.calcUpdateDiscrete(currentTime)
+  else:
+    return bucket.calcUpdateContinuous(currentTime)
+
+proc update(bucket: TokenBucket, currentTime: Moment) =
+  let (newBudget, newLastUpdate) = bucket.calcUpdate(currentTime)
+  bucket.budget = newBudget
+  bucket.lastUpdate = newLastUpdate
 
 proc tryConsume*(bucket: TokenBucket, tokens: int, now = Moment.now()): bool =
   ## If `tokens` are available, consume them,
-  ## Otherwhise, return false.
+  ## Otherwise, return false.
 
   if bucket.budget >= tokens:
+    # If bucket is full, consider this point as period start, drop silent periods before
+    if bucket.budget == bucket.capacity:
+      bucket.lastUpdate = now
     bucket.budget -= tokens
     return true
 
@@ -80,8 +136,8 @@ proc worker(bucket: TokenBucket) {.async.} =
       let eventWaiter = bucket.manuallyReplenished.wait()
       if bucket.fillDuration.milliseconds > 0:
         let
-          nextCycleValue = float(min(waiter.value, bucket.budgetCap))
-          budgetRatio = nextCycleValue.float / bucket.budgetCap.float
+          nextCycleValue = float(min(waiter.value, bucket.capacity))
+          budgetRatio = nextCycleValue.float / bucket.capacity.float
           timeToTarget = int(budgetRatio * bucket.fillDuration.milliseconds.float) + 1
           #TODO this will create a timer for each blocked bucket,
           #which may cause performance issue when creating many
@@ -119,7 +175,7 @@ proc consume*(bucket: TokenBucket, tokens: int, now = Moment.now()): Future[void
   if isNil(bucket.workFuture) or bucket.workFuture.finished():
     bucket.workFuture = worker(bucket)
 
-  return retFuture
+  retFuture
 
 proc replenish*(bucket: TokenBucket, tokens: int, now = Moment.now()) =
   ## Add `tokens` to the budget (capped to the bucket capacity)
@@ -127,16 +183,33 @@ proc replenish*(bucket: TokenBucket, tokens: int, now = Moment.now()) =
   bucket.update(now)
   bucket.manuallyReplenished.fire()
 
+proc getAvailableCapacity*(
+    bucket: TokenBucket, currentTime: Moment = Moment.now()
+): tuple[budget: int, capacity: int, lastUpdate: Moment] =
+  let (assumedBudget, assumedLastUpdate) = bucket.calcUpdate(currentTime)
+  (assumedBudget, bucket.capacity, assumedLastUpdate)
+
 proc new*(
   T: type[TokenBucket],
-  budgetCap: int,
-  fillDuration: Duration = 1.seconds): T =
+  capacity: int,
+  fillDuration: Duration = 1.seconds,
+  replenishMode: ReplenishMode = ReplenishMode.Continuous): T =
 
   ## Create a TokenBucket
   T(
-    budget: budgetCap,
-    budgetCap: budgetCap,
+    budget: capacity,
+    capacity: capacity,
     fillDuration: fillDuration,
     lastUpdate: Moment.now(),
-    manuallyReplenished: newAsyncEvent()
+    manuallyReplenished: newAsyncEvent(),
+    replenishMode: replenishMode
   )
+
+proc setState*(bucket: TokenBucket, budget: int, lastUpdate: Moment) =
+  bucket.budget = budget
+  bucket.lastUpdate = lastUpdate
+
+func `$`*(b: TokenBucket): string {.inline.} =
+  if isNil(b):
+    return "nil"
+  $b.capacity & "/" & $b.fillDuration
diff --git a/docs/src/ratelimit.md b/docs/src/ratelimit.md
new file mode 100644
index 000000000..796312300
--- /dev/null
+++ b/docs/src/ratelimit.md
@@ -0,0 +1,109 @@
+# TokenBucket — Usage Modes (Overview)
+
+TokenBucket provides several usage modes and patterns depending on how you want to rate-limit:
+
+- Continuous mode (default):
+	- Mints tokens proportionally to elapsed time at a constant rate (`capacity / fillDuration`), adding only whole tokens.
+	- When the bucket is full for an interval, the elapsed time is burned (no “credit banking”).
+	- If an update would overfill, budget is clamped to capacity and leftover elapsed time is discarded; `lastUpdate` is set to the current time.
+	- Nanosecond-level accounting for precise behavior.
+
+- Discrete mode:
+	- Replenishes only after a full `fillDuration` has elapsed (step-like refill behavior).
+	- Before the period boundary, budget does not increase; after the boundary, budget jumps to capacity.
+	- Use when you need hard period boundaries rather than proportional accrual.
+
+- Manual-only replenish (fillDuration = 0):
+	- Disables automatic minting; tokens can only be added via `replenish(tokens)`.
+	- Replenish is capped at capacity and wakes pending consumers.
+
+- Synchronous consumption: `tryConsume(tokens, now)`
+	- Attempts to consume immediately; returns `true` on success, `false` otherwise.
+	- If consuming from full, `lastUpdate` is set to `now` (prevents idle-at-full credit banking in Continuous mode).
+
+- Asynchronous consumption: `consume(tokens, now) -> Future[void]`
+	- Returns a future that completes when tokens become available (or can be cancelled).
+	- Internally, the waiter is woken around the time enough tokens are expected to accrue, or earlier if `replenish()` is called.
+
+- Capacity and timing introspection: `getAvailableCapacity(now)`
+	- Computes the budget as of `now` without mutating bucket state.
+
+- Manual replenishment: `replenish(tokens, now)`
+	- Adds tokens (capped to capacity), updates timing, and wakes waiters.
+
+The sections below illustrate Continuous semantics with concrete timelines and compare them with the older algorithm for context.
+
+# Example Scenarios for Continuous Mode
+## TokenBucket Continuous Mode — Scenario 1 Timeline
+
+Assumptions:
+- Capacity `C = 10`
+- `fillDuration = 1s` (per-token time: 100ms)
+- Start: `t = 0ms`, `budget = 10`, `lastUpdate = 0ms`
+
+Legend:
+- Minted tokens: tokens added by Continuous update at that step (TA)
+- Budget after mint: budget after minting, before the consume at that row
+- Budget after consume: budget left after processing the request at that row
+- LU set?: whether `lastUpdate` changes at that step (reason)
+
+Only request events are listed below (no passive availability checks):
+
+| Time    | Elapsed from LU | Budget (in) | Request tokens | Minted tokens (TA) | Budget after mint | Budget after consume | LU set?                          |
+|---------|------------------|-------------|----------------|--------------------|-------------------|----------------------|-----------------------------------|
+| 0 ms    | n/a              | 10          | 7              | 0                  | 10                | 3                    | yes (consume/full → 0 ms)         |
+| 200 ms  | 200 ms           | 3           | 5              | 2                  | 5                 | 0                    | yes (update → 200 ms)             |
+| 650 ms  | 450 ms           | 0           | 3              | 4                  | 4                 | 1                    | yes (update → 600 ms)             |
+| 1200 ms | 600 ms           | 1           | 6              | 6                  | 7                 | 1                    | yes (update → 1200 ms)            |
+| 1800 ms | 600 ms           | 1           | 5              | 6                  | 7                 | 2                    | yes (update → 1800 ms)            |
+| 2100 ms | 300 ms           | 2           | 10             | 3                  | 5                 | 5 (insufficient)     | yes (update → 2100 ms)            |
+| 2600 ms | 500 ms           | 5           | 10             | 5 (to cap)         | 10 (hit cap)      | 0                    | yes (update hit cap → 2600 ms); yes (consume/full → 2600 ms) |
+
+Notes:
+- When an update would overfill the bucket, it is clamped to capacity and `lastUpdate` is set to the current time; leftover elapsed time is discarded.
+- Consuming from a full bucket sets `lastUpdate` to the consume time (prevents idle-at-full credit banking).
+
+### Consumption Summary (0–3s window)
+
+Per `fillDuration` period (1s each):
+
+| Period         | Requests within period                         | Tokens consumed |
+|----------------|-------------------------------------------------|-----------------|
+| 0–1000 ms      | 0ms:7, 200ms:5, 650ms:3                        | 15              |
+| 1000–2000 ms   | 1200ms:6, 1800ms:5                             | 11              |
+| 2000–3000 ms   | 2100ms:10 (insufficient), 2600ms:10 (consumed) | 10              |
+
+Total consumed over 3 seconds: 15 + 11 + 10 = 36 tokens.
+
+## High-rate single-token requests (Continuous)
+
+Settings:
+- Capacity `C = 10`, `fillDuration = 10ms` (per-token time: 1ms)
+- Window to observe: `0–40ms` (4 full periods)
+- Requests are 1 token each; batches occur at specific timestamps.
+
+We show how the bucket rejects attempts that exceed the available budget at each instant, ensuring no more than `capacity + minted` tokens are usable in any time frame. Over `0–40ms`, at most `10 (initial capacity) + 4 × 10 (mint) = 50` tokens can be consumed.
+
+Request batches and outcomes:
+
+| Time   | Elapsed from LU | Budget before | Minted (PT→TA) | Budget after mint | Requests (×1) | Accepted | Rejected | Budget after consume | LU after |
+|--------|------------------|---------------|-----------------|-------------------|---------------|----------|----------|----------------------|---------|
+| 0 ms   | n/a              | 10            | 0               | 10                | 12            | 10       | 2        | 0                    | 0 ms    |
+| 5 ms   | 5 ms             | 0             | 5 → 5           | 5                 | 7             | 5        | 2        | 0                    | 5 ms    |
+| 10 ms  | 5 ms             | 0             | 5 → 5           | 5                 | 15            | 5        | 10       | 0                    | 10 ms   |
+| 12 ms  | 2 ms             | 0             | 2 → 2           | 2                 | 3             | 2        | 1        | 0                    | 12 ms   |
+| 20 ms  | 8 ms             | 0             | 8 → 8           | 8                 | 25            | 8        | 17       | 0                    | 20 ms   |
+| 30 ms  | 10 ms            | 0             | 10 → 10         | 10                | 9             | 9        | 0        | 1                    | 30 ms   |
+| 31 ms  | 1 ms             | 1             | 1 → 1           | 2                 | 3             | 2        | 1        | 0                    | 31 ms   |
+| 40 ms  | 9 ms             | 0             | 9 → 9           | 9                 | 20            | 9        | 11       | 0                    | 40 ms   |
+
+Totals over 0–40ms:
+- Attempted: 12 + 7 + 15 + 3 + 25 + 9 + 3 + 20 = 94 requests
+- Accepted: 10 + 5 + 5 + 2 + 8 + 9 + 2 + 9 = 50 tokens (matches `10 + 4×10`)
+- Rejected: 94 − 50 = 44 requests
+
+Why the rejections happen (preventing overuse):
+- At any given instant, you can only consume up to the tokens currently in the bucket.
+- Between instants, tokens mint continuously at `capacity / fillDuration = 1 token/ms`; the table shows how many become available just before each batch.
+- When a batch demands more than available, the excess is rejected (or would be queued with `consume()`), enforcing the rate limit.
+- Over any observation window, the maximum consumable tokens = initial available (up to capacity) + tokens minted during that window; here, that cap is `10 + (40ms × 1/ms) = 50`.
\ No newline at end of file
diff --git a/tests/testratelimit.nim b/tests/testratelimit.nim
index d28492874..f8a34f69f 100644
--- a/tests/testratelimit.nim
+++ b/tests/testratelimit.nim
@@ -117,14 +117,308 @@ suite "Token Bucket":
     check bucket.tryConsume(1, fakeNow) == true
 
   test "Short replenish":
-    skip()
-    # TODO (cheatfate): This test was disabled, because it continuosly fails in
-    # Github Actions Windows x64 CI when using Nim 1.6.14 version.
-    # Unable to reproduce failure locally.
+    var bucket = TokenBucket.new(15000, 1.milliseconds)
+    let start = Moment.now()
+    check bucket.tryConsume(15000, start)
+    check bucket.tryConsume(1, start) == false
+
+    check bucket.tryConsume(15000, start + 1.milliseconds) == true
+
+  # Edge-case: ensure only one refill can occur for the same timestamp even if
+  # multiple tryConsume calls are made that would otherwise appear to have large
+  # elapsed time credit. This prevents multi-call burst inflation at a single time.
+  test "No double refill at same timestamp":
+    var bucket = TokenBucket.new(10, 100.milliseconds)
+    let t0 = Moment.now()
+    # Consume from full so lastUpdate is stamped at t0
+    check bucket.tryConsume(5, t0) == true  # budget now 5
+    # Long idle period (simulate large elapsed time)
+    let idle = t0 + 5.seconds
+    # First large request triggers an update + refill limited by space (5)
+    check bucket.tryConsume(6, idle) == true  # budget after = 4 (5 minted -> 10 then -6)
+    # Second request at the SAME timestamp cannot refill again
+    check bucket.tryConsume(5, idle) == false
+    # Prove only 4 remain: consuming 4 succeeds, then 1 more fails at same timestamp
+    check bucket.tryConsume(4, idle) == true
+    check bucket.tryConsume(1, idle) == false
+
+  # Edge-case fairness: partial usage should only mint up to available space, not
+  # more than cap, and leftover elapsed time is burned once cap is reached.
+  test "Refill limited by available space":
+    var bucket = TokenBucket.new(10, 100.milliseconds)
+    let t0 = Moment.now()
+    # Spend a portion (from full) -> lastUpdate = t0, budget 4
+    check bucket.tryConsume(6, t0) == true
+    # Mid-period small consume without triggering update (still before refill point)
+    let mid = t0 + 50.milliseconds
+    check bucket.tryConsume(1, mid) == true  # budget 3
+    # At the 100ms boundary request more than remaining budget to force update
+    let boundary = t0 + 100.milliseconds
+    # Space is 7; even though 100ms elapsed corresponds to 10 possible tokens,
+    # only 7 are minted and leftover elapsed time credit is discarded.
+    check bucket.tryConsume(6, boundary) == true  # leaves 4
+    # A second consume at identical boundary timestamp cannot mint more than residual
+    check bucket.tryConsume(5, boundary) == false
+    # After another 40ms, at most floor(40/100 * 10)=4 tokens accrue; request 4 succeeds
+    let late = boundary + 40.milliseconds
+    check bucket.tryConsume(4, late) == true  # should deplete
+    # A subsequent call at the same timestamp may mint remaining fractional time credit (fair catch-up)
+    # so a small consume can still succeed.
+    check bucket.tryConsume(1, late) == true
+
+  test "Discrete replenish mode does not refill before period elapsed":
+    var bucket = TokenBucket.new(10, 100.milliseconds, ReplenishMode.Discrete)
+    let t0 = Moment.now()
+    # Spend a portion (from full) -> lastUpdate = t0, budget 10
+    check bucket.tryConsume(9, t0) == true # leaves 1
+
+    var cap = bucket.getAvailableCapacity(t0)
+    check cap.budget == 1
+    check cap.lastUpdate == t0
+    check cap.capacity == 10
+
+    let mid = t0 + 50.milliseconds
+
+    cap = bucket.getAvailableCapacity(mid)
+    check cap.budget == 1
+    check cap.lastUpdate == t0
+    check cap.capacity == 10
+
+    check bucket.tryConsume(2, mid) == false  # no update before period boundary passed, budget 1
+
+    let boundary = t0 + 100.milliseconds
+
+    cap = bucket.getAvailableCapacity(boundary)
+    check cap.budget == 10
+    check cap.lastUpdate == boundary
+    check cap.capacity == 10
+
+    check bucket.tryConsume(2, boundary) == true  # ok, we passed the period boundary now, leaves 8
+
+  test "Continuous high-rate single-token 10/10ms over 40ms":
+    # Capacity 10, fillDuration 10ms (1 token/ms). Only 1-token requests are made
+    # at specific timestamps within 0–40ms (4 full periods). We verify that no
+    # more than 50 tokens can be consumed in total and that per-batch accept/reject
+    # counts and lastUpdate values match expectations.
+    var bucket = TokenBucket.new(10, 10.milliseconds)
+    let t0 = Moment.now()
+    let t5 = t0 + 5.milliseconds
+    let t10 = t0 + 10.milliseconds
+    let t12 = t0 + 12.milliseconds
+    let t20 = t0 + 20.milliseconds
+    let t30 = t0 + 30.milliseconds
+    let t31 = t0 + 31.milliseconds
+    let t40 = t0 + 40.milliseconds
+
+    proc attempt(count: int, now: Moment): tuple[accepted, rejected: int] =
+      var acc = 0
+      var rej = 0
+      for _ in 0..<count:
+        if bucket.tryConsume(1, now):
+          inc acc
+        else:
+          inc rej
+      (acc, rej)
+
+    # 0ms: 12 attempts -> accept 10, reject 2; budget ends 0; LU=0ms (consume-from-full)
+    var r = attempt(12, t0)
+    check r.accepted == 10
+    check r.rejected == 2
+    var cap = bucket.getAvailableCapacity(t0)
+    check cap.budget == 0
+    check cap.lastUpdate == t0
+
+    # 5ms: 7 attempts -> mint 5 then accept 5, reject 2; budget 0; LU=5ms
+    r = attempt(7, t5)
+    check r.accepted == 5
+    check r.rejected == 2
+    cap = bucket.getAvailableCapacity(t5)
+    check cap.budget == 0
+    check cap.lastUpdate == t5
+
+    # 10ms: 15 attempts -> mint 5 then accept 5, reject 10; budget 0; LU=10ms
+    r = attempt(15, t10)
+    check r.accepted == 5
+    check r.rejected == 10
+    cap = bucket.getAvailableCapacity(t10)
+    check cap.budget == 0
+    check cap.lastUpdate == t10
+
+    # 12ms: 3 attempts -> mint 2 then accept 2, reject 1; budget 0; LU=12ms
+    r = attempt(3, t12)
+    check r.accepted == 2
+    check r.rejected == 1
+    cap = bucket.getAvailableCapacity(t12)
+    check cap.budget == 0
+    check cap.lastUpdate == t12
+
+    # 20ms: 25 attempts -> mint 8 then accept 8, reject 17; budget 0; LU=20ms
+    r = attempt(25, t20)
+    check r.accepted == 8
+    check r.rejected == 17
+    cap = bucket.getAvailableCapacity(t20)
+    check cap.budget == 0
+    check cap.lastUpdate == t20
+
+    # 30ms: 9 attempts -> mint 10 then accept 9, budget ends 1; LU=30ms
+    r = attempt(9, t30)
+    check r.accepted == 9
+    check r.rejected == 0
+    cap = bucket.getAvailableCapacity(t30)
+    check cap.budget == 1
+    check cap.lastUpdate == t30
+
+    # 31ms: 3 attempts -> mint 1 then accept 2, reject 1; budget 0; LU=31ms
+    r = attempt(3, t31)
+    check r.accepted == 2
+    check r.rejected == 1
+    cap = bucket.getAvailableCapacity(t31)
+    check cap.budget == 0
+    check cap.lastUpdate == t31
+
+    # 40ms: 20 attempts -> mint 9 then accept 9, reject 11; budget 0; LU=40ms
+    r = attempt(20, t40)
+    check r.accepted == 9
+    check r.rejected == 11
+    cap = bucket.getAvailableCapacity(t40)
+    check cap.budget == 0
+    check cap.lastUpdate == t40
+
+    # Totals across 0–40ms window
+    let totalAccepted = 10 + 5 + 5 + 2 + 8 + 9 + 2 + 9
+    let totalRejected = 2 + 2 + 10 + 1 + 17 + 0 + 1 + 11
+    check totalAccepted == 50
+    check totalRejected == 44
+
+  test "Continuous high-rate single-token 10/10ms over 40ms (advancing time)":
+    # Variant of the high-rate test where each tryConsume occurs at a timestamp that
+    # advances by ~1ms when possible, simulating a more realistic stream of requests.
+    # We still demand more than can be provided to ensure rejections, and we verify
+    # that across 0–40ms only 50 tokens can be accepted.
+    var bucket = TokenBucket.new(10, 10.milliseconds)
+    let t0 = Moment.now()
+
+    var accepted = 0
+    var rejected = 0
+    # Perform 94 attempts; for the first 41 attempts we increase the timestamp by 1ms
+    # per attempt (up to t0+40ms). After that, we keep attempting at t0+40ms to
+    # simulate concurrent bursts at the end of the window.
+    for i in 0..<94:
+      let ts = t0 + min(i, 40).milliseconds
+      if bucket.tryConsume(1, ts):
+        inc accepted
+      else:
+        inc rejected
+
+    # At most 10 (initial) + 40 (minted over 40ms) can be accepted
+    check accepted == 50
+    check rejected == 44
+
+    let t40 = t0 + 40.milliseconds
+    let cap = bucket.getAvailableCapacity(t40)
+    # All available tokens within the window should have been consumed by our attempts
+    check cap.budget == 0
+
+  # Continuous-mode scenario reproductions and timeline validation
+  test "Continuous Scenario 1 timeline":
+    # Capacity 10, fillDuration 1s, per-token time 100ms
+    var bucket = TokenBucket.new(10, 1.seconds)
+
+    let t0 = Moment.now()
+    let t200 = t0 + 200.milliseconds
+    let t600 = t0 + 600.milliseconds
+    let t650 = t0 + 650.milliseconds
+    let t1000 = t0 + 1000.milliseconds
+    let t1200 = t0 + 1200.milliseconds
+    let t1800 = t0 + 1800.milliseconds
+    let t2100 = t0 + 2100.milliseconds
+    let t2600 = t0 + 2600.milliseconds
+    let t3000 = t0 + 3000.milliseconds
+
+    # 1) t=0ms: consume 7 from full -> LU set to t0, budget 3
+    check bucket.tryConsume(7, t0) == true
+    var cap = bucket.getAvailableCapacity(t0)
+    check cap.budget == 3
+    check cap.lastUpdate == t0
+
+    # 2) t=200ms: request 5 -> mint 2, then consume -> budget 0, LU=200ms
+    check bucket.tryConsume(5, t200) == true
+    cap = bucket.getAvailableCapacity(t200)
+    check cap.budget == 0
+    check cap.lastUpdate == t200
+
+    # 3) t=650ms: request 3 -> mint 4 (to t600), then consume -> budget 1, LU=600ms
+    check bucket.tryConsume(3, t650) == true
+    cap = bucket.getAvailableCapacity(t600)
+    check cap.budget == 1
+    check cap.lastUpdate == t600
+
+    # 4) t=1000ms: availability check only (does not mutate); expected 4 minted -> budget 5, LU=1000ms
+    cap = bucket.getAvailableCapacity(t1000)
+    check cap.budget == 5
+    check cap.lastUpdate == t1000
+
+    # 5) t=1200ms: request 6 -> net minted since LU to here totals 6 -> budget ends 1, LU=1200ms
+    check bucket.tryConsume(6, t1200) == true
+    cap = bucket.getAvailableCapacity(t1200)
+    check cap.budget == 1
+    check cap.lastUpdate == t1200
+
+    # 6) t=1800ms: request 5 -> mint 6 then consume -> budget 2, LU=1800ms
+    check bucket.tryConsume(5, t1800) == true
+    cap = bucket.getAvailableCapacity(t1800)
+    check cap.budget == 2
+    check cap.lastUpdate == t1800
+
+    # 7) t=2100ms: request 10 -> mint 3 to reach 5, still insufficient -> false, LU=2100ms
+    check bucket.tryConsume(10, t2100) == false
+    cap = bucket.getAvailableCapacity(t2100)
+    check cap.budget == 5
+    check cap.lastUpdate == t2100
+
+    # 8) t=2600ms: enough time for 5 more -> reach full and then consume 10 -> budget 0, LU=2600ms
+    check bucket.tryConsume(10, t2600) == true
+    cap = bucket.getAvailableCapacity(t2600)
+    check cap.budget == 0
+    check cap.lastUpdate == t2600
+
+    # 9) t=3000ms: availability check -> mint 4 -> budget 4, LU=3000ms
+    cap = bucket.getAvailableCapacity(t3000)
+    check cap.budget == 4
+    check cap.lastUpdate == t3000
+
+  test "Continuous: idle while full burns time":
+    # Capacity 5, fillDuration 1s, per-token time 200ms
+    var bucket = TokenBucket.new(5, 1.seconds)
+    let t0 = Moment.now()
+    let t2_5 = t0 + 2500.milliseconds
+    # Consume 1 after long idle at full -> LU set to now
+    check bucket.tryConsume(1, t2_5) == true
+    var cap = bucket.getAvailableCapacity(t2_5)
+    check cap.budget == 4
+    check cap.lastUpdate == t2_5
+    # 100ms later is below per-token time -> no mint
+    let t2_6 = t0 + 2600.milliseconds
+    cap = bucket.getAvailableCapacity(t2_6)
+    check cap.budget == 4
+    check cap.lastUpdate == t2_5
+
+  test "Continuous: large jump clamps to capacity and LU=now when capped":
+    # Capacity 8, fillDuration 4s, per-token time 0.5s
+    var bucket = TokenBucket.new(8, 4.seconds)
+    let t0 = Moment.now()
+    # Spend 6 from full so LU = t0, budget = 2
+    check bucket.tryConsume(6, t0) == true
 
-    # var bucket = TokenBucket.new(15000, 1.milliseconds)
-    # let start = Moment.now()
-    # check bucket.tryConsume(15000, start)
-    # check bucket.tryConsume(1, start) == false
+    let t5 = t0 + 5.seconds
+    # Availability check: should reach cap and set LU to t5 (hit-cap path burns leftover time)
+    var cap2 = bucket.getAvailableCapacity(t5)
+    check cap2.budget == 8
+    check cap2.lastUpdate == t5
 
-    # check bucket.tryConsume(15000, start + 1.milliseconds) == true
+    # Consume 3 slightly later; update will also clamp and set LU to now, then consume from full
+    let t5_2 = t0 + 5200.milliseconds
+    check bucket.tryConsume(3, t5_2) == true
+    cap2 = bucket.getAvailableCapacity(t5_2)
+    check cap2.budget == 5
+    check cap2.lastUpdate == t5_2