From a666bbaf546966b92e500df3f9583d711d286305 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?=
 <dr.yuhao.zhu@outlook.com>
Date: Tue, 8 Jul 2025 20:18:06 +0200
Subject: [PATCH 1/7] Avoid uninitialized biguint

---
 src/decimojo/biguint/biguint.mojo | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/decimojo/biguint/biguint.mojo b/src/decimojo/biguint/biguint.mojo
index 6877dbd..9fc17ec 100644
--- a/src/decimojo/biguint/biguint.mojo
+++ b/src/decimojo/biguint/biguint.mojo
@@ -114,7 +114,7 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable):
 
     fn __init__(out self, owned words: List[UInt32]):
         """Initializes a BigUInt from a list of UInt32 words.
-        It does not verify whether the list is empty or the words are invalid.
+        It does not verify whether the words are within the valid range
         See `from_list()` for safer initialization.
 
         Args:
@@ -124,11 +124,13 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable):
 
         Notes:
 
-        This method does not check whether
-        (1) the list is empty.
-        (2) the words are smaller than `999_999_999`.
+        This method does not check whether the words are smaller than
+        `999_999_999`.
         """
-        self.words = words^
+        if len(words) == 0:
+            self.words = List[UInt32](UInt32(0))
+        else:
+            self.words = words^
 
     fn __init__(out self, owned *words: UInt32):
         """Initializes a BigUInt from raw words without validating the words.

From 69bc86d387085b3e0703ee76436845393bd1de23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?=
 <dr.yuhao.zhu@outlook.com>
Date: Tue, 8 Jul 2025 21:02:11 +0200
Subject: [PATCH 2/7] create a function for normalization

---
 pixi.toml                             |  2 +-
 src/decimojo/biguint/arithmetics.mojo | 62 ++++++++++++++++-----------
 2 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/pixi.toml b/pixi.toml
index 3049b67..77c03d2 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -14,7 +14,7 @@ max = "==25.4"
 
 [tasks]
 # format the code
-format = "pixi run mojo format ./"
+format = "pixi run mojo format ./src && pixi run mojo format ./benches && pixi run mojo format ./tests && pixi run mojo format ./docs"
 
 # compile the package
 package_decimojo = "pixi run mojo package src/decimojo && cp decimojo.mojopkg tests/ && cp decimojo.mojopkg benches/ && rm decimojo.mojopkg"
diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo
index be3dbdb..6725cdf 100644
--- a/src/decimojo/biguint/arithmetics.mojo
+++ b/src/decimojo/biguint/arithmetics.mojo
@@ -1305,31 +1305,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
     # as large as possible
     # I use table lookup to find the normalization factor
     var normalization_factor: Int  # Number of digits to shift
-    var msw = x2.words[len(x2.words) - 1]
-    if msw < 10_000:
-        if msw < 100:
-            if msw < 10:
-                normalization_factor = 8  # Shift by 8 digits
-            else:  # 10 <= msw < 100
-                normalization_factor = 7  # Shift by 7 digits
-        else:  # 100 <= msw < 10_000
-            if msw < 1_000:  # 100 <= msw < 1_000
-                normalization_factor = 6  # Shift by 6 digits
-            else:  # 1_000 <= msw < 10_000:
-                normalization_factor = 5  # Shift by 5 digits
-    elif msw < 100_000_000:  # 10_000 <= msw < 100_000_000
-        if msw < 1_000_000:
-            if msw < 100_000:  # 10_000 <= msw < 100_000
-                normalization_factor = 4  # Shift by 4 digits
-            else:  # 100_000 <= msw < 1_000_000
-                normalization_factor = 3  # Shift by 3 digits
-        else:  # 1_000_000 <= msw < 100_000_000
-            if msw < 10_000_000:  # 1_000_000 <= msw < 10_000_000
-                normalization_factor = 2  # Shift by 2 digits
-            else:  # 10_000_000 <= msw < 100_000_000
-                normalization_factor = 1  # Shift by 1 digit
-    else:  # 100_000_000 <= msw < 1_000_000_000
-        normalization_factor = 0  # No shift needed
+    normalization_factor = calculate_normalization_factor(x2.words[-1])
 
     if normalization_factor == 0:
         # No normalization needed, just use the general division algorithm
@@ -1921,3 +1897,39 @@ fn power_of_10(n: Int) raises -> BigUInt:
         result.words.append(1)
 
     return result^
+
+
+fn calculate_normalization_factor(msw: UInt32) -> Int:
+    """Calculates the normalization factor based on the most significant word.
+    The normalized word should be as close to BASE as possible.
+
+    Notes:
+
+    This is a helper function for division algorithms.
+    """
+    if msw < 10_000:
+        if msw < 100:
+            if msw < 10:
+                normalization_factor = 8  # Shift by 8 digits
+            else:  # 10 <= msw < 100
+                normalization_factor = 7  # Shift by 7 digits
+        else:  # 100 <= msw < 10_000
+            if msw < 1_000:  # 100 <= msw < 1_000
+                normalization_factor = 6  # Shift by 6 digits
+            else:  # 1_000 <= msw < 10_000:
+                normalization_factor = 5  # Shift by 5 digits
+    elif msw < 100_000_000:  # 10_000 <= msw < 100_000_000
+        if msw < 1_000_000:
+            if msw < 100_000:  # 10_000 <= msw < 100_000
+                normalization_factor = 4  # Shift by 4 digits
+            else:  # 100_000 <= msw < 1_000_000
+                normalization_factor = 3  # Shift by 3 digits
+        else:  # 1_000_000 <= msw < 100_000_000
+            if msw < 10_000_000:  # 1_000_000 <= msw < 10_000_000
+                normalization_factor = 2  # Shift by 2 digits
+            else:  # 10_000_000 <= msw < 100_000_000
+                normalization_factor = 1  # Shift by 1 digit
+    else:  # 100_000_000 <= msw < 1_000_000_000
+        normalization_factor = 0  # No shift needed
+
+    return normalization_factor

From fb77e356e9d75091e46dd14ba98f774cd363ca19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?=
 <dr.yuhao.zhu@outlook.com>
Date: Tue, 8 Jul 2025 20:17:43 +0200
Subject: [PATCH 3/7] Enable 2n / n

---
 division.mojo | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 division.mojo

diff --git a/division.mojo b/division.mojo
new file mode 100644
index 0000000..929857f
--- /dev/null
+++ b/division.mojo
@@ -0,0 +1,182 @@
+from decimojo.prelude import *
+import time
+
+
+fn divide_three_words_by_two(
+    a2: UInt32, a1: UInt32, a0: UInt32, b1: UInt32, b0: UInt32
+) raises -> Tuple[UInt32, UInt32, UInt32]:
+    """Divides a 3-word number by a 2-word number.
+    b1 must be at least 500_000_000.
+
+    Args:
+        a2: The most significant word of the dividend.
+        a1: The middle word of the dividend.
+        a0: The least significant word of the dividend.
+        b1: The most significant word of the divisor.
+        b0: The least significant word of the divisor.
+
+    Returns:
+        A tuple containing
+        (1) the quotient (as UInt32)
+        (2) the most significant word of the remainder (as UInt32)
+        (3) the least significant word of the remainder (as UInt32).
+
+    Notes:
+        a = a2 * BASE^2 + a1 * BASE + a0.
+        b = b1 * BASE + b0.
+    """
+    if b1 < 500_000_000:
+        raise Error("b1 must be at least 500_000_000")
+
+    var a2a1 = UInt64(a2) * 1_000_000_000 + UInt64(a1)
+
+    var q: UInt64 = UInt64(a2a1) // UInt64(b1)
+    var c = a2a1 - q * UInt64(b1)
+    var d: UInt64 = q * UInt64(b0)
+    var r = UInt64(c * 1_000_000_000) + UInt64(a0)
+
+    if r < UInt64(d):
+        var b = UInt64(b1) * 1_000_000_000 + UInt64(b0)
+        q -= 1
+        r += b
+        if r < UInt64(d):
+            q -= 1
+            r += b
+
+    r -= d
+    var r1: UInt32 = UInt32(r // 1_000_000_000)
+    var r0: UInt32 = UInt32(r % 1_000_000_000)
+
+    return (UInt32(q), r1, r0)
+
+
+fn divide_four_words_by_two(
+    a3: UInt32,
+    a2: UInt32,
+    a1: UInt32,
+    a0: UInt32,
+    b1: UInt32,
+    b0: UInt32,
+) raises -> Tuple[UInt32, UInt32, UInt32, UInt32]:
+    """Divides a 4-word number by a 2-word number.
+
+    Args:
+        a3: The most significant word of the dividend.
+        a2: The second most significant word of the dividend.
+        a1: The second least significant word of the dividend.
+        a0: The least significant word of the dividend.
+        b1: The most significant word of the divisor.
+        b0: The least significant word of the divisor.
+
+    Returns:
+        A tuple containing
+        (1) the most significant word of the quotient (as UInt32)
+        (2) the least significant word of the quotient (as UInt32)
+        (3) the most significant word of the remainder (as UInt32)
+        (4) the least significant word of the remainder (as UInt32).
+    """
+
+    if b1 < 500_000_000:
+        raise Error("b1 must be at least 500_000_000")
+    if a3 > b1:
+        raise Error("a must be less than b * 10^18")
+    elif a3 == b1:
+        if a2 > b0:
+            raise Error("a must be less than b * 10^18")
+        elif a2 == b0:
+            if a1 > 0:
+                raise Error("a must be less than b * 10^18")
+            elif a1 == 0:
+                if a0 >= 0:
+                    raise Error("a must be less than b * 10^18")
+
+    var q1, r1, r0 = divide_three_words_by_two(a3, a2, a1, b1, b0)
+    var q0, s1, s0 = divide_three_words_by_two(r1, r0, a0, b1, b0)
+    return (q1, q0, s1, s0)
+
+
+fn divide_three_by_two(
+    a2: BigUInt, a1: BigUInt, a0: BigUInt, b1: BigUInt, b0: BigUInt, n: Int
+) raises -> Tuple[BigUInt, BigUInt]:
+    if b1.words[-1] < 500_000_000:
+        raise Error("b[-1] must be at least 500_000_000")
+
+    var a2a1: BigUInt
+    if a2.is_zero():
+        a2a1 = a1
+    else:
+        a2a1 = a2
+        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+            a2a1, n
+        )
+        a2a1 += a1
+    var q, c = divide_recursive(a2a1, b1, n)
+    var d = q * b0
+    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n)
+    var r = c + a0
+
+    if r < d:
+        var b = b1
+        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(b, n)
+        b += b0
+        q -= BigUInt.ONE
+        r += b
+        if r < d:
+            q -= BigUInt.ONE
+            r += b
+
+    r -= d
+    return (q, r)
+
+
+fn divide_recursive(
+    a: BigUInt, b: BigUInt, n: Int
+) raises -> Tuple[BigUInt, BigUInt]:
+    if (n & 1) != 0 or n <= 2:
+        return (a // b, a % b)
+
+    else:
+        a0 = BigUInt(a.words[0 : n // 2])
+        a1 = BigUInt(a.words[n // 2 : n])
+        a2 = BigUInt(a.words[n : n + n // 2])
+        a3 = BigUInt(a.words[n + n // 2 : n + n])
+
+        b0 = BigUInt(b.words[0 : n // 2])
+        b1 = BigUInt(b.words[n // 2 : n])
+
+        print("b:", b1, b0)
+        q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2)
+        r0 = BigUInt(r.words[0 : n // 2])
+        r1 = BigUInt(r.words[n // 2 : n])
+        q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2)
+
+        q = q1
+        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+            q, n // 2
+        )
+        q += q0
+
+    return (q, s)
+
+
+fn main() raises:
+    n = 2**4
+
+    var a = BUInt("123_456_789" * 2 * n)
+    var b = BUInt("987654321" + "000000000" * (n - 1))
+
+    var result1 = List[BigUInt]()
+    t0 = time.perf_counter_ns()
+    for _ in range(1):
+        q = divide_recursive(a, b, n)
+        result1.append(q[0])
+    print("time taken: ", time.perf_counter_ns() - t0, " ns")
+
+    var result2 = List[BigUInt]()
+    t0 = time.perf_counter_ns()
+    for _ in range(1):
+        p = a // b
+        result2.append(p)
+    print("time taken: ", time.perf_counter_ns() - t0, " ns")
+
+    print("result1 == result2: ", result1[0] == result2[0])

From 69bbfbf3ee5e2d9e985548a9d0b3facf353335b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?=
 <dr.yuhao.zhu@outlook.com>
Date: Tue, 8 Jul 2025 22:53:07 +0200
Subject: [PATCH 4/7] Finish a preliminary implementation

---
 division.mojo | 141 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 128 insertions(+), 13 deletions(-)

diff --git a/division.mojo b/division.mojo
index 929857f..cf8f866 100644
--- a/division.mojo
+++ b/division.mojo
@@ -1,5 +1,6 @@
 from decimojo.prelude import *
 import time
+import math
 
 
 fn divide_three_words_by_two(
@@ -98,9 +99,6 @@ fn divide_four_words_by_two(
 fn divide_three_by_two(
     a2: BigUInt, a1: BigUInt, a0: BigUInt, b1: BigUInt, b0: BigUInt, n: Int
 ) raises -> Tuple[BigUInt, BigUInt]:
-    if b1.words[-1] < 500_000_000:
-        raise Error("b[-1] must be at least 500_000_000")
-
     var a2a1: BigUInt
     if a2.is_zero():
         a2a1 = a1
@@ -110,7 +108,7 @@ fn divide_three_by_two(
             a2a1, n
         )
         a2a1 += a1
-    var q, c = divide_recursive(a2a1, b1, n)
+    var q, c = divide_two_by_one(a2a1, b1, n)
     var d = q * b0
     decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n)
     var r = c + a0
@@ -129,12 +127,30 @@ fn divide_three_by_two(
     return (q, r)
 
 
-fn divide_recursive(
+fn divide_two_by_one(
     a: BigUInt, b: BigUInt, n: Int
 ) raises -> Tuple[BigUInt, BigUInt]:
-    if (n & 1) != 0 or n <= 2:
+    """Divides a BigUInt by another BigUInt using a recursive approach.
+    The divisor has n words and the dividend has 2n words.
+
+    Args:
+        a: The dividend as a BigUInt.
+        b: The divisor as a BigUInt.
+        n: The number of words in the divisor.
+    """
+    if (n & 1) != 0 or n <= 2:  # n can be 16 or 32
         return (a // b, a % b)
 
+    if b.words[-1] < 500_000_000:
+        raise Error("b[-1] must be at least 500_000_000")
+
+    # b_modified = b
+    # decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+    #     b_modified, n
+    # )
+    # if a.compare(b_modified) >= 0:
+    #     raise Error("a must be less than b * 10^18")
+
     else:
         a0 = BigUInt(a.words[0 : n // 2])
         a1 = BigUInt(a.words[n // 2 : n])
@@ -144,7 +160,6 @@ fn divide_recursive(
         b0 = BigUInt(b.words[0 : n // 2])
         b1 = BigUInt(b.words[n // 2 : n])
 
-        print("b:", b1, b0)
         q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2)
         r0 = BigUInt(r.words[0 : n // 2])
         r1 = BigUInt(r.words[n // 2 : n])
@@ -159,17 +174,115 @@ fn divide_recursive(
     return (q, s)
 
 
-fn main() raises:
-    n = 2**4
+fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt:
+    """Divides BigUInt using the Burnikel-Ziegler algorithm."""
+
+    # Yuhao Zhu:
+    # This implementation is based on the research report
+    # "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler.
+    # MPI-I-98-1-022, October 1998.
+
+    alias BLOCK_SIZE_OF_WORDS = 2
 
-    var a = BUInt("123_456_789" * 2 * n)
-    var b = BUInt("987654321" + "000000000" * (n - 1))
+    # STEP 1:
+    # Normalize the divisor b to n words so that
+    # (1) it is of the form j*2^k and
+    # (2) the most significant word is at least 500_000_000.
+
+    var normalized_b = b
+    var normalized_a = a
+    var normalization_factor: Int
+
+    if normalized_b.words[-1] == 0:
+        normalized_b.remove_leading_empty_words()
+    if normalized_b.words[-1] < 500_000_000:
+        normalization_factor = (
+            decimojo.biguint.arithmetics.calculate_normalization_factor(
+                normalized_b.words[-1]
+            )
+        )
+    else:
+        normalization_factor = 0
+
+    # The targeted number of blocks should be the smallest 2^k such that
+    # 2^k >= number of words in normalized_b ceil divided by BLOCK_SIZE_OF_WORDS.
+    # k is the depth of the recursion.
+    # n is the final number of words in the normalized b.
+    var n_blocks_divisor = math.ceildiv(
+        len(normalized_b.words), BLOCK_SIZE_OF_WORDS
+    )
+    var depth = Int(math.ceil(math.log2(Float64(n_blocks_divisor))))
+    n_blocks_divisor = 2**depth
+    var n = n_blocks_divisor * BLOCK_SIZE_OF_WORDS
+
+    var n_digits_to_scale_up = (
+        n - len(normalized_b.words)
+    ) * 9 + normalization_factor
+
+    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten(
+        normalized_b, n_digits_to_scale_up
+    )
+    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten(
+        normalized_a, n_digits_to_scale_up
+    )
+
+    # The normalized_b is now 9 digits, but may still be smaller than 500_000_000.
+    var gap_ratio = BUInt.BASE // normalized_b.words[-1]
+    if gap_ratio > 2:
+        decimojo.biguint.arithmetics.multiply_inplace_by_uint32(
+            normalized_b, gap_ratio
+        )
+        decimojo.biguint.arithmetics.multiply_inplace_by_uint32(
+            normalized_a, gap_ratio
+        )
+
+    # STEP 2: Split the normalized a into blocks of size n.
+    # t is the number of blocks in the dividend.
+    var t = math.ceildiv(len(normalized_a.words), n)
+    if len(a.words) == t * n:
+        # If the number of words in a is already a multiple of n
+        # We check if the most significant word is >= 500_000_000.
+        # If it is, we need to add one more block to the dividend.
+        # This ensures that the most significant word of the dividend
+        # is smaller than 500_000_000.
+        if normalized_a.words[-1] >= 500_000_000:
+            t += 1
+
+    var z = BigUInt(normalized_a.words[(t - 2) * n : t * n])
+    var q = BigUInt()
+    for i in range(t - 2, -1, -1):
+        var q_i, r = divide_two_by_one(z, normalized_b, n)
+        # print(z, "//", normalized_b, "=", q_i, "mod", r)
+        if i == t - 2:
+            q = q_i
+        else:
+            decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+                q, n
+            )
+            q += q_i
+        if i > 0:
+            decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+                r, n
+            )
+            z = r + BigUInt(normalized_a.words[(i - 1) * n : i * n])
+
+    return q
+
+
+fn main() raises:
+    n = 2**14
+    var a = BUInt(String("987_654_321" * 3 * n))
+    var b = BUInt(String("3_141_592" * n))
+    # var a = BigUInt(
+    #     "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000"
+    # )
+    # var b = BigUInt("678678678678000000")
 
     var result1 = List[BigUInt]()
     t0 = time.perf_counter_ns()
     for _ in range(1):
-        q = divide_recursive(a, b, n)
-        result1.append(q[0])
+        q = divide_burnikel_ziegler(a, b)
+        result1.append(q)
     print("time taken: ", time.perf_counter_ns() - t0, " ns")
 
     var result2 = List[BigUInt]()
@@ -179,4 +292,6 @@ fn main() raises:
         result2.append(p)
     print("time taken: ", time.perf_counter_ns() - t0, " ns")
 
+    # print("result1: ", result1[0])
+    # print("result2: ", result2[0])
     print("result1 == result2: ", result1[0] == result2[0])

From 2fbcb01ed4ca52f3e3c07fe685c590f41f6bcfe9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?=
 <dr.yuhao.zhu@outlook.com>
Date: Tue, 8 Jul 2025 23:16:08 +0200
Subject: [PATCH 5/7] Add benches

---
 .../bench_biguint_truncate_divide_bz.mojo     | 555 ++++++++++++++++++
 .../decimojo/biguint/division.mojo            |  84 +--
 2 files changed, 604 insertions(+), 35 deletions(-)
 create mode 100644 benches/biguint/bench_biguint_truncate_divide_bz.mojo
 rename division.mojo => src/decimojo/biguint/division.mojo (81%)

diff --git a/benches/biguint/bench_biguint_truncate_divide_bz.mojo b/benches/biguint/bench_biguint_truncate_divide_bz.mojo
new file mode 100644
index 0000000..712b85b
--- /dev/null
+++ b/benches/biguint/bench_biguint_truncate_divide_bz.mojo
@@ -0,0 +1,555 @@
+"""
+Comprehensive benchmarks for BigUInt truncate_divide operation.
+Compares performance against Python's built-in int division with 20 diverse test cases.
+BigUInt is an unsigned integer type, so all test cases use positive numbers only.
+"""
+
+from decimojo.biguint.biguint import BigUInt
+import decimojo.biguint.arithmetics
+from python import Python, PythonObject
+from time import perf_counter_ns
+import time
+import os
+from collections import List
+
+
+fn open_log_file() raises -> PythonObject:
+    """
+    Creates and opens a log file with a timestamp in the filename.
+
+    Returns:
+        A file object opened for writing.
+    """
+    var python = Python.import_module("builtins")
+    var datetime = Python.import_module("datetime")
+    var pysys = Python.import_module("sys")
+    pysys.set_int_max_str_digits(1000000)
+
+    # Create logs directory if it doesn't exist
+    var log_dir = "./logs"
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+
+    # Generate a timestamp for the filename
+    var timestamp = String(datetime.datetime.now().isoformat())
+    var log_filename = (
+        log_dir + "/benchmark_biguint_truncate_divide_" + timestamp + ".log"
+    )
+
+    print("Saving benchmark results to:", log_filename)
+    return python.open(log_filename, "w")
+
+
+fn log_print(msg: String, log_file: PythonObject) raises:
+    """
+    Prints a message to both the console and the log file.
+
+    Args:
+        msg: The message to print.
+        log_file: The file object to write to.
+    """
+    print(msg)
+    log_file.write(msg + "\n")
+    log_file.flush()  # Ensure the message is written immediately
+
+
+fn run_benchmark_truncate_divide(
+    name: String,
+    dividend: String,
+    divisor: String,
+    iterations: Int,
+    log_file: PythonObject,
+    mut speedup_factors: List[Float64],
+) raises:
+    """
+    Run a benchmark comparing Mojo BigUInt truncate_divide with Python int division.
+
+    Args:
+        name: Name of the benchmark case.
+        dividend: String representation of the dividend.
+        divisor: String representation of the divisor.
+        iterations: Number of iterations to run.
+        log_file: File object for logging results.
+        speedup_factors: Mojo List to store speedup factors for averaging.
+    """
+    log_print("\nBenchmark:       " + name, log_file)
+    log_print("Dividend:        " + dividend, log_file)
+    log_print("Divisor:         " + divisor, log_file)
+
+    # Set up Mojo and Python values
+    var mojo_dividend = BigUInt(dividend)
+    var mojo_divisor = BigUInt(divisor)
+    var py = Python.import_module("builtins")
+    var py_dividend = py.int(dividend)
+    var py_divisor = py.int(divisor)
+
+    # Execute the operations once to verify correctness
+    try:
+        var mojo_result = decimojo.biguint.division.divide_burnikel_ziegler(
+            mojo_dividend, mojo_divisor
+        )
+        var py_result = py_dividend // py_divisor
+
+        # Display results for verification
+        log_print("Mojo result:     " + String(mojo_result), log_file)
+        log_print("Python result:   " + String(py_result), log_file)
+
+        # Benchmark Mojo implementation
+        var t0 = perf_counter_ns()
+        for _ in range(iterations):
+            _ = decimojo.biguint.division.divide_burnikel_ziegler(
+                mojo_dividend, mojo_divisor
+            )
+        var mojo_time = (perf_counter_ns() - t0) / iterations
+        if mojo_time == 0:
+            mojo_time = 1  # Prevent division by zero
+
+        # Benchmark Python implementation
+        t0 = perf_counter_ns()
+        for _ in range(iterations):
+            _ = py_dividend // py_divisor
+        var python_time = (perf_counter_ns() - t0) / iterations
+
+        # Calculate speedup factor
+        var speedup = python_time / mojo_time
+        speedup_factors.append(Float64(speedup))
+
+        # Print results with speedup comparison
+        log_print(
+            "Mojo division:   " + String(mojo_time) + " ns per iteration",
+            log_file,
+        )
+        log_print(
+            "Python division: " + String(python_time) + " ns per iteration",
+            log_file,
+        )
+        log_print("Speedup factor:  " + String(speedup), log_file)
+    except e:
+        log_print("Error occurred during benchmark: " + String(e), log_file)
+        log_print("Skipping this benchmark case", log_file)
+
+
+fn main() raises:
+    # Open log file
+    var log_file = open_log_file()
+    var datetime = Python.import_module("datetime")
+
+    # Create a Mojo List to store speedup factors for averaging later
+    var speedup_factors = List[Float64]()
+
+    # Display benchmark header with system information
+    log_print("=== DeciMojo BigUInt Truncate Division Benchmark ===", log_file)
+    log_print("Time: " + String(datetime.datetime.now().isoformat()), log_file)
+
+    # Try to get system info
+    try:
+        var platform = Python.import_module("platform")
+        log_print(
+            "System: "
+            + String(platform.system())
+            + " "
+            + String(platform.release()),
+            log_file,
+        )
+        log_print("Processor: " + String(platform.processor()), log_file)
+        log_print(
+            "Python version: " + String(platform.python_version()), log_file
+        )
+    except:
+        log_print("Could not retrieve system information", log_file)
+
+    var iterations = 100
+
+    # Define benchmark cases (all positive numbers for BigUInt)
+    log_print(
+        "\nRunning truncate division benchmarks with "
+        + String(iterations)
+        + " iterations each",
+        log_file,
+    )
+
+    # Case 1: Simple division with no remainder
+    run_benchmark_truncate_divide(
+        "Simple division, no remainder",
+        "100",
+        "10",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 2: Division with remainder
+    run_benchmark_truncate_divide(
+        "Division with remainder",
+        "10",
+        "3",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 3: Division of small numbers
+    run_benchmark_truncate_divide(
+        "Division of small numbers",
+        "7",
+        "2",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 4: Division resulting in zero
+    run_benchmark_truncate_divide(
+        "Division resulting in zero",
+        "5",
+        "10",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 5: Division by one
+    run_benchmark_truncate_divide(
+        "Division by one",
+        "12345",
+        "1",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 6: Zero dividend
+    run_benchmark_truncate_divide(
+        "Zero dividend",
+        "0",
+        "5",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 7: Large number division
+    run_benchmark_truncate_divide(
+        "Large number division",
+        "9999999999",  # 10 billion
+        "333",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 8: Very large number division
+    run_benchmark_truncate_divide(
+        "Very large number division",
+        "1" + "0" * 50,  # 10^50
+        "7",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 9: Division of large numbers with exact result
+    run_benchmark_truncate_divide(
+        "Division of large numbers with exact result",
+        "1" + "0" * 30,  # 10^30
+        "1" + "0" * 10,  # 10^10
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 10: Division by large number
+    run_benchmark_truncate_divide(
+        "Division by large number",
+        "12345",
+        "9" * 20,  # 20 nines
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 11: Fibonacci number division
+    run_benchmark_truncate_divide(
+        "Fibonacci number division",
+        "6765",  # Fib(20)
+        "4181",  # Fib(19)
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 12: Prime number division
+    run_benchmark_truncate_divide(
+        "Prime number division",
+        "2147483647",  # Mersenne prime (2^31 - 1)
+        "997",  # Prime
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 13: Division of numbers near UInt64 limit
+    run_benchmark_truncate_divide(
+        "Division near UInt64 limit",
+        "18446744073709551615",  # UInt64.MAX
+        "2",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 14: Division with divisor just below dividend
+    run_benchmark_truncate_divide(
+        "Division with divisor just below dividend",
+        "1000",
+        "999",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 15: Division with exact powers of 10
+    run_benchmark_truncate_divide(
+        "Division with exact powers of 10",
+        "1" + "0" * 20,  # 10^20
+        "1" + "0" * 5,  # 10^5
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 16: Division of repeated digits
+    run_benchmark_truncate_divide(
+        "Division of repeated digits",
+        "9" * 30,  # 30 nines
+        "9" * 15,  # 15 nines
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 17: Division with extremely large dividend and small divisor
+    run_benchmark_truncate_divide(
+        "Extreme large dividend and small divisor",
+        "9" * 100,  # 100 nines
+        "3",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 18: Division with powers of 2
+    run_benchmark_truncate_divide(
+        "Division with powers of 2",
+        "1" + "0" * 50,  # 10^50
+        "256",  # 2^8
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 19: Division yielding a single-digit result
+    run_benchmark_truncate_divide(
+        "Division yielding a single-digit result",
+        "123456789",
+        "123456780",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 20: Division with random large numbers
+    run_benchmark_truncate_divide(
+        "Division with random large numbers",
+        "8675309123456789098765432112345678909876543211234567",
+        "12345678901234567890",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 21: Division with around 50 digits and with divisor just below dividend
+    run_benchmark_truncate_divide(
+        "Division with around 50 digits divisor just below dividend",
+        "12345" * 10,
+        "6789" * 12,
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 22: Division of very large repeated digits
+    run_benchmark_truncate_divide(
+        "Division of repeated digits",
+        "990132857498314692374162398217" * 10,  # 30 * 10 = 300 digits
+        "85172390413429847239" * 10,  # 20 * 10 = 200 digits
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 23: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of large numbers (270 digits vs 135 digits)",
+        "123456789" * 30,
+        "987654321" * 15,
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 24: Division of very large numbers
+    run_benchmark_truncate_divide(
+        "Division of very large numbers (2250 digits vs 900 digits)",
+        "123456789" * 250,
+        "987654321" * 100,
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 25: Division of very, very large numbers
+    # x1 is more than 400 words long (>= 10^3600)
+    # x2 is more than 200 words long (>= 10^1800)
+    run_benchmark_truncate_divide(
+        "Division of very, very large numbers (3600 digits vs 1800 digits)",
+        "123456789" * 400,
+        "987654321" * 200,
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 26: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of very large numbers (256 digits vs 128 digits)",
+        "1234567890193287491287508917213097405916874098123705160923812345678901932874912875089172130974059168740981237051609238749875089170984701759832708497029875019837409871085709813749870897510749875089170984701759832708497029875019837409871085709813749870897510",
+        "68740981237051609238123456789019328749128750891721309740591687409812370516092387498750879548759387959978279541709847017598327084",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 27: Division of numbers with around 200 and 100 digits
+    run_benchmark_truncate_divide(
+        "Division of large numbers (200 digits vs 100 digits)",
+        (
+            "314159265358979323846264338327950288419716939937510"
+            "582097494459230781640628620899862803482534211706798214808651"
+            "328230664709384460955058223172535940812848111745028410270193"
+            "852110555964462294895493038196"
+        ),
+        (
+            "271828182845904523536028747135266249775724709369995"
+            "95749669676277240766303535475945713821785251664274"
+        ),
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 28: Division of numbers with around 200 and 100 digits
+    run_benchmark_truncate_divide(
+        "Division of large numbers (200 digits vs 100 digits)",
+        (
+            "314159265358979323846264338327950288419716939937510"
+            "582097494459230781640628620899862803482534211706798214808651"
+            "328230664709384460955058223172535940812848111745028410270193"
+            "852110555964462294895493038196"
+        ),
+        (
+            "141421356237309504880168872420969807856967187537694"
+            "80731766797379907324784621070388503875343276400719"
+        ),
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 29: Division of numbers with around 150 and 50 digits
+    run_benchmark_truncate_divide(
+        "Division of large numbers (150 digits vs 50 digits)",
+        (
+            "314159265358979323846264338327950288419716939937510"
+            "582097494459230781640628620899862803482534211706798214808651"
+            "3282306647093844609550582231725359408128"
+        ),
+        "141421356237309504880168872420969807856967187537694",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 30: Division of numbers with around 150 and 50 digits
+    run_benchmark_truncate_divide(
+        "Division of large numbers (150 digits vs 50 digits)",
+        (
+            "316227766016824890583648059893174009579947593530458"
+            "382628078540989121552735792899961040720792717368862335475063"
+            "5167610057579407944886251958020310186466"
+        ),
+        "141421356237309504880168872420969807856967187537694",
+        iterations,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 31: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of large numbers (1000 words vs 100 digits)",
+        "316227766_016824890_583648059_893174009_579947593" * 1000,
+        "141421356_237309504_880168872_420969807_856967187" * 100,
+        3,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 32: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of large numbers (10000 words vs 1234 digits)",
+        "316227766_016824890_583648059_893174009_579947593" * 10000,
+        "141421356_237309504_880168872_420969807_856967187" * 1234,
+        3,
+        log_file,
+        speedup_factors,
+    )
+
+    # Calculate average speedup factor (ignoring any cases that might have failed)
+    if len(speedup_factors) > 0:
+        var sum_speedup: Float64 = 0.0
+        for i in range(len(speedup_factors)):
+            sum_speedup += speedup_factors[i]
+        var average_speedup = sum_speedup / Float64(len(speedup_factors))
+
+        # Display summary
+        log_print(
+            "\n=== BigUInt Truncate Division Benchmark Summary ===", log_file
+        )
+        log_print(
+            "Benchmarked:      "
+            + String(len(speedup_factors))
+            + " different division cases",
+            log_file,
+        )
+        log_print(
+            "Each case ran:    " + String(iterations) + " iterations", log_file
+        )
+        log_print(
+            "Average speedup:  " + String(average_speedup) + "×", log_file
+        )
+
+        # List all speedup factors
+        log_print("\nIndividual speedup factors:", log_file)
+        for i in range(len(speedup_factors)):
+            log_print(
+                String("Case {}: {}×").format(
+                    i + 1, round(speedup_factors[i], 2)
+                ),
+                log_file,
+            )
+    else:
+        log_print("\nNo valid benchmark cases were completed", log_file)
+
+    # Close the log file
+    log_file.close()
+    print("Benchmark completed. Log file closed.")
diff --git a/division.mojo b/src/decimojo/biguint/division.mojo
similarity index 81%
rename from division.mojo
rename to src/decimojo/biguint/division.mojo
index cf8f866..31f9f5e 100644
--- a/division.mojo
+++ b/src/decimojo/biguint/division.mojo
@@ -97,7 +97,13 @@ fn divide_four_words_by_two(
 
 
 fn divide_three_by_two(
-    a2: BigUInt, a1: BigUInt, a0: BigUInt, b1: BigUInt, b0: BigUInt, n: Int
+    a2: BigUInt,
+    a1: BigUInt,
+    a0: BigUInt,
+    b1: BigUInt,
+    b0: BigUInt,
+    n: Int,
+    cut_off: Int,
 ) raises -> Tuple[BigUInt, BigUInt]:
     var a2a1: BigUInt
     if a2.is_zero():
@@ -108,7 +114,7 @@ fn divide_three_by_two(
             a2a1, n
         )
         a2a1 += a1
-    var q, c = divide_two_by_one(a2a1, b1, n)
+    var q, c = divide_two_by_one(a2a1, b1, n, cut_off)
     var d = q * b0
     decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n)
     var r = c + a0
@@ -128,7 +134,7 @@ fn divide_three_by_two(
 
 
 fn divide_two_by_one(
-    a: BigUInt, b: BigUInt, n: Int
+    a: BigUInt, b: BigUInt, n: Int, cut_off: Int
 ) raises -> Tuple[BigUInt, BigUInt]:
     """Divides a BigUInt by another BigUInt using a recursive approach.
     The divisor has n words and the dividend has 2n words.
@@ -137,8 +143,9 @@ fn divide_two_by_one(
         a: The dividend as a BigUInt.
         b: The divisor as a BigUInt.
         n: The number of words in the divisor.
+        cut_off: The minimum number of words for the recursive division.
     """
-    if (n & 1) != 0 or n <= 2:  # n can be 16 or 32
+    if (n & 1) != 0 or n <= cut_off:
         return (a // b, a % b)
 
     if b.words[-1] < 500_000_000:
@@ -160,10 +167,10 @@ fn divide_two_by_one(
         b0 = BigUInt(b.words[0 : n // 2])
         b1 = BigUInt(b.words[n // 2 : n])
 
-        q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2)
+        q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2, cut_off)
         r0 = BigUInt(r.words[0 : n // 2])
         r1 = BigUInt(r.words[n // 2 : n])
-        q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2)
+        q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2, cut_off)
 
         q = q1
         decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
@@ -174,7 +181,9 @@ fn divide_two_by_one(
     return (q, s)
 
 
-fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt:
+fn divide_burnikel_ziegler(
+    a: BigUInt, b: BigUInt, cut_off: Int = 16
+) raises -> BigUInt:
     """Divides BigUInt using the Burnikel-Ziegler algorithm."""
 
     # Yuhao Zhu:
@@ -182,7 +191,12 @@ fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt:
     # "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler.
     # MPI-I-98-1-022, October 1998.
 
-    alias BLOCK_SIZE_OF_WORDS = 2
+    var BLOCK_SIZE_OF_WORDS = cut_off
+
+    if (len(a.words) <= cut_off) or (len(b.words) <= cut_off):
+        # If the number of words in a or b is less than or equal to 64,
+        # we can use the schoolbook division algorithm.
+        return a // b
 
     # STEP 1:
     # Normalize the divisor b to n words so that
@@ -251,7 +265,7 @@ fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt:
     var z = BigUInt(normalized_a.words[(t - 2) * n : t * n])
     var q = BigUInt()
     for i in range(t - 2, -1, -1):
-        var q_i, r = divide_two_by_one(z, normalized_b, n)
+        var q_i, r = divide_two_by_one(z, normalized_b, n, cut_off)
         # print(z, "//", normalized_b, "=", q_i, "mod", r)
         if i == t - 2:
             q = q_i
@@ -269,29 +283,29 @@ fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt:
     return q
 
 
-fn main() raises:
-    n = 2**14
-    var a = BUInt(String("987_654_321" * 3 * n))
-    var b = BUInt(String("3_141_592" * n))
-    # var a = BigUInt(
-    #     "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000"
-    # )
-    # var b = BigUInt("678678678678000000")
-
-    var result1 = List[BigUInt]()
-    t0 = time.perf_counter_ns()
-    for _ in range(1):
-        q = divide_burnikel_ziegler(a, b)
-        result1.append(q)
-    print("time taken: ", time.perf_counter_ns() - t0, " ns")
-
-    var result2 = List[BigUInt]()
-    t0 = time.perf_counter_ns()
-    for _ in range(1):
-        p = a // b
-        result2.append(p)
-    print("time taken: ", time.perf_counter_ns() - t0, " ns")
-
-    # print("result1: ", result1[0])
-    # print("result2: ", result2[0])
-    print("result1 == result2: ", result1[0] == result2[0])
+# fn main() raises:
+#     n = 2**14
+#     var a = BUInt(String("987_654_321" * 3 * n))
+#     var b = BUInt(String("3_141_592" * n))
+#     # var a = BigUInt(
+#     #     "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000"
+#     # )
+#     # var b = BigUInt("678678678678000000")
+
+#     var result1 = List[BigUInt]()
+#     t0 = time.perf_counter_ns()
+#     for _ in range(1):
+#         q = divide_burnikel_ziegler(a, b)
+#         result1.append(q)
+#     print("time taken: ", time.perf_counter_ns() - t0, " ns")
+
+#     var result2 = List[BigUInt]()
+#     t0 = time.perf_counter_ns()
+#     for _ in range(1):
+#         p = a // b
+#         result2.append(p)
+#     print("time taken: ", time.perf_counter_ns() - t0, " ns")
+
+#     # print("result1: ", result1[0])
+#     # print("result2: ", result2[0])
+#     print("result1 == result2: ", result1[0] == result2[0])

From a1d5885b84aef4f2ad8cc52c0a8dc96d05739d85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?=
 <dr.yuhao.zhu@outlook.com>
Date: Wed, 9 Jul 2025 11:05:23 +0200
Subject: [PATCH 6/7] Integrate functions of BZ algorithm into arithmetics
 module

---
 .../bench_biguint_divide_complexity.mojo      |  12 +-
 .../bench_biguint_truncate_divide_bz.mojo     |   8 +-
 src/decimojo/biguint/arithmetics.mojo         | 394 ++++++++++++++++--
 src/decimojo/biguint/division.mojo            | 311 --------------
 4 files changed, 382 insertions(+), 343 deletions(-)
 delete mode 100644 src/decimojo/biguint/division.mojo

diff --git a/benches/biguint/bench_biguint_divide_complexity.mojo b/benches/biguint/bench_biguint_divide_complexity.mojo
index cb270fb..29bc8bb 100644
--- a/benches/biguint/bench_biguint_divide_complexity.mojo
+++ b/benches/biguint/bench_biguint_divide_complexity.mojo
@@ -1,6 +1,6 @@
 # ===----------------------------------------------------------------------=== #
 # Benchmark for BigUInt division time complexity analysis
-# Testing word sizes from 32 to 65536 words (powers of 2)
+# Testing word sizes from 32 to 2**18 words (powers of 2)
 # ===----------------------------------------------------------------------=== #
 
 from time import perf_counter_ns
@@ -157,7 +157,7 @@ fn main() raises:
     )
     log_print("", log_file)
 
-    # Test sizes: powers of 2 from 32 to 65536
+    # Test sizes: powers of 2 from 32 to 2**18 words
     var test_sizes = List[Int]()
     test_sizes.append(32)
     test_sizes.append(64)
@@ -171,6 +171,8 @@ fn main() raises:
     test_sizes.append(16384)
     test_sizes.append(32768)
     test_sizes.append(65536)
+    test_sizes.append(131072)  # 2^17
+    test_sizes.append(262144)  # 2^18
 
     # Test Case 1: Large / Small division (2n / n)
     log_print("=== TEST CASE 1: LARGE / SMALL DIVISION (2n / n) ===", log_file)
@@ -183,7 +185,7 @@ fn main() raises:
     for i in range(len(test_sizes)):
         var divisor_size = test_sizes[i]
         var dividend_size = divisor_size * 2
-        if dividend_size <= 65536:  # Stay within our limit
+        if dividend_size <= 2**18:  # Stay within our limit
             var avg_time = benchmark_divide_at_size(
                 dividend_size, divisor_size, 5, log_file
             )
@@ -206,7 +208,7 @@ fn main() raises:
     for i in range(len(test_sizes)):
         var divisor_size = test_sizes[i]
         var dividend_size = divisor_size * 4
-        if dividend_size <= 65536:  # Stay within our limit
+        if dividend_size <= 2**18:  # Stay within our limit
             var avg_time = benchmark_divide_at_size(
                 dividend_size, divisor_size, 5, log_file
             )
@@ -274,7 +276,7 @@ fn main() raises:
     for i in range(len(test_sizes)):
         var divisor_size = test_sizes[i]
         var dividend_size = divisor_size * 4
-        if dividend_size <= 65536:  # Only show results within our limit
+        if dividend_size <= 2**18:  # Only show results within our limit
             var time_taken = very_large_small_results[i]
 
             if time_taken > 0.0:  # Only show valid results
diff --git a/benches/biguint/bench_biguint_truncate_divide_bz.mojo b/benches/biguint/bench_biguint_truncate_divide_bz.mojo
index 712b85b..4c590cc 100644
--- a/benches/biguint/bench_biguint_truncate_divide_bz.mojo
+++ b/benches/biguint/bench_biguint_truncate_divide_bz.mojo
@@ -85,8 +85,10 @@ fn run_benchmark_truncate_divide(
 
     # Execute the operations once to verify correctness
     try:
-        var mojo_result = decimojo.biguint.division.divide_burnikel_ziegler(
-            mojo_dividend, mojo_divisor
+        var mojo_result = (
+            decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler(
+                mojo_dividend, mojo_divisor
+            )
         )
         var py_result = py_dividend // py_divisor
 
@@ -97,7 +99,7 @@ fn run_benchmark_truncate_divide(
         # Benchmark Mojo implementation
         var t0 = perf_counter_ns()
         for _ in range(iterations):
-            _ = decimojo.biguint.division.divide_burnikel_ziegler(
+            _ = decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler(
                 mojo_dividend, mojo_divisor
             )
         var mojo_time = (perf_counter_ns() - t0) / iterations
diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo
index 6725cdf..4bae6c9 100644
--- a/src/decimojo/biguint/arithmetics.mojo
+++ b/src/decimojo/biguint/arithmetics.mojo
@@ -19,6 +19,7 @@ Implements basic arithmetic functions for the BigUInt type.
 """
 
 from algorithm import vectorize
+import math
 import time
 import testing
 
@@ -51,7 +52,7 @@ from decimojo.rounding_mode import RoundingMode
 # multiply_inplace_by_power_of_billion(mut x: BigUInt, n: Int)
 #
 # floor_divide(x1: BigUInt, x2: BigUInt) -> BigUInt
-# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt
+# floor_divide_school(x1: BigUInt, x2: BigUInt) -> BigUInt
 # floor_divide_estimate_quotient(x1: BigUInt, x2: BigUInt, j: Int, m: Int) -> UInt64
 # floor_divide_inplace_by_single_word(x1: BigUInt, x2: BigUInt) -> None
 # floor_divide_inplace_by_double_words(x1: BigUInt, x2: BigUInt) -> None
@@ -59,7 +60,7 @@ from decimojo.rounding_mode import RoundingMode
 # floor_divide_by_power_of_ten(x: BigUInt, n: Int) -> BigUInt
 #
 # truncate_divide(x1: BigUInt, x2: BigUInt) -> BigUInt
-# ceil_divide(x1: BigUInt, x2: BigUInt) -> BigUIntulo(x1: BigUIn# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt
+# ceil_divide(x1: BigUInt, x2: BigUInt) -> BigUIntulo(x1: BigUIn# floor_divide_school(x1: BigUInt, x2: BigUInt) -> BigUInt
 #
 # floor_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt
 # ceil_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt
@@ -1191,7 +1192,9 @@ fn multiply_inplace_by_power_of_billion(mut x: BigUInt, n: Int):
 
 # ===----------------------------------------------------------------------=== #
 # Division Algorithms
-# floor_divide_general, floor_divide_inplace_by_2
+# floor_divide
+# floor_divide_school
+# floor_divide_burnikel_ziegler
 # ===----------------------------------------------------------------------=== #
 
 
@@ -1212,6 +1215,8 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
         It is equal to truncated division for positive numbers.
     """
 
+    alias CUTOFF_BURNIKEL_ZIEGLER = 64
+
     # CASE: x2 is single word
     if len(x2.words) == 1:
         # SUB-CASE: Division by zero
@@ -1296,29 +1301,41 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
         result.remove_leading_empty_words()
         return result^
 
-    # CASE: division of very, very large numbers
-    # Use Newton-Raphson division for large numbers?
+    # CASE: Division of small numbers
+    # If the number of words in a or b is small enough,
+    # we can use the schoolbook division algorithm.
+    if (len(x1.words) <= CUTOFF_BURNIKEL_ZIEGLER) and (
+        len(x2.words) <= CUTOFF_BURNIKEL_ZIEGLER
+    ):
+        # I will normalize the divisor to improve quotient estimation
+        var normalization_factor: Int  # Number of digits to shift
+        # Calculate normalization factor to make leading digit of divisor
+        # as large as possible
+        normalization_factor = calculate_normalization_factor(x2.words[-1])
+
+        if normalization_factor == 0:
+            # No normalization needed, just use the general division algorithm
+            return floor_divide_school(x1, x2)
+        else:
+            # Normalize the divisor and dividend
+            var normalized_x1 = multiply_by_power_of_ten(
+                x1, normalization_factor
+            )
+            var normalized_x2 = multiply_by_power_of_ten(
+                x2, normalization_factor
+            )
+            return floor_divide_school(normalized_x1, normalized_x2)
 
-    # CASE: all other situations
-    # Normalize divisor to improve quotient estimation
-    # Calculate normalization factor to make leading digit of divisor
-    # as large as possible
-    # I use table lookup to find the normalization factor
-    var normalization_factor: Int  # Number of digits to shift
-    normalization_factor = calculate_normalization_factor(x2.words[-1])
-
-    if normalization_factor == 0:
-        # No normalization needed, just use the general division algorithm
-        return floor_divide_general(x1, x2)
-    else:
-        # Normalize the divisor and dividend
-        var normalized_x1 = multiply_by_power_of_ten(x1, normalization_factor)
-        var normalized_x2 = multiply_by_power_of_ten(x2, normalization_factor)
-        return floor_divide_general(normalized_x1, normalized_x2)
+    # CASE: division of very, very large numbers
+    # Use the Burnikel-Ziegler division algorithm
+    print("Using Burnikel-Ziegler division algorithm for large numbers")
+    return floor_divide_burnikel_ziegler(
+        x1, x2, cut_off=CUTOFF_BURNIKEL_ZIEGLER
+    )
 
 
-fn floor_divide_general(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt:
-    """General division algorithm for BigInt numbers.
+fn floor_divide_school(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt:
+    """General schoolbook division algorithm for BigInt numbers.
 
     Args:
         dividend: The dividend.
@@ -1333,7 +1350,7 @@ fn floor_divide_general(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt:
 
     if divisor.is_zero():
         raise Error(
-            "`biguint.arithmetics.floor_divide_general()`: Division by zero"
+            "`biguint.arithmetics.floor_divide_school()`: Division by zero"
         )
 
     # Initialize result and remainder
@@ -1625,6 +1642,335 @@ fn floor_divide_by_power_of_ten(x: BigUInt, n: Int) raises -> BigUInt:
     return result^
 
 
+# FAST RUCURSIVE DIVISION ALGORITHM
+# =============================== #
+# The following functions implement the Burnikel-Ziegler algorithm.
+#
+# floor_divide_burnikel_ziegler
+# floor_divide_two_by_one
+# floor_divide_three_by_two
+# floor_divide_three_by_two_uint32
+# floor_divide_four_by_two_uint32
+#
+# Yuhao Zhu:
+# I tried to write this implementation based on the research report
+# "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler.
+# MPI-I-98-1-022, October 1998.
+# The paper is mainly based on 2^k-based integers, and therefore, some tricks
+# cannot be applied to 10^k-based integers. For example, when normalizing the
+# divisor to let its most significant word be at least BASE//2, we cannot simply
+# shift the bits until the most significant bit is 1.
+# TODO:
+# Some optimization needs to be done in future to
+# (1) accelerate the normalization process
+# (2) avoid unnecessary memory allocations and copies
+
+
+fn floor_divide_burnikel_ziegler(
+    a: BigUInt, b: BigUInt, cut_off: Int = 16
+) raises -> BigUInt:
+    """Divides BigUInt using the Burnikel-Ziegler algorithm.
+
+    Args:
+        a: The dividend.
+        b: The divisor.
+        cut_off: The cutoff value for the number of words in the divisor to use
+            the schoolbook division algorithm. It also determines the size of
+            the blocks used in the recursive division algorithm.
+    """
+
+    var BLOCK_SIZE_OF_WORDS = cut_off
+
+    # STEP 1:
+    # Normalize the divisor b to n words so that
+    # (1) it is of the form j*2^k and
+    # (2) the most significant word is at least 500_000_000.
+
+    var normalized_b = b
+    var normalized_a = a
+    var normalization_factor: Int
+
+    if normalized_b.words[-1] == 0:
+        normalized_b.remove_leading_empty_words()
+    if normalized_b.words[-1] < 500_000_000:
+        normalization_factor = (
+            decimojo.biguint.arithmetics.calculate_normalization_factor(
+                normalized_b.words[-1]
+            )
+        )
+    else:
+        normalization_factor = 0
+
+    # The targeted number of blocks should be the smallest 2^k such that
+    # 2^k >= number of words in normalized_b ceil divided by BLOCK_SIZE_OF_WORDS.
+    # k is the depth of the recursion.
+    # n is the final number of words in the normalized b.
+    var n_blocks_divisor = math.ceildiv(
+        len(normalized_b.words), BLOCK_SIZE_OF_WORDS
+    )
+    var depth = Int(math.ceil(math.log2(Float64(n_blocks_divisor))))
+    n_blocks_divisor = 2**depth
+    var n = n_blocks_divisor * BLOCK_SIZE_OF_WORDS
+
+    var n_digits_to_scale_up = (
+        n - len(normalized_b.words)
+    ) * 9 + normalization_factor
+
+    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten(
+        normalized_b, n_digits_to_scale_up
+    )
+    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten(
+        normalized_a, n_digits_to_scale_up
+    )
+
+    # The normalized_b is now 9 digits, but may still be smaller than 500_000_000.
+    var gap_ratio = BigUInt.BASE // normalized_b.words[-1]
+    if gap_ratio > 2:
+        decimojo.biguint.arithmetics.multiply_inplace_by_uint32(
+            normalized_b, gap_ratio
+        )
+        decimojo.biguint.arithmetics.multiply_inplace_by_uint32(
+            normalized_a, gap_ratio
+        )
+
+    # STEP 2: Split the normalized a into blocks of size n.
+    # t is the number of blocks in the dividend.
+    var t = math.ceildiv(len(normalized_a.words), n)
+    if len(a.words) == t * n:
+        # If the number of words in a is already a multiple of n
+        # We check if the most significant word is >= 500_000_000.
+        # If it is, we need to add one more block to the dividend.
+        # This ensures that the most significant word of the dividend
+        # is smaller than 500_000_000.
+        if normalized_a.words[-1] >= 500_000_000:
+            t += 1
+
+    var z = BigUInt(normalized_a.words[(t - 2) * n : t * n])
+    var q = BigUInt()
+    for i in range(t - 2, -1, -1):
+        var q_i, r = floor_divide_two_by_one(z, normalized_b, n, cut_off)
+        # print(z, "//", normalized_b, "=", q_i, "mod", r)
+        if i == t - 2:
+            q = q_i
+        else:
+            decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+                q, n
+            )
+            q += q_i
+        if i > 0:
+            decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+                r, n
+            )
+            z = r + BigUInt(normalized_a.words[(i - 1) * n : i * n])
+
+    return q
+
+
+fn floor_divide_three_by_two(
+    a2: BigUInt,
+    a1: BigUInt,
+    a0: BigUInt,
+    b1: BigUInt,
+    b0: BigUInt,
+    n: Int,
+    cut_off: Int,
+) raises -> Tuple[BigUInt, BigUInt]:
+    """Divides a 3-word number by a 2-word number.
+
+    Args:
+        a2: The most significant word of the dividend.
+        a1: The middle word of the dividend.
+        a0: The least significant word of the dividend.
+        b1: The most significant word of the divisor.
+        b0: The least significant word of the divisor.
+        n: The number of words in the divisor.
+        cut_off: The minimum number of words for the recursive division.
+
+    Returns:
+        A tuple containing the quotient and the remainder as BigUInt.
+    """
+
+    var a2a1: BigUInt
+    if a2.is_zero():
+        a2a1 = a1
+    else:
+        a2a1 = a2
+        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+            a2a1, n
+        )
+        a2a1 += a1
+    var q, c = floor_divide_two_by_one(a2a1, b1, n, cut_off)
+    var d = q * b0
+    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n)
+    var r = c + a0
+
+    if r < d:
+        var b = b1
+        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(b, n)
+        b += b0
+        q -= BigUInt.ONE
+        r += b
+        if r < d:
+            q -= BigUInt.ONE
+            r += b
+
+    r -= d
+    return (q, r)
+
+
+fn floor_divide_two_by_one(
+    a: BigUInt, b: BigUInt, n: Int, cut_off: Int
+) raises -> Tuple[BigUInt, BigUInt]:
+    """Divides a BigUInt by another BigUInt using a recursive approach.
+    The divisor has n words and the dividend has 2n words.
+
+    Args:
+        a: The dividend as a BigUInt.
+        b: The divisor as a BigUInt. The most significant word must be at least
+           500_000_000.
+        n: The number of words in the divisor.
+        cut_off: The minimum number of words for the recursive division.
+
+    Returns:
+        A tuple containing the quotient and the remainder as BigUInt.
+
+    Notes:
+
+    You need to ensure that n is even to continue with the algorithm.
+    Otherwise, it will use the schoolbook division algorithm.
+    """
+    if (n & 1 == 1) or (n <= cut_off):
+        var q = floor_divide_school(a, b)
+        var r = a - q * b
+        return (q^, r^)
+
+    if b.words[-1] < 500_000_000:
+        raise Error("b[-1] must be at least 500_000_000")
+
+    # b_modified = b
+    # decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+    #     b_modified, n
+    # )
+    # if a.compare(b_modified) >= 0:
+    #     raise Error("a must be less than b * 10^18")
+
+    else:
+        a0 = BigUInt(a.words[0 : n // 2])
+        a1 = BigUInt(a.words[n // 2 : n])
+        a2 = BigUInt(a.words[n : n + n // 2])
+        a3 = BigUInt(a.words[n + n // 2 : n + n])
+
+        b0 = BigUInt(b.words[0 : n // 2])
+        b1 = BigUInt(b.words[n // 2 : n])
+
+        q1, r = floor_divide_three_by_two(a3, a2, a1, b1, b0, n // 2, cut_off)
+        r0 = BigUInt(r.words[0 : n // 2])
+        r1 = BigUInt(r.words[n // 2 : n])
+        q0, s = floor_divide_three_by_two(r1, r0, a0, b1, b0, n // 2, cut_off)
+
+        q = q1
+        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
+            q, n // 2
+        )
+        q += q0
+
+    return (q, s)
+
+
+fn floor_divide_three_by_two_uint32(
+    a2: UInt32, a1: UInt32, a0: UInt32, b1: UInt32, b0: UInt32
+) raises -> Tuple[UInt32, UInt32, UInt32]:
+    """Divides a 3-word number by a 2-word number.
+    b1 must be at least 500_000_000.
+
+    Args:
+        a2: The most significant word of the dividend.
+        a1: The middle word of the dividend.
+        a0: The least significant word of the dividend.
+        b1: The most significant word of the divisor.
+        b0: The least significant word of the divisor.
+
+    Returns:
+        A tuple containing
+        (1) the quotient (as UInt32)
+        (2) the most significant word of the remainder (as UInt32)
+        (3) the least significant word of the remainder (as UInt32).
+
+    Notes:
+
+    a = a2 * BASE^2 + a1 * BASE + a0.
+    b = b1 * BASE + b0.
+    """
+    if b1 < 500_000_000:
+        raise Error("b1 must be at least 500_000_000")
+
+    var a2a1 = UInt64(a2) * 1_000_000_000 + UInt64(a1)
+
+    var q: UInt64 = UInt64(a2a1) // UInt64(b1)
+    var c = a2a1 - q * UInt64(b1)
+    var d: UInt64 = q * UInt64(b0)
+    var r = UInt64(c * 1_000_000_000) + UInt64(a0)
+
+    if r < UInt64(d):
+        var b = UInt64(b1) * 1_000_000_000 + UInt64(b0)
+        q -= 1
+        r += b
+        if r < UInt64(d):
+            q -= 1
+            r += b
+
+    r -= d
+    var r1: UInt32 = UInt32(r // 1_000_000_000)
+    var r0: UInt32 = UInt32(r % 1_000_000_000)
+
+    return (UInt32(q), r1, r0)
+
+
+fn floor_divide_four_by_two_uint32(
+    a3: UInt32,
+    a2: UInt32,
+    a1: UInt32,
+    a0: UInt32,
+    b1: UInt32,
+    b0: UInt32,
+) raises -> Tuple[UInt32, UInt32, UInt32, UInt32]:
+    """Divides a 4-word number by a 2-word number.
+
+    Args:
+        a3: The most significant word of the dividend.
+        a2: The second most significant word of the dividend.
+        a1: The second least significant word of the dividend.
+        a0: The least significant word of the dividend.
+        b1: The most significant word of the divisor.
+        b0: The least significant word of the divisor.
+
+    Returns:
+        A tuple containing
+        (1) the most significant word of the quotient (as UInt32)
+        (2) the least significant word of the quotient (as UInt32)
+        (3) the most significant word of the remainder (as UInt32)
+        (4) the least significant word of the remainder (as UInt32).
+    """
+
+    if b1 < 500_000_000:
+        raise Error("b1 must be at least 500_000_000")
+    if a3 > b1:
+        raise Error("a must be less than b * 10^18")
+    elif a3 == b1:
+        if a2 > b0:
+            raise Error("a must be less than b * 10^18")
+        elif a2 == b0:
+            if a1 > 0:
+                raise Error("a must be less than b * 10^18")
+            elif a1 == 0:
+                if a0 >= 0:
+                    raise Error("a must be less than b * 10^18")
+
+    var q1, r1, r0 = floor_divide_three_by_two_uint32(a3, a2, a1, b1, b0)
+    var q0, s1, s0 = floor_divide_three_by_two_uint32(r1, r0, a0, b1, b0)
+    return (q1, q0, s1, s0)
+
+
 @always_inline
 fn truncate_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
     """Returns the quotient of two BigUInt numbers, truncating toward zero.
diff --git a/src/decimojo/biguint/division.mojo b/src/decimojo/biguint/division.mojo
deleted file mode 100644
index 31f9f5e..0000000
--- a/src/decimojo/biguint/division.mojo
+++ /dev/null
@@ -1,311 +0,0 @@
-from decimojo.prelude import *
-import time
-import math
-
-
-fn divide_three_words_by_two(
-    a2: UInt32, a1: UInt32, a0: UInt32, b1: UInt32, b0: UInt32
-) raises -> Tuple[UInt32, UInt32, UInt32]:
-    """Divides a 3-word number by a 2-word number.
-    b1 must be at least 500_000_000.
-
-    Args:
-        a2: The most significant word of the dividend.
-        a1: The middle word of the dividend.
-        a0: The least significant word of the dividend.
-        b1: The most significant word of the divisor.
-        b0: The least significant word of the divisor.
-
-    Returns:
-        A tuple containing
-        (1) the quotient (as UInt32)
-        (2) the most significant word of the remainder (as UInt32)
-        (3) the least significant word of the remainder (as UInt32).
-
-    Notes:
-        a = a2 * BASE^2 + a1 * BASE + a0.
-        b = b1 * BASE + b0.
-    """
-    if b1 < 500_000_000:
-        raise Error("b1 must be at least 500_000_000")
-
-    var a2a1 = UInt64(a2) * 1_000_000_000 + UInt64(a1)
-
-    var q: UInt64 = UInt64(a2a1) // UInt64(b1)
-    var c = a2a1 - q * UInt64(b1)
-    var d: UInt64 = q * UInt64(b0)
-    var r = UInt64(c * 1_000_000_000) + UInt64(a0)
-
-    if r < UInt64(d):
-        var b = UInt64(b1) * 1_000_000_000 + UInt64(b0)
-        q -= 1
-        r += b
-        if r < UInt64(d):
-            q -= 1
-            r += b
-
-    r -= d
-    var r1: UInt32 = UInt32(r // 1_000_000_000)
-    var r0: UInt32 = UInt32(r % 1_000_000_000)
-
-    return (UInt32(q), r1, r0)
-
-
-fn divide_four_words_by_two(
-    a3: UInt32,
-    a2: UInt32,
-    a1: UInt32,
-    a0: UInt32,
-    b1: UInt32,
-    b0: UInt32,
-) raises -> Tuple[UInt32, UInt32, UInt32, UInt32]:
-    """Divides a 4-word number by a 2-word number.
-
-    Args:
-        a3: The most significant word of the dividend.
-        a2: The second most significant word of the dividend.
-        a1: The second least significant word of the dividend.
-        a0: The least significant word of the dividend.
-        b1: The most significant word of the divisor.
-        b0: The least significant word of the divisor.
-
-    Returns:
-        A tuple containing
-        (1) the most significant word of the quotient (as UInt32)
-        (2) the least significant word of the quotient (as UInt32)
-        (3) the most significant word of the remainder (as UInt32)
-        (4) the least significant word of the remainder (as UInt32).
-    """
-
-    if b1 < 500_000_000:
-        raise Error("b1 must be at least 500_000_000")
-    if a3 > b1:
-        raise Error("a must be less than b * 10^18")
-    elif a3 == b1:
-        if a2 > b0:
-            raise Error("a must be less than b * 10^18")
-        elif a2 == b0:
-            if a1 > 0:
-                raise Error("a must be less than b * 10^18")
-            elif a1 == 0:
-                if a0 >= 0:
-                    raise Error("a must be less than b * 10^18")
-
-    var q1, r1, r0 = divide_three_words_by_two(a3, a2, a1, b1, b0)
-    var q0, s1, s0 = divide_three_words_by_two(r1, r0, a0, b1, b0)
-    return (q1, q0, s1, s0)
-
-
-fn divide_three_by_two(
-    a2: BigUInt,
-    a1: BigUInt,
-    a0: BigUInt,
-    b1: BigUInt,
-    b0: BigUInt,
-    n: Int,
-    cut_off: Int,
-) raises -> Tuple[BigUInt, BigUInt]:
-    var a2a1: BigUInt
-    if a2.is_zero():
-        a2a1 = a1
-    else:
-        a2a1 = a2
-        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
-            a2a1, n
-        )
-        a2a1 += a1
-    var q, c = divide_two_by_one(a2a1, b1, n, cut_off)
-    var d = q * b0
-    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n)
-    var r = c + a0
-
-    if r < d:
-        var b = b1
-        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(b, n)
-        b += b0
-        q -= BigUInt.ONE
-        r += b
-        if r < d:
-            q -= BigUInt.ONE
-            r += b
-
-    r -= d
-    return (q, r)
-
-
-fn divide_two_by_one(
-    a: BigUInt, b: BigUInt, n: Int, cut_off: Int
-) raises -> Tuple[BigUInt, BigUInt]:
-    """Divides a BigUInt by another BigUInt using a recursive approach.
-    The divisor has n words and the dividend has 2n words.
-
-    Args:
-        a: The dividend as a BigUInt.
-        b: The divisor as a BigUInt.
-        n: The number of words in the divisor.
-        cut_off: The minimum number of words for the recursive division.
-    """
-    if (n & 1) != 0 or n <= cut_off:
-        return (a // b, a % b)
-
-    if b.words[-1] < 500_000_000:
-        raise Error("b[-1] must be at least 500_000_000")
-
-    # b_modified = b
-    # decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
-    #     b_modified, n
-    # )
-    # if a.compare(b_modified) >= 0:
-    #     raise Error("a must be less than b * 10^18")
-
-    else:
-        a0 = BigUInt(a.words[0 : n // 2])
-        a1 = BigUInt(a.words[n // 2 : n])
-        a2 = BigUInt(a.words[n : n + n // 2])
-        a3 = BigUInt(a.words[n + n // 2 : n + n])
-
-        b0 = BigUInt(b.words[0 : n // 2])
-        b1 = BigUInt(b.words[n // 2 : n])
-
-        q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2, cut_off)
-        r0 = BigUInt(r.words[0 : n // 2])
-        r1 = BigUInt(r.words[n // 2 : n])
-        q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2, cut_off)
-
-        q = q1
-        decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
-            q, n // 2
-        )
-        q += q0
-
-    return (q, s)
-
-
-fn divide_burnikel_ziegler(
-    a: BigUInt, b: BigUInt, cut_off: Int = 16
-) raises -> BigUInt:
-    """Divides BigUInt using the Burnikel-Ziegler algorithm."""
-
-    # Yuhao Zhu:
-    # This implementation is based on the research report
-    # "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler.
-    # MPI-I-98-1-022, October 1998.
-
-    var BLOCK_SIZE_OF_WORDS = cut_off
-
-    if (len(a.words) <= cut_off) or (len(b.words) <= cut_off):
-        # If the number of words in a or b is less than or equal to 64,
-        # we can use the schoolbook division algorithm.
-        return a // b
-
-    # STEP 1:
-    # Normalize the divisor b to n words so that
-    # (1) it is of the form j*2^k and
-    # (2) the most significant word is at least 500_000_000.
-
-    var normalized_b = b
-    var normalized_a = a
-    var normalization_factor: Int
-
-    if normalized_b.words[-1] == 0:
-        normalized_b.remove_leading_empty_words()
-    if normalized_b.words[-1] < 500_000_000:
-        normalization_factor = (
-            decimojo.biguint.arithmetics.calculate_normalization_factor(
-                normalized_b.words[-1]
-            )
-        )
-    else:
-        normalization_factor = 0
-
-    # The targeted number of blocks should be the smallest 2^k such that
-    # 2^k >= number of words in normalized_b ceil divided by BLOCK_SIZE_OF_WORDS.
-    # k is the depth of the recursion.
-    # n is the final number of words in the normalized b.
-    var n_blocks_divisor = math.ceildiv(
-        len(normalized_b.words), BLOCK_SIZE_OF_WORDS
-    )
-    var depth = Int(math.ceil(math.log2(Float64(n_blocks_divisor))))
-    n_blocks_divisor = 2**depth
-    var n = n_blocks_divisor * BLOCK_SIZE_OF_WORDS
-
-    var n_digits_to_scale_up = (
-        n - len(normalized_b.words)
-    ) * 9 + normalization_factor
-
-    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten(
-        normalized_b, n_digits_to_scale_up
-    )
-    decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten(
-        normalized_a, n_digits_to_scale_up
-    )
-
-    # The normalized_b is now 9 digits, but may still be smaller than 500_000_000.
-    var gap_ratio = BUInt.BASE // normalized_b.words[-1]
-    if gap_ratio > 2:
-        decimojo.biguint.arithmetics.multiply_inplace_by_uint32(
-            normalized_b, gap_ratio
-        )
-        decimojo.biguint.arithmetics.multiply_inplace_by_uint32(
-            normalized_a, gap_ratio
-        )
-
-    # STEP 2: Split the normalized a into blocks of size n.
-    # t is the number of blocks in the dividend.
-    var t = math.ceildiv(len(normalized_a.words), n)
-    if len(a.words) == t * n:
-        # If the number of words in a is already a multiple of n
-        # We check if the most significant word is >= 500_000_000.
-        # If it is, we need to add one more block to the dividend.
-        # This ensures that the most significant word of the dividend
-        # is smaller than 500_000_000.
-        if normalized_a.words[-1] >= 500_000_000:
-            t += 1
-
-    var z = BigUInt(normalized_a.words[(t - 2) * n : t * n])
-    var q = BigUInt()
-    for i in range(t - 2, -1, -1):
-        var q_i, r = divide_two_by_one(z, normalized_b, n, cut_off)
-        # print(z, "//", normalized_b, "=", q_i, "mod", r)
-        if i == t - 2:
-            q = q_i
-        else:
-            decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
-                q, n
-            )
-            q += q_i
-        if i > 0:
-            decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(
-                r, n
-            )
-            z = r + BigUInt(normalized_a.words[(i - 1) * n : i * n])
-
-    return q
-
-
-# fn main() raises:
-#     n = 2**14
-#     var a = BUInt(String("987_654_321" * 3 * n))
-#     var b = BUInt(String("3_141_592" * n))
-#     # var a = BigUInt(
-#     #     "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000"
-#     # )
-#     # var b = BigUInt("678678678678000000")
-
-#     var result1 = List[BigUInt]()
-#     t0 = time.perf_counter_ns()
-#     for _ in range(1):
-#         q = divide_burnikel_ziegler(a, b)
-#         result1.append(q)
-#     print("time taken: ", time.perf_counter_ns() - t0, " ns")
-
-#     var result2 = List[BigUInt]()
-#     t0 = time.perf_counter_ns()
-#     for _ in range(1):
-#         p = a // b
-#         result2.append(p)
-#     print("time taken: ", time.perf_counter_ns() - t0, " ns")
-
-#     # print("result1: ", result1[0])
-#     # print("result2: ", result2[0])
-#     print("result1 == result2: ", result1[0] == result2[0])

From 78aa2e6fe0cf76fca5e3e57d050b160cdea7b846 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?=
 <dr.yuhao.zhu@outlook.com>
Date: Wed, 9 Jul 2025 14:35:30 +0200
Subject: [PATCH 7/7] Remove bench file for bz

---
 .../bench_biguint_truncate_divide.mojo        |  40 +-
 .../bench_biguint_truncate_divide_bz.mojo     | 557 ------------------
 src/decimojo/biguint/arithmetics.mojo         |   2 +-
 3 files changed, 36 insertions(+), 563 deletions(-)
 delete mode 100644 benches/biguint/bench_biguint_truncate_divide_bz.mojo

diff --git a/benches/biguint/bench_biguint_truncate_divide.mojo b/benches/biguint/bench_biguint_truncate_divide.mojo
index 39f7724..a097038 100644
--- a/benches/biguint/bench_biguint_truncate_divide.mojo
+++ b/benches/biguint/bench_biguint_truncate_divide.mojo
@@ -23,7 +23,7 @@ fn open_log_file() raises -> PythonObject:
     var python = Python.import_module("builtins")
     var datetime = Python.import_module("datetime")
     var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(1000000)
+    pysys.set_int_max_str_digits(10000000)
 
     # Create logs directory if it doesn't exist
     var log_dir = "./logs"
@@ -88,6 +88,15 @@ fn run_benchmark_truncate_divide(
         var mojo_result = mojo_dividend // mojo_divisor
         var py_result = py_dividend // py_divisor
 
+        if String(mojo_result) != String(py_result):
+            log_print(
+                "Error: Mojo and Python results do not match!",
+                log_file,
+            )
+            log_print("Mojo result:     " + String(mojo_result), log_file)
+            log_print("Python result:   " + String(py_result), log_file)
+            return  # Skip this benchmark case if results don't match
+
         # Display results for verification
         log_print("Mojo result:     " + String(mojo_result), log_file)
         log_print("Python result:   " + String(py_result), log_file)
@@ -155,6 +164,7 @@ fn main() raises:
         log_print("Could not retrieve system information", log_file)
 
     var iterations = 100
+    var iterations_large_numbers = 10
 
     # Define benchmark cases (all positive numbers for BigUInt)
     log_print(
@@ -492,20 +502,40 @@ fn main() raises:
 
     # Case 31: Division of large numbers
     run_benchmark_truncate_divide(
-        "Division of large numbers (1000 words vs 100 digits)",
+        "Division of large numbers (5000 words vs words digits)",
         "316227766_016824890_583648059_893174009_579947593" * 1000,
         "141421356_237309504_880168872_420969807_856967187" * 100,
-        3,
+        iterations_large_numbers,
         log_file,
         speedup_factors,
     )
 
     # Case 32: Division of large numbers
     run_benchmark_truncate_divide(
-        "Division of large numbers (10000 words vs 1234 digits)",
+        "Division of large numbers (50000 words vs 6170 words)",
         "316227766_016824890_583648059_893174009_579947593" * 10000,
         "141421356_237309504_880168872_420969807_856967187" * 1234,
-        3,
+        iterations_large_numbers,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 33: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of large numbers (2**16 words vs 2**12 digits)",
+        "123456789" * 2**16,
+        "987654321" * 2**12,
+        iterations_large_numbers,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 34: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of large numbers (2**18 words vs 2**14 digits)",
+        "123456789" * 2**18,
+        "987654321" * 2**14,
+        iterations_large_numbers,
         log_file,
         speedup_factors,
     )
diff --git a/benches/biguint/bench_biguint_truncate_divide_bz.mojo b/benches/biguint/bench_biguint_truncate_divide_bz.mojo
deleted file mode 100644
index 4c590cc..0000000
--- a/benches/biguint/bench_biguint_truncate_divide_bz.mojo
+++ /dev/null
@@ -1,557 +0,0 @@
-"""
-Comprehensive benchmarks for BigUInt truncate_divide operation.
-Compares performance against Python's built-in int division with 20 diverse test cases.
-BigUInt is an unsigned integer type, so all test cases use positive numbers only.
-"""
-
-from decimojo.biguint.biguint import BigUInt
-import decimojo.biguint.arithmetics
-from python import Python, PythonObject
-from time import perf_counter_ns
-import time
-import os
-from collections import List
-
-
-fn open_log_file() raises -> PythonObject:
-    """
-    Creates and opens a log file with a timestamp in the filename.
-
-    Returns:
-        A file object opened for writing.
-    """
-    var python = Python.import_module("builtins")
-    var datetime = Python.import_module("datetime")
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(1000000)
-
-    # Create logs directory if it doesn't exist
-    var log_dir = "./logs"
-    if not os.path.exists(log_dir):
-        os.makedirs(log_dir)
-
-    # Generate a timestamp for the filename
-    var timestamp = String(datetime.datetime.now().isoformat())
-    var log_filename = (
-        log_dir + "/benchmark_biguint_truncate_divide_" + timestamp + ".log"
-    )
-
-    print("Saving benchmark results to:", log_filename)
-    return python.open(log_filename, "w")
-
-
-fn log_print(msg: String, log_file: PythonObject) raises:
-    """
-    Prints a message to both the console and the log file.
-
-    Args:
-        msg: The message to print.
-        log_file: The file object to write to.
-    """
-    print(msg)
-    log_file.write(msg + "\n")
-    log_file.flush()  # Ensure the message is written immediately
-
-
-fn run_benchmark_truncate_divide(
-    name: String,
-    dividend: String,
-    divisor: String,
-    iterations: Int,
-    log_file: PythonObject,
-    mut speedup_factors: List[Float64],
-) raises:
-    """
-    Run a benchmark comparing Mojo BigUInt truncate_divide with Python int division.
-
-    Args:
-        name: Name of the benchmark case.
-        dividend: String representation of the dividend.
-        divisor: String representation of the divisor.
-        iterations: Number of iterations to run.
-        log_file: File object for logging results.
-        speedup_factors: Mojo List to store speedup factors for averaging.
-    """
-    log_print("\nBenchmark:       " + name, log_file)
-    log_print("Dividend:        " + dividend, log_file)
-    log_print("Divisor:         " + divisor, log_file)
-
-    # Set up Mojo and Python values
-    var mojo_dividend = BigUInt(dividend)
-    var mojo_divisor = BigUInt(divisor)
-    var py = Python.import_module("builtins")
-    var py_dividend = py.int(dividend)
-    var py_divisor = py.int(divisor)
-
-    # Execute the operations once to verify correctness
-    try:
-        var mojo_result = (
-            decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler(
-                mojo_dividend, mojo_divisor
-            )
-        )
-        var py_result = py_dividend // py_divisor
-
-        # Display results for verification
-        log_print("Mojo result:     " + String(mojo_result), log_file)
-        log_print("Python result:   " + String(py_result), log_file)
-
-        # Benchmark Mojo implementation
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler(
-                mojo_dividend, mojo_divisor
-            )
-        var mojo_time = (perf_counter_ns() - t0) / iterations
-        if mojo_time == 0:
-            mojo_time = 1  # Prevent division by zero
-
-        # Benchmark Python implementation
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = py_dividend // py_divisor
-        var python_time = (perf_counter_ns() - t0) / iterations
-
-        # Calculate speedup factor
-        var speedup = python_time / mojo_time
-        speedup_factors.append(Float64(speedup))
-
-        # Print results with speedup comparison
-        log_print(
-            "Mojo division:   " + String(mojo_time) + " ns per iteration",
-            log_file,
-        )
-        log_print(
-            "Python division: " + String(python_time) + " ns per iteration",
-            log_file,
-        )
-        log_print("Speedup factor:  " + String(speedup), log_file)
-    except e:
-        log_print("Error occurred during benchmark: " + String(e), log_file)
-        log_print("Skipping this benchmark case", log_file)
-
-
-fn main() raises:
-    # Open log file
-    var log_file = open_log_file()
-    var datetime = Python.import_module("datetime")
-
-    # Create a Mojo List to store speedup factors for averaging later
-    var speedup_factors = List[Float64]()
-
-    # Display benchmark header with system information
-    log_print("=== DeciMojo BigUInt Truncate Division Benchmark ===", log_file)
-    log_print("Time: " + String(datetime.datetime.now().isoformat()), log_file)
-
-    # Try to get system info
-    try:
-        var platform = Python.import_module("platform")
-        log_print(
-            "System: "
-            + String(platform.system())
-            + " "
-            + String(platform.release()),
-            log_file,
-        )
-        log_print("Processor: " + String(platform.processor()), log_file)
-        log_print(
-            "Python version: " + String(platform.python_version()), log_file
-        )
-    except:
-        log_print("Could not retrieve system information", log_file)
-
-    var iterations = 100
-
-    # Define benchmark cases (all positive numbers for BigUInt)
-    log_print(
-        "\nRunning truncate division benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    # Case 1: Simple division with no remainder
-    run_benchmark_truncate_divide(
-        "Simple division, no remainder",
-        "100",
-        "10",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 2: Division with remainder
-    run_benchmark_truncate_divide(
-        "Division with remainder",
-        "10",
-        "3",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 3: Division of small numbers
-    run_benchmark_truncate_divide(
-        "Division of small numbers",
-        "7",
-        "2",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 4: Division resulting in zero
-    run_benchmark_truncate_divide(
-        "Division resulting in zero",
-        "5",
-        "10",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 5: Division by one
-    run_benchmark_truncate_divide(
-        "Division by one",
-        "12345",
-        "1",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 6: Zero dividend
-    run_benchmark_truncate_divide(
-        "Zero dividend",
-        "0",
-        "5",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 7: Large number division
-    run_benchmark_truncate_divide(
-        "Large number division",
-        "9999999999",  # 10 billion
-        "333",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 8: Very large number division
-    run_benchmark_truncate_divide(
-        "Very large number division",
-        "1" + "0" * 50,  # 10^50
-        "7",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 9: Division of large numbers with exact result
-    run_benchmark_truncate_divide(
-        "Division of large numbers with exact result",
-        "1" + "0" * 30,  # 10^30
-        "1" + "0" * 10,  # 10^10
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 10: Division by large number
-    run_benchmark_truncate_divide(
-        "Division by large number",
-        "12345",
-        "9" * 20,  # 20 nines
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 11: Fibonacci number division
-    run_benchmark_truncate_divide(
-        "Fibonacci number division",
-        "6765",  # Fib(20)
-        "4181",  # Fib(19)
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 12: Prime number division
-    run_benchmark_truncate_divide(
-        "Prime number division",
-        "2147483647",  # Mersenne prime (2^31 - 1)
-        "997",  # Prime
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 13: Division of numbers near UInt64 limit
-    run_benchmark_truncate_divide(
-        "Division near UInt64 limit",
-        "18446744073709551615",  # UInt64.MAX
-        "2",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 14: Division with divisor just below dividend
-    run_benchmark_truncate_divide(
-        "Division with divisor just below dividend",
-        "1000",
-        "999",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 15: Division with exact powers of 10
-    run_benchmark_truncate_divide(
-        "Division with exact powers of 10",
-        "1" + "0" * 20,  # 10^20
-        "1" + "0" * 5,  # 10^5
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 16: Division of repeated digits
-    run_benchmark_truncate_divide(
-        "Division of repeated digits",
-        "9" * 30,  # 30 nines
-        "9" * 15,  # 15 nines
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 17: Division with extremely large dividend and small divisor
-    run_benchmark_truncate_divide(
-        "Extreme large dividend and small divisor",
-        "9" * 100,  # 100 nines
-        "3",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 18: Division with powers of 2
-    run_benchmark_truncate_divide(
-        "Division with powers of 2",
-        "1" + "0" * 50,  # 10^50
-        "256",  # 2^8
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 19: Division yielding a single-digit result
-    run_benchmark_truncate_divide(
-        "Division yielding a single-digit result",
-        "123456789",
-        "123456780",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 20: Division with random large numbers
-    run_benchmark_truncate_divide(
-        "Division with random large numbers",
-        "8675309123456789098765432112345678909876543211234567",
-        "12345678901234567890",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 21: Division with around 50 digits and with divisor just below dividend
-    run_benchmark_truncate_divide(
-        "Division with around 50 digits divisor just below dividend",
-        "12345" * 10,
-        "6789" * 12,
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 22: Division of very large repeated digits
-    run_benchmark_truncate_divide(
-        "Division of repeated digits",
-        "990132857498314692374162398217" * 10,  # 30 * 10 = 300 digits
-        "85172390413429847239" * 10,  # 20 * 10 = 200 digits
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 23: Division of large numbers
-    run_benchmark_truncate_divide(
-        "Division of large numbers (270 digits vs 135 digits)",
-        "123456789" * 30,
-        "987654321" * 15,
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 24: Division of very large numbers
-    run_benchmark_truncate_divide(
-        "Division of very large numbers (2250 digits vs 900 digits)",
-        "123456789" * 250,
-        "987654321" * 100,
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 25: Division of very, very large numbers
-    # x1 is more than 400 words long (>= 10^3600)
-    # x2 is more than 200 words long (>= 10^1800)
-    run_benchmark_truncate_divide(
-        "Division of very, very large numbers (3600 digits vs 1800 digits)",
-        "123456789" * 400,
-        "987654321" * 200,
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 26: Division of large numbers
-    run_benchmark_truncate_divide(
-        "Division of very large numbers (256 digits vs 128 digits)",
-        "1234567890193287491287508917213097405916874098123705160923812345678901932874912875089172130974059168740981237051609238749875089170984701759832708497029875019837409871085709813749870897510749875089170984701759832708497029875019837409871085709813749870897510",
-        "68740981237051609238123456789019328749128750891721309740591687409812370516092387498750879548759387959978279541709847017598327084",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 27: Division of numbers with around 200 and 100 digits
-    run_benchmark_truncate_divide(
-        "Division of large numbers (200 digits vs 100 digits)",
-        (
-            "314159265358979323846264338327950288419716939937510"
-            "582097494459230781640628620899862803482534211706798214808651"
-            "328230664709384460955058223172535940812848111745028410270193"
-            "852110555964462294895493038196"
-        ),
-        (
-            "271828182845904523536028747135266249775724709369995"
-            "95749669676277240766303535475945713821785251664274"
-        ),
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 28: Division of numbers with around 200 and 100 digits
-    run_benchmark_truncate_divide(
-        "Division of large numbers (200 digits vs 100 digits)",
-        (
-            "314159265358979323846264338327950288419716939937510"
-            "582097494459230781640628620899862803482534211706798214808651"
-            "328230664709384460955058223172535940812848111745028410270193"
-            "852110555964462294895493038196"
-        ),
-        (
-            "141421356237309504880168872420969807856967187537694"
-            "80731766797379907324784621070388503875343276400719"
-        ),
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 29: Division of numbers with around 150 and 50 digits
-    run_benchmark_truncate_divide(
-        "Division of large numbers (150 digits vs 50 digits)",
-        (
-            "314159265358979323846264338327950288419716939937510"
-            "582097494459230781640628620899862803482534211706798214808651"
-            "3282306647093844609550582231725359408128"
-        ),
-        "141421356237309504880168872420969807856967187537694",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 30: Division of numbers with around 150 and 50 digits
-    run_benchmark_truncate_divide(
-        "Division of large numbers (150 digits vs 50 digits)",
-        (
-            "316227766016824890583648059893174009579947593530458"
-            "382628078540989121552735792899961040720792717368862335475063"
-            "5167610057579407944886251958020310186466"
-        ),
-        "141421356237309504880168872420969807856967187537694",
-        iterations,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 31: Division of large numbers
-    run_benchmark_truncate_divide(
-        "Division of large numbers (1000 words vs 100 digits)",
-        "316227766_016824890_583648059_893174009_579947593" * 1000,
-        "141421356_237309504_880168872_420969807_856967187" * 100,
-        3,
-        log_file,
-        speedup_factors,
-    )
-
-    # Case 32: Division of large numbers
-    run_benchmark_truncate_divide(
-        "Division of large numbers (10000 words vs 1234 digits)",
-        "316227766_016824890_583648059_893174009_579947593" * 10000,
-        "141421356_237309504_880168872_420969807_856967187" * 1234,
-        3,
-        log_file,
-        speedup_factors,
-    )
-
-    # Calculate average speedup factor (ignoring any cases that might have failed)
-    if len(speedup_factors) > 0:
-        var sum_speedup: Float64 = 0.0
-        for i in range(len(speedup_factors)):
-            sum_speedup += speedup_factors[i]
-        var average_speedup = sum_speedup / Float64(len(speedup_factors))
-
-        # Display summary
-        log_print(
-            "\n=== BigUInt Truncate Division Benchmark Summary ===", log_file
-        )
-        log_print(
-            "Benchmarked:      "
-            + String(len(speedup_factors))
-            + " different division cases",
-            log_file,
-        )
-        log_print(
-            "Each case ran:    " + String(iterations) + " iterations", log_file
-        )
-        log_print(
-            "Average speedup:  " + String(average_speedup) + "×", log_file
-        )
-
-        # List all speedup factors
-        log_print("\nIndividual speedup factors:", log_file)
-        for i in range(len(speedup_factors)):
-            log_print(
-                String("Case {}: {}×").format(
-                    i + 1, round(speedup_factors[i], 2)
-                ),
-                log_file,
-            )
-    else:
-        log_print("\nNo valid benchmark cases were completed", log_file)
-
-    # Close the log file
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo
index 4bae6c9..84d38f5 100644
--- a/src/decimojo/biguint/arithmetics.mojo
+++ b/src/decimojo/biguint/arithmetics.mojo
@@ -1328,7 +1328,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
 
     # CASE: division of very, very large numbers
     # Use the Burnikel-Ziegler division algorithm
-    print("Using Burnikel-Ziegler division algorithm for large numbers")
+    # print("Using Burnikel-Ziegler division algorithm for large numbers")
     return floor_divide_burnikel_ziegler(
         x1, x2, cut_off=CUTOFF_BURNIKEL_ZIEGLER
     )