From a666bbaf546966b92e500df3f9583d711d286305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 8 Jul 2025 20:18:06 +0200 Subject: [PATCH 1/7] Avoid uninitialized biguint --- src/decimojo/biguint/biguint.mojo | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/decimojo/biguint/biguint.mojo b/src/decimojo/biguint/biguint.mojo index 6877dbd..9fc17ec 100644 --- a/src/decimojo/biguint/biguint.mojo +++ b/src/decimojo/biguint/biguint.mojo @@ -114,7 +114,7 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable): fn __init__(out self, owned words: List[UInt32]): """Initializes a BigUInt from a list of UInt32 words. - It does not verify whether the list is empty or the words are invalid. + It does not verify whether the words are within the valid range See `from_list()` for safer initialization. Args: @@ -124,11 +124,13 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable): Notes: - This method does not check whether - (1) the list is empty. - (2) the words are smaller than `999_999_999`. + This method does not check whether the words are smaller than + `999_999_999`. """ - self.words = words^ + if len(words) == 0: + self.words = List[UInt32](UInt32(0)) + else: + self.words = words^ fn __init__(out self, owned *words: UInt32): """Initializes a BigUInt from raw words without validating the words. From 69bc86d387085b3e0703ee76436845393bd1de23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 8 Jul 2025 21:02:11 +0200 Subject: [PATCH 2/7] create a function for normalization --- pixi.toml | 2 +- src/decimojo/biguint/arithmetics.mojo | 62 ++++++++++++++++----------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/pixi.toml b/pixi.toml index 3049b67..77c03d2 100644 --- a/pixi.toml +++ b/pixi.toml @@ -14,7 +14,7 @@ max = "==25.4" [tasks] # format the code -format = "pixi run mojo format ./" +format = "pixi run mojo format ./src && pixi run mojo format ./benches && pixi run mojo format ./tests && pixi run mojo format ./docs" # compile the package package_decimojo = "pixi run mojo package src/decimojo && cp decimojo.mojopkg tests/ && cp decimojo.mojopkg benches/ && rm decimojo.mojopkg" diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo index be3dbdb..6725cdf 100644 --- a/src/decimojo/biguint/arithmetics.mojo +++ b/src/decimojo/biguint/arithmetics.mojo @@ -1305,31 +1305,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: # as large as possible # I use table lookup to find the normalization factor var normalization_factor: Int # Number of digits to shift - var msw = x2.words[len(x2.words) - 1] - if msw < 10_000: - if msw < 100: - if msw < 10: - normalization_factor = 8 # Shift by 8 digits - else: # 10 <= msw < 100 - normalization_factor = 7 # Shift by 7 digits - else: # 100 <= msw < 10_000 - if msw < 1_000: # 100 <= msw < 1_000 - normalization_factor = 6 # Shift by 6 digits - else: # 1_000 <= msw < 10_000: - normalization_factor = 5 # Shift by 5 digits - elif msw < 100_000_000: # 10_000 <= msw < 100_000_000 - if msw < 1_000_000: - if msw < 100_000: # 10_000 <= msw < 100_000 - normalization_factor = 4 # Shift by 4 digits - else: # 100_000 <= msw < 1_000_000 - normalization_factor = 3 # Shift by 3 digits - else: # 1_000_000 <= msw < 100_000_000 - if msw < 10_000_000: # 1_000_000 <= msw < 10_000_000 - normalization_factor = 2 # Shift by 2 digits - else: # 10_000_000 <= msw < 100_000_000 - normalization_factor = 1 # Shift by 1 digit - else: # 100_000_000 <= msw < 1_000_000_000 - normalization_factor = 0 # No shift needed + normalization_factor = calculate_normalization_factor(x2.words[-1]) if normalization_factor == 0: # No normalization needed, just use the general division algorithm @@ -1921,3 +1897,39 @@ fn power_of_10(n: Int) raises -> BigUInt: result.words.append(1) return result^ + + +fn calculate_normalization_factor(msw: UInt32) -> Int: + """Calculates the normalization factor based on the most significant word. + The normalized word should be as close to BASE as possible. + + Notes: + + This is a helper function for division algorithms. + """ + if msw < 10_000: + if msw < 100: + if msw < 10: + normalization_factor = 8 # Shift by 8 digits + else: # 10 <= msw < 100 + normalization_factor = 7 # Shift by 7 digits + else: # 100 <= msw < 10_000 + if msw < 1_000: # 100 <= msw < 1_000 + normalization_factor = 6 # Shift by 6 digits + else: # 1_000 <= msw < 10_000: + normalization_factor = 5 # Shift by 5 digits + elif msw < 100_000_000: # 10_000 <= msw < 100_000_000 + if msw < 1_000_000: + if msw < 100_000: # 10_000 <= msw < 100_000 + normalization_factor = 4 # Shift by 4 digits + else: # 100_000 <= msw < 1_000_000 + normalization_factor = 3 # Shift by 3 digits + else: # 1_000_000 <= msw < 100_000_000 + if msw < 10_000_000: # 1_000_000 <= msw < 10_000_000 + normalization_factor = 2 # Shift by 2 digits + else: # 10_000_000 <= msw < 100_000_000 + normalization_factor = 1 # Shift by 1 digit + else: # 100_000_000 <= msw < 1_000_000_000 + normalization_factor = 0 # No shift needed + + return normalization_factor From fb77e356e9d75091e46dd14ba98f774cd363ca19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 8 Jul 2025 20:17:43 +0200 Subject: [PATCH 3/7] Enable 2n / n --- division.mojo | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 division.mojo diff --git a/division.mojo b/division.mojo new file mode 100644 index 0000000..929857f --- /dev/null +++ b/division.mojo @@ -0,0 +1,182 @@ +from decimojo.prelude import * +import time + + +fn divide_three_words_by_two( + a2: UInt32, a1: UInt32, a0: UInt32, b1: UInt32, b0: UInt32 +) raises -> Tuple[UInt32, UInt32, UInt32]: + """Divides a 3-word number by a 2-word number. + b1 must be at least 500_000_000. + + Args: + a2: The most significant word of the dividend. + a1: The middle word of the dividend. + a0: The least significant word of the dividend. + b1: The most significant word of the divisor. + b0: The least significant word of the divisor. + + Returns: + A tuple containing + (1) the quotient (as UInt32) + (2) the most significant word of the remainder (as UInt32) + (3) the least significant word of the remainder (as UInt32). + + Notes: + a = a2 * BASE^2 + a1 * BASE + a0. + b = b1 * BASE + b0. + """ + if b1 < 500_000_000: + raise Error("b1 must be at least 500_000_000") + + var a2a1 = UInt64(a2) * 1_000_000_000 + UInt64(a1) + + var q: UInt64 = UInt64(a2a1) // UInt64(b1) + var c = a2a1 - q * UInt64(b1) + var d: UInt64 = q * UInt64(b0) + var r = UInt64(c * 1_000_000_000) + UInt64(a0) + + if r < UInt64(d): + var b = UInt64(b1) * 1_000_000_000 + UInt64(b0) + q -= 1 + r += b + if r < UInt64(d): + q -= 1 + r += b + + r -= d + var r1: UInt32 = UInt32(r // 1_000_000_000) + var r0: UInt32 = UInt32(r % 1_000_000_000) + + return (UInt32(q), r1, r0) + + +fn divide_four_words_by_two( + a3: UInt32, + a2: UInt32, + a1: UInt32, + a0: UInt32, + b1: UInt32, + b0: UInt32, +) raises -> Tuple[UInt32, UInt32, UInt32, UInt32]: + """Divides a 4-word number by a 2-word number. + + Args: + a3: The most significant word of the dividend. + a2: The second most significant word of the dividend. + a1: The second least significant word of the dividend. + a0: The least significant word of the dividend. + b1: The most significant word of the divisor. + b0: The least significant word of the divisor. + + Returns: + A tuple containing + (1) the most significant word of the quotient (as UInt32) + (2) the least significant word of the quotient (as UInt32) + (3) the most significant word of the remainder (as UInt32) + (4) the least significant word of the remainder (as UInt32). + """ + + if b1 < 500_000_000: + raise Error("b1 must be at least 500_000_000") + if a3 > b1: + raise Error("a must be less than b * 10^18") + elif a3 == b1: + if a2 > b0: + raise Error("a must be less than b * 10^18") + elif a2 == b0: + if a1 > 0: + raise Error("a must be less than b * 10^18") + elif a1 == 0: + if a0 >= 0: + raise Error("a must be less than b * 10^18") + + var q1, r1, r0 = divide_three_words_by_two(a3, a2, a1, b1, b0) + var q0, s1, s0 = divide_three_words_by_two(r1, r0, a0, b1, b0) + return (q1, q0, s1, s0) + + +fn divide_three_by_two( + a2: BigUInt, a1: BigUInt, a0: BigUInt, b1: BigUInt, b0: BigUInt, n: Int +) raises -> Tuple[BigUInt, BigUInt]: + if b1.words[-1] < 500_000_000: + raise Error("b[-1] must be at least 500_000_000") + + var a2a1: BigUInt + if a2.is_zero(): + a2a1 = a1 + else: + a2a1 = a2 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + a2a1, n + ) + a2a1 += a1 + var q, c = divide_recursive(a2a1, b1, n) + var d = q * b0 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n) + var r = c + a0 + + if r < d: + var b = b1 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(b, n) + b += b0 + q -= BigUInt.ONE + r += b + if r < d: + q -= BigUInt.ONE + r += b + + r -= d + return (q, r) + + +fn divide_recursive( + a: BigUInt, b: BigUInt, n: Int +) raises -> Tuple[BigUInt, BigUInt]: + if (n & 1) != 0 or n <= 2: + return (a // b, a % b) + + else: + a0 = BigUInt(a.words[0 : n // 2]) + a1 = BigUInt(a.words[n // 2 : n]) + a2 = BigUInt(a.words[n : n + n // 2]) + a3 = BigUInt(a.words[n + n // 2 : n + n]) + + b0 = BigUInt(b.words[0 : n // 2]) + b1 = BigUInt(b.words[n // 2 : n]) + + print("b:", b1, b0) + q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2) + r0 = BigUInt(r.words[0 : n // 2]) + r1 = BigUInt(r.words[n // 2 : n]) + q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2) + + q = q1 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + q, n // 2 + ) + q += q0 + + return (q, s) + + +fn main() raises: + n = 2**4 + + var a = BUInt("123_456_789" * 2 * n) + var b = BUInt("987654321" + "000000000" * (n - 1)) + + var result1 = List[BigUInt]() + t0 = time.perf_counter_ns() + for _ in range(1): + q = divide_recursive(a, b, n) + result1.append(q[0]) + print("time taken: ", time.perf_counter_ns() - t0, " ns") + + var result2 = List[BigUInt]() + t0 = time.perf_counter_ns() + for _ in range(1): + p = a // b + result2.append(p) + print("time taken: ", time.perf_counter_ns() - t0, " ns") + + print("result1 == result2: ", result1[0] == result2[0]) From 69bbfbf3ee5e2d9e985548a9d0b3facf353335b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 8 Jul 2025 22:53:07 +0200 Subject: [PATCH 4/7] Finish a preliminary implementation --- division.mojo | 141 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 128 insertions(+), 13 deletions(-) diff --git a/division.mojo b/division.mojo index 929857f..cf8f866 100644 --- a/division.mojo +++ b/division.mojo @@ -1,5 +1,6 @@ from decimojo.prelude import * import time +import math fn divide_three_words_by_two( @@ -98,9 +99,6 @@ fn divide_four_words_by_two( fn divide_three_by_two( a2: BigUInt, a1: BigUInt, a0: BigUInt, b1: BigUInt, b0: BigUInt, n: Int ) raises -> Tuple[BigUInt, BigUInt]: - if b1.words[-1] < 500_000_000: - raise Error("b[-1] must be at least 500_000_000") - var a2a1: BigUInt if a2.is_zero(): a2a1 = a1 @@ -110,7 +108,7 @@ fn divide_three_by_two( a2a1, n ) a2a1 += a1 - var q, c = divide_recursive(a2a1, b1, n) + var q, c = divide_two_by_one(a2a1, b1, n) var d = q * b0 decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n) var r = c + a0 @@ -129,12 +127,30 @@ fn divide_three_by_two( return (q, r) -fn divide_recursive( +fn divide_two_by_one( a: BigUInt, b: BigUInt, n: Int ) raises -> Tuple[BigUInt, BigUInt]: - if (n & 1) != 0 or n <= 2: + """Divides a BigUInt by another BigUInt using a recursive approach. + The divisor has n words and the dividend has 2n words. + + Args: + a: The dividend as a BigUInt. + b: The divisor as a BigUInt. + n: The number of words in the divisor. + """ + if (n & 1) != 0 or n <= 2: # n can be 16 or 32 return (a // b, a % b) + if b.words[-1] < 500_000_000: + raise Error("b[-1] must be at least 500_000_000") + + # b_modified = b + # decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + # b_modified, n + # ) + # if a.compare(b_modified) >= 0: + # raise Error("a must be less than b * 10^18") + else: a0 = BigUInt(a.words[0 : n // 2]) a1 = BigUInt(a.words[n // 2 : n]) @@ -144,7 +160,6 @@ fn divide_recursive( b0 = BigUInt(b.words[0 : n // 2]) b1 = BigUInt(b.words[n // 2 : n]) - print("b:", b1, b0) q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2) r0 = BigUInt(r.words[0 : n // 2]) r1 = BigUInt(r.words[n // 2 : n]) @@ -159,17 +174,115 @@ fn divide_recursive( return (q, s) -fn main() raises: - n = 2**4 +fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt: + """Divides BigUInt using the Burnikel-Ziegler algorithm.""" + + # Yuhao Zhu: + # This implementation is based on the research report + # "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler. + # MPI-I-98-1-022, October 1998. + + alias BLOCK_SIZE_OF_WORDS = 2 - var a = BUInt("123_456_789" * 2 * n) - var b = BUInt("987654321" + "000000000" * (n - 1)) + # STEP 1: + # Normalize the divisor b to n words so that + # (1) it is of the form j*2^k and + # (2) the most significant word is at least 500_000_000. + + var normalized_b = b + var normalized_a = a + var normalization_factor: Int + + if normalized_b.words[-1] == 0: + normalized_b.remove_leading_empty_words() + if normalized_b.words[-1] < 500_000_000: + normalization_factor = ( + decimojo.biguint.arithmetics.calculate_normalization_factor( + normalized_b.words[-1] + ) + ) + else: + normalization_factor = 0 + + # The targeted number of blocks should be the smallest 2^k such that + # 2^k >= number of words in normalized_b ceil divided by BLOCK_SIZE_OF_WORDS. + # k is the depth of the recursion. + # n is the final number of words in the normalized b. + var n_blocks_divisor = math.ceildiv( + len(normalized_b.words), BLOCK_SIZE_OF_WORDS + ) + var depth = Int(math.ceil(math.log2(Float64(n_blocks_divisor)))) + n_blocks_divisor = 2**depth + var n = n_blocks_divisor * BLOCK_SIZE_OF_WORDS + + var n_digits_to_scale_up = ( + n - len(normalized_b.words) + ) * 9 + normalization_factor + + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten( + normalized_b, n_digits_to_scale_up + ) + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten( + normalized_a, n_digits_to_scale_up + ) + + # The normalized_b is now 9 digits, but may still be smaller than 500_000_000. + var gap_ratio = BUInt.BASE // normalized_b.words[-1] + if gap_ratio > 2: + decimojo.biguint.arithmetics.multiply_inplace_by_uint32( + normalized_b, gap_ratio + ) + decimojo.biguint.arithmetics.multiply_inplace_by_uint32( + normalized_a, gap_ratio + ) + + # STEP 2: Split the normalized a into blocks of size n. + # t is the number of blocks in the dividend. + var t = math.ceildiv(len(normalized_a.words), n) + if len(a.words) == t * n: + # If the number of words in a is already a multiple of n + # We check if the most significant word is >= 500_000_000. + # If it is, we need to add one more block to the dividend. + # This ensures that the most significant word of the dividend + # is smaller than 500_000_000. + if normalized_a.words[-1] >= 500_000_000: + t += 1 + + var z = BigUInt(normalized_a.words[(t - 2) * n : t * n]) + var q = BigUInt() + for i in range(t - 2, -1, -1): + var q_i, r = divide_two_by_one(z, normalized_b, n) + # print(z, "//", normalized_b, "=", q_i, "mod", r) + if i == t - 2: + q = q_i + else: + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + q, n + ) + q += q_i + if i > 0: + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + r, n + ) + z = r + BigUInt(normalized_a.words[(i - 1) * n : i * n]) + + return q + + +fn main() raises: + n = 2**14 + var a = BUInt(String("987_654_321" * 3 * n)) + var b = BUInt(String("3_141_592" * n)) + # var a = BigUInt( + # "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000" + # ) + # var b = BigUInt("678678678678000000") var result1 = List[BigUInt]() t0 = time.perf_counter_ns() for _ in range(1): - q = divide_recursive(a, b, n) - result1.append(q[0]) + q = divide_burnikel_ziegler(a, b) + result1.append(q) print("time taken: ", time.perf_counter_ns() - t0, " ns") var result2 = List[BigUInt]() @@ -179,4 +292,6 @@ fn main() raises: result2.append(p) print("time taken: ", time.perf_counter_ns() - t0, " ns") + # print("result1: ", result1[0]) + # print("result2: ", result2[0]) print("result1 == result2: ", result1[0] == result2[0]) From 2fbcb01ed4ca52f3e3c07fe685c590f41f6bcfe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 8 Jul 2025 23:16:08 +0200 Subject: [PATCH 5/7] Add benches --- .../bench_biguint_truncate_divide_bz.mojo | 555 ++++++++++++++++++ .../decimojo/biguint/division.mojo | 84 +-- 2 files changed, 604 insertions(+), 35 deletions(-) create mode 100644 benches/biguint/bench_biguint_truncate_divide_bz.mojo rename division.mojo => src/decimojo/biguint/division.mojo (81%) diff --git a/benches/biguint/bench_biguint_truncate_divide_bz.mojo b/benches/biguint/bench_biguint_truncate_divide_bz.mojo new file mode 100644 index 0000000..712b85b --- /dev/null +++ b/benches/biguint/bench_biguint_truncate_divide_bz.mojo @@ -0,0 +1,555 @@ +""" +Comprehensive benchmarks for BigUInt truncate_divide operation. +Compares performance against Python's built-in int division with 20 diverse test cases. +BigUInt is an unsigned integer type, so all test cases use positive numbers only. +""" + +from decimojo.biguint.biguint import BigUInt +import decimojo.biguint.arithmetics +from python import Python, PythonObject +from time import perf_counter_ns +import time +import os +from collections import List + + +fn open_log_file() raises -> PythonObject: + """ + Creates and opens a log file with a timestamp in the filename. + + Returns: + A file object opened for writing. + """ + var python = Python.import_module("builtins") + var datetime = Python.import_module("datetime") + var pysys = Python.import_module("sys") + pysys.set_int_max_str_digits(1000000) + + # Create logs directory if it doesn't exist + var log_dir = "./logs" + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Generate a timestamp for the filename + var timestamp = String(datetime.datetime.now().isoformat()) + var log_filename = ( + log_dir + "/benchmark_biguint_truncate_divide_" + timestamp + ".log" + ) + + print("Saving benchmark results to:", log_filename) + return python.open(log_filename, "w") + + +fn log_print(msg: String, log_file: PythonObject) raises: + """ + Prints a message to both the console and the log file. + + Args: + msg: The message to print. + log_file: The file object to write to. + """ + print(msg) + log_file.write(msg + "\n") + log_file.flush() # Ensure the message is written immediately + + +fn run_benchmark_truncate_divide( + name: String, + dividend: String, + divisor: String, + iterations: Int, + log_file: PythonObject, + mut speedup_factors: List[Float64], +) raises: + """ + Run a benchmark comparing Mojo BigUInt truncate_divide with Python int division. + + Args: + name: Name of the benchmark case. + dividend: String representation of the dividend. + divisor: String representation of the divisor. + iterations: Number of iterations to run. + log_file: File object for logging results. + speedup_factors: Mojo List to store speedup factors for averaging. + """ + log_print("\nBenchmark: " + name, log_file) + log_print("Dividend: " + dividend, log_file) + log_print("Divisor: " + divisor, log_file) + + # Set up Mojo and Python values + var mojo_dividend = BigUInt(dividend) + var mojo_divisor = BigUInt(divisor) + var py = Python.import_module("builtins") + var py_dividend = py.int(dividend) + var py_divisor = py.int(divisor) + + # Execute the operations once to verify correctness + try: + var mojo_result = decimojo.biguint.division.divide_burnikel_ziegler( + mojo_dividend, mojo_divisor + ) + var py_result = py_dividend // py_divisor + + # Display results for verification + log_print("Mojo result: " + String(mojo_result), log_file) + log_print("Python result: " + String(py_result), log_file) + + # Benchmark Mojo implementation + var t0 = perf_counter_ns() + for _ in range(iterations): + _ = decimojo.biguint.division.divide_burnikel_ziegler( + mojo_dividend, mojo_divisor + ) + var mojo_time = (perf_counter_ns() - t0) / iterations + if mojo_time == 0: + mojo_time = 1 # Prevent division by zero + + # Benchmark Python implementation + t0 = perf_counter_ns() + for _ in range(iterations): + _ = py_dividend // py_divisor + var python_time = (perf_counter_ns() - t0) / iterations + + # Calculate speedup factor + var speedup = python_time / mojo_time + speedup_factors.append(Float64(speedup)) + + # Print results with speedup comparison + log_print( + "Mojo division: " + String(mojo_time) + " ns per iteration", + log_file, + ) + log_print( + "Python division: " + String(python_time) + " ns per iteration", + log_file, + ) + log_print("Speedup factor: " + String(speedup), log_file) + except e: + log_print("Error occurred during benchmark: " + String(e), log_file) + log_print("Skipping this benchmark case", log_file) + + +fn main() raises: + # Open log file + var log_file = open_log_file() + var datetime = Python.import_module("datetime") + + # Create a Mojo List to store speedup factors for averaging later + var speedup_factors = List[Float64]() + + # Display benchmark header with system information + log_print("=== DeciMojo BigUInt Truncate Division Benchmark ===", log_file) + log_print("Time: " + String(datetime.datetime.now().isoformat()), log_file) + + # Try to get system info + try: + var platform = Python.import_module("platform") + log_print( + "System: " + + String(platform.system()) + + " " + + String(platform.release()), + log_file, + ) + log_print("Processor: " + String(platform.processor()), log_file) + log_print( + "Python version: " + String(platform.python_version()), log_file + ) + except: + log_print("Could not retrieve system information", log_file) + + var iterations = 100 + + # Define benchmark cases (all positive numbers for BigUInt) + log_print( + "\nRunning truncate division benchmarks with " + + String(iterations) + + " iterations each", + log_file, + ) + + # Case 1: Simple division with no remainder + run_benchmark_truncate_divide( + "Simple division, no remainder", + "100", + "10", + iterations, + log_file, + speedup_factors, + ) + + # Case 2: Division with remainder + run_benchmark_truncate_divide( + "Division with remainder", + "10", + "3", + iterations, + log_file, + speedup_factors, + ) + + # Case 3: Division of small numbers + run_benchmark_truncate_divide( + "Division of small numbers", + "7", + "2", + iterations, + log_file, + speedup_factors, + ) + + # Case 4: Division resulting in zero + run_benchmark_truncate_divide( + "Division resulting in zero", + "5", + "10", + iterations, + log_file, + speedup_factors, + ) + + # Case 5: Division by one + run_benchmark_truncate_divide( + "Division by one", + "12345", + "1", + iterations, + log_file, + speedup_factors, + ) + + # Case 6: Zero dividend + run_benchmark_truncate_divide( + "Zero dividend", + "0", + "5", + iterations, + log_file, + speedup_factors, + ) + + # Case 7: Large number division + run_benchmark_truncate_divide( + "Large number division", + "9999999999", # 10 billion + "333", + iterations, + log_file, + speedup_factors, + ) + + # Case 8: Very large number division + run_benchmark_truncate_divide( + "Very large number division", + "1" + "0" * 50, # 10^50 + "7", + iterations, + log_file, + speedup_factors, + ) + + # Case 9: Division of large numbers with exact result + run_benchmark_truncate_divide( + "Division of large numbers with exact result", + "1" + "0" * 30, # 10^30 + "1" + "0" * 10, # 10^10 + iterations, + log_file, + speedup_factors, + ) + + # Case 10: Division by large number + run_benchmark_truncate_divide( + "Division by large number", + "12345", + "9" * 20, # 20 nines + iterations, + log_file, + speedup_factors, + ) + + # Case 11: Fibonacci number division + run_benchmark_truncate_divide( + "Fibonacci number division", + "6765", # Fib(20) + "4181", # Fib(19) + iterations, + log_file, + speedup_factors, + ) + + # Case 12: Prime number division + run_benchmark_truncate_divide( + "Prime number division", + "2147483647", # Mersenne prime (2^31 - 1) + "997", # Prime + iterations, + log_file, + speedup_factors, + ) + + # Case 13: Division of numbers near UInt64 limit + run_benchmark_truncate_divide( + "Division near UInt64 limit", + "18446744073709551615", # UInt64.MAX + "2", + iterations, + log_file, + speedup_factors, + ) + + # Case 14: Division with divisor just below dividend + run_benchmark_truncate_divide( + "Division with divisor just below dividend", + "1000", + "999", + iterations, + log_file, + speedup_factors, + ) + + # Case 15: Division with exact powers of 10 + run_benchmark_truncate_divide( + "Division with exact powers of 10", + "1" + "0" * 20, # 10^20 + "1" + "0" * 5, # 10^5 + iterations, + log_file, + speedup_factors, + ) + + # Case 16: Division of repeated digits + run_benchmark_truncate_divide( + "Division of repeated digits", + "9" * 30, # 30 nines + "9" * 15, # 15 nines + iterations, + log_file, + speedup_factors, + ) + + # Case 17: Division with extremely large dividend and small divisor + run_benchmark_truncate_divide( + "Extreme large dividend and small divisor", + "9" * 100, # 100 nines + "3", + iterations, + log_file, + speedup_factors, + ) + + # Case 18: Division with powers of 2 + run_benchmark_truncate_divide( + "Division with powers of 2", + "1" + "0" * 50, # 10^50 + "256", # 2^8 + iterations, + log_file, + speedup_factors, + ) + + # Case 19: Division yielding a single-digit result + run_benchmark_truncate_divide( + "Division yielding a single-digit result", + "123456789", + "123456780", + iterations, + log_file, + speedup_factors, + ) + + # Case 20: Division with random large numbers + run_benchmark_truncate_divide( + "Division with random large numbers", + "8675309123456789098765432112345678909876543211234567", + "12345678901234567890", + iterations, + log_file, + speedup_factors, + ) + + # Case 21: Division with around 50 digits and with divisor just below dividend + run_benchmark_truncate_divide( + "Division with around 50 digits divisor just below dividend", + "12345" * 10, + "6789" * 12, + iterations, + log_file, + speedup_factors, + ) + + # Case 22: Division of very large repeated digits + run_benchmark_truncate_divide( + "Division of repeated digits", + "990132857498314692374162398217" * 10, # 30 * 10 = 300 digits + "85172390413429847239" * 10, # 20 * 10 = 200 digits + iterations, + log_file, + speedup_factors, + ) + + # Case 23: Division of large numbers + run_benchmark_truncate_divide( + "Division of large numbers (270 digits vs 135 digits)", + "123456789" * 30, + "987654321" * 15, + iterations, + log_file, + speedup_factors, + ) + + # Case 24: Division of very large numbers + run_benchmark_truncate_divide( + "Division of very large numbers (2250 digits vs 900 digits)", + "123456789" * 250, + "987654321" * 100, + iterations, + log_file, + speedup_factors, + ) + + # Case 25: Division of very, very large numbers + # x1 is more than 400 words long (>= 10^3600) + # x2 is more than 200 words long (>= 10^1800) + run_benchmark_truncate_divide( + "Division of very, very large numbers (3600 digits vs 1800 digits)", + "123456789" * 400, + "987654321" * 200, + iterations, + log_file, + speedup_factors, + ) + + # Case 26: Division of large numbers + run_benchmark_truncate_divide( + "Division of very large numbers (256 digits vs 128 digits)", + "1234567890193287491287508917213097405916874098123705160923812345678901932874912875089172130974059168740981237051609238749875089170984701759832708497029875019837409871085709813749870897510749875089170984701759832708497029875019837409871085709813749870897510", + "68740981237051609238123456789019328749128750891721309740591687409812370516092387498750879548759387959978279541709847017598327084", + iterations, + log_file, + speedup_factors, + ) + + # Case 27: Division of numbers with around 200 and 100 digits + run_benchmark_truncate_divide( + "Division of large numbers (200 digits vs 100 digits)", + ( + "314159265358979323846264338327950288419716939937510" + "582097494459230781640628620899862803482534211706798214808651" + "328230664709384460955058223172535940812848111745028410270193" + "852110555964462294895493038196" + ), + ( + "271828182845904523536028747135266249775724709369995" + "95749669676277240766303535475945713821785251664274" + ), + iterations, + log_file, + speedup_factors, + ) + + # Case 28: Division of numbers with around 200 and 100 digits + run_benchmark_truncate_divide( + "Division of large numbers (200 digits vs 100 digits)", + ( + "314159265358979323846264338327950288419716939937510" + "582097494459230781640628620899862803482534211706798214808651" + "328230664709384460955058223172535940812848111745028410270193" + "852110555964462294895493038196" + ), + ( + "141421356237309504880168872420969807856967187537694" + "80731766797379907324784621070388503875343276400719" + ), + iterations, + log_file, + speedup_factors, + ) + + # Case 29: Division of numbers with around 150 and 50 digits + run_benchmark_truncate_divide( + "Division of large numbers (150 digits vs 50 digits)", + ( + "314159265358979323846264338327950288419716939937510" + "582097494459230781640628620899862803482534211706798214808651" + "3282306647093844609550582231725359408128" + ), + "141421356237309504880168872420969807856967187537694", + iterations, + log_file, + speedup_factors, + ) + + # Case 30: Division of numbers with around 150 and 50 digits + run_benchmark_truncate_divide( + "Division of large numbers (150 digits vs 50 digits)", + ( + "316227766016824890583648059893174009579947593530458" + "382628078540989121552735792899961040720792717368862335475063" + "5167610057579407944886251958020310186466" + ), + "141421356237309504880168872420969807856967187537694", + iterations, + log_file, + speedup_factors, + ) + + # Case 31: Division of large numbers + run_benchmark_truncate_divide( + "Division of large numbers (1000 words vs 100 digits)", + "316227766_016824890_583648059_893174009_579947593" * 1000, + "141421356_237309504_880168872_420969807_856967187" * 100, + 3, + log_file, + speedup_factors, + ) + + # Case 32: Division of large numbers + run_benchmark_truncate_divide( + "Division of large numbers (10000 words vs 1234 digits)", + "316227766_016824890_583648059_893174009_579947593" * 10000, + "141421356_237309504_880168872_420969807_856967187" * 1234, + 3, + log_file, + speedup_factors, + ) + + # Calculate average speedup factor (ignoring any cases that might have failed) + if len(speedup_factors) > 0: + var sum_speedup: Float64 = 0.0 + for i in range(len(speedup_factors)): + sum_speedup += speedup_factors[i] + var average_speedup = sum_speedup / Float64(len(speedup_factors)) + + # Display summary + log_print( + "\n=== BigUInt Truncate Division Benchmark Summary ===", log_file + ) + log_print( + "Benchmarked: " + + String(len(speedup_factors)) + + " different division cases", + log_file, + ) + log_print( + "Each case ran: " + String(iterations) + " iterations", log_file + ) + log_print( + "Average speedup: " + String(average_speedup) + "×", log_file + ) + + # List all speedup factors + log_print("\nIndividual speedup factors:", log_file) + for i in range(len(speedup_factors)): + log_print( + String("Case {}: {}×").format( + i + 1, round(speedup_factors[i], 2) + ), + log_file, + ) + else: + log_print("\nNo valid benchmark cases were completed", log_file) + + # Close the log file + log_file.close() + print("Benchmark completed. Log file closed.") diff --git a/division.mojo b/src/decimojo/biguint/division.mojo similarity index 81% rename from division.mojo rename to src/decimojo/biguint/division.mojo index cf8f866..31f9f5e 100644 --- a/division.mojo +++ b/src/decimojo/biguint/division.mojo @@ -97,7 +97,13 @@ fn divide_four_words_by_two( fn divide_three_by_two( - a2: BigUInt, a1: BigUInt, a0: BigUInt, b1: BigUInt, b0: BigUInt, n: Int + a2: BigUInt, + a1: BigUInt, + a0: BigUInt, + b1: BigUInt, + b0: BigUInt, + n: Int, + cut_off: Int, ) raises -> Tuple[BigUInt, BigUInt]: var a2a1: BigUInt if a2.is_zero(): @@ -108,7 +114,7 @@ fn divide_three_by_two( a2a1, n ) a2a1 += a1 - var q, c = divide_two_by_one(a2a1, b1, n) + var q, c = divide_two_by_one(a2a1, b1, n, cut_off) var d = q * b0 decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n) var r = c + a0 @@ -128,7 +134,7 @@ fn divide_three_by_two( fn divide_two_by_one( - a: BigUInt, b: BigUInt, n: Int + a: BigUInt, b: BigUInt, n: Int, cut_off: Int ) raises -> Tuple[BigUInt, BigUInt]: """Divides a BigUInt by another BigUInt using a recursive approach. The divisor has n words and the dividend has 2n words. @@ -137,8 +143,9 @@ fn divide_two_by_one( a: The dividend as a BigUInt. b: The divisor as a BigUInt. n: The number of words in the divisor. + cut_off: The minimum number of words for the recursive division. """ - if (n & 1) != 0 or n <= 2: # n can be 16 or 32 + if (n & 1) != 0 or n <= cut_off: return (a // b, a % b) if b.words[-1] < 500_000_000: @@ -160,10 +167,10 @@ fn divide_two_by_one( b0 = BigUInt(b.words[0 : n // 2]) b1 = BigUInt(b.words[n // 2 : n]) - q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2) + q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2, cut_off) r0 = BigUInt(r.words[0 : n // 2]) r1 = BigUInt(r.words[n // 2 : n]) - q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2) + q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2, cut_off) q = q1 decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( @@ -174,7 +181,9 @@ fn divide_two_by_one( return (q, s) -fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt: +fn divide_burnikel_ziegler( + a: BigUInt, b: BigUInt, cut_off: Int = 16 +) raises -> BigUInt: """Divides BigUInt using the Burnikel-Ziegler algorithm.""" # Yuhao Zhu: @@ -182,7 +191,12 @@ fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt: # "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler. # MPI-I-98-1-022, October 1998. - alias BLOCK_SIZE_OF_WORDS = 2 + var BLOCK_SIZE_OF_WORDS = cut_off + + if (len(a.words) <= cut_off) or (len(b.words) <= cut_off): + # If the number of words in a or b is less than or equal to 64, + # we can use the schoolbook division algorithm. + return a // b # STEP 1: # Normalize the divisor b to n words so that @@ -251,7 +265,7 @@ fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt: var z = BigUInt(normalized_a.words[(t - 2) * n : t * n]) var q = BigUInt() for i in range(t - 2, -1, -1): - var q_i, r = divide_two_by_one(z, normalized_b, n) + var q_i, r = divide_two_by_one(z, normalized_b, n, cut_off) # print(z, "//", normalized_b, "=", q_i, "mod", r) if i == t - 2: q = q_i @@ -269,29 +283,29 @@ fn divide_burnikel_ziegler(a: BigUInt, b: BigUInt) raises -> BigUInt: return q -fn main() raises: - n = 2**14 - var a = BUInt(String("987_654_321" * 3 * n)) - var b = BUInt(String("3_141_592" * n)) - # var a = BigUInt( - # "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000" - # ) - # var b = BigUInt("678678678678000000") - - var result1 = List[BigUInt]() - t0 = time.perf_counter_ns() - for _ in range(1): - q = divide_burnikel_ziegler(a, b) - result1.append(q) - print("time taken: ", time.perf_counter_ns() - t0, " ns") - - var result2 = List[BigUInt]() - t0 = time.perf_counter_ns() - for _ in range(1): - p = a // b - result2.append(p) - print("time taken: ", time.perf_counter_ns() - t0, " ns") - - # print("result1: ", result1[0]) - # print("result2: ", result2[0]) - print("result1 == result2: ", result1[0] == result2[0]) +# fn main() raises: +# n = 2**14 +# var a = BUInt(String("987_654_321" * 3 * n)) +# var b = BUInt(String("3_141_592" * n)) +# # var a = BigUInt( +# # "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000" +# # ) +# # var b = BigUInt("678678678678000000") + +# var result1 = List[BigUInt]() +# t0 = time.perf_counter_ns() +# for _ in range(1): +# q = divide_burnikel_ziegler(a, b) +# result1.append(q) +# print("time taken: ", time.perf_counter_ns() - t0, " ns") + +# var result2 = List[BigUInt]() +# t0 = time.perf_counter_ns() +# for _ in range(1): +# p = a // b +# result2.append(p) +# print("time taken: ", time.perf_counter_ns() - t0, " ns") + +# # print("result1: ", result1[0]) +# # print("result2: ", result2[0]) +# print("result1 == result2: ", result1[0] == result2[0]) From a1d5885b84aef4f2ad8cc52c0a8dc96d05739d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Wed, 9 Jul 2025 11:05:23 +0200 Subject: [PATCH 6/7] Integrate functions of BZ algorithm into arithmetics module --- .../bench_biguint_divide_complexity.mojo | 12 +- .../bench_biguint_truncate_divide_bz.mojo | 8 +- src/decimojo/biguint/arithmetics.mojo | 394 ++++++++++++++++-- src/decimojo/biguint/division.mojo | 311 -------------- 4 files changed, 382 insertions(+), 343 deletions(-) delete mode 100644 src/decimojo/biguint/division.mojo diff --git a/benches/biguint/bench_biguint_divide_complexity.mojo b/benches/biguint/bench_biguint_divide_complexity.mojo index cb270fb..29bc8bb 100644 --- a/benches/biguint/bench_biguint_divide_complexity.mojo +++ b/benches/biguint/bench_biguint_divide_complexity.mojo @@ -1,6 +1,6 @@ # ===----------------------------------------------------------------------=== # # Benchmark for BigUInt division time complexity analysis -# Testing word sizes from 32 to 65536 words (powers of 2) +# Testing word sizes from 32 to 2**18 words (powers of 2) # ===----------------------------------------------------------------------=== # from time import perf_counter_ns @@ -157,7 +157,7 @@ fn main() raises: ) log_print("", log_file) - # Test sizes: powers of 2 from 32 to 65536 + # Test sizes: powers of 2 from 32 to 2**18 words var test_sizes = List[Int]() test_sizes.append(32) test_sizes.append(64) @@ -171,6 +171,8 @@ fn main() raises: test_sizes.append(16384) test_sizes.append(32768) test_sizes.append(65536) + test_sizes.append(131072) # 2^17 + test_sizes.append(262144) # 2^18 # Test Case 1: Large / Small division (2n / n) log_print("=== TEST CASE 1: LARGE / SMALL DIVISION (2n / n) ===", log_file) @@ -183,7 +185,7 @@ fn main() raises: for i in range(len(test_sizes)): var divisor_size = test_sizes[i] var dividend_size = divisor_size * 2 - if dividend_size <= 65536: # Stay within our limit + if dividend_size <= 2**18: # Stay within our limit var avg_time = benchmark_divide_at_size( dividend_size, divisor_size, 5, log_file ) @@ -206,7 +208,7 @@ fn main() raises: for i in range(len(test_sizes)): var divisor_size = test_sizes[i] var dividend_size = divisor_size * 4 - if dividend_size <= 65536: # Stay within our limit + if dividend_size <= 2**18: # Stay within our limit var avg_time = benchmark_divide_at_size( dividend_size, divisor_size, 5, log_file ) @@ -274,7 +276,7 @@ fn main() raises: for i in range(len(test_sizes)): var divisor_size = test_sizes[i] var dividend_size = divisor_size * 4 - if dividend_size <= 65536: # Only show results within our limit + if dividend_size <= 2**18: # Only show results within our limit var time_taken = very_large_small_results[i] if time_taken > 0.0: # Only show valid results diff --git a/benches/biguint/bench_biguint_truncate_divide_bz.mojo b/benches/biguint/bench_biguint_truncate_divide_bz.mojo index 712b85b..4c590cc 100644 --- a/benches/biguint/bench_biguint_truncate_divide_bz.mojo +++ b/benches/biguint/bench_biguint_truncate_divide_bz.mojo @@ -85,8 +85,10 @@ fn run_benchmark_truncate_divide( # Execute the operations once to verify correctness try: - var mojo_result = decimojo.biguint.division.divide_burnikel_ziegler( - mojo_dividend, mojo_divisor + var mojo_result = ( + decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler( + mojo_dividend, mojo_divisor + ) ) var py_result = py_dividend // py_divisor @@ -97,7 +99,7 @@ fn run_benchmark_truncate_divide( # Benchmark Mojo implementation var t0 = perf_counter_ns() for _ in range(iterations): - _ = decimojo.biguint.division.divide_burnikel_ziegler( + _ = decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler( mojo_dividend, mojo_divisor ) var mojo_time = (perf_counter_ns() - t0) / iterations diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo index 6725cdf..4bae6c9 100644 --- a/src/decimojo/biguint/arithmetics.mojo +++ b/src/decimojo/biguint/arithmetics.mojo @@ -19,6 +19,7 @@ Implements basic arithmetic functions for the BigUInt type. """ from algorithm import vectorize +import math import time import testing @@ -51,7 +52,7 @@ from decimojo.rounding_mode import RoundingMode # multiply_inplace_by_power_of_billion(mut x: BigUInt, n: Int) # # floor_divide(x1: BigUInt, x2: BigUInt) -> BigUInt -# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt +# floor_divide_school(x1: BigUInt, x2: BigUInt) -> BigUInt # floor_divide_estimate_quotient(x1: BigUInt, x2: BigUInt, j: Int, m: Int) -> UInt64 # floor_divide_inplace_by_single_word(x1: BigUInt, x2: BigUInt) -> None # floor_divide_inplace_by_double_words(x1: BigUInt, x2: BigUInt) -> None @@ -59,7 +60,7 @@ from decimojo.rounding_mode import RoundingMode # floor_divide_by_power_of_ten(x: BigUInt, n: Int) -> BigUInt # # truncate_divide(x1: BigUInt, x2: BigUInt) -> BigUInt -# ceil_divide(x1: BigUInt, x2: BigUInt) -> BigUIntulo(x1: BigUIn# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt +# ceil_divide(x1: BigUInt, x2: BigUInt) -> BigUIntulo(x1: BigUIn# floor_divide_school(x1: BigUInt, x2: BigUInt) -> BigUInt # # floor_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt # ceil_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt @@ -1191,7 +1192,9 @@ fn multiply_inplace_by_power_of_billion(mut x: BigUInt, n: Int): # ===----------------------------------------------------------------------=== # # Division Algorithms -# floor_divide_general, floor_divide_inplace_by_2 +# floor_divide +# floor_divide_school +# floor_divide_burnikel_ziegler # ===----------------------------------------------------------------------=== # @@ -1212,6 +1215,8 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: It is equal to truncated division for positive numbers. """ + alias CUTOFF_BURNIKEL_ZIEGLER = 64 + # CASE: x2 is single word if len(x2.words) == 1: # SUB-CASE: Division by zero @@ -1296,29 +1301,41 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: result.remove_leading_empty_words() return result^ - # CASE: division of very, very large numbers - # Use Newton-Raphson division for large numbers? + # CASE: Division of small numbers + # If the number of words in a or b is small enough, + # we can use the schoolbook division algorithm. + if (len(x1.words) <= CUTOFF_BURNIKEL_ZIEGLER) and ( + len(x2.words) <= CUTOFF_BURNIKEL_ZIEGLER + ): + # I will normalize the divisor to improve quotient estimation + var normalization_factor: Int # Number of digits to shift + # Calculate normalization factor to make leading digit of divisor + # as large as possible + normalization_factor = calculate_normalization_factor(x2.words[-1]) + + if normalization_factor == 0: + # No normalization needed, just use the general division algorithm + return floor_divide_school(x1, x2) + else: + # Normalize the divisor and dividend + var normalized_x1 = multiply_by_power_of_ten( + x1, normalization_factor + ) + var normalized_x2 = multiply_by_power_of_ten( + x2, normalization_factor + ) + return floor_divide_school(normalized_x1, normalized_x2) - # CASE: all other situations - # Normalize divisor to improve quotient estimation - # Calculate normalization factor to make leading digit of divisor - # as large as possible - # I use table lookup to find the normalization factor - var normalization_factor: Int # Number of digits to shift - normalization_factor = calculate_normalization_factor(x2.words[-1]) - - if normalization_factor == 0: - # No normalization needed, just use the general division algorithm - return floor_divide_general(x1, x2) - else: - # Normalize the divisor and dividend - var normalized_x1 = multiply_by_power_of_ten(x1, normalization_factor) - var normalized_x2 = multiply_by_power_of_ten(x2, normalization_factor) - return floor_divide_general(normalized_x1, normalized_x2) + # CASE: division of very, very large numbers + # Use the Burnikel-Ziegler division algorithm + print("Using Burnikel-Ziegler division algorithm for large numbers") + return floor_divide_burnikel_ziegler( + x1, x2, cut_off=CUTOFF_BURNIKEL_ZIEGLER + ) -fn floor_divide_general(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt: - """General division algorithm for BigInt numbers. +fn floor_divide_school(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt: + """General schoolbook division algorithm for BigInt numbers. Args: dividend: The dividend. @@ -1333,7 +1350,7 @@ fn floor_divide_general(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt: if divisor.is_zero(): raise Error( - "`biguint.arithmetics.floor_divide_general()`: Division by zero" + "`biguint.arithmetics.floor_divide_school()`: Division by zero" ) # Initialize result and remainder @@ -1625,6 +1642,335 @@ fn floor_divide_by_power_of_ten(x: BigUInt, n: Int) raises -> BigUInt: return result^ +# FAST RUCURSIVE DIVISION ALGORITHM +# =============================== # +# The following functions implement the Burnikel-Ziegler algorithm. +# +# floor_divide_burnikel_ziegler +# floor_divide_two_by_one +# floor_divide_three_by_two +# floor_divide_three_by_two_uint32 +# floor_divide_four_by_two_uint32 +# +# Yuhao Zhu: +# I tried to write this implementation based on the research report +# "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler. +# MPI-I-98-1-022, October 1998. +# The paper is mainly based on 2^k-based integers, and therefore, some tricks +# cannot be applied to 10^k-based integers. For example, when normalizing the +# divisor to let its most significant word be at least BASE//2, we cannot simply +# shift the bits until the most significant bit is 1. +# TODO: +# Some optimization needs to be done in future to +# (1) accelerate the normalization process +# (2) avoid unnecessary memory allocations and copies + + +fn floor_divide_burnikel_ziegler( + a: BigUInt, b: BigUInt, cut_off: Int = 16 +) raises -> BigUInt: + """Divides BigUInt using the Burnikel-Ziegler algorithm. + + Args: + a: The dividend. + b: The divisor. + cut_off: The cutoff value for the number of words in the divisor to use + the schoolbook division algorithm. It also determines the size of + the blocks used in the recursive division algorithm. + """ + + var BLOCK_SIZE_OF_WORDS = cut_off + + # STEP 1: + # Normalize the divisor b to n words so that + # (1) it is of the form j*2^k and + # (2) the most significant word is at least 500_000_000. + + var normalized_b = b + var normalized_a = a + var normalization_factor: Int + + if normalized_b.words[-1] == 0: + normalized_b.remove_leading_empty_words() + if normalized_b.words[-1] < 500_000_000: + normalization_factor = ( + decimojo.biguint.arithmetics.calculate_normalization_factor( + normalized_b.words[-1] + ) + ) + else: + normalization_factor = 0 + + # The targeted number of blocks should be the smallest 2^k such that + # 2^k >= number of words in normalized_b ceil divided by BLOCK_SIZE_OF_WORDS. + # k is the depth of the recursion. + # n is the final number of words in the normalized b. + var n_blocks_divisor = math.ceildiv( + len(normalized_b.words), BLOCK_SIZE_OF_WORDS + ) + var depth = Int(math.ceil(math.log2(Float64(n_blocks_divisor)))) + n_blocks_divisor = 2**depth + var n = n_blocks_divisor * BLOCK_SIZE_OF_WORDS + + var n_digits_to_scale_up = ( + n - len(normalized_b.words) + ) * 9 + normalization_factor + + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten( + normalized_b, n_digits_to_scale_up + ) + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten( + normalized_a, n_digits_to_scale_up + ) + + # The normalized_b is now 9 digits, but may still be smaller than 500_000_000. + var gap_ratio = BigUInt.BASE // normalized_b.words[-1] + if gap_ratio > 2: + decimojo.biguint.arithmetics.multiply_inplace_by_uint32( + normalized_b, gap_ratio + ) + decimojo.biguint.arithmetics.multiply_inplace_by_uint32( + normalized_a, gap_ratio + ) + + # STEP 2: Split the normalized a into blocks of size n. + # t is the number of blocks in the dividend. + var t = math.ceildiv(len(normalized_a.words), n) + if len(a.words) == t * n: + # If the number of words in a is already a multiple of n + # We check if the most significant word is >= 500_000_000. + # If it is, we need to add one more block to the dividend. + # This ensures that the most significant word of the dividend + # is smaller than 500_000_000. + if normalized_a.words[-1] >= 500_000_000: + t += 1 + + var z = BigUInt(normalized_a.words[(t - 2) * n : t * n]) + var q = BigUInt() + for i in range(t - 2, -1, -1): + var q_i, r = floor_divide_two_by_one(z, normalized_b, n, cut_off) + # print(z, "//", normalized_b, "=", q_i, "mod", r) + if i == t - 2: + q = q_i + else: + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + q, n + ) + q += q_i + if i > 0: + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + r, n + ) + z = r + BigUInt(normalized_a.words[(i - 1) * n : i * n]) + + return q + + +fn floor_divide_three_by_two( + a2: BigUInt, + a1: BigUInt, + a0: BigUInt, + b1: BigUInt, + b0: BigUInt, + n: Int, + cut_off: Int, +) raises -> Tuple[BigUInt, BigUInt]: + """Divides a 3-word number by a 2-word number. + + Args: + a2: The most significant word of the dividend. + a1: The middle word of the dividend. + a0: The least significant word of the dividend. + b1: The most significant word of the divisor. + b0: The least significant word of the divisor. + n: The number of words in the divisor. + cut_off: The minimum number of words for the recursive division. + + Returns: + A tuple containing the quotient and the remainder as BigUInt. + """ + + var a2a1: BigUInt + if a2.is_zero(): + a2a1 = a1 + else: + a2a1 = a2 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + a2a1, n + ) + a2a1 += a1 + var q, c = floor_divide_two_by_one(a2a1, b1, n, cut_off) + var d = q * b0 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n) + var r = c + a0 + + if r < d: + var b = b1 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(b, n) + b += b0 + q -= BigUInt.ONE + r += b + if r < d: + q -= BigUInt.ONE + r += b + + r -= d + return (q, r) + + +fn floor_divide_two_by_one( + a: BigUInt, b: BigUInt, n: Int, cut_off: Int +) raises -> Tuple[BigUInt, BigUInt]: + """Divides a BigUInt by another BigUInt using a recursive approach. + The divisor has n words and the dividend has 2n words. + + Args: + a: The dividend as a BigUInt. + b: The divisor as a BigUInt. The most significant word must be at least + 500_000_000. + n: The number of words in the divisor. + cut_off: The minimum number of words for the recursive division. + + Returns: + A tuple containing the quotient and the remainder as BigUInt. + + Notes: + + You need to ensure that n is even to continue with the algorithm. + Otherwise, it will use the schoolbook division algorithm. + """ + if (n & 1 == 1) or (n <= cut_off): + var q = floor_divide_school(a, b) + var r = a - q * b + return (q^, r^) + + if b.words[-1] < 500_000_000: + raise Error("b[-1] must be at least 500_000_000") + + # b_modified = b + # decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + # b_modified, n + # ) + # if a.compare(b_modified) >= 0: + # raise Error("a must be less than b * 10^18") + + else: + a0 = BigUInt(a.words[0 : n // 2]) + a1 = BigUInt(a.words[n // 2 : n]) + a2 = BigUInt(a.words[n : n + n // 2]) + a3 = BigUInt(a.words[n + n // 2 : n + n]) + + b0 = BigUInt(b.words[0 : n // 2]) + b1 = BigUInt(b.words[n // 2 : n]) + + q1, r = floor_divide_three_by_two(a3, a2, a1, b1, b0, n // 2, cut_off) + r0 = BigUInt(r.words[0 : n // 2]) + r1 = BigUInt(r.words[n // 2 : n]) + q0, s = floor_divide_three_by_two(r1, r0, a0, b1, b0, n // 2, cut_off) + + q = q1 + decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( + q, n // 2 + ) + q += q0 + + return (q, s) + + +fn floor_divide_three_by_two_uint32( + a2: UInt32, a1: UInt32, a0: UInt32, b1: UInt32, b0: UInt32 +) raises -> Tuple[UInt32, UInt32, UInt32]: + """Divides a 3-word number by a 2-word number. + b1 must be at least 500_000_000. + + Args: + a2: The most significant word of the dividend. + a1: The middle word of the dividend. + a0: The least significant word of the dividend. + b1: The most significant word of the divisor. + b0: The least significant word of the divisor. + + Returns: + A tuple containing + (1) the quotient (as UInt32) + (2) the most significant word of the remainder (as UInt32) + (3) the least significant word of the remainder (as UInt32). + + Notes: + + a = a2 * BASE^2 + a1 * BASE + a0. + b = b1 * BASE + b0. + """ + if b1 < 500_000_000: + raise Error("b1 must be at least 500_000_000") + + var a2a1 = UInt64(a2) * 1_000_000_000 + UInt64(a1) + + var q: UInt64 = UInt64(a2a1) // UInt64(b1) + var c = a2a1 - q * UInt64(b1) + var d: UInt64 = q * UInt64(b0) + var r = UInt64(c * 1_000_000_000) + UInt64(a0) + + if r < UInt64(d): + var b = UInt64(b1) * 1_000_000_000 + UInt64(b0) + q -= 1 + r += b + if r < UInt64(d): + q -= 1 + r += b + + r -= d + var r1: UInt32 = UInt32(r // 1_000_000_000) + var r0: UInt32 = UInt32(r % 1_000_000_000) + + return (UInt32(q), r1, r0) + + +fn floor_divide_four_by_two_uint32( + a3: UInt32, + a2: UInt32, + a1: UInt32, + a0: UInt32, + b1: UInt32, + b0: UInt32, +) raises -> Tuple[UInt32, UInt32, UInt32, UInt32]: + """Divides a 4-word number by a 2-word number. + + Args: + a3: The most significant word of the dividend. + a2: The second most significant word of the dividend. + a1: The second least significant word of the dividend. + a0: The least significant word of the dividend. + b1: The most significant word of the divisor. + b0: The least significant word of the divisor. + + Returns: + A tuple containing + (1) the most significant word of the quotient (as UInt32) + (2) the least significant word of the quotient (as UInt32) + (3) the most significant word of the remainder (as UInt32) + (4) the least significant word of the remainder (as UInt32). + """ + + if b1 < 500_000_000: + raise Error("b1 must be at least 500_000_000") + if a3 > b1: + raise Error("a must be less than b * 10^18") + elif a3 == b1: + if a2 > b0: + raise Error("a must be less than b * 10^18") + elif a2 == b0: + if a1 > 0: + raise Error("a must be less than b * 10^18") + elif a1 == 0: + if a0 >= 0: + raise Error("a must be less than b * 10^18") + + var q1, r1, r0 = floor_divide_three_by_two_uint32(a3, a2, a1, b1, b0) + var q0, s1, s0 = floor_divide_three_by_two_uint32(r1, r0, a0, b1, b0) + return (q1, q0, s1, s0) + + @always_inline fn truncate_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: """Returns the quotient of two BigUInt numbers, truncating toward zero. diff --git a/src/decimojo/biguint/division.mojo b/src/decimojo/biguint/division.mojo deleted file mode 100644 index 31f9f5e..0000000 --- a/src/decimojo/biguint/division.mojo +++ /dev/null @@ -1,311 +0,0 @@ -from decimojo.prelude import * -import time -import math - - -fn divide_three_words_by_two( - a2: UInt32, a1: UInt32, a0: UInt32, b1: UInt32, b0: UInt32 -) raises -> Tuple[UInt32, UInt32, UInt32]: - """Divides a 3-word number by a 2-word number. - b1 must be at least 500_000_000. - - Args: - a2: The most significant word of the dividend. - a1: The middle word of the dividend. - a0: The least significant word of the dividend. - b1: The most significant word of the divisor. - b0: The least significant word of the divisor. - - Returns: - A tuple containing - (1) the quotient (as UInt32) - (2) the most significant word of the remainder (as UInt32) - (3) the least significant word of the remainder (as UInt32). - - Notes: - a = a2 * BASE^2 + a1 * BASE + a0. - b = b1 * BASE + b0. - """ - if b1 < 500_000_000: - raise Error("b1 must be at least 500_000_000") - - var a2a1 = UInt64(a2) * 1_000_000_000 + UInt64(a1) - - var q: UInt64 = UInt64(a2a1) // UInt64(b1) - var c = a2a1 - q * UInt64(b1) - var d: UInt64 = q * UInt64(b0) - var r = UInt64(c * 1_000_000_000) + UInt64(a0) - - if r < UInt64(d): - var b = UInt64(b1) * 1_000_000_000 + UInt64(b0) - q -= 1 - r += b - if r < UInt64(d): - q -= 1 - r += b - - r -= d - var r1: UInt32 = UInt32(r // 1_000_000_000) - var r0: UInt32 = UInt32(r % 1_000_000_000) - - return (UInt32(q), r1, r0) - - -fn divide_four_words_by_two( - a3: UInt32, - a2: UInt32, - a1: UInt32, - a0: UInt32, - b1: UInt32, - b0: UInt32, -) raises -> Tuple[UInt32, UInt32, UInt32, UInt32]: - """Divides a 4-word number by a 2-word number. - - Args: - a3: The most significant word of the dividend. - a2: The second most significant word of the dividend. - a1: The second least significant word of the dividend. - a0: The least significant word of the dividend. - b1: The most significant word of the divisor. - b0: The least significant word of the divisor. - - Returns: - A tuple containing - (1) the most significant word of the quotient (as UInt32) - (2) the least significant word of the quotient (as UInt32) - (3) the most significant word of the remainder (as UInt32) - (4) the least significant word of the remainder (as UInt32). - """ - - if b1 < 500_000_000: - raise Error("b1 must be at least 500_000_000") - if a3 > b1: - raise Error("a must be less than b * 10^18") - elif a3 == b1: - if a2 > b0: - raise Error("a must be less than b * 10^18") - elif a2 == b0: - if a1 > 0: - raise Error("a must be less than b * 10^18") - elif a1 == 0: - if a0 >= 0: - raise Error("a must be less than b * 10^18") - - var q1, r1, r0 = divide_three_words_by_two(a3, a2, a1, b1, b0) - var q0, s1, s0 = divide_three_words_by_two(r1, r0, a0, b1, b0) - return (q1, q0, s1, s0) - - -fn divide_three_by_two( - a2: BigUInt, - a1: BigUInt, - a0: BigUInt, - b1: BigUInt, - b0: BigUInt, - n: Int, - cut_off: Int, -) raises -> Tuple[BigUInt, BigUInt]: - var a2a1: BigUInt - if a2.is_zero(): - a2a1 = a1 - else: - a2a1 = a2 - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( - a2a1, n - ) - a2a1 += a1 - var q, c = divide_two_by_one(a2a1, b1, n, cut_off) - var d = q * b0 - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(c, n) - var r = c + a0 - - if r < d: - var b = b1 - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion(b, n) - b += b0 - q -= BigUInt.ONE - r += b - if r < d: - q -= BigUInt.ONE - r += b - - r -= d - return (q, r) - - -fn divide_two_by_one( - a: BigUInt, b: BigUInt, n: Int, cut_off: Int -) raises -> Tuple[BigUInt, BigUInt]: - """Divides a BigUInt by another BigUInt using a recursive approach. - The divisor has n words and the dividend has 2n words. - - Args: - a: The dividend as a BigUInt. - b: The divisor as a BigUInt. - n: The number of words in the divisor. - cut_off: The minimum number of words for the recursive division. - """ - if (n & 1) != 0 or n <= cut_off: - return (a // b, a % b) - - if b.words[-1] < 500_000_000: - raise Error("b[-1] must be at least 500_000_000") - - # b_modified = b - # decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( - # b_modified, n - # ) - # if a.compare(b_modified) >= 0: - # raise Error("a must be less than b * 10^18") - - else: - a0 = BigUInt(a.words[0 : n // 2]) - a1 = BigUInt(a.words[n // 2 : n]) - a2 = BigUInt(a.words[n : n + n // 2]) - a3 = BigUInt(a.words[n + n // 2 : n + n]) - - b0 = BigUInt(b.words[0 : n // 2]) - b1 = BigUInt(b.words[n // 2 : n]) - - q1, r = divide_three_by_two(a3, a2, a1, b1, b0, n // 2, cut_off) - r0 = BigUInt(r.words[0 : n // 2]) - r1 = BigUInt(r.words[n // 2 : n]) - q0, s = divide_three_by_two(r1, r0, a0, b1, b0, n // 2, cut_off) - - q = q1 - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( - q, n // 2 - ) - q += q0 - - return (q, s) - - -fn divide_burnikel_ziegler( - a: BigUInt, b: BigUInt, cut_off: Int = 16 -) raises -> BigUInt: - """Divides BigUInt using the Burnikel-Ziegler algorithm.""" - - # Yuhao Zhu: - # This implementation is based on the research report - # "Fast Recursive Division" by Christoph Burnikel and Joachim Ziegler. - # MPI-I-98-1-022, October 1998. - - var BLOCK_SIZE_OF_WORDS = cut_off - - if (len(a.words) <= cut_off) or (len(b.words) <= cut_off): - # If the number of words in a or b is less than or equal to 64, - # we can use the schoolbook division algorithm. - return a // b - - # STEP 1: - # Normalize the divisor b to n words so that - # (1) it is of the form j*2^k and - # (2) the most significant word is at least 500_000_000. - - var normalized_b = b - var normalized_a = a - var normalization_factor: Int - - if normalized_b.words[-1] == 0: - normalized_b.remove_leading_empty_words() - if normalized_b.words[-1] < 500_000_000: - normalization_factor = ( - decimojo.biguint.arithmetics.calculate_normalization_factor( - normalized_b.words[-1] - ) - ) - else: - normalization_factor = 0 - - # The targeted number of blocks should be the smallest 2^k such that - # 2^k >= number of words in normalized_b ceil divided by BLOCK_SIZE_OF_WORDS. - # k is the depth of the recursion. - # n is the final number of words in the normalized b. - var n_blocks_divisor = math.ceildiv( - len(normalized_b.words), BLOCK_SIZE_OF_WORDS - ) - var depth = Int(math.ceil(math.log2(Float64(n_blocks_divisor)))) - n_blocks_divisor = 2**depth - var n = n_blocks_divisor * BLOCK_SIZE_OF_WORDS - - var n_digits_to_scale_up = ( - n - len(normalized_b.words) - ) * 9 + normalization_factor - - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten( - normalized_b, n_digits_to_scale_up - ) - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_ten( - normalized_a, n_digits_to_scale_up - ) - - # The normalized_b is now 9 digits, but may still be smaller than 500_000_000. - var gap_ratio = BUInt.BASE // normalized_b.words[-1] - if gap_ratio > 2: - decimojo.biguint.arithmetics.multiply_inplace_by_uint32( - normalized_b, gap_ratio - ) - decimojo.biguint.arithmetics.multiply_inplace_by_uint32( - normalized_a, gap_ratio - ) - - # STEP 2: Split the normalized a into blocks of size n. - # t is the number of blocks in the dividend. - var t = math.ceildiv(len(normalized_a.words), n) - if len(a.words) == t * n: - # If the number of words in a is already a multiple of n - # We check if the most significant word is >= 500_000_000. - # If it is, we need to add one more block to the dividend. - # This ensures that the most significant word of the dividend - # is smaller than 500_000_000. - if normalized_a.words[-1] >= 500_000_000: - t += 1 - - var z = BigUInt(normalized_a.words[(t - 2) * n : t * n]) - var q = BigUInt() - for i in range(t - 2, -1, -1): - var q_i, r = divide_two_by_one(z, normalized_b, n, cut_off) - # print(z, "//", normalized_b, "=", q_i, "mod", r) - if i == t - 2: - q = q_i - else: - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( - q, n - ) - q += q_i - if i > 0: - decimojo.biguint.arithmetics.multiply_inplace_by_power_of_billion( - r, n - ) - z = r + BigUInt(normalized_a.words[(i - 1) * n : i * n]) - - return q - - -# fn main() raises: -# n = 2**14 -# var a = BUInt(String("987_654_321" * 3 * n)) -# var b = BUInt(String("3_141_592" * n)) -# # var a = BigUInt( -# # "123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789000000" -# # ) -# # var b = BigUInt("678678678678000000") - -# var result1 = List[BigUInt]() -# t0 = time.perf_counter_ns() -# for _ in range(1): -# q = divide_burnikel_ziegler(a, b) -# result1.append(q) -# print("time taken: ", time.perf_counter_ns() - t0, " ns") - -# var result2 = List[BigUInt]() -# t0 = time.perf_counter_ns() -# for _ in range(1): -# p = a // b -# result2.append(p) -# print("time taken: ", time.perf_counter_ns() - t0, " ns") - -# # print("result1: ", result1[0]) -# # print("result2: ", result2[0]) -# print("result1 == result2: ", result1[0] == result2[0]) From 78aa2e6fe0cf76fca5e3e57d050b160cdea7b846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Wed, 9 Jul 2025 14:35:30 +0200 Subject: [PATCH 7/7] Remove bench file for bz --- .../bench_biguint_truncate_divide.mojo | 40 +- .../bench_biguint_truncate_divide_bz.mojo | 557 ------------------ src/decimojo/biguint/arithmetics.mojo | 2 +- 3 files changed, 36 insertions(+), 563 deletions(-) delete mode 100644 benches/biguint/bench_biguint_truncate_divide_bz.mojo diff --git a/benches/biguint/bench_biguint_truncate_divide.mojo b/benches/biguint/bench_biguint_truncate_divide.mojo index 39f7724..a097038 100644 --- a/benches/biguint/bench_biguint_truncate_divide.mojo +++ b/benches/biguint/bench_biguint_truncate_divide.mojo @@ -23,7 +23,7 @@ fn open_log_file() raises -> PythonObject: var python = Python.import_module("builtins") var datetime = Python.import_module("datetime") var pysys = Python.import_module("sys") - pysys.set_int_max_str_digits(1000000) + pysys.set_int_max_str_digits(10000000) # Create logs directory if it doesn't exist var log_dir = "./logs" @@ -88,6 +88,15 @@ fn run_benchmark_truncate_divide( var mojo_result = mojo_dividend // mojo_divisor var py_result = py_dividend // py_divisor + if String(mojo_result) != String(py_result): + log_print( + "Error: Mojo and Python results do not match!", + log_file, + ) + log_print("Mojo result: " + String(mojo_result), log_file) + log_print("Python result: " + String(py_result), log_file) + return # Skip this benchmark case if results don't match + # Display results for verification log_print("Mojo result: " + String(mojo_result), log_file) log_print("Python result: " + String(py_result), log_file) @@ -155,6 +164,7 @@ fn main() raises: log_print("Could not retrieve system information", log_file) var iterations = 100 + var iterations_large_numbers = 10 # Define benchmark cases (all positive numbers for BigUInt) log_print( @@ -492,20 +502,40 @@ fn main() raises: # Case 31: Division of large numbers run_benchmark_truncate_divide( - "Division of large numbers (1000 words vs 100 digits)", + "Division of large numbers (5000 words vs words digits)", "316227766_016824890_583648059_893174009_579947593" * 1000, "141421356_237309504_880168872_420969807_856967187" * 100, - 3, + iterations_large_numbers, log_file, speedup_factors, ) # Case 32: Division of large numbers run_benchmark_truncate_divide( - "Division of large numbers (10000 words vs 1234 digits)", + "Division of large numbers (50000 words vs 6170 words)", "316227766_016824890_583648059_893174009_579947593" * 10000, "141421356_237309504_880168872_420969807_856967187" * 1234, - 3, + iterations_large_numbers, + log_file, + speedup_factors, + ) + + # Case 33: Division of large numbers + run_benchmark_truncate_divide( + "Division of large numbers (2**16 words vs 2**12 digits)", + "123456789" * 2**16, + "987654321" * 2**12, + iterations_large_numbers, + log_file, + speedup_factors, + ) + + # Case 34: Division of large numbers + run_benchmark_truncate_divide( + "Division of large numbers (2**18 words vs 2**14 digits)", + "123456789" * 2**18, + "987654321" * 2**14, + iterations_large_numbers, log_file, speedup_factors, ) diff --git a/benches/biguint/bench_biguint_truncate_divide_bz.mojo b/benches/biguint/bench_biguint_truncate_divide_bz.mojo deleted file mode 100644 index 4c590cc..0000000 --- a/benches/biguint/bench_biguint_truncate_divide_bz.mojo +++ /dev/null @@ -1,557 +0,0 @@ -""" -Comprehensive benchmarks for BigUInt truncate_divide operation. -Compares performance against Python's built-in int division with 20 diverse test cases. -BigUInt is an unsigned integer type, so all test cases use positive numbers only. -""" - -from decimojo.biguint.biguint import BigUInt -import decimojo.biguint.arithmetics -from python import Python, PythonObject -from time import perf_counter_ns -import time -import os -from collections import List - - -fn open_log_file() raises -> PythonObject: - """ - Creates and opens a log file with a timestamp in the filename. - - Returns: - A file object opened for writing. - """ - var python = Python.import_module("builtins") - var datetime = Python.import_module("datetime") - var pysys = Python.import_module("sys") - pysys.set_int_max_str_digits(1000000) - - # Create logs directory if it doesn't exist - var log_dir = "./logs" - if not os.path.exists(log_dir): - os.makedirs(log_dir) - - # Generate a timestamp for the filename - var timestamp = String(datetime.datetime.now().isoformat()) - var log_filename = ( - log_dir + "/benchmark_biguint_truncate_divide_" + timestamp + ".log" - ) - - print("Saving benchmark results to:", log_filename) - return python.open(log_filename, "w") - - -fn log_print(msg: String, log_file: PythonObject) raises: - """ - Prints a message to both the console and the log file. - - Args: - msg: The message to print. - log_file: The file object to write to. - """ - print(msg) - log_file.write(msg + "\n") - log_file.flush() # Ensure the message is written immediately - - -fn run_benchmark_truncate_divide( - name: String, - dividend: String, - divisor: String, - iterations: Int, - log_file: PythonObject, - mut speedup_factors: List[Float64], -) raises: - """ - Run a benchmark comparing Mojo BigUInt truncate_divide with Python int division. - - Args: - name: Name of the benchmark case. - dividend: String representation of the dividend. - divisor: String representation of the divisor. - iterations: Number of iterations to run. - log_file: File object for logging results. - speedup_factors: Mojo List to store speedup factors for averaging. - """ - log_print("\nBenchmark: " + name, log_file) - log_print("Dividend: " + dividend, log_file) - log_print("Divisor: " + divisor, log_file) - - # Set up Mojo and Python values - var mojo_dividend = BigUInt(dividend) - var mojo_divisor = BigUInt(divisor) - var py = Python.import_module("builtins") - var py_dividend = py.int(dividend) - var py_divisor = py.int(divisor) - - # Execute the operations once to verify correctness - try: - var mojo_result = ( - decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler( - mojo_dividend, mojo_divisor - ) - ) - var py_result = py_dividend // py_divisor - - # Display results for verification - log_print("Mojo result: " + String(mojo_result), log_file) - log_print("Python result: " + String(py_result), log_file) - - # Benchmark Mojo implementation - var t0 = perf_counter_ns() - for _ in range(iterations): - _ = decimojo.biguint.arithmetics.floor_divide_burnikel_ziegler( - mojo_dividend, mojo_divisor - ) - var mojo_time = (perf_counter_ns() - t0) / iterations - if mojo_time == 0: - mojo_time = 1 # Prevent division by zero - - # Benchmark Python implementation - t0 = perf_counter_ns() - for _ in range(iterations): - _ = py_dividend // py_divisor - var python_time = (perf_counter_ns() - t0) / iterations - - # Calculate speedup factor - var speedup = python_time / mojo_time - speedup_factors.append(Float64(speedup)) - - # Print results with speedup comparison - log_print( - "Mojo division: " + String(mojo_time) + " ns per iteration", - log_file, - ) - log_print( - "Python division: " + String(python_time) + " ns per iteration", - log_file, - ) - log_print("Speedup factor: " + String(speedup), log_file) - except e: - log_print("Error occurred during benchmark: " + String(e), log_file) - log_print("Skipping this benchmark case", log_file) - - -fn main() raises: - # Open log file - var log_file = open_log_file() - var datetime = Python.import_module("datetime") - - # Create a Mojo List to store speedup factors for averaging later - var speedup_factors = List[Float64]() - - # Display benchmark header with system information - log_print("=== DeciMojo BigUInt Truncate Division Benchmark ===", log_file) - log_print("Time: " + String(datetime.datetime.now().isoformat()), log_file) - - # Try to get system info - try: - var platform = Python.import_module("platform") - log_print( - "System: " - + String(platform.system()) - + " " - + String(platform.release()), - log_file, - ) - log_print("Processor: " + String(platform.processor()), log_file) - log_print( - "Python version: " + String(platform.python_version()), log_file - ) - except: - log_print("Could not retrieve system information", log_file) - - var iterations = 100 - - # Define benchmark cases (all positive numbers for BigUInt) - log_print( - "\nRunning truncate division benchmarks with " - + String(iterations) - + " iterations each", - log_file, - ) - - # Case 1: Simple division with no remainder - run_benchmark_truncate_divide( - "Simple division, no remainder", - "100", - "10", - iterations, - log_file, - speedup_factors, - ) - - # Case 2: Division with remainder - run_benchmark_truncate_divide( - "Division with remainder", - "10", - "3", - iterations, - log_file, - speedup_factors, - ) - - # Case 3: Division of small numbers - run_benchmark_truncate_divide( - "Division of small numbers", - "7", - "2", - iterations, - log_file, - speedup_factors, - ) - - # Case 4: Division resulting in zero - run_benchmark_truncate_divide( - "Division resulting in zero", - "5", - "10", - iterations, - log_file, - speedup_factors, - ) - - # Case 5: Division by one - run_benchmark_truncate_divide( - "Division by one", - "12345", - "1", - iterations, - log_file, - speedup_factors, - ) - - # Case 6: Zero dividend - run_benchmark_truncate_divide( - "Zero dividend", - "0", - "5", - iterations, - log_file, - speedup_factors, - ) - - # Case 7: Large number division - run_benchmark_truncate_divide( - "Large number division", - "9999999999", # 10 billion - "333", - iterations, - log_file, - speedup_factors, - ) - - # Case 8: Very large number division - run_benchmark_truncate_divide( - "Very large number division", - "1" + "0" * 50, # 10^50 - "7", - iterations, - log_file, - speedup_factors, - ) - - # Case 9: Division of large numbers with exact result - run_benchmark_truncate_divide( - "Division of large numbers with exact result", - "1" + "0" * 30, # 10^30 - "1" + "0" * 10, # 10^10 - iterations, - log_file, - speedup_factors, - ) - - # Case 10: Division by large number - run_benchmark_truncate_divide( - "Division by large number", - "12345", - "9" * 20, # 20 nines - iterations, - log_file, - speedup_factors, - ) - - # Case 11: Fibonacci number division - run_benchmark_truncate_divide( - "Fibonacci number division", - "6765", # Fib(20) - "4181", # Fib(19) - iterations, - log_file, - speedup_factors, - ) - - # Case 12: Prime number division - run_benchmark_truncate_divide( - "Prime number division", - "2147483647", # Mersenne prime (2^31 - 1) - "997", # Prime - iterations, - log_file, - speedup_factors, - ) - - # Case 13: Division of numbers near UInt64 limit - run_benchmark_truncate_divide( - "Division near UInt64 limit", - "18446744073709551615", # UInt64.MAX - "2", - iterations, - log_file, - speedup_factors, - ) - - # Case 14: Division with divisor just below dividend - run_benchmark_truncate_divide( - "Division with divisor just below dividend", - "1000", - "999", - iterations, - log_file, - speedup_factors, - ) - - # Case 15: Division with exact powers of 10 - run_benchmark_truncate_divide( - "Division with exact powers of 10", - "1" + "0" * 20, # 10^20 - "1" + "0" * 5, # 10^5 - iterations, - log_file, - speedup_factors, - ) - - # Case 16: Division of repeated digits - run_benchmark_truncate_divide( - "Division of repeated digits", - "9" * 30, # 30 nines - "9" * 15, # 15 nines - iterations, - log_file, - speedup_factors, - ) - - # Case 17: Division with extremely large dividend and small divisor - run_benchmark_truncate_divide( - "Extreme large dividend and small divisor", - "9" * 100, # 100 nines - "3", - iterations, - log_file, - speedup_factors, - ) - - # Case 18: Division with powers of 2 - run_benchmark_truncate_divide( - "Division with powers of 2", - "1" + "0" * 50, # 10^50 - "256", # 2^8 - iterations, - log_file, - speedup_factors, - ) - - # Case 19: Division yielding a single-digit result - run_benchmark_truncate_divide( - "Division yielding a single-digit result", - "123456789", - "123456780", - iterations, - log_file, - speedup_factors, - ) - - # Case 20: Division with random large numbers - run_benchmark_truncate_divide( - "Division with random large numbers", - "8675309123456789098765432112345678909876543211234567", - "12345678901234567890", - iterations, - log_file, - speedup_factors, - ) - - # Case 21: Division with around 50 digits and with divisor just below dividend - run_benchmark_truncate_divide( - "Division with around 50 digits divisor just below dividend", - "12345" * 10, - "6789" * 12, - iterations, - log_file, - speedup_factors, - ) - - # Case 22: Division of very large repeated digits - run_benchmark_truncate_divide( - "Division of repeated digits", - "990132857498314692374162398217" * 10, # 30 * 10 = 300 digits - "85172390413429847239" * 10, # 20 * 10 = 200 digits - iterations, - log_file, - speedup_factors, - ) - - # Case 23: Division of large numbers - run_benchmark_truncate_divide( - "Division of large numbers (270 digits vs 135 digits)", - "123456789" * 30, - "987654321" * 15, - iterations, - log_file, - speedup_factors, - ) - - # Case 24: Division of very large numbers - run_benchmark_truncate_divide( - "Division of very large numbers (2250 digits vs 900 digits)", - "123456789" * 250, - "987654321" * 100, - iterations, - log_file, - speedup_factors, - ) - - # Case 25: Division of very, very large numbers - # x1 is more than 400 words long (>= 10^3600) - # x2 is more than 200 words long (>= 10^1800) - run_benchmark_truncate_divide( - "Division of very, very large numbers (3600 digits vs 1800 digits)", - "123456789" * 400, - "987654321" * 200, - iterations, - log_file, - speedup_factors, - ) - - # Case 26: Division of large numbers - run_benchmark_truncate_divide( - "Division of very large numbers (256 digits vs 128 digits)", - "1234567890193287491287508917213097405916874098123705160923812345678901932874912875089172130974059168740981237051609238749875089170984701759832708497029875019837409871085709813749870897510749875089170984701759832708497029875019837409871085709813749870897510", - "68740981237051609238123456789019328749128750891721309740591687409812370516092387498750879548759387959978279541709847017598327084", - iterations, - log_file, - speedup_factors, - ) - - # Case 27: Division of numbers with around 200 and 100 digits - run_benchmark_truncate_divide( - "Division of large numbers (200 digits vs 100 digits)", - ( - "314159265358979323846264338327950288419716939937510" - "582097494459230781640628620899862803482534211706798214808651" - "328230664709384460955058223172535940812848111745028410270193" - "852110555964462294895493038196" - ), - ( - "271828182845904523536028747135266249775724709369995" - "95749669676277240766303535475945713821785251664274" - ), - iterations, - log_file, - speedup_factors, - ) - - # Case 28: Division of numbers with around 200 and 100 digits - run_benchmark_truncate_divide( - "Division of large numbers (200 digits vs 100 digits)", - ( - "314159265358979323846264338327950288419716939937510" - "582097494459230781640628620899862803482534211706798214808651" - "328230664709384460955058223172535940812848111745028410270193" - "852110555964462294895493038196" - ), - ( - "141421356237309504880168872420969807856967187537694" - "80731766797379907324784621070388503875343276400719" - ), - iterations, - log_file, - speedup_factors, - ) - - # Case 29: Division of numbers with around 150 and 50 digits - run_benchmark_truncate_divide( - "Division of large numbers (150 digits vs 50 digits)", - ( - "314159265358979323846264338327950288419716939937510" - "582097494459230781640628620899862803482534211706798214808651" - "3282306647093844609550582231725359408128" - ), - "141421356237309504880168872420969807856967187537694", - iterations, - log_file, - speedup_factors, - ) - - # Case 30: Division of numbers with around 150 and 50 digits - run_benchmark_truncate_divide( - "Division of large numbers (150 digits vs 50 digits)", - ( - "316227766016824890583648059893174009579947593530458" - "382628078540989121552735792899961040720792717368862335475063" - "5167610057579407944886251958020310186466" - ), - "141421356237309504880168872420969807856967187537694", - iterations, - log_file, - speedup_factors, - ) - - # Case 31: Division of large numbers - run_benchmark_truncate_divide( - "Division of large numbers (1000 words vs 100 digits)", - "316227766_016824890_583648059_893174009_579947593" * 1000, - "141421356_237309504_880168872_420969807_856967187" * 100, - 3, - log_file, - speedup_factors, - ) - - # Case 32: Division of large numbers - run_benchmark_truncate_divide( - "Division of large numbers (10000 words vs 1234 digits)", - "316227766_016824890_583648059_893174009_579947593" * 10000, - "141421356_237309504_880168872_420969807_856967187" * 1234, - 3, - log_file, - speedup_factors, - ) - - # Calculate average speedup factor (ignoring any cases that might have failed) - if len(speedup_factors) > 0: - var sum_speedup: Float64 = 0.0 - for i in range(len(speedup_factors)): - sum_speedup += speedup_factors[i] - var average_speedup = sum_speedup / Float64(len(speedup_factors)) - - # Display summary - log_print( - "\n=== BigUInt Truncate Division Benchmark Summary ===", log_file - ) - log_print( - "Benchmarked: " - + String(len(speedup_factors)) - + " different division cases", - log_file, - ) - log_print( - "Each case ran: " + String(iterations) + " iterations", log_file - ) - log_print( - "Average speedup: " + String(average_speedup) + "×", log_file - ) - - # List all speedup factors - log_print("\nIndividual speedup factors:", log_file) - for i in range(len(speedup_factors)): - log_print( - String("Case {}: {}×").format( - i + 1, round(speedup_factors[i], 2) - ), - log_file, - ) - else: - log_print("\nNo valid benchmark cases were completed", log_file) - - # Close the log file - log_file.close() - print("Benchmark completed. Log file closed.") diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo index 4bae6c9..84d38f5 100644 --- a/src/decimojo/biguint/arithmetics.mojo +++ b/src/decimojo/biguint/arithmetics.mojo @@ -1328,7 +1328,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: # CASE: division of very, very large numbers # Use the Burnikel-Ziegler division algorithm - print("Using Burnikel-Ziegler division algorithm for large numbers") + # print("Using Burnikel-Ziegler division algorithm for large numbers") return floor_divide_burnikel_ziegler( x1, x2, cut_off=CUTOFF_BURNIKEL_ZIEGLER )