diff --git a/benches/biguint/bench.mojo b/benches/biguint/bench.mojo index 5c0f08b..56cbb3b 100644 --- a/benches/biguint/bench.mojo +++ b/benches/biguint/bench.mojo @@ -1,4 +1,5 @@ from bench_biguint_add import main as bench_add +from bench_biguint_subtraction import main as bench_subtraction from bench_biguint_multiply import main as bench_multiply from bench_biguint_truncate_divide import main as bench_truncate_divide from bench_biguint_from_string import main as bench_from_string @@ -11,6 +12,7 @@ fn main() raises: This is the BigUInt Benchmarks ========================================= add: Add +sub: Subtract mul: Multiply div: Truncate divide (//) fromstr: From string @@ -22,6 +24,8 @@ q: Exit var command = input("Type name of bench you want to run: ") if command == "add": bench_add() + elif command == "sub": + bench_subtraction() elif command == "mul": bench_multiply() elif command == "div": diff --git a/benches/biguint/bench_biguint_subtraction.mojo b/benches/biguint/bench_biguint_subtraction.mojo new file mode 100644 index 0000000..7125cdd --- /dev/null +++ b/benches/biguint/bench_biguint_subtraction.mojo @@ -0,0 +1,296 @@ +""" +Comprehensive benchmarks for BigUInt subtraction. +Compares performance against Python's built-in int with diverse test cases. +""" + +from decimojo.biguint.biguint import BigUInt +import decimojo.biguint.arithmetics +from python import Python, PythonObject +from time import perf_counter_ns +import time +import os +from collections import List + + +fn open_log_file() raises -> PythonObject: + """ + Creates and opens a log file with a timestamp in the filename. + + Returns: + A file object opened for writing. + """ + var python = Python.import_module("builtins") + var datetime = Python.import_module("datetime") + var pysys = Python.import_module("sys") + pysys.set_int_max_str_digits(1000000) + + # Create logs directory if it doesn't exist + var log_dir = "./logs" + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Generate a timestamp for the filename + var timestamp = String(datetime.datetime.now().isoformat()) + var log_filename = ( + log_dir + "/benchmark_biguint_subtraction_" + timestamp + ".log" + ) + + print("Saving benchmark results to:", log_filename) + return python.open(log_filename, "w") + + +fn log_print(msg: String, log_file: PythonObject) raises: + """ + Prints a message to both the console and the log file. + + Args: + msg: The message to print. + log_file: The file object to write to. + """ + print(msg) + log_file.write(msg + "\n") + log_file.flush() # Ensure the message is written immediately + + +fn run_benchmark_subtraction( + name: String, + value1: String, + value2: String, + iterations: Int, + log_file: PythonObject, + mut speedup_factors: List[Float64], +) raises: + """ + Run a benchmark comparing Mojo BigUInt subtraction with Python int subtraction. + + Args: + name: Name of the benchmark case. + value1: String representation of first operand. + value2: String representation of second operand. + iterations: Number of iterations to run. + log_file: File object for logging results. + speedup_factors: Mojo List to store speedup factors for averaging. + """ + log_print("\nBenchmark: " + name, log_file) + log_print("First operand: " + value1, log_file) + log_print("Second operand: " + value2, log_file) + + # Set up Mojo and Python values + var mojo_value1 = BigUInt(value1) + var mojo_value2 = BigUInt(value2) + var py = Python.import_module("builtins") + var py_value1 = py.int(value1) + var py_value2 = py.int(value2) + + # Execute the operations once to verify correctness + var mojo_result = mojo_value1 - mojo_value2 + var py_result = py_value1 - py_value2 + + # Display results for verification + log_print("Mojo result: " + String(mojo_result), log_file) + log_print("Python result: " + String(py_result), log_file) + + # Benchmark Mojo implementation + var t0 = perf_counter_ns() + for _ in range(iterations): + _ = mojo_value1 - mojo_value2 + var mojo_time = (perf_counter_ns() - t0) / iterations + if mojo_time == 0: + mojo_time = 1 # Prevent division by zero + + # Benchmark Python implementation + t0 = perf_counter_ns() + for _ in range(iterations): + _ = py_value1 - py_value2 + var python_time = (perf_counter_ns() - t0) / iterations + + # Calculate speedup factor + var speedup = python_time / mojo_time + speedup_factors.append(Float64(speedup)) + + # Print results with speedup comparison + log_print( + "Mojo subtraction: " + String(mojo_time) + " ns per iteration", + log_file, + ) + log_print( + "Python subtraction: " + String(python_time) + " ns per iteration", + log_file, + ) + log_print("Speedup factor: " + String(speedup), log_file) + + +fn main() raises: + # Open log file + var log_file = open_log_file() + var datetime = Python.import_module("datetime") + + # Create a Mojo List to store speedup factors for averaging later + var speedup_factors = List[Float64]() + + # Display benchmark header with system information + log_print("=== DeciMojo BigUInt Subtraction Benchmark ===", log_file) + log_print("Time: " + String(datetime.datetime.now().isoformat()), log_file) + + # Try to get system info + try: + var platform = Python.import_module("platform") + log_print( + "System: " + + String(platform.system()) + + " " + + String(platform.release()), + log_file, + ) + log_print("Processor: " + String(platform.processor()), log_file) + log_print( + "Python version: " + String(platform.python_version()), log_file + ) + except: + log_print("Could not retrieve system information", log_file) + + var iterations = 1000 + + # Define benchmark cases + log_print( + "\nRunning subtraction benchmarks with " + + String(iterations) + + " iterations each", + log_file, + ) + + # Case 26: Subtraction with 2 words - 1 word + run_benchmark_subtraction( + "Subtraction with 2 words - 1 word", + "123456789" * 2, + "987654321", + iterations, + log_file, + speedup_factors, + ) + + # Case 27: Subtraction with 4 words - 2 words + run_benchmark_subtraction( + "Subtraction with 4 words - 2 words", + "123456789" * 4, + "987654321" * 2, + iterations, + log_file, + speedup_factors, + ) + + # Case 28: Subtraction with 8 words - 4 words + run_benchmark_subtraction( + "Subtraction with 8 words - 4 words", + "123456789" * 8, + "987654321" * 4, + iterations, + log_file, + speedup_factors, + ) + + # Case 29: Subtraction with 16 words - 8 words + run_benchmark_subtraction( + "Subtraction with 16 words - 8 words", + "123456789" * 16, + "987654321" * 8, + iterations, + log_file, + speedup_factors, + ) + + # Case 30: Subtraction with 32 words - 16 words + run_benchmark_subtraction( + "Subtraction with 32 words - 16 words", + "123456789" * 32, + "987654321" * 16, + iterations, + log_file, + speedup_factors, + ) + + # Case 31: Subtraction with 64 words - 32 words + run_benchmark_subtraction( + "Subtraction with 64 words - 32 words", + "123456789" * 64, + "987654321" * 32, + iterations, + log_file, + speedup_factors, + ) + + # Case 32: Subtraction with 256 words - 128 words + run_benchmark_subtraction( + "Subtraction with 256 words - 128 words", + "123456789" * 256, + "987654321" * 128, + iterations, + log_file, + speedup_factors, + ) + + # Case 33: Subtraction with 1024 words - 512 words + run_benchmark_subtraction( + "Subtraction with 1024 words - 512 words", + "123456789" * 1024, + "987654321" * 512, + iterations, + log_file, + speedup_factors, + ) + + # Case 34: Subtraction with 4096 words - 2048 words + run_benchmark_subtraction( + "Subtraction with 4096 words - 2048 words", + "123456789" * 4096, + "987654321" * 2048, + iterations, + log_file, + speedup_factors, + ) + + # Case 35: Subtraction with 16384 words - 8192 words + run_benchmark_subtraction( + "Subtraction with 16384 words - 8192 words", + "123456789" * 16384, + "987654321" * 8192, + iterations, + log_file, + speedup_factors, + ) + + # Case 36: Subtraction with 32768 words - 16384 words + run_benchmark_subtraction( + "Subtraction with 32768 words - 16384 words", + "123456789" * 32768, + "987654321" * 16384, + iterations, + log_file, + speedup_factors, + ) + + # Calculate average speedup factor + var sum_speedup: Float64 = 0.0 + for i in range(len(speedup_factors)): + sum_speedup += speedup_factors[i] + var average_speedup = sum_speedup / Float64(len(speedup_factors)) + + # Display summary + log_print("\n=== BigUInt Subtraction Benchmark Summary ===", log_file) + log_print("Benchmarked: different subtraction cases", log_file) + log_print( + "Each case ran: " + String(iterations) + " iterations", log_file + ) + log_print("Average speedup: " + String(average_speedup) + "×", log_file) + + # List all speedup factors + log_print("\nIndividual speedup factors:", log_file) + for i in range(len(speedup_factors)): + log_print( + String("Case {}: {}×").format(i + 1, round(speedup_factors[i], 2)), + log_file, + ) + + # Close the log file + log_file.close() + print("Benchmark completed. Log file closed.") diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo index 6f78929..be3dbdb 100644 --- a/src/decimojo/biguint/arithmetics.mojo +++ b/src/decimojo/biguint/arithmetics.mojo @@ -33,6 +33,8 @@ from decimojo.rounding_mode import RoundingMode # absolute(x: BigUInt) -> BigUInt # # add(x1: BigUInt, x2: BigUInt) -> BigUInt +# add_simd(x: BigUInt, y: BigUInt) -> BigUInt +# add_slices(x: BigUInt, y: BigUInt, start_x: Int, end_x: Int, start_y: Int, end_y: Int) -> BigUInt # add_inplace(x1: BigUInt, x2: BigUInt) # add_inplace_by_uint32(x: BigUInt, y: UInt32) -> None # @@ -44,6 +46,7 @@ from decimojo.rounding_mode import RoundingMode # multiply(x1: BigUInt, x2: BigUInt) -> BigUInt # multiply_slices(x: BigUInt, y: BigUInt, start_x: Int, end_x: Int, start_y: Int, end_y: Int) -> BigUInt # multiply_karatsuba(x: BigUInt, y: BigUInt, start_x: Int, end_x: Int, start_y: Int, end_y: Int, cutoff_number_of_words: Int) -> BigUInt +# multiply_inplace_by_uint32(x: BigUInt, y: UInt32) -> None # multiply_by_power_of_ten(x: BigUInt, n: Int) -> BigUInt # multiply_inplace_by_power_of_billion(mut x: BigUInt, n: Int) # @@ -62,6 +65,7 @@ from decimojo.rounding_mode import RoundingMode # ceil_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt # divmod(x1: BigUInt, x2: BigUInt) -> Tuple[BigUInt, BigUInt] # +# normalize_carries(x: BigUInt) -> None # power_of_10(n: Int) -> BigUInt # ===----------------------------------------------------------------------=== # @@ -360,6 +364,10 @@ fn add_inplace(mut x: BigUInt, y: BigUInt) -> None: Args: x: The first unsigned integer operand. y: The second unsigned integer operand. + + Notes: + + This function uses SIMD operations to add the words of the two BigUInt. """ # Short circuit cases @@ -417,6 +425,30 @@ fn add_inplace_by_uint32(mut x: BigUInt, y: UInt32) -> None: fn subtract(x: BigUInt, y: BigUInt) raises -> BigUInt: """Returns the difference of two unsigned integers. + Args: + x: The first unsigned integer (minuend). + y: The second unsigned integer (subtrahend). + + Raises: + Error: If y is greater than x, resulting in an underflow. + + Returns: + The result of subtracting y from x. + """ + # I will use the SIMD approach for subtraction + # This speeds up the subtraction by 1.25x for large numbers. + return subtract_simd(x, y) + + # Yuhao ZHU: + # Below is a school method for subtraction. + # You go from the least significant word to the most significant word. + # + # return subtract_school(x, y) + + +fn subtract_school(x: BigUInt, y: BigUInt) raises -> BigUInt: + """Returns the difference of two unsigned integers using the school method. + Args: x: The first unsigned integer (minuend). y: The second unsigned integer (subtrahend). @@ -478,6 +510,81 @@ fn subtract(x: BigUInt, y: BigUInt) raises -> BigUInt: return result^ +fn subtract_simd(x: BigUInt, y: BigUInt) raises -> BigUInt: + """Returns the difference of two unsigned integers using SIMD operations. + + Args: + x: The first unsigned integer (minuend). + y: The second unsigned integer (subtrahend). + + Raises: + Error: If y is greater than x, resulting in an underflow. + + Returns: + The result of subtracting y from x. + + Notes: + + I will make use of SIMD operations to subtract the words in parallel. + This will first subtract the words in parallel and then handle the borrows. + Note that there will be potential overflow in the subtraction, + but I will take advantage of that. + """ + # If the subtrahend is zero, return the minuend + # Yuhao ZHU: + # This step is important because y can be of zero words and is longer than x. + # This will makes the subtraction beyond the boundary of the result number, + # whose length is equal to the length of x. + # Note that our subtraction is via SIMD, so it is directly worked on unsafe + # pointers. + if y.is_zero(): + return x + + # We need to determine which number has the larger magnitude + var comparison_result = x.compare(y) + if comparison_result == 0: + # |x| = |y| + return BigUInt() # Return zero + if comparison_result < 0: + raise Error("biguint.arithmetics.subtract(): Underflow due to x < y") + + # Now it is safe to subtract the smaller number from the larger one + # The result will have no more words than the first number + var words = List[UInt32](unsafe_uninit_length=len(x.words)) + + # Yuhao ZHU: + # We will make use of SIMD operations to subtract the words in parallel. + # This will first subtract the words in parallel and then handle the borrows. + # Note that there will be potential overflow in the subtraction, + # but we will take advantage of that. + @parameter + fn vector_subtract[simd_width: Int](i: Int): + words.data.store[width=simd_width]( + i, + x.words.data.load[width=simd_width](i) + - y.words.data.load[width=simd_width](i), + ) + + vectorize[vector_subtract, BigUInt.VECTOR_WIDTH](len(y.words)) + + @parameter + fn vector_copy_rest[simd_width: Int](i: Int): + words.data.store[width=simd_width]( + len(y.words) + i, + x.words.data.load[width=simd_width](len(y.words) + i), + ) + + vectorize[vector_copy_rest, BigUInt.VECTOR_WIDTH]( + len(x.words) - len(y.words) + ) + + var result = BigUInt(words=words^) + normalize_borrows(result) + result.remove_leading_empty_words() + + return result^ + + fn subtract_inplace(mut x: BigUInt, y: BigUInt) raises -> None: """Subtracts y from x in place.""" @@ -497,35 +604,59 @@ fn subtract_inplace(mut x: BigUInt, y: BigUInt) raises -> None: # Now it is safe to subtract the smaller number from the larger one # Note that len(x.words) >= len(y.words) here - var borrow: UInt32 = 0 # Can either be 0 or 1 - - for i in range(len(y.words)): - if x.words[i] < borrow + y.words[i]: - x.words[i] += BigUInt.BASE - x.words[i] -= borrow + y.words[i] - borrow = 1 # Set borrow for the next word - else: - x.words[i] -= borrow + y.words[i] - borrow = 0 # No borrow for the next word + # Use SIMD operations to subtract the words in parallel. + @parameter + fn vector_subtract[simd_width: Int](i: Int): + x.words.data.store[width=simd_width]( + i, + x.words.data.load[width=simd_width](i) + - y.words.data.load[width=simd_width](i), + ) - # If x has more words than y, we need to handle the remaining words + vectorize[vector_subtract, BigUInt.VECTOR_WIDTH](len(y.words)) - if borrow == 0: - # If there is no borrow, we can stop early - x.remove_leading_empty_words() - return + # Normalize borrows after subtraction + normalize_borrows(x) + x.remove_leading_empty_words() - else: - # At this stage, borrow can only be 0 or 1 - for i in range(len(y.words), len(x.words)): - if x.words[i] >= borrow: - x.words[i] -= borrow - break # No more borrow, we can stop early - else: # x.words[i] == 0, borrow == 1 - x.words[i] = BigUInt.BASE - borrow + return - x.remove_leading_empty_words() - return + # Yuhao ZHU: + # Below is a school method for subtraction. + # You go from the least significant word to the most significant word. + # I keep it here for reference. + # In case the behavior of the underflow of the SIMD subtraction changes + # in the future, we can fall back to this method. + + # var borrow: UInt32 = 0 # Can either be 0 or 1 + + # for i in range(len(y.words)): + # if x.words[i] < borrow + y.words[i]: + # x.words[i] += BigUInt.BASE + # x.words[i] -= borrow + y.words[i] + # borrow = 1 # Set borrow for the next word + # else: + # x.words[i] -= borrow + y.words[i] + # borrow = 0 # No borrow for the next word + + # # If x has more words than y, we need to handle the remaining words + + # if borrow == 0: + # # If there is no borrow, we can stop early + # x.remove_leading_empty_words() + # return + + # else: + # # At this stage, borrow can only be 0 or 1 + # for i in range(len(y.words), len(x.words)): + # if x.words[i] >= borrow: + # x.words[i] -= borrow + # break # No more borrow, we can stop early + # else: # x.words[i] == 0, borrow == 1 + # x.words[i] = BigUInt.BASE - borrow + + # x.remove_leading_empty_words() + # return fn subtract_inplace_no_check(mut x: BigUInt, y: BigUInt) -> None: @@ -543,38 +674,23 @@ fn subtract_inplace_no_check(mut x: BigUInt, y: BigUInt) -> None: return # Underflow checks are skipped here, so we assume x >= y + # Note that len(x.words) >= len(y.words) under this assumption - # Now it is safe to subtract the smaller number from the larger one - # Note that len(x.words) >= len(y.words) here - var borrow: UInt32 = 0 # Can either be 0 or 1 - - for i in range(len(y.words)): - if x.words[i] < borrow + y.words[i]: - x.words[i] += BigUInt.BASE - x.words[i] -= borrow + y.words[i] - borrow = 1 # Set borrow for the next word - else: - x.words[i] -= borrow + y.words[i] - borrow = 0 # No borrow for the next word + @parameter + fn vector_subtract[simd_width: Int](i: Int): + x.words.data.store[width=simd_width]( + i, + x.words.data.load[width=simd_width](i) + - y.words.data.load[width=simd_width](i), + ) - # If x has more words than y, we need to handle the remaining words + vectorize[vector_subtract, BigUInt.VECTOR_WIDTH](len(y.words)) - if borrow == 0: - # If there is no borrow, we can stop early - x.remove_leading_empty_words() - return + # Normalize borrows after subtraction + normalize_borrows(x) + x.remove_leading_empty_words() - else: - # At this stage, borrow can only be 0 or 1 - for i in range(len(y.words), len(x.words)): - if x.words[i] >= borrow: - x.words[i] -= borrow - break # No more borrow, we can stop early - else: # x.words[i] == 0, borrow == 1 - x.words[i] = BigUInt.BASE - borrow - - x.remove_leading_empty_words() - return + return # ===----------------------------------------------------------------------=== # @@ -725,8 +841,8 @@ fn multiply_slices( # The lower 9 digits (base 10^9) go into the current word # The upper digits become the carry for the next position - words[i + j] = UInt32(product % BigUInt.BASE) - carry = product // BigUInt.BASE + words[i + j] = UInt32(product % UInt64(BigUInt.BASE)) + carry = product // UInt64(BigUInt.BASE) # If there is a carry left, add it to the next position if carry > 0: @@ -1703,11 +1819,11 @@ fn normalize_carries(mut x: BigUInt): # Yuhao ZHU: # By construction, the words of x are in the range [0, BASE*2). - # Thus, the crray can only be 0 or 1. + # Thus, the carry can only be 0 or 1. var carry: UInt32 = 0 for ref word in x.words: if carry == 0: - if word < BigUInt.BASE: + if word <= BigUInt.BASE_MAX: pass # carry = 0 else: word -= BigUInt.BASE @@ -1725,6 +1841,46 @@ fn normalize_carries(mut x: BigUInt): return +fn normalize_borrows(mut x: BigUInt): + """Normalizes the values of words into valid range by borrowing. + The caller should ensure that the final result is non-negative. + The initial values of the words should be in the range: + [0, BASE-1] or [3294967297, 4294967295], in other words, + [UInt32.MAX - 999_999_998, ..., UInt32.MAX, 0, ..., BASE-1]. + + Notes: + + If we subtract two BigUInt numbers word-by-word, we may end up with + a situation where some words are **underflowed**. We can take advantage of + the overflowed values of the words to normalize the borrows, + ensuring that all words are within the valid range. + """ + + alias NEG_BASE_MAX = UInt32(3294967297) # UInt32(0) - BigUInt.BASE_MAX + + # Yuhao ZHU: + # By construction, the words of x are in the range [-BASE_MAX, BASE_MAX]. + # Thus, the borrow can only be 0 or 1. + var borrow: UInt32 = 0 + for ref word in x.words: + if borrow == 0: + if word <= BigUInt.BASE_MAX: # 0 <= word <= 999_999_999 + pass # borrow = 0 + else: # word >= 3294967297, overflowed value + word += BigUInt.BASE + borrow = 1 + else: # borrow == 1 + if (word >= 1) and ( + word <= BigUInt.BASE_MAX + ): # 1 <= word <= 999_999_999 + word -= 1 + borrow = 0 + else: # word >= 3294967297 or word == 0, overflowed value + word = (word + BigUInt.BASE) - 1 + # borrow = 1 + return + + fn power_of_10(n: Int) raises -> BigUInt: """Calculates 10^n efficiently.""" if n < 0: