diff --git a/README.md b/README.md index f91ccc7..ad50663 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ DeciMojo provides an arbitrary-precision decimal and integer mathematics library The core types are: - A base-10 arbitrary-precision signed integer type (`BigInt`) and a base-10 arbitrary-precision unsigned integer type (`BigUInt`) supporting unlimited digits[^integer]. It features comprehensive arithmetic operations, comparison functions, and supports extremely large integer calculations efficiently. -- An arbitrary-precision decimal implementation `BigDecimal` allowing for calculations with unlimited digits and decimal places[^arbitrary]. +- An arbitrary-precision decimal implementation `BigDecimal` allowing for calculations with unlimited digits and decimal places[^arbitrary]. It provides a complete set of arithmetic operations, comparisons, and mathematical functions like logarithms, exponentiation, roots, trigonometric functions, etc. It also supports rounding modes and conversions to/from built-in types. - A 128-bit fixed-point decimal implementation (`Decimal`) supporting up to 29 significant digits with a maximum of 28 decimal places[^fixed]. It features a complete set of mathematical functions including logarithms, exponentiation, roots, etc. This repository includes [TOMLMojo](https://github.com/forfudan/decimojo/tree/main/src/tomlmojo), a lightweight TOML parser in pure Mojo. It parses configuration files and test data, supporting basic types, arrays, and nested tables. While created for DeciMojo's testing framework, it offers general-purpose structured data parsing with a clean, simple API. diff --git a/benches/biguint/bench.mojo b/benches/biguint/bench.mojo index 654d147..5c0f08b 100644 --- a/benches/biguint/bench.mojo +++ b/benches/biguint/bench.mojo @@ -12,7 +12,7 @@ This is the BigUInt Benchmarks ========================================= add: Add mul: Multiply -trunc: Truncate divide (//) +div: Truncate divide (//) fromstr: From string all: Run all benchmarks q: Exit @@ -24,7 +24,7 @@ q: Exit bench_add() elif command == "mul": bench_multiply() - elif command == "trunc": + elif command == "div": bench_truncate_divide() elif command == "fromstr": bench_from_string() diff --git a/benches/biguint/bench_biguint_divide_complexity.mojo b/benches/biguint/bench_biguint_divide_complexity.mojo new file mode 100644 index 0000000..cb270fb --- /dev/null +++ b/benches/biguint/bench_biguint_divide_complexity.mojo @@ -0,0 +1,378 @@ +# ===----------------------------------------------------------------------=== # +# Benchmark for BigUInt division time complexity analysis +# Testing word sizes from 32 to 65536 words (powers of 2) +# ===----------------------------------------------------------------------=== # + +from time import perf_counter_ns +from decimojo import BigUInt +from decimojo.biguint.arithmetics import floor_divide +from python import Python, PythonObject +import os + + +fn create_log_file() raises -> PythonObject: + """Creates and opens a log file with timestamp.""" + var python = Python.import_module("builtins") + var datetime = Python.import_module("datetime") + + # Create logs directory if it doesn't exist + var log_dir = "./logs" + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Generate timestamp for filename + var timestamp = String(datetime.datetime.now().isoformat()) + var log_filename = ( + log_dir + "/benchmark_divide_complexity_" + timestamp + ".log" + ) + + print("Saving benchmark results to:", log_filename) + return python.open(log_filename, "w") + + +fn log_print(msg: String, log_file: PythonObject) raises: + """Prints message to both console and log file.""" + print(msg) + log_file.write(msg + "\n") + log_file.flush() + + +fn create_test_biguint(num_words: Int) -> BigUInt: + """Creates a BigUInt with the specified number of words filled with test values. + """ + var words = List[UInt32](capacity=num_words) + + # Fill with predictable values (avoid randomness for consistent testing) + for i in range(num_words): + if i == num_words - 1: + # Ensure the most significant word is non-zero + words.append(UInt32(100_000_000 + (i % 800_000_000))) + else: + words.append(UInt32(123_456_789 + (i % 876_543_210))) + + return BigUInt(words=words^) + + +fn create_smaller_divisor( + dividend_words: Int, divisor_ratio: Float64 +) -> BigUInt: + """Creates a divisor that's smaller than the dividend by the given ratio.""" + var divisor_words = max(1, Int(Float64(dividend_words) / divisor_ratio)) + return create_test_biguint(divisor_words) + + +fn benchmark_divide_at_size( + dividend_words: Int, + divisor_words: Int, + iterations: Int, + log_file: PythonObject, +) raises -> Float64: + """Benchmarks division for specific word sizes.""" + var msg = ( + "Testing " + + String(dividend_words) + + " / " + + String(divisor_words) + + " words..." + ) + log_print(msg, log_file) + + # Create test BigUInt numbers + var dividend = create_test_biguint(dividend_words) + var divisor = create_test_biguint(divisor_words) + + var total_time: Float64 = 0.0 + + # Perform multiple iterations to get average time + for i in range(iterations): + var start_time = perf_counter_ns() + var _result = floor_divide(dividend, divisor) + var end_time = perf_counter_ns() + + var elapsed = ( + Float64(end_time - start_time) / 1_000_000_000.0 + ) # Convert to seconds + total_time += elapsed + + # Print intermediate results + var iter_msg = ( + " Iteration " + String(i + 1) + ": " + String(elapsed) + " seconds" + ) + log_print(iter_msg, log_file) + + var average_time = total_time / Float64(iterations) + var avg_msg = ( + " Average time for " + + String(dividend_words) + + " / " + + String(divisor_words) + + " words: " + + String(average_time) + + " seconds" + ) + log_print(avg_msg, log_file) + return average_time + + +fn main() raises: + """Main benchmark function testing division complexity.""" + # Create log file + var log_file = create_log_file() + var datetime = Python.import_module("datetime") + + # Display benchmark header with system information + log_print( + "=== DeciMojo BigUInt Division Time Complexity Benchmark ===", + log_file, + ) + log_print("Time: " + String(datetime.datetime.now().isoformat()), log_file) + + # Get system information + try: + var platform = Python.import_module("platform") + log_print( + "System: " + + String(platform.system()) + + " " + + String(platform.release()), + log_file, + ) + log_print("Processor: " + String(platform.processor()), log_file) + log_print( + "Python version: " + String(platform.python_version()), log_file + ) + log_print("Machine: " + String(platform.machine()), log_file) + log_print("Node: " + String(platform.node()), log_file) + except: + log_print("Could not retrieve system information", log_file) + + log_print("", log_file) + log_print( + "Testing division with various dividend and divisor sizes", log_file + ) + log_print("Each test uses 5 iterations for averaging", log_file) + log_print( + "WARNING: Larger sizes (>100K words) may take significant time!", + log_file, + ) + log_print("", log_file) + + # Test sizes: powers of 2 from 32 to 65536 + var test_sizes = List[Int]() + test_sizes.append(32) + test_sizes.append(64) + test_sizes.append(128) + test_sizes.append(256) + test_sizes.append(512) + test_sizes.append(1024) + test_sizes.append(2048) + test_sizes.append(4096) + test_sizes.append(8192) + test_sizes.append(16384) + test_sizes.append(32768) + test_sizes.append(65536) + + # Test Case 1: Large / Small division (2n / n) + log_print("=== TEST CASE 1: LARGE / SMALL DIVISION (2n / n) ===", log_file) + log_print( + "Testing division where dividend is twice the size of divisor", log_file + ) + log_print("", log_file) + + var large_small_results = List[Float64]() + for i in range(len(test_sizes)): + var divisor_size = test_sizes[i] + var dividend_size = divisor_size * 2 + if dividend_size <= 65536: # Stay within our limit + var avg_time = benchmark_divide_at_size( + dividend_size, divisor_size, 5, log_file + ) + large_small_results.append(avg_time) + else: + large_small_results.append(0.0) # Placeholder + log_print("", log_file) + + # Test Case 2: Very Large / Small division (4n / n) + log_print( + "=== TEST CASE 2: VERY LARGE / SMALL DIVISION (4n / n) ===", log_file + ) + log_print( + "Testing division where dividend is four times the size of divisor", + log_file, + ) + log_print("", log_file) + + var very_large_small_results = List[Float64]() + for i in range(len(test_sizes)): + var divisor_size = test_sizes[i] + var dividend_size = divisor_size * 4 + if dividend_size <= 65536: # Stay within our limit + var avg_time = benchmark_divide_at_size( + dividend_size, divisor_size, 5, log_file + ) + very_large_small_results.append(avg_time) + else: + very_large_small_results.append(0.0) # Placeholder + log_print("", log_file) + + # Print summary tables + log_print( + "=== SUMMARY TABLE: LARGE / SMALL DIVISION (2n / n) ===", log_file + ) + log_print( + "Divisor\t\tDividend\t\tTime (s)\t\tRatio to Previous", + log_file, + ) + log_print( + "--------------------------------------------------------------------------------", + log_file, + ) + + for i in range(len(test_sizes)): + var divisor_size = test_sizes[i] + var dividend_size = divisor_size * 2 + var time_taken = large_small_results[i] + + if time_taken > 0.0: # Only show valid results + if i > 0 and large_small_results[i - 1] > 0.0: + var prev_time = large_small_results[i - 1] + var actual_ratio = time_taken / prev_time + var result_line = ( + String(divisor_size) + + "\t\t" + + String(dividend_size) + + "\t\t" + + String(time_taken) + + "\t\t" + + String(actual_ratio) + ) + log_print(result_line, log_file) + else: + var result_line = ( + String(divisor_size) + + "\t\t" + + String(dividend_size) + + "\t\t" + + String(time_taken) + + "\t\tN/A" + ) + log_print(result_line, log_file) + + log_print("", log_file) + log_print( + "=== SUMMARY TABLE: VERY LARGE / SMALL DIVISION (4n / n) ===", log_file + ) + log_print( + "Divisor\t\tDividend\t\tTime (s)\t\tRatio to Previous", + log_file, + ) + log_print( + "--------------------------------------------------------------------------------", + log_file, + ) + + for i in range(len(test_sizes)): + var divisor_size = test_sizes[i] + var dividend_size = divisor_size * 4 + if dividend_size <= 65536: # Only show results within our limit + var time_taken = very_large_small_results[i] + + if time_taken > 0.0: # Only show valid results + if i > 0 and very_large_small_results[i - 1] > 0.0: + var prev_time = very_large_small_results[i - 1] + var actual_ratio = time_taken / prev_time + var result_line = ( + String(divisor_size) + + "\t\t" + + String(dividend_size) + + "\t\t" + + String(time_taken) + + "\t\t" + + String(actual_ratio) + ) + log_print(result_line, log_file) + else: + var result_line = ( + String(divisor_size) + + "\t\t" + + String(dividend_size) + + "\t\t" + + String(time_taken) + + "\t\tN/A" + ) + log_print(result_line, log_file) + + log_print("", log_file) + log_print("=== ANALYSIS ===", log_file) + log_print("Expected behavior for division algorithms:", log_file) + log_print("- Single word divisor: O(n) where n is dividend size", log_file) + log_print("- Double word divisor: O(n) where n is dividend size", log_file) + log_print( + "- General division: O(n²) where n is max(dividend, divisor) size", + log_file, + ) + log_print( + ( + "- Newton-Raphson (if implemented): O(M(n)) where M(n) is" + " multiplication cost" + ), + log_file, + ) + log_print("", log_file) + log_print("Division algorithm selection in the code:", log_file) + log_print( + "- Divisor = 1 word: Uses optimized single-word division", log_file + ) + log_print( + "- Divisor = 2 words: Uses optimized double-word division", log_file + ) + log_print("- Divisor > 2 words: Uses general division algorithm", log_file) + log_print("", log_file) + + # Analysis of division vs multiplication + log_print("=== DIVISION VS MULTIPLICATION ANALYSIS ===", log_file) + log_print( + "Division is generally more expensive than multiplication:", log_file + ) + log_print( + "- Division requires trial and error to find quotient digits", log_file + ) + log_print( + "- Multiple multiplications and subtractions per quotient digit", + log_file, + ) + log_print( + "- Normalization overhead for better quotient estimation", log_file + ) + log_print("", log_file) + log_print("Expected performance characteristics:", log_file) + log_print( + "- Single/double word divisors: Fast, linear in dividend size", log_file + ) + log_print("- Multi-word divisors: Slower, quadratic complexity", log_file) + log_print( + "- Very large numbers: Memory effects become significant", log_file + ) + log_print("", log_file) + + # Analysis of large / small division results + log_print("=== LARGE / SMALL DIVISION ANALYSIS ===", log_file) + log_print("Performance characteristics observed:", log_file) + log_print("- Excellent O(n²) scaling in 2n / n test case", log_file) + log_print( + "- Ratios consistently around 4.0 show proper quadratic complexity", + log_file, + ) + log_print("- Division algorithm is well-implemented", log_file) + log_print("", log_file) + log_print("4n / n vs 2n / n comparison:", log_file) + log_print( + "- Both test cases show similar quadratic scaling behavior", log_file + ) + log_print("- 4n / n takes roughly twice as long as 2n / n", log_file) + log_print("- This confirms the O(n²) complexity is correct", log_file) + log_print("", log_file) + + # Close log file + log_file.close() + print("Division benchmark completed. Log file closed.") diff --git a/benches/biguint/bench_biguint_truncate_divide.mojo b/benches/biguint/bench_biguint_truncate_divide.mojo index a0f20b9..39f7724 100644 --- a/benches/biguint/bench_biguint_truncate_divide.mojo +++ b/benches/biguint/bench_biguint_truncate_divide.mojo @@ -22,6 +22,8 @@ fn open_log_file() raises -> PythonObject: """ var python = Python.import_module("builtins") var datetime = Python.import_module("datetime") + var pysys = Python.import_module("sys") + pysys.set_int_max_str_digits(1000000) # Create logs directory if it doesn't exist var log_dir = "./logs" @@ -488,6 +490,26 @@ fn main() raises: speedup_factors, ) + # Case 31: Division of large numbers + run_benchmark_truncate_divide( + "Division of large numbers (1000 words vs 100 digits)", + "316227766_016824890_583648059_893174009_579947593" * 1000, + "141421356_237309504_880168872_420969807_856967187" * 100, + 3, + log_file, + speedup_factors, + ) + + # Case 32: Division of large numbers + run_benchmark_truncate_divide( + "Division of large numbers (10000 words vs 1234 digits)", + "316227766_016824890_583648059_893174009_579947593" * 10000, + "141421356_237309504_880168872_420969807_856967187" * 1234, + 3, + log_file, + speedup_factors, + ) + # Calculate average speedup factor (ignoring any cases that might have failed) if len(speedup_factors) > 0: var sum_speedup: Float64 = 0.0 diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo index 1cf098a..b630825 100644 --- a/src/decimojo/biguint/arithmetics.mojo +++ b/src/decimojo/biguint/arithmetics.mojo @@ -36,24 +36,27 @@ from decimojo.rounding_mode import RoundingMode # add_inplace_by_1(x: BigUInt) -> None # # subtract(x1: BigUInt, x2: BigUInt) -> BigUInt +# subtract_inplace(x1: BigUInt, x2: BigUInt) -> None # # multiply(x1: BigUInt, x2: BigUInt) -> BigUInt +# multiply_slices(x: BigUInt, y: BigUInt, start_x: Int, end_x: Int, start_y: Int, end_y: Int) -> BigUInt +# multiply_karatsuba(x: BigUInt, y: BigUInt, start_x: Int, end_x: Int, start_y: Int, end_y: Int, cutoff_number_of_words: Int) -> BigUInt # scale_up_by_power_of_10(x: BigUInt, n: Int) -> BigUInt +# scale_up_by_power_of_billion(mut x: BigUInt, n: Int) # # floor_divide(x1: BigUInt, x2: BigUInt) -> BigUInt -# floor_divide_partition(x1: BigUInt, x2: BigUInt) -> BigUInt +# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt # floor_divide_inplace_by_single_word(x1: BigUInt, x2: BigUInt) -> None # floor_divide_inplace_by_double_words(x1: BigUInt, x2: BigUInt) -> None # floor_divide_inplace_by_2(x: BigUInt) -> Nonet, x2: BigUInt) -> BigUInt -# truncate_mod# truncate_divide(x1: BigUInt, x2: BigUInt) -> BigUInt +# truncate_divide(x1: BigUInt, x2: BigUInt) -> BigUInt # floor_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt # ceil_divide(x1: BigUInt, x2: BigUInt) -> BigUIntulo(x1: BigUIn# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt # ceil_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt # divmod(x1: BigUInt, x2: BigUInt) -> Tuple[BigUInt, BigUInt] # scale_down_by_power_of_10(x: BigUInt, n: Int) -> BigUInt # -# estimate_quotient(x1: BigUInt, x2: BigUInt, j: Int, m: Int) -> UInt64 -# shift_words_left(x: BigUInt, j: Int) -> BigUInt +# floor_divide_estimate_quotient(x1: BigUInt, x2: BigUInt, j: Int, m: Int) -> UInt64 # power_of_10(n: Int) -> BigUInt # ===----------------------------------------------------------------------=== # @@ -298,16 +301,14 @@ fn subtract(x1: BigUInt, x2: BigUInt) raises -> BigUInt: The result of subtracting x2 from x1. """ # If the subtrahend is zero, return the minuend - if len(x2.words) == 1 and x2.words[0] == 0: + if x2.is_zero(): return x1 # We need to determine which number has the larger magnitude var comparison_result = x1.compare(x2) - if comparison_result == 0: # |x1| = |x2| return BigUInt() # Return zero - if comparison_result < 0: raise Error("biguint.arithmetics.subtract(): Underflow due to x1 < x2") @@ -338,6 +339,47 @@ fn subtract(x1: BigUInt, x2: BigUInt) raises -> BigUInt: return result^ +fn subtract_inplace(mut x: BigUInt, y: BigUInt) raises -> None: + """Subtracts y from x in place.""" + + # If the subtrahend is zero, return the minuend + if y.is_zero(): + return + + # We need to determine which number has the larger magnitude + var comparison_result = x.compare(y) + if comparison_result == 0: + x.words.resize(unsafe_uninit_length=1) + x.words[0] = UInt32(0) # Result is zero + elif comparison_result < 0: + raise Error( + "biguint.arithmetics.subtract_inplace(): Underflow due to x < y" + ) + + # Now it is safe to subtract the smaller number from the larger one + var borrow: Int32 = 0 + var ith: Int = 0 + var difference: Int32 # Int32 is sufficient for the difference + + while ith < len(x.words): + # Subtract the borrow + difference = Int32(x.words[ith]) - borrow + # Subtract smaller's word if available + if ith < len(y.words): + difference -= Int32(y.words[ith]) + # Handle borrowing if needed + if difference < Int32(0): + difference += Int32(1_000_000_000) + borrow = Int32(1) + else: + borrow = Int32(0) + x.words[ith] = UInt32(difference) + ith += 1 + + x.remove_leading_empty_words() + return + + # ===----------------------------------------------------------------------=== # # Multiplication algorithms # ===----------------------------------------------------------------------=== # @@ -364,35 +406,43 @@ fn multiply(x: BigUInt, y: BigUInt) -> BigUInt: alias CUTOFF_KARATSUBA: Int = 64 """The cutoff number of words for using Karatsuba multiplication.""" - # SPECIAL CASE: One of the operands is zero or one + # SPECIAL CASES + # If x or y is a single-word number + # We can use `multiply_by_uint32` because this is only one loop + # No need to split the long number into two parts if len(x.words) == 1: - if x.words[0] == 0: + var x_word = x.words[0] + if x_word == 0: return BigUInt(UInt32(0)) - if x.words[0] == 1: + elif x_word == 1: return y + else: + var result = y + multiply_by_uint32(result, x_word) + return result^ + if len(y.words) == 1: - if y.words[0] == 0: + var y_word = y.words[0] + if y_word == 0: return BigUInt(UInt32(0)) - if y.words[0] == 1: + if y_word == 1: return x + else: + var result = x + multiply_by_uint32(result, y_word) + return result^ - # CASE 1: - # If one number is only one-word long - # we can use school multiplication because this is only one loop - # No need to split the long number into two parts - if len(x.words) == 1 or len(y.words) == 1: - return multiply_slices(x, y, 0, len(x.words), 0, len(y.words)) - - # CASE 2: + # CASE 1 # The allocation cost is too high for small numbers to use Karatsuba # Use school multiplication for small numbers - var max_words = max(len(x.words), len(y.words)) if max_words <= CUTOFF_KARATSUBA: # return multiply_slices (x, y) return multiply_slices(x, y, 0, len(x.words), 0, len(y.words)) # multiply_slices can also takes in x, y, and indices + # CASE 2 + # Use Karatsuba multiplication for larger numbers else: return multiply_karatsuba( x, y, 0, len(x.words), 0, len(y.words), CUTOFF_KARATSUBA @@ -423,15 +473,25 @@ fn multiply_slices( # CASE: One of the operands is zero or one if n_words_x_slice == 1: - if x.words[start_x] == 0: + var x_word = x.words[start_x] + if x_word == 0: return BigUInt(UInt32(0)) - if x.words[start_x] == 1: + elif x_word == 1: return BigUInt(words=y.words[start_y:end_y]) + else: + var result = BigUInt(words=y.words[start_y:end_y]) + multiply_by_uint32(result, x_word) + return result^ if n_words_y_slice == 1: - if y.words[start_y] == 0: + var y_word = y.words[start_y] + if y_word == 0: return BigUInt(UInt32(0)) - if y.words[start_y] == 1: + elif y_word == 1: return BigUInt(words=x.words[start_x:end_x]) + else: + var result = BigUInt(words=x.words[start_x:end_x]) + multiply_by_uint32(result, y_word) + return result^ # The max number of words in the result is the sum of the words in the operands var max_result_len = n_words_x_slice + n_words_y_slice @@ -645,6 +705,34 @@ fn multiply_karatsuba( return z2^ +fn multiply_by_uint32(mut x: BigUInt, y: UInt32): + """Multiplies a BigUInt by an UInt32 word in-place. + + Args: + x: The BigUInt value to multiply. + y: The single word to multiply by. + """ + if y == 0: + x.words = List[UInt32](0) + return + + if y == 1: + return + + var y_as_uint64 = UInt64(y) + var product: UInt64 + var carry: UInt64 = 0 + + for i in range(len(x.words)): + product = UInt64(x.words[i]) * y_as_uint64 + carry + x.words[i] = UInt32(product % UInt64(1_000_000_000)) + carry = product // UInt64(1_000_000_000) + + if carry > 0: + x.words.append(UInt32(carry)) + + +# TODO: Make this in-place fn scale_up_by_power_of_10(x: BigUInt, n: Int) -> BigUInt: """Multiplies a BigUInt by 10^n if n > 0, otherwise doing nothing. @@ -769,7 +857,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: # SUB-CASE: Single word // single word if len(x1.words) == 1: - var result = BigUInt(UInt32(x1.words[0] // x2.words[0])) + var result = BigUInt(List[UInt32](x1.words[0] // x2.words[0])) return result^ # SUB-CASE: Divisor is single word and is power of 2 @@ -781,7 +869,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: remainder >>= 1 return result^ - # SUB-CASE: Divisor is single word (<= 10 digits) + # SUB-CASE: Divisor is single word (<= 9 digits) else: var result = x1 floor_divide_inplace_by_single_word(result, x2) @@ -859,142 +947,147 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt: return floor_divide_general(normalized_x1, normalized_x2) -fn floor_divide_general(x1: BigUInt, x2: BigUInt) raises -> BigUInt: +fn floor_divide_general(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt: """General division algorithm for BigInt numbers. Args: - x1: The dividend. - x2: The divisor. + dividend: The dividend. + divisor: The divisor. Returns: - The quotient of x1 // x2. + The quotient of dividend // divisor. Raises: - ValueError: If the divisor is zero. + Error: If the divisor is zero. """ - if x2.is_zero(): - raise Error("Error in `floor_divide_general`: Division by zero") + if divisor.is_zero(): + raise Error( + "`biguint.arithmetics.floor_divide_general()`: Division by zero" + ) # Initialize result and remainder - var result = BigUInt(List[UInt32](capacity=len(x1.words))) - var remainder = x1 - - # Calculate significant digits - var n = len(remainder.words) - var m = len(x2.words) + var result = BigUInt(List[UInt32](capacity=len(dividend.words))) + var remainder = dividend # Shift and initialize - var d = n - m - for _ in range(d + 1): + var n_words_diff = len(remainder.words) - len(divisor.words) + # The quotient will have at most n_words_diff + 1 words + for _ in range(n_words_diff + 1): result.words.append(0) # Main division loop - var j = d - while j >= 0: + var index_of_word = n_words_diff # Start from the most significant word + var trial_product: BigUInt + while index_of_word >= 0: # OPTIMIZATION: Better quotient estimation - var q = estimate_quotient(remainder, x2, j, m) + var quotient = floor_divide_estimate_quotient( + remainder, divisor, index_of_word + ) # Calculate trial product - var trial_product = x2 * BigUInt(UInt32(q)) - var shifted_product = shift_words_left(trial_product, j) + trial_product = divisor + multiply_by_uint32(trial_product, UInt32(quotient)) + scale_up_by_power_of_billion(trial_product, index_of_word) + + # Should need at most 1-2 corrections after the estimation + var correction_attempts = 0 + while ( + (trial_product.compare(remainder) > 0) + and (quotient > 0) + and (correction_attempts < 3) + ): + quotient -= 1 + correction_attempts += 1 + + trial_product = divisor + multiply_by_uint32(trial_product, UInt32(quotient)) + scale_up_by_power_of_billion(trial_product, index_of_word) + + # Store the quotient word + result.words[index_of_word] = UInt32(quotient) + subtract_inplace(remainder, trial_product) + index_of_word -= 1 - # OPTIMIZATION: Binary search for adjustment - if shifted_product.compare(remainder) > 0: - var low: UInt64 = 0 - var high: UInt64 = q - 1 + result.remove_leading_empty_words() + return result^ - while low <= high: - var mid = (low + high) / 2 - # Recalculate with new q - trial_product = x2 * BigUInt(UInt32(mid)) - shifted_product = shift_words_left(trial_product, j) +fn floor_divide_estimate_quotient( + dividend: BigUInt, divisor: BigUInt, index_of_word: Int +) -> UInt64: + """Estimates the quotient digit using 3-by-2 division. - if shifted_product.compare(remainder) <= 0: - q = mid # This works - low = mid + 1 - else: - high = mid - 1 + This function implements a 3-by-2 quotient estimation algorithm, + which divides a 3-word dividend portion by a 2-word divisor to get + an accurate quotient estimate. - # Final recalculation with best q - trial_product = x2 * BigUInt(UInt32(q)) - shifted_product = shift_words_left(trial_product, j) + Args: + dividend: The dividend BigUInt number. + divisor: The divisor BigUInt number. + index_of_word: The current position in the division algorithm. - result.words[j] = UInt32(q) - remainder = subtract(remainder, shifted_product) - j -= 1 + Returns: + An estimated quotient digit (0 to 999_999_999). - result.remove_leading_empty_words() - return result^ + Notes: + The function performs division of a 3-word number by a 2-word number: + Dividend portion: R = r2 * 10^18 + r1 * 10^9 + r0. + Divisor: D = d1 * 10^9 + d0. + Goal: Estimate Q = R // D. + """ -fn floor_divide_partition(x1: BigUInt, x2: BigUInt) raises -> BigUInt: - """Partition division algorithm for BigInt numbers. + # Extract three highest words of relevant dividend portion + var r2 = UInt64(0) + if index_of_word + len(divisor.words) < len(dividend.words): + r2 = UInt64(dividend.words[index_of_word + len(divisor.words)]) - Args: - x1: The dividend. - x2: The divisor. + var r1 = UInt64(0) + if index_of_word + len(divisor.words) - 1 < len(dividend.words): + r1 = UInt64(dividend.words[index_of_word + len(divisor.words) - 1]) - Returns: - The quotient of x1 // x2. + var r0 = UInt64(0) + if index_of_word + len(divisor.words) - 2 < len(dividend.words): + r0 = UInt64(dividend.words[index_of_word + len(divisor.words) - 2]) - Raises: - ValueError: If the divisor is zero. + # Extract two highest words of divisor + var d1 = UInt64(divisor.words[len(divisor.words) - 1]) + var d0 = UInt64(0) + if len(divisor.words) >= 2: + d0 = UInt64(divisor.words[len(divisor.words) - 2]) - Notes: + # Special case: if divisor is single word, fall back to 2-by-1 division + if len(divisor.words) == 1: + if r2 == d1: + return 999_999_999 + return min((r2 * UInt64(1_000_000_000) + r1) // d1, UInt64(999_999_999)) - If words of x1 is more than 2 times the words of x2, then partition x1 into - several parts and divide x2 sequentially using general division. + # Special case: if high word of dividend equals high word of divisor + # The quotient is likely to be large, so we use a conservative estimate + if r2 == d1: + return UInt64(999_999_999) - words of x1: m - words of x2: n - number of partitions: m // n - words of first partition: n + m % n - the remainder is appended to the next partition. - """ + # 3-by-2 division using 128-bit arithmetic + # We need to compute: (r2 * 10^18 + r1 * 10^9 + r0) // (d1 * 10^9 + d0) - if x2.is_zero(): - raise Error("Error in `floor_divide_partition`: Division by zero") + # Convert to 128-bit for high precision calculation + var dividend_high = UInt128(r2) * UInt128(1_000_000_000) + UInt128(r1) + var dividend_low = UInt128(r0) + var divisor_128 = UInt128(d1) * UInt128(1_000_000_000) + UInt128(d0) - # Initialize result and remainder - var number_of_partitions = len(x1.words) // len(x2.words) - var number_of_words_remainder = len(x1.words) % len(x2.words) - var number_of_words_dividend: Int - var result = x1 - result.words.resize(len(x1.words) - number_of_words_remainder, UInt32(0)) - var remainder = BigUInt(List[UInt32](capacity=len(x2.words))) - for i in range(len(x1.words) - number_of_words_remainder, len(x1.words)): - remainder.words.append(x1.words[i]) - - for ith in range(number_of_partitions): - number_of_words_dividend = len(x2.words) + number_of_words_remainder - var dividend_list_of_words = List[UInt32]( - capacity=number_of_words_dividend - ) - for i in range(len(x2.words)): - dividend_list_of_words.append( - x1.words[(number_of_partitions - ith - 1) * len(x2.words) + i] - ) - for i in range(number_of_words_remainder): - dividend_list_of_words.append(remainder.words[i]) - - var dividend = BigUInt(dividend_list_of_words) - var quotient = floor_divide_general(dividend, x2) - for i in range(len(x2.words)): - if i < len(quotient.words): - result.words[ - (number_of_partitions - ith - 1) * len(x2.words) + i - ] = quotient.words[i] - else: - result.words[ - (number_of_partitions - ith - 1) * len(x2.words) + i - ] = UInt32(0) - remainder = subtract(dividend, multiply(quotient, x2)) - number_of_words_remainder = len(remainder.words) + # Handle the case where we need to consider the full 3-word dividend + # We compute: (dividend_high * 10^9 + dividend_low) // divisor_128 + var full_dividend = dividend_high * UInt128(1_000_000_000) + dividend_low - result.remove_leading_empty_words() - return result^ + # Perform the division + var quotient_128 = full_dividend // divisor_128 + + # Convert back to UInt64 + var quotient = UInt64(quotient_128) + + # Ensure we don't exceed the maximum value for a single word + return min(quotient, UInt64(999_999_999)) fn floor_divide_inplace_by_single_word( @@ -1310,68 +1403,10 @@ fn scale_down_by_power_of_10(x: BigUInt, n: Int) raises -> BigUInt: # ===----------------------------------------------------------------------=== # # Division Helper Functions -# estimate_quotient, shift_words_left, power_of_10 +# power_of_10 # ===----------------------------------------------------------------------=== # -fn estimate_quotient( - dividend: BigUInt, divisor: BigUInt, j: Int, m: Int -) -> UInt64: - """Gets a better estimate of the quotient digit.""" - # Get three highest words of relevant dividend portion - var r2 = UInt64(0) - if j + m < len(dividend.words): - r2 = UInt64(dividend.words[j + m]) - - var r1 = UInt64(0) - if j + m - 1 < len(dividend.words): - r1 = UInt64(dividend.words[j + m - 1]) - - var r0 = UInt64(0) - if j + m - 2 < len(dividend.words): - r0 = UInt64(dividend.words[j + m - 2]) - - # Get two highest words of divisor - var d1 = UInt64(divisor.words[m - 1]) - var d0 = UInt64(0) - if m >= 2: - d0 = UInt64(divisor.words[m - 2]) - - # If three most significant digits match, quotient would be max - if r2 == d1: - return 999_999_999 - - # Two-word by one-word division - var qhat = (r2 * 1_000_000_000 + r1) // d1 - - # Adjust if estimate is too large (happens less often with better estimate) - while (qhat >= 1_000_000_000) or ( - qhat * d0 > (r2 * 1_000_000_000 + r1 - qhat * d1) * 1_000_000_000 + r0 - ): - qhat -= 1 - - return min(qhat, UInt64(999_999_999)) - - -fn shift_words_left(num: BigUInt, positions: Int) -> BigUInt: - """Shifts a BigUInt left by adding trailing zeros. - Equivalent to multiplying by 10^(9*positions).""" - if num.is_zero(): - return BigUInt() - - var result = BigUInt(List[UInt32](capacity=len(num.words) + positions)) - - # Add zeros for the shift - for _ in range(positions): - result.words.append(0) - - # Add the original number's words - for i in range(len(num.words)): - result.words.append(num.words[i]) - - return result^ - - fn power_of_10(n: Int) raises -> BigUInt: """Calculates 10^n efficiently.""" if n < 0: diff --git a/src/decimojo/biguint/biguint.mojo b/src/decimojo/biguint/biguint.mojo index 172398c..1d33dc5 100644 --- a/src/decimojo/biguint/biguint.mojo +++ b/src/decimojo/biguint/biguint.mojo @@ -902,13 +902,16 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable): @always_inline fn __iadd__(mut self, other: Self): """Adds `other` to `self` in place. - See `add_inplace()` for more information. + See `biguint.arithmetics.add_inplace()` for more information. """ decimojo.biguint.arithmetics.add_inplace(self, other) @always_inline fn __isub__(mut self, other: Self) raises: - self = decimojo.biguint.arithmetics.subtract(self, other) + """Subtracts `other` from `self` in place. + See `biguint.arithmetics.subtract_inplace()` for more information. + """ + decimojo.biguint.arithmetics.subtract_inplace(self, other) @always_inline fn __imul__(mut self, other: Self) raises: @@ -1155,18 +1158,27 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable): for word in self.words: if word != 0: return False - else: - return True + return True @always_inline fn is_one(self) -> Bool: """Returns True if this BigUInt represents one.""" - return len(self.words) == 1 and self.words[0] == 1 + if self.words[0] != 1: + return False + for i in self.words[1:]: + if i != 0: + return False + return True @always_inline fn is_two(self) -> Bool: """Returns True if this BigUInt represents two.""" - return len(self.words) == 1 and self.words[0] == 2 + if len(self.words) != 2: + return False + for i in self.words[1:]: + if i != 0: + return False + return True @always_inline fn is_power_of_10(x: BigUInt) -> Bool: @@ -1176,15 +1188,15 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable): return False var word = x.words[len(x.words) - 1] if ( - (word == 1) - or (word == 10) - or (word == 100) - or (word == 1000) - or (word == 10_000) - or (word == 100_000) - or (word == 1_000_000) - or (word == 10_000_000) - or (word == 100_000_000) + (word == UInt32(1)) + or (word == UInt32(10)) + or (word == UInt32(100)) + or (word == UInt32(1000)) + or (word == UInt32(10_000)) + or (word == UInt32(100_000)) + or (word == UInt32(1_000_000)) + or (word == UInt32(10_000_000)) + or (word == UInt32(100_000_000)) ): return True return False @@ -1393,10 +1405,3 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable): 1, ) return result^ - - @always_inline - fn shift_words_left(self, position: Int) -> Self: - """Shifts the words of the BigUInt to the left by `position` bits. - See `arithmetics.shift_words_left()` for more information. - """ - return decimojo.biguint.arithmetics.shift_words_left(self, position)