diff --git a/benches/bigdecimal/bench_bigdecimal_divide.mojo b/benches/bigdecimal/bench_bigdecimal_divide.mojo index 53a1076..f2e556d 100644 --- a/benches/bigdecimal/bench_bigdecimal_divide.mojo +++ b/benches/bigdecimal/bench_bigdecimal_divide.mojo @@ -12,6 +12,7 @@ from collections import List alias PRECISION = 4096 alias ITERATIONS = 100 +alias ITERATIONS_LARGE_NUMBERS = 3 fn open_log_file() raises -> PythonObject: @@ -758,6 +759,86 @@ fn main() raises: speedup_factors, ) + # Case 57: Division 65536 words / 65536 words + run_benchmark_divide( + "Division 65536 words / 65536 words", + "123456789" * 32768 + "." + "123456789" * 32768, + "987654321" * 32768 + "." + "987654321" * 32768, + iterations, + log_file, + speedup_factors, + ) + + # Case 58: Division 262144 words / 262144 words + run_benchmark_divide( + "Division 262144 words / 262144 words", + "123456789" * 131072 + "." + "123456789" * 131072, + "987654321" * 131072 + "." + "987654321" * 131072, + ITERATIONS_LARGE_NUMBERS, + log_file, + speedup_factors, + ) + + # Case 59: Division 65536 words / 32768 words + run_benchmark_divide( + "Division 65536 words / 32768 words", + "123456789" * 32768 + "." + "123456789" * 32768, + "987654321" * 16384 + "." + "987654321" * 16384, + ITERATIONS_LARGE_NUMBERS, + log_file, + speedup_factors, + ) + + # Case 60: Division 65536 words / 16384 words + run_benchmark_divide( + "Division 65536 words / 16384 words", + "123456789" * 16384 + "." + "123456789" * 16384, + "987654321" * 8192 + "." + "987654321" * 8192, + ITERATIONS_LARGE_NUMBERS, + log_file, + speedup_factors, + ) + + # Case 61: Division 65536 words / 8192 words + run_benchmark_divide( + "Division 65536 words / 8192 words", + "123456789" * 8192 + "." + "123456789" * 8192, + "987654321" * 4096 + "." + "987654321" * 4096, + ITERATIONS_LARGE_NUMBERS, + log_file, + speedup_factors, + ) + + # Case 62: Division 65536 words / 4096 words + run_benchmark_divide( + "Division 65536 words / 4096 words", + "123456789" * 4096 + "." + "123456789" * 4096, + "987654321" * 2048 + "." + "987654321" * 2048, + ITERATIONS_LARGE_NUMBERS, + log_file, + speedup_factors, + ) + + # Case 63: Division 65536 words / 2048 words + run_benchmark_divide( + "Division 65536 words / 2048 words", + "123456789" * 2048 + "." + "123456789" * 2048, + "987654321" * 1024 + "." + "987654321" * 1024, + ITERATIONS_LARGE_NUMBERS, + log_file, + speedup_factors, + ) + + # Case 64: Division 65536 words / 1024 words + run_benchmark_divide( + "Division 65536 words / 1024 words", + "123456789" * 1024 + "." + "123456789" * 1024, + "987654321" * 512 + "." + "987654321" * 512, + ITERATIONS_LARGE_NUMBERS, + log_file, + speedup_factors, + ) + # Calculate average speedup factor (ignoring any cases that might have failed) if len(speedup_factors) > 0: var sum_speedup: Float64 = 0.0 diff --git a/docs/todo.md b/docs/todo.md index 1f66596..3022777 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -5,6 +5,7 @@ This is a to-do list for DeciMojo. - [ ] When Mojo supports global variables, implement a global variable for the `BigDecimal` class to store the precision of the decimal number. This will allow users to set the precision globally, rather than having to set it for each function of the `BigDecimal` class. - [ ] Implement different methods for adding decimojo types with `Int` types so that an implicit conversion is not required. - [ ] Use debug mode to check for unnecessary zero words before all arithmetic operations. This will help ensure that there are no zero words, which can simplify the speed of checking for zero because we only need to check the first word. +- [ ] Check the `floor_divide()` function of `BigUInt`. Currently, the speed of division between imilar-sized numbers are okay, but the speed of 2n-by-n, 4n-by-n, and 8n-by-n divisions decreases unproportionally. This is likely due to the segmentation of the dividend in the Burnikel-Ziegler algorithm. - [x] (#31) The `exp()` function performs slower than Python's counterpart in specific cases. Detailed investigation reveals the bottleneck stems from multiplication operations between decimals with significant fractional components. These operations currently rely on UInt256 arithmetic, which introduces performance overhead. Optimization of the `multiply()` function is required to address these performance bottlenecks, particularly for high-precision decimal multiplication with many digits after the decimal point. - [x] Implement different methods for augmented arithmetic assignments to improve memeory-efficiency and performance. diff --git a/src/decimojo/bigdecimal/arithmetics.mojo b/src/decimojo/bigdecimal/arithmetics.mojo index d7adc54..9f4e315 100644 --- a/src/decimojo/bigdecimal/arithmetics.mojo +++ b/src/decimojo/bigdecimal/arithmetics.mojo @@ -18,8 +18,7 @@ Implements functions for mathematical operations on BigDecimal objects. """ -import time -import testing +import math from decimojo.rounding_mode import RoundingMode import decimojo.utility @@ -184,17 +183,17 @@ fn multiply(x1: BigDecimal, x2: BigDecimal) -> BigDecimal: # TODO: Optimize when divided by power of 10 fn true_divide( - x1: BigDecimal, x2: BigDecimal, precision: Int + x: BigDecimal, y: BigDecimal, precision: Int ) raises -> BigDecimal: """Returns the quotient of two numbers with specified precision. Args: - x1: The first operand (dividend). - x2: The second operand (divisor). + x: The first operand (dividend). + y: The second operand (divisor). precision: The number of significant digits in the result. Returns: - The quotient of x1 and x2, with precision up to `precision` + The quotient of x and y, with precision up to `precision` significant digits. Notes: @@ -207,122 +206,167 @@ fn true_divide( point is calcuated to precision + BUFFER_DIGITS, and the result is rounded to precision according to the specified rules. """ - alias BUFFER_DIGITS = 9 # Buffer digits for rounding - # Check for division by zero - if x2.coefficient.is_zero(): - raise Error("Division by zero") + if y.coefficient.is_zero(): + raise Error("bigdecimal.arithmetics.true_divide(): Division by zero") # Handle dividend of zero - if x1.coefficient.is_zero(): + if x.coefficient.is_zero(): return BigDecimal( coefficient=BigUInt.ZERO, - scale=x1.scale - x2.scale, - sign=x1.sign != x2.sign, + scale=x.scale - y.scale, + sign=x.sign != y.sign, ) - # First estimate the number of significant digits needed in the dividend - # to produce a result with precision significant digits - var x1_digits = x1.coefficient.number_of_digits() - var x2_digits = x2.coefficient.number_of_digits() + # For other cases, we use `true_divide_general()` to handle the division + # Note that this functiona already considers extra buffer digits + return true_divide_general(x, y, precision) - # Check whether the coefficients can already be divided exactly - # If division is exact, return the result immediately - if x1_digits >= x2_digits: - var quotient: BigUInt - var remainder: BigUInt - quotient, remainder = x1.coefficient.divmod(x2.coefficient) - # Calculate the expected result scale - var result_scale = x1.scale - x2.scale - if remainder.is_zero(): - # For exact division, calculate significant digits in result - var num_sig_digits = quotient.number_of_digits() - # If the significant digits are within precision, return as is - if num_sig_digits <= precision: - return BigDecimal( - coefficient=quotient^, - scale=result_scale, - sign=x1.sign != x2.sign, - ) - else: # num_sig_digits > precision - # Otherwise, need to truncate to max precision - var digits_to_remove = num_sig_digits - precision - var quotient = quotient.remove_trailing_digits_with_rounding( - digits_to_remove, - RoundingMode.ROUND_HALF_EVEN, - remove_extra_digit_due_to_rounding=True, - ) - result_scale -= digits_to_remove - return BigDecimal( - coefficient=quotient^, - scale=result_scale, - sign=x1.sign != x2.sign, - ) - - # Calculate how many additional digits we need in the dividend - # We want: (x1_digits + additional) - x2_digits ≈ precision - var additional_digits = precision + BUFFER_DIGITS - (x1_digits - x2_digits) - additional_digits = max(0, additional_digits) - # Scale up the dividend to ensure sufficient precision - var scaled_x1 = x1.coefficient - if additional_digits > 0: - scaled_x1.multiply_inplace_by_power_of_ten(additional_digits) +fn true_divide_fast( + x: BigDecimal, y: BigDecimal, minimum_precision: Int +) raises -> BigDecimal: + """Returns the quotient of two numbers. - # Perform division - var quotient: BigUInt - var remainder: BigUInt - quotient, remainder = scaled_x1.divmod(x2.coefficient) - var result_scale = additional_digits + x1.scale - x2.scale + Args: + x: The first operand (dividend). + y: The second operand (divisor). + minimum_precision: The minimum number of significant digits in the + result. Should be greater than 0. - # Check if division is exact - var is_exact = remainder.is_zero() + Returns: + The quotient of x and y with at least `minimum_precision` + significant digits. - # Check total digits in the result - var result_digits = quotient.number_of_digits() + Notes: - # If the division is exact - # we may need to remove the extra trailing zeros. - # TODO: Think about the behavior, whether division should always return the - # `precision` even if the result scale is less than precision. - # Example: 10 / 4 = 2.50000000000000000000000000000 - # If exact division, remove trailing zeros - if is_exact: - var num_trailing_zeros = quotient.number_of_trailing_zeros() - if num_trailing_zeros > 0: - quotient = quotient.floor_divide_by_power_of_ten(num_trailing_zeros) - result_scale -= num_trailing_zeros - # Recalculate digits after removing trailing zeros - result_digits = quotient.number_of_digits() - - # Otherwise, the division is not exact or have too many digits - # round to precision - # If we have too many significant digits, reduce to precision - # Extract the digits to be rounded - # Example: 2 digits to remove - # divisor = 100 - # half_divisor = 50 - # rounding_digits = 123456 % 100 = 56 - # result_coefficient = 123456 // 100 = 1234 - # If rounding_digits > half_divisor, round up - # If rounding_digits == half_divisor, round up if the last digit of - # result_coefficient is odd - # If rounding_digits < half_divisor, round down - if result_digits > precision: - var digits_to_remove = result_digits - precision - quotient = quotient.remove_trailing_digits_with_rounding( - digits_to_remove, - RoundingMode.ROUND_HALF_EVEN, - remove_extra_digit_due_to_rounding=True, + This function conduct a quick division that: + (1) does not round the result to the specified precision. + (2) does not check the exact division nor remove extra trailing zeros. + """ + + # Yuhao Zhu: + # x = a * 10*(-m) + # y = b * 10*(-n) + # Let s = extra digits to ensure precision + # x / y = x * 10^s / y / 10^s = (a * 10^s // b) * 10*(-(m + s - n)) + # We need to ensure that a * 10^s // b has more significant digits than p. + # A quicker way is to add whole empty words to the dividend. + # Let n_diff = len(a.words) - len(b.words). + # We add ceil(precision // 9) + 1 - n_diff empty words to the dividend. + # This ensures that we always have at least 9 extra digits in the dividend. + + debug_assert[assert_mode="none"]( + minimum_precision > 0, + "Minimum precision should be greater than 0", + ) + + var diff_n_words = len(x.coefficient.words) - len(y.coefficient.words) + var extra_words = math.ceildiv(minimum_precision, 9) + 2 - diff_n_words + var extra_digits = extra_words * 9 + + var coef_x: BigUInt + if extra_words > 0: + coef_x = decimojo.biguint.arithmetics.multiply_by_power_of_billion( + x.coefficient, extra_words ) - # Adjust the scale accordingly - result_scale -= digits_to_remove + elif extra_words < 0: + # TODO: Replace this with `floor_divide_by_power_of_billion()` + coef_x = decimojo.biguint.arithmetics.floor_divide_by_power_of_ten( + x.coefficient, -extra_words * 9 + ) + else: + coef_x = x.coefficient + var coef = coef_x // y.coefficient + var scale = x.scale + extra_digits - y.scale return BigDecimal( - coefficient=quotient^, - scale=result_scale, - sign=x1.sign != x2.sign, + coefficient=coef^, + scale=scale, + sign=x.sign != y.sign, + ) + + +fn true_divide_general( + x: BigDecimal, y: BigDecimal, precision: Int +) raises -> BigDecimal: + """Returns the quotient of two numbers with the specified precision. + + Args: + x: The first operand (dividend). + y: The second operand (divisor). + precision: The minimum number of significant digits in the + result. Should be greater than 0. + + Returns: + The quotient of x and y with the specified precision. + + Notes: + + This function conduct a division that: + (1) rounds the result to the specified precision, + (2) checks the exact division and remove extra trailing zeros. + """ + + # Yuhao Zhu: + # x = a * 10*(-m) + # y = b * 10*(-n) + # Let s = extra digits to ensure precision + # x / y = x * 10^s / y / 10^s = (a * 10^s // b) * 10*(-(m + s - n)) + # We need to ensure that a * 10^s // b has more significant digits than p. + # A quicker way is to add whole empty words to the dividend. + # Let n_diff = len(a.words) - len(b.words). + # We add ceil(precision // 9) + 1 + max(-n_diff, 0) words to the dividend. + # This ensures that we always have at least 9 extra digits in the dividend. + # We take max(-n_diff, 0) because we need to check the exact division. + + debug_assert[assert_mode="none"]( + precision > 0, + "Precision should be greater than 0", + ) + + var diff_n_words = len(x.coefficient.words) - len(y.coefficient.words) + var extra_words = math.ceildiv(precision, 9) + 2 + if diff_n_words < 0: + extra_words -= diff_n_words # diff_n_words is negative, so we add words + var extra_digits = extra_words * 9 + + var coef_x: BigUInt + if extra_words > 0: + coef_x = decimojo.biguint.arithmetics.multiply_by_power_of_billion( + x.coefficient, extra_words + ) + else: + coef_x = x.coefficient + + var coef = coef_x // y.coefficient + if coef * y.coefficient == coef_x: + # The division is exact, so we need to remove the extra trailing zeros + # so that the final scale is at least (x.scale - y.scale). + # If x.scale - y.scale < 0, we can safely remove all trailing zeros. + # Otherwise, we can remove at most extra digits added. + var num_digits_to_remove = min( + extra_digits, coef.number_of_trailing_zeros() + ) + # TODO: Make a in-place version of this + coef = decimojo.biguint.arithmetics.floor_divide_by_power_of_ten( + coef, num_digits_to_remove + ) + extra_digits -= num_digits_to_remove + + var scale = x.scale + extra_digits - y.scale + var result = BigDecimal( + coefficient=coef^, + scale=scale, + sign=x.sign != y.sign, + ) + result.round_to_precision( + precision, + RoundingMode.ROUND_HALF_EVEN, + remove_extra_digit_due_to_rounding=True, + fill_zeros_to_precision=False, ) + return result^ fn true_divide_inexact(