diff --git a/README.md b/README.md
index f91ccc7..ad50663 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ DeciMojo provides an arbitrary-precision decimal and integer mathematics library
 The core types are:
 
 - A base-10 arbitrary-precision signed integer type (`BigInt`) and a base-10 arbitrary-precision unsigned integer type (`BigUInt`) supporting unlimited digits[^integer]. It features comprehensive arithmetic operations, comparison functions, and supports extremely large integer calculations efficiently.
-- An arbitrary-precision decimal implementation `BigDecimal` allowing for calculations with unlimited digits and decimal places[^arbitrary].
+- An arbitrary-precision decimal implementation `BigDecimal` allowing for calculations with unlimited digits and decimal places[^arbitrary]. It provides a complete set of arithmetic operations, comparisons, and mathematical functions like logarithms, exponentiation, roots, trigonometric functions, etc. It also supports rounding modes and conversions to/from built-in types.
 - A 128-bit fixed-point decimal implementation (`Decimal`) supporting up to 29 significant digits with a maximum of 28 decimal places[^fixed]. It features a complete set of mathematical functions including logarithms, exponentiation, roots, etc.
 
 This repository includes [TOMLMojo](https://github.com/forfudan/decimojo/tree/main/src/tomlmojo), a lightweight TOML parser in pure Mojo. It parses configuration files and test data, supporting basic types, arrays, and nested tables. While created for DeciMojo's testing framework, it offers general-purpose structured data parsing with a clean, simple API.
diff --git a/benches/biguint/bench.mojo b/benches/biguint/bench.mojo
index 654d147..5c0f08b 100644
--- a/benches/biguint/bench.mojo
+++ b/benches/biguint/bench.mojo
@@ -12,7 +12,7 @@ This is the BigUInt Benchmarks
 =========================================
 add:         Add
 mul:         Multiply
-trunc:       Truncate divide (//)
+div:         Truncate divide (//)
 fromstr:     From string
 all:         Run all benchmarks
 q:           Exit
@@ -24,7 +24,7 @@ q:           Exit
         bench_add()
     elif command == "mul":
         bench_multiply()
-    elif command == "trunc":
+    elif command == "div":
         bench_truncate_divide()
     elif command == "fromstr":
         bench_from_string()
diff --git a/benches/biguint/bench_biguint_divide_complexity.mojo b/benches/biguint/bench_biguint_divide_complexity.mojo
new file mode 100644
index 0000000..cb270fb
--- /dev/null
+++ b/benches/biguint/bench_biguint_divide_complexity.mojo
@@ -0,0 +1,378 @@
+# ===----------------------------------------------------------------------=== #
+# Benchmark for BigUInt division time complexity analysis
+# Testing word sizes from 32 to 65536 words (powers of 2)
+# ===----------------------------------------------------------------------=== #
+
+from time import perf_counter_ns
+from decimojo import BigUInt
+from decimojo.biguint.arithmetics import floor_divide
+from python import Python, PythonObject
+import os
+
+
+fn create_log_file() raises -> PythonObject:
+    """Creates and opens a log file with timestamp."""
+    var python = Python.import_module("builtins")
+    var datetime = Python.import_module("datetime")
+
+    # Create logs directory if it doesn't exist
+    var log_dir = "./logs"
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+
+    # Generate timestamp for filename
+    var timestamp = String(datetime.datetime.now().isoformat())
+    var log_filename = (
+        log_dir + "/benchmark_divide_complexity_" + timestamp + ".log"
+    )
+
+    print("Saving benchmark results to:", log_filename)
+    return python.open(log_filename, "w")
+
+
+fn log_print(msg: String, log_file: PythonObject) raises:
+    """Prints message to both console and log file."""
+    print(msg)
+    log_file.write(msg + "\n")
+    log_file.flush()
+
+
+fn create_test_biguint(num_words: Int) -> BigUInt:
+    """Creates a BigUInt with the specified number of words filled with test values.
+    """
+    var words = List[UInt32](capacity=num_words)
+
+    # Fill with predictable values (avoid randomness for consistent testing)
+    for i in range(num_words):
+        if i == num_words - 1:
+            # Ensure the most significant word is non-zero
+            words.append(UInt32(100_000_000 + (i % 800_000_000)))
+        else:
+            words.append(UInt32(123_456_789 + (i % 876_543_210)))
+
+    return BigUInt(words=words^)
+
+
+fn create_smaller_divisor(
+    dividend_words: Int, divisor_ratio: Float64
+) -> BigUInt:
+    """Creates a divisor that's smaller than the dividend by the given ratio."""
+    var divisor_words = max(1, Int(Float64(dividend_words) / divisor_ratio))
+    return create_test_biguint(divisor_words)
+
+
+fn benchmark_divide_at_size(
+    dividend_words: Int,
+    divisor_words: Int,
+    iterations: Int,
+    log_file: PythonObject,
+) raises -> Float64:
+    """Benchmarks division for specific word sizes."""
+    var msg = (
+        "Testing "
+        + String(dividend_words)
+        + " / "
+        + String(divisor_words)
+        + " words..."
+    )
+    log_print(msg, log_file)
+
+    # Create test BigUInt numbers
+    var dividend = create_test_biguint(dividend_words)
+    var divisor = create_test_biguint(divisor_words)
+
+    var total_time: Float64 = 0.0
+
+    # Perform multiple iterations to get average time
+    for i in range(iterations):
+        var start_time = perf_counter_ns()
+        var _result = floor_divide(dividend, divisor)
+        var end_time = perf_counter_ns()
+
+        var elapsed = (
+            Float64(end_time - start_time) / 1_000_000_000.0
+        )  # Convert to seconds
+        total_time += elapsed
+
+        # Print intermediate results
+        var iter_msg = (
+            "  Iteration " + String(i + 1) + ": " + String(elapsed) + " seconds"
+        )
+        log_print(iter_msg, log_file)
+
+    var average_time = total_time / Float64(iterations)
+    var avg_msg = (
+        "  Average time for "
+        + String(dividend_words)
+        + " / "
+        + String(divisor_words)
+        + " words: "
+        + String(average_time)
+        + " seconds"
+    )
+    log_print(avg_msg, log_file)
+    return average_time
+
+
+fn main() raises:
+    """Main benchmark function testing division complexity."""
+    # Create log file
+    var log_file = create_log_file()
+    var datetime = Python.import_module("datetime")
+
+    # Display benchmark header with system information
+    log_print(
+        "=== DeciMojo BigUInt Division Time Complexity Benchmark ===",
+        log_file,
+    )
+    log_print("Time: " + String(datetime.datetime.now().isoformat()), log_file)
+
+    # Get system information
+    try:
+        var platform = Python.import_module("platform")
+        log_print(
+            "System: "
+            + String(platform.system())
+            + " "
+            + String(platform.release()),
+            log_file,
+        )
+        log_print("Processor: " + String(platform.processor()), log_file)
+        log_print(
+            "Python version: " + String(platform.python_version()), log_file
+        )
+        log_print("Machine: " + String(platform.machine()), log_file)
+        log_print("Node: " + String(platform.node()), log_file)
+    except:
+        log_print("Could not retrieve system information", log_file)
+
+    log_print("", log_file)
+    log_print(
+        "Testing division with various dividend and divisor sizes", log_file
+    )
+    log_print("Each test uses 5 iterations for averaging", log_file)
+    log_print(
+        "WARNING: Larger sizes (>100K words) may take significant time!",
+        log_file,
+    )
+    log_print("", log_file)
+
+    # Test sizes: powers of 2 from 32 to 65536
+    var test_sizes = List[Int]()
+    test_sizes.append(32)
+    test_sizes.append(64)
+    test_sizes.append(128)
+    test_sizes.append(256)
+    test_sizes.append(512)
+    test_sizes.append(1024)
+    test_sizes.append(2048)
+    test_sizes.append(4096)
+    test_sizes.append(8192)
+    test_sizes.append(16384)
+    test_sizes.append(32768)
+    test_sizes.append(65536)
+
+    # Test Case 1: Large / Small division (2n / n)
+    log_print("=== TEST CASE 1: LARGE / SMALL DIVISION (2n / n) ===", log_file)
+    log_print(
+        "Testing division where dividend is twice the size of divisor", log_file
+    )
+    log_print("", log_file)
+
+    var large_small_results = List[Float64]()
+    for i in range(len(test_sizes)):
+        var divisor_size = test_sizes[i]
+        var dividend_size = divisor_size * 2
+        if dividend_size <= 65536:  # Stay within our limit
+            var avg_time = benchmark_divide_at_size(
+                dividend_size, divisor_size, 5, log_file
+            )
+            large_small_results.append(avg_time)
+        else:
+            large_small_results.append(0.0)  # Placeholder
+        log_print("", log_file)
+
+    # Test Case 2: Very Large / Small division (4n / n)
+    log_print(
+        "=== TEST CASE 2: VERY LARGE / SMALL DIVISION (4n / n) ===", log_file
+    )
+    log_print(
+        "Testing division where dividend is four times the size of divisor",
+        log_file,
+    )
+    log_print("", log_file)
+
+    var very_large_small_results = List[Float64]()
+    for i in range(len(test_sizes)):
+        var divisor_size = test_sizes[i]
+        var dividend_size = divisor_size * 4
+        if dividend_size <= 65536:  # Stay within our limit
+            var avg_time = benchmark_divide_at_size(
+                dividend_size, divisor_size, 5, log_file
+            )
+            very_large_small_results.append(avg_time)
+        else:
+            very_large_small_results.append(0.0)  # Placeholder
+        log_print("", log_file)
+
+    # Print summary tables
+    log_print(
+        "=== SUMMARY TABLE: LARGE / SMALL DIVISION (2n / n) ===", log_file
+    )
+    log_print(
+        "Divisor\t\tDividend\t\tTime (s)\t\tRatio to Previous",
+        log_file,
+    )
+    log_print(
+        "--------------------------------------------------------------------------------",
+        log_file,
+    )
+
+    for i in range(len(test_sizes)):
+        var divisor_size = test_sizes[i]
+        var dividend_size = divisor_size * 2
+        var time_taken = large_small_results[i]
+
+        if time_taken > 0.0:  # Only show valid results
+            if i > 0 and large_small_results[i - 1] > 0.0:
+                var prev_time = large_small_results[i - 1]
+                var actual_ratio = time_taken / prev_time
+                var result_line = (
+                    String(divisor_size)
+                    + "\t\t"
+                    + String(dividend_size)
+                    + "\t\t"
+                    + String(time_taken)
+                    + "\t\t"
+                    + String(actual_ratio)
+                )
+                log_print(result_line, log_file)
+            else:
+                var result_line = (
+                    String(divisor_size)
+                    + "\t\t"
+                    + String(dividend_size)
+                    + "\t\t"
+                    + String(time_taken)
+                    + "\t\tN/A"
+                )
+                log_print(result_line, log_file)
+
+    log_print("", log_file)
+    log_print(
+        "=== SUMMARY TABLE: VERY LARGE / SMALL DIVISION (4n / n) ===", log_file
+    )
+    log_print(
+        "Divisor\t\tDividend\t\tTime (s)\t\tRatio to Previous",
+        log_file,
+    )
+    log_print(
+        "--------------------------------------------------------------------------------",
+        log_file,
+    )
+
+    for i in range(len(test_sizes)):
+        var divisor_size = test_sizes[i]
+        var dividend_size = divisor_size * 4
+        if dividend_size <= 65536:  # Only show results within our limit
+            var time_taken = very_large_small_results[i]
+
+            if time_taken > 0.0:  # Only show valid results
+                if i > 0 and very_large_small_results[i - 1] > 0.0:
+                    var prev_time = very_large_small_results[i - 1]
+                    var actual_ratio = time_taken / prev_time
+                    var result_line = (
+                        String(divisor_size)
+                        + "\t\t"
+                        + String(dividend_size)
+                        + "\t\t"
+                        + String(time_taken)
+                        + "\t\t"
+                        + String(actual_ratio)
+                    )
+                    log_print(result_line, log_file)
+                else:
+                    var result_line = (
+                        String(divisor_size)
+                        + "\t\t"
+                        + String(dividend_size)
+                        + "\t\t"
+                        + String(time_taken)
+                        + "\t\tN/A"
+                    )
+                    log_print(result_line, log_file)
+
+    log_print("", log_file)
+    log_print("=== ANALYSIS ===", log_file)
+    log_print("Expected behavior for division algorithms:", log_file)
+    log_print("- Single word divisor: O(n) where n is dividend size", log_file)
+    log_print("- Double word divisor: O(n) where n is dividend size", log_file)
+    log_print(
+        "- General division: O(n²) where n is max(dividend, divisor) size",
+        log_file,
+    )
+    log_print(
+        (
+            "- Newton-Raphson (if implemented): O(M(n)) where M(n) is"
+            " multiplication cost"
+        ),
+        log_file,
+    )
+    log_print("", log_file)
+    log_print("Division algorithm selection in the code:", log_file)
+    log_print(
+        "- Divisor = 1 word: Uses optimized single-word division", log_file
+    )
+    log_print(
+        "- Divisor = 2 words: Uses optimized double-word division", log_file
+    )
+    log_print("- Divisor > 2 words: Uses general division algorithm", log_file)
+    log_print("", log_file)
+
+    # Analysis of division vs multiplication
+    log_print("=== DIVISION VS MULTIPLICATION ANALYSIS ===", log_file)
+    log_print(
+        "Division is generally more expensive than multiplication:", log_file
+    )
+    log_print(
+        "- Division requires trial and error to find quotient digits", log_file
+    )
+    log_print(
+        "- Multiple multiplications and subtractions per quotient digit",
+        log_file,
+    )
+    log_print(
+        "- Normalization overhead for better quotient estimation", log_file
+    )
+    log_print("", log_file)
+    log_print("Expected performance characteristics:", log_file)
+    log_print(
+        "- Single/double word divisors: Fast, linear in dividend size", log_file
+    )
+    log_print("- Multi-word divisors: Slower, quadratic complexity", log_file)
+    log_print(
+        "- Very large numbers: Memory effects become significant", log_file
+    )
+    log_print("", log_file)
+
+    # Analysis of large / small division results
+    log_print("=== LARGE / SMALL DIVISION ANALYSIS ===", log_file)
+    log_print("Performance characteristics observed:", log_file)
+    log_print("- Excellent O(n²) scaling in 2n / n test case", log_file)
+    log_print(
+        "- Ratios consistently around 4.0 show proper quadratic complexity",
+        log_file,
+    )
+    log_print("- Division algorithm is well-implemented", log_file)
+    log_print("", log_file)
+    log_print("4n / n vs 2n / n comparison:", log_file)
+    log_print(
+        "- Both test cases show similar quadratic scaling behavior", log_file
+    )
+    log_print("- 4n / n takes roughly twice as long as 2n / n", log_file)
+    log_print("- This confirms the O(n²) complexity is correct", log_file)
+    log_print("", log_file)
+
+    # Close log file
+    log_file.close()
+    print("Division benchmark completed. Log file closed.")
diff --git a/benches/biguint/bench_biguint_truncate_divide.mojo b/benches/biguint/bench_biguint_truncate_divide.mojo
index a0f20b9..39f7724 100644
--- a/benches/biguint/bench_biguint_truncate_divide.mojo
+++ b/benches/biguint/bench_biguint_truncate_divide.mojo
@@ -22,6 +22,8 @@ fn open_log_file() raises -> PythonObject:
     """
     var python = Python.import_module("builtins")
     var datetime = Python.import_module("datetime")
+    var pysys = Python.import_module("sys")
+    pysys.set_int_max_str_digits(1000000)
 
     # Create logs directory if it doesn't exist
     var log_dir = "./logs"
@@ -488,6 +490,26 @@ fn main() raises:
         speedup_factors,
     )
 
+    # Case 31: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of large numbers (1000 words vs 100 digits)",
+        "316227766_016824890_583648059_893174009_579947593" * 1000,
+        "141421356_237309504_880168872_420969807_856967187" * 100,
+        3,
+        log_file,
+        speedup_factors,
+    )
+
+    # Case 32: Division of large numbers
+    run_benchmark_truncate_divide(
+        "Division of large numbers (10000 words vs 1234 digits)",
+        "316227766_016824890_583648059_893174009_579947593" * 10000,
+        "141421356_237309504_880168872_420969807_856967187" * 1234,
+        3,
+        log_file,
+        speedup_factors,
+    )
+
     # Calculate average speedup factor (ignoring any cases that might have failed)
     if len(speedup_factors) > 0:
         var sum_speedup: Float64 = 0.0
diff --git a/src/decimojo/biguint/arithmetics.mojo b/src/decimojo/biguint/arithmetics.mojo
index 1cf098a..b630825 100644
--- a/src/decimojo/biguint/arithmetics.mojo
+++ b/src/decimojo/biguint/arithmetics.mojo
@@ -36,24 +36,27 @@ from decimojo.rounding_mode import RoundingMode
 # add_inplace_by_1(x: BigUInt) -> None
 #
 # subtract(x1: BigUInt, x2: BigUInt) -> BigUInt
+# subtract_inplace(x1: BigUInt, x2: BigUInt) -> None
 #
 # multiply(x1: BigUInt, x2: BigUInt) -> BigUInt
+# multiply_slices(x: BigUInt, y: BigUInt, start_x: Int, end_x: Int, start_y: Int, end_y: Int) -> BigUInt
+# multiply_karatsuba(x: BigUInt, y: BigUInt, start_x: Int, end_x: Int, start_y: Int, end_y: Int, cutoff_number_of_words: Int) -> BigUInt
 # scale_up_by_power_of_10(x: BigUInt, n: Int) -> BigUInt
+# scale_up_by_power_of_billion(mut x: BigUInt, n: Int)
 #
 # floor_divide(x1: BigUInt, x2: BigUInt) -> BigUInt
-# floor_divide_partition(x1: BigUInt, x2: BigUInt) -> BigUInt
+# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt
 # floor_divide_inplace_by_single_word(x1: BigUInt, x2: BigUInt) -> None
 # floor_divide_inplace_by_double_words(x1: BigUInt, x2: BigUInt) -> None
 # floor_divide_inplace_by_2(x: BigUInt) -> Nonet, x2: BigUInt) -> BigUInt
-# truncate_mod# truncate_divide(x1: BigUInt, x2: BigUInt) -> BigUInt
+# truncate_divide(x1: BigUInt, x2: BigUInt) -> BigUInt
 # floor_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt
 # ceil_divide(x1: BigUInt, x2: BigUInt) -> BigUIntulo(x1: BigUIn# floor_divide_general(x1: BigUInt, x2: BigUInt) -> BigUInt
 # ceil_modulo(x1: BigUInt, x2: BigUInt) -> BigUInt
 # divmod(x1: BigUInt, x2: BigUInt) -> Tuple[BigUInt, BigUInt]
 # scale_down_by_power_of_10(x: BigUInt, n: Int) -> BigUInt
 #
-# estimate_quotient(x1: BigUInt, x2: BigUInt, j: Int, m: Int) -> UInt64
-# shift_words_left(x: BigUInt, j: Int) -> BigUInt
+# floor_divide_estimate_quotient(x1: BigUInt, x2: BigUInt, j: Int, m: Int) -> UInt64
 # power_of_10(n: Int) -> BigUInt
 # ===----------------------------------------------------------------------=== #
 
@@ -298,16 +301,14 @@ fn subtract(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
         The result of subtracting x2 from x1.
     """
     # If the subtrahend is zero, return the minuend
-    if len(x2.words) == 1 and x2.words[0] == 0:
+    if x2.is_zero():
         return x1
 
     # We need to determine which number has the larger magnitude
     var comparison_result = x1.compare(x2)
-
     if comparison_result == 0:
         # |x1| = |x2|
         return BigUInt()  # Return zero
-
     if comparison_result < 0:
         raise Error("biguint.arithmetics.subtract(): Underflow due to x1 < x2")
 
@@ -338,6 +339,47 @@ fn subtract(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
     return result^
 
 
+fn subtract_inplace(mut x: BigUInt, y: BigUInt) raises -> None:
+    """Subtracts y from x in place."""
+
+    # If the subtrahend is zero, return the minuend
+    if y.is_zero():
+        return
+
+    # We need to determine which number has the larger magnitude
+    var comparison_result = x.compare(y)
+    if comparison_result == 0:
+        x.words.resize(unsafe_uninit_length=1)
+        x.words[0] = UInt32(0)  # Result is zero
+    elif comparison_result < 0:
+        raise Error(
+            "biguint.arithmetics.subtract_inplace(): Underflow due to x < y"
+        )
+
+    # Now it is safe to subtract the smaller number from the larger one
+    var borrow: Int32 = 0
+    var ith: Int = 0
+    var difference: Int32  # Int32 is sufficient for the difference
+
+    while ith < len(x.words):
+        # Subtract the borrow
+        difference = Int32(x.words[ith]) - borrow
+        # Subtract smaller's word if available
+        if ith < len(y.words):
+            difference -= Int32(y.words[ith])
+        # Handle borrowing if needed
+        if difference < Int32(0):
+            difference += Int32(1_000_000_000)
+            borrow = Int32(1)
+        else:
+            borrow = Int32(0)
+        x.words[ith] = UInt32(difference)
+        ith += 1
+
+    x.remove_leading_empty_words()
+    return
+
+
 # ===----------------------------------------------------------------------=== #
 # Multiplication algorithms
 # ===----------------------------------------------------------------------=== #
@@ -364,35 +406,43 @@ fn multiply(x: BigUInt, y: BigUInt) -> BigUInt:
     alias CUTOFF_KARATSUBA: Int = 64
     """The cutoff number of words for using Karatsuba multiplication."""
 
-    # SPECIAL CASE: One of the operands is zero or one
+    # SPECIAL CASES
+    # If x or y is a single-word number
+    # We can use `multiply_by_uint32` because this is only one loop
+    # No need to split the long number into two parts
     if len(x.words) == 1:
-        if x.words[0] == 0:
+        var x_word = x.words[0]
+        if x_word == 0:
             return BigUInt(UInt32(0))
-        if x.words[0] == 1:
+        elif x_word == 1:
             return y
+        else:
+            var result = y
+            multiply_by_uint32(result, x_word)
+            return result^
+
     if len(y.words) == 1:
-        if y.words[0] == 0:
+        var y_word = y.words[0]
+        if y_word == 0:
             return BigUInt(UInt32(0))
-        if y.words[0] == 1:
+        if y_word == 1:
             return x
+        else:
+            var result = x
+            multiply_by_uint32(result, y_word)
+            return result^
 
-    # CASE 1:
-    # If one number is only one-word long
-    # we can use school multiplication because this is only one loop
-    # No need to split the long number into two parts
-    if len(x.words) == 1 or len(y.words) == 1:
-        return multiply_slices(x, y, 0, len(x.words), 0, len(y.words))
-
-    # CASE 2:
+    # CASE 1
     # The allocation cost is too high for small numbers to use Karatsuba
     # Use school multiplication for small numbers
-
     var max_words = max(len(x.words), len(y.words))
     if max_words <= CUTOFF_KARATSUBA:
         # return multiply_slices (x, y)
         return multiply_slices(x, y, 0, len(x.words), 0, len(y.words))
         # multiply_slices can also takes in x, y, and indices
 
+    # CASE 2
+    # Use Karatsuba multiplication for larger numbers
     else:
         return multiply_karatsuba(
             x, y, 0, len(x.words), 0, len(y.words), CUTOFF_KARATSUBA
@@ -423,15 +473,25 @@ fn multiply_slices(
 
     # CASE: One of the operands is zero or one
     if n_words_x_slice == 1:
-        if x.words[start_x] == 0:
+        var x_word = x.words[start_x]
+        if x_word == 0:
             return BigUInt(UInt32(0))
-        if x.words[start_x] == 1:
+        elif x_word == 1:
             return BigUInt(words=y.words[start_y:end_y])
+        else:
+            var result = BigUInt(words=y.words[start_y:end_y])
+            multiply_by_uint32(result, x_word)
+            return result^
     if n_words_y_slice == 1:
-        if y.words[start_y] == 0:
+        var y_word = y.words[start_y]
+        if y_word == 0:
             return BigUInt(UInt32(0))
-        if y.words[start_y] == 1:
+        elif y_word == 1:
             return BigUInt(words=x.words[start_x:end_x])
+        else:
+            var result = BigUInt(words=x.words[start_x:end_x])
+            multiply_by_uint32(result, y_word)
+            return result^
 
     # The max number of words in the result is the sum of the words in the operands
     var max_result_len = n_words_x_slice + n_words_y_slice
@@ -645,6 +705,34 @@ fn multiply_karatsuba(
         return z2^
 
 
+fn multiply_by_uint32(mut x: BigUInt, y: UInt32):
+    """Multiplies a BigUInt by an UInt32 word in-place.
+
+    Args:
+        x: The BigUInt value to multiply.
+        y: The single word to multiply by.
+    """
+    if y == 0:
+        x.words = List[UInt32](0)
+        return
+
+    if y == 1:
+        return
+
+    var y_as_uint64 = UInt64(y)
+    var product: UInt64
+    var carry: UInt64 = 0
+
+    for i in range(len(x.words)):
+        product = UInt64(x.words[i]) * y_as_uint64 + carry
+        x.words[i] = UInt32(product % UInt64(1_000_000_000))
+        carry = product // UInt64(1_000_000_000)
+
+    if carry > 0:
+        x.words.append(UInt32(carry))
+
+
+# TODO: Make this in-place
 fn scale_up_by_power_of_10(x: BigUInt, n: Int) -> BigUInt:
     """Multiplies a BigUInt by 10^n if n > 0, otherwise doing nothing.
 
@@ -769,7 +857,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
 
         # SUB-CASE: Single word // single word
         if len(x1.words) == 1:
-            var result = BigUInt(UInt32(x1.words[0] // x2.words[0]))
+            var result = BigUInt(List[UInt32](x1.words[0] // x2.words[0]))
             return result^
 
         # SUB-CASE: Divisor is single word and is power of 2
@@ -781,7 +869,7 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
                 remainder >>= 1
             return result^
 
-        # SUB-CASE: Divisor is single word (<= 10 digits)
+        # SUB-CASE: Divisor is single word (<= 9 digits)
         else:
             var result = x1
             floor_divide_inplace_by_single_word(result, x2)
@@ -859,142 +947,147 @@ fn floor_divide(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
     return floor_divide_general(normalized_x1, normalized_x2)
 
 
-fn floor_divide_general(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
+fn floor_divide_general(dividend: BigUInt, divisor: BigUInt) raises -> BigUInt:
     """General division algorithm for BigInt numbers.
 
     Args:
-        x1: The dividend.
-        x2: The divisor.
+        dividend: The dividend.
+        divisor: The divisor.
 
     Returns:
-        The quotient of x1 // x2.
+        The quotient of dividend // divisor.
 
     Raises:
-        ValueError: If the divisor is zero.
+        Error: If the divisor is zero.
     """
 
-    if x2.is_zero():
-        raise Error("Error in `floor_divide_general`: Division by zero")
+    if divisor.is_zero():
+        raise Error(
+            "`biguint.arithmetics.floor_divide_general()`: Division by zero"
+        )
 
     # Initialize result and remainder
-    var result = BigUInt(List[UInt32](capacity=len(x1.words)))
-    var remainder = x1
-
-    # Calculate significant digits
-    var n = len(remainder.words)
-    var m = len(x2.words)
+    var result = BigUInt(List[UInt32](capacity=len(dividend.words)))
+    var remainder = dividend
 
     # Shift and initialize
-    var d = n - m
-    for _ in range(d + 1):
+    var n_words_diff = len(remainder.words) - len(divisor.words)
+    # The quotient will have at most n_words_diff + 1 words
+    for _ in range(n_words_diff + 1):
         result.words.append(0)
 
     # Main division loop
-    var j = d
-    while j >= 0:
+    var index_of_word = n_words_diff  # Start from the most significant word
+    var trial_product: BigUInt
+    while index_of_word >= 0:
         # OPTIMIZATION: Better quotient estimation
-        var q = estimate_quotient(remainder, x2, j, m)
+        var quotient = floor_divide_estimate_quotient(
+            remainder, divisor, index_of_word
+        )
 
         # Calculate trial product
-        var trial_product = x2 * BigUInt(UInt32(q))
-        var shifted_product = shift_words_left(trial_product, j)
+        trial_product = divisor
+        multiply_by_uint32(trial_product, UInt32(quotient))
+        scale_up_by_power_of_billion(trial_product, index_of_word)
+
+        # Should need at most 1-2 corrections after the estimation
+        var correction_attempts = 0
+        while (
+            (trial_product.compare(remainder) > 0)
+            and (quotient > 0)
+            and (correction_attempts < 3)
+        ):
+            quotient -= 1
+            correction_attempts += 1
+
+            trial_product = divisor
+            multiply_by_uint32(trial_product, UInt32(quotient))
+            scale_up_by_power_of_billion(trial_product, index_of_word)
+
+        # Store the quotient word
+        result.words[index_of_word] = UInt32(quotient)
+        subtract_inplace(remainder, trial_product)
+        index_of_word -= 1
 
-        # OPTIMIZATION: Binary search for adjustment
-        if shifted_product.compare(remainder) > 0:
-            var low: UInt64 = 0
-            var high: UInt64 = q - 1
+    result.remove_leading_empty_words()
+    return result^
 
-            while low <= high:
-                var mid = (low + high) / 2
 
-                # Recalculate with new q
-                trial_product = x2 * BigUInt(UInt32(mid))
-                shifted_product = shift_words_left(trial_product, j)
+fn floor_divide_estimate_quotient(
+    dividend: BigUInt, divisor: BigUInt, index_of_word: Int
+) -> UInt64:
+    """Estimates the quotient digit using 3-by-2 division.
 
-                if shifted_product.compare(remainder) <= 0:
-                    q = mid  # This works
-                    low = mid + 1
-                else:
-                    high = mid - 1
+    This function implements a 3-by-2 quotient estimation algorithm,
+    which divides a 3-word dividend portion by a 2-word divisor to get
+    an accurate quotient estimate.
 
-            # Final recalculation with best q
-            trial_product = x2 * BigUInt(UInt32(q))
-            shifted_product = shift_words_left(trial_product, j)
+    Args:
+        dividend: The dividend BigUInt number.
+        divisor: The divisor BigUInt number.
+        index_of_word: The current position in the division algorithm.
 
-        result.words[j] = UInt32(q)
-        remainder = subtract(remainder, shifted_product)
-        j -= 1
+    Returns:
+        An estimated quotient digit (0 to 999_999_999).
 
-    result.remove_leading_empty_words()
-    return result^
+    Notes:
 
+    The function performs division of a 3-word number by a 2-word number:
+    Dividend portion: R = r2 * 10^18 + r1 * 10^9 + r0.
+    Divisor: D = d1 * 10^9 + d0.
+    Goal: Estimate Q = R // D.
+    """
 
-fn floor_divide_partition(x1: BigUInt, x2: BigUInt) raises -> BigUInt:
-    """Partition division algorithm for BigInt numbers.
+    # Extract three highest words of relevant dividend portion
+    var r2 = UInt64(0)
+    if index_of_word + len(divisor.words) < len(dividend.words):
+        r2 = UInt64(dividend.words[index_of_word + len(divisor.words)])
 
-    Args:
-        x1: The dividend.
-        x2: The divisor.
+    var r1 = UInt64(0)
+    if index_of_word + len(divisor.words) - 1 < len(dividend.words):
+        r1 = UInt64(dividend.words[index_of_word + len(divisor.words) - 1])
 
-    Returns:
-        The quotient of x1 // x2.
+    var r0 = UInt64(0)
+    if index_of_word + len(divisor.words) - 2 < len(dividend.words):
+        r0 = UInt64(dividend.words[index_of_word + len(divisor.words) - 2])
 
-    Raises:
-        ValueError: If the divisor is zero.
+    # Extract two highest words of divisor
+    var d1 = UInt64(divisor.words[len(divisor.words) - 1])
+    var d0 = UInt64(0)
+    if len(divisor.words) >= 2:
+        d0 = UInt64(divisor.words[len(divisor.words) - 2])
 
-    Notes:
+    # Special case: if divisor is single word, fall back to 2-by-1 division
+    if len(divisor.words) == 1:
+        if r2 == d1:
+            return 999_999_999
+        return min((r2 * UInt64(1_000_000_000) + r1) // d1, UInt64(999_999_999))
 
-    If words of x1 is more than 2 times the words of x2, then partition x1 into
-    several parts and divide x2 sequentially using general division.
+    # Special case: if high word of dividend equals high word of divisor
+    # The quotient is likely to be large, so we use a conservative estimate
+    if r2 == d1:
+        return UInt64(999_999_999)
 
-    words of x1: m
-    words of x2: n
-    number of partitions: m // n
-    words of first partition: n + m % n
-    the remainder is appended to the next partition.
-    """
+    # 3-by-2 division using 128-bit arithmetic
+    # We need to compute: (r2 * 10^18 + r1 * 10^9 + r0) // (d1 * 10^9 + d0)
 
-    if x2.is_zero():
-        raise Error("Error in `floor_divide_partition`: Division by zero")
+    # Convert to 128-bit for high precision calculation
+    var dividend_high = UInt128(r2) * UInt128(1_000_000_000) + UInt128(r1)
+    var dividend_low = UInt128(r0)
+    var divisor_128 = UInt128(d1) * UInt128(1_000_000_000) + UInt128(d0)
 
-    # Initialize result and remainder
-    var number_of_partitions = len(x1.words) // len(x2.words)
-    var number_of_words_remainder = len(x1.words) % len(x2.words)
-    var number_of_words_dividend: Int
-    var result = x1
-    result.words.resize(len(x1.words) - number_of_words_remainder, UInt32(0))
-    var remainder = BigUInt(List[UInt32](capacity=len(x2.words)))
-    for i in range(len(x1.words) - number_of_words_remainder, len(x1.words)):
-        remainder.words.append(x1.words[i])
-
-    for ith in range(number_of_partitions):
-        number_of_words_dividend = len(x2.words) + number_of_words_remainder
-        var dividend_list_of_words = List[UInt32](
-            capacity=number_of_words_dividend
-        )
-        for i in range(len(x2.words)):
-            dividend_list_of_words.append(
-                x1.words[(number_of_partitions - ith - 1) * len(x2.words) + i]
-            )
-        for i in range(number_of_words_remainder):
-            dividend_list_of_words.append(remainder.words[i])
-
-        var dividend = BigUInt(dividend_list_of_words)
-        var quotient = floor_divide_general(dividend, x2)
-        for i in range(len(x2.words)):
-            if i < len(quotient.words):
-                result.words[
-                    (number_of_partitions - ith - 1) * len(x2.words) + i
-                ] = quotient.words[i]
-            else:
-                result.words[
-                    (number_of_partitions - ith - 1) * len(x2.words) + i
-                ] = UInt32(0)
-        remainder = subtract(dividend, multiply(quotient, x2))
-        number_of_words_remainder = len(remainder.words)
+    # Handle the case where we need to consider the full 3-word dividend
+    # We compute: (dividend_high * 10^9 + dividend_low) // divisor_128
+    var full_dividend = dividend_high * UInt128(1_000_000_000) + dividend_low
 
-    result.remove_leading_empty_words()
-    return result^
+    # Perform the division
+    var quotient_128 = full_dividend // divisor_128
+
+    # Convert back to UInt64
+    var quotient = UInt64(quotient_128)
+
+    # Ensure we don't exceed the maximum value for a single word
+    return min(quotient, UInt64(999_999_999))
 
 
 fn floor_divide_inplace_by_single_word(
@@ -1310,68 +1403,10 @@ fn scale_down_by_power_of_10(x: BigUInt, n: Int) raises -> BigUInt:
 
 # ===----------------------------------------------------------------------=== #
 # Division Helper Functions
-# estimate_quotient, shift_words_left, power_of_10
+# power_of_10
 # ===----------------------------------------------------------------------=== #
 
 
-fn estimate_quotient(
-    dividend: BigUInt, divisor: BigUInt, j: Int, m: Int
-) -> UInt64:
-    """Gets a better estimate of the quotient digit."""
-    # Get three highest words of relevant dividend portion
-    var r2 = UInt64(0)
-    if j + m < len(dividend.words):
-        r2 = UInt64(dividend.words[j + m])
-
-    var r1 = UInt64(0)
-    if j + m - 1 < len(dividend.words):
-        r1 = UInt64(dividend.words[j + m - 1])
-
-    var r0 = UInt64(0)
-    if j + m - 2 < len(dividend.words):
-        r0 = UInt64(dividend.words[j + m - 2])
-
-    # Get two highest words of divisor
-    var d1 = UInt64(divisor.words[m - 1])
-    var d0 = UInt64(0)
-    if m >= 2:
-        d0 = UInt64(divisor.words[m - 2])
-
-    # If three most significant digits match, quotient would be max
-    if r2 == d1:
-        return 999_999_999
-
-    # Two-word by one-word division
-    var qhat = (r2 * 1_000_000_000 + r1) // d1
-
-    # Adjust if estimate is too large (happens less often with better estimate)
-    while (qhat >= 1_000_000_000) or (
-        qhat * d0 > (r2 * 1_000_000_000 + r1 - qhat * d1) * 1_000_000_000 + r0
-    ):
-        qhat -= 1
-
-    return min(qhat, UInt64(999_999_999))
-
-
-fn shift_words_left(num: BigUInt, positions: Int) -> BigUInt:
-    """Shifts a BigUInt left by adding trailing zeros.
-    Equivalent to multiplying by 10^(9*positions)."""
-    if num.is_zero():
-        return BigUInt()
-
-    var result = BigUInt(List[UInt32](capacity=len(num.words) + positions))
-
-    # Add zeros for the shift
-    for _ in range(positions):
-        result.words.append(0)
-
-    # Add the original number's words
-    for i in range(len(num.words)):
-        result.words.append(num.words[i])
-
-    return result^
-
-
 fn power_of_10(n: Int) raises -> BigUInt:
     """Calculates 10^n efficiently."""
     if n < 0:
diff --git a/src/decimojo/biguint/biguint.mojo b/src/decimojo/biguint/biguint.mojo
index 172398c..1d33dc5 100644
--- a/src/decimojo/biguint/biguint.mojo
+++ b/src/decimojo/biguint/biguint.mojo
@@ -902,13 +902,16 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable):
     @always_inline
     fn __iadd__(mut self, other: Self):
         """Adds `other` to `self` in place.
-        See `add_inplace()` for more information.
+        See `biguint.arithmetics.add_inplace()` for more information.
         """
         decimojo.biguint.arithmetics.add_inplace(self, other)
 
     @always_inline
     fn __isub__(mut self, other: Self) raises:
-        self = decimojo.biguint.arithmetics.subtract(self, other)
+        """Subtracts `other` from `self` in place.
+        See `biguint.arithmetics.subtract_inplace()` for more information.
+        """
+        decimojo.biguint.arithmetics.subtract_inplace(self, other)
 
     @always_inline
     fn __imul__(mut self, other: Self) raises:
@@ -1155,18 +1158,27 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable):
         for word in self.words:
             if word != 0:
                 return False
-        else:
-            return True
+        return True
 
     @always_inline
     fn is_one(self) -> Bool:
         """Returns True if this BigUInt represents one."""
-        return len(self.words) == 1 and self.words[0] == 1
+        if self.words[0] != 1:
+            return False
+        for i in self.words[1:]:
+            if i != 0:
+                return False
+        return True
 
     @always_inline
     fn is_two(self) -> Bool:
         """Returns True if this BigUInt represents two."""
-        return len(self.words) == 1 and self.words[0] == 2
+        if len(self.words) != 2:
+            return False
+        for i in self.words[1:]:
+            if i != 0:
+                return False
+        return True
 
     @always_inline
     fn is_power_of_10(x: BigUInt) -> Bool:
@@ -1176,15 +1188,15 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable):
                 return False
         var word = x.words[len(x.words) - 1]
         if (
-            (word == 1)
-            or (word == 10)
-            or (word == 100)
-            or (word == 1000)
-            or (word == 10_000)
-            or (word == 100_000)
-            or (word == 1_000_000)
-            or (word == 10_000_000)
-            or (word == 100_000_000)
+            (word == UInt32(1))
+            or (word == UInt32(10))
+            or (word == UInt32(100))
+            or (word == UInt32(1000))
+            or (word == UInt32(10_000))
+            or (word == UInt32(100_000))
+            or (word == UInt32(1_000_000))
+            or (word == UInt32(10_000_000))
+            or (word == UInt32(100_000_000))
         ):
             return True
         return False
@@ -1393,10 +1405,3 @@ struct BigUInt(Absable, IntableRaising, Stringable, Writable):
                         1,
                     )
         return result^
-
-    @always_inline
-    fn shift_words_left(self, position: Int) -> Self:
-        """Shifts the words of the BigUInt to the left by `position` bits.
-        See `arithmetics.shift_words_left()` for more information.
-        """
-        return decimojo.biguint.arithmetics.shift_words_left(self, position)