aws · Oct 22, 2025 · Oct 20, 2025 · Oct 21, 2025
diff --git a/src/aws_durable_execution_sdk_python/concurrency.py b/src/aws_durable_execution_sdk_python/concurrency.py
@@ -7,6 +7,7 @@
 import threading
 import time
 from abc import ABC, abstractmethod
+from collections import Counter
 from concurrent.futures import Future, ThreadPoolExecutor
 from dataclasses import dataclass
 from enum import Enum
@@ -98,16 +99,69 @@ class BatchResult(Generic[R], BatchResultProtocol[R]):  # noqa: PYI059
     completion_reason: CompletionReason
 
     @classmethod
-    def from_dict(cls, data: dict) -> BatchResult[R]:
+    def from_dict(
+        cls, data: dict, completion_config: CompletionConfig | None = None
+    ) -> BatchResult[R]:
         batch_items: list[BatchItem[R]] = [
             BatchItem.from_dict(item) for item in data["all"]
         ]
-        # TODO: is this valid? assuming completion reason is ALL_COMPLETED?
-        completion_reason = CompletionReason(
-            data.get("completionReason", "ALL_COMPLETED")
-        )
+
+        completion_reason_value = data.get("completionReason")
+        if completion_reason_value is None:
+            # Infer completion reason from batch item statuses and completion config
+            # This aligns with the TypeScript implementation that uses completion config
+            # to accurately reconstruct the completion reason during replay
+            result = cls.from_items(batch_items, completion_config)
+            logger.warning(
+                "Missing completionReason in BatchResult deserialization, "
+                "inferred '%s' from batch item statuses. "
+                "This may indicate incomplete serialization data.",
+                result.completion_reason.value,
+            )
+            return result
+
+        completion_reason = CompletionReason(completion_reason_value)
         return cls(batch_items, completion_reason)
 
+    @classmethod
+    def from_items(
+        cls,
+        items: list[BatchItem[R]],
+        completion_config: CompletionConfig | None = None,
+    ):
+        """
+        Infer completion reason based on batch item statuses and completion config.
+
+        This follows the same logic as the TypeScript implementation:
+        - If all items completed: ALL_COMPLETED
+        - If minSuccessful threshold met and not all completed: MIN_SUCCESSFUL_REACHED
+        - Otherwise: FAILURE_TOLERANCE_EXCEEDED
+        """
+
+        statuses = (item.status for item in items)
+        counts = Counter(statuses)
+        succeeded_count = counts.get(BatchItemStatus.SUCCEEDED, 0)
+        failed_count = counts.get(BatchItemStatus.FAILED, 0)
+        started_count = counts.get(BatchItemStatus.STARTED, 0)
+
+        completed_count = succeeded_count + failed_count
+        total_count = started_count + completed_count
+
+        # If all items completed (no started items), it's ALL_COMPLETED
+        if completed_count == total_count:
+            completion_reason = CompletionReason.ALL_COMPLETED
+        elif (  # If we have completion config and minSuccessful threshold is met
+            completion_config
+            and (min_successful := completion_config.min_successful) is not None
+            and succeeded_count >= min_successful
+        ):
+            completion_reason = CompletionReason.MIN_SUCCESSFUL_REACHED
+        else:
+            # Otherwise, assume failure tolerance was exceeded
+            completion_reason = CompletionReason.FAILURE_TOLERANCE_EXCEEDED
+
+        return cls(items, completion_reason)
+
     def to_dict(self) -> dict:
         return {
             "all": [item.to_dict() for item in self.all],
@@ -163,19 +217,15 @@ def get_errors(self) -> list[ErrorObject]:
 
     @property
     def success_count(self) -> int:
-        return len(
-            [item for item in self.all if item.status is BatchItemStatus.SUCCEEDED]
-        )
+        return sum(1 for item in self.all if item.status is BatchItemStatus.SUCCEEDED)
 
     @property
     def failure_count(self) -> int:
-        return len([item for item in self.all if item.status is BatchItemStatus.FAILED])
+        return sum(1 for item in self.all if item.status is BatchItemStatus.FAILED)
 
     @property
     def started_count(self) -> int:
-        return len(
-            [item for item in self.all if item.status is BatchItemStatus.STARTED]
-        )
+        return sum(1 for item in self.all if item.status is BatchItemStatus.STARTED)
 
     @property
     def total_count(self) -> int:
@@ -336,25 +386,63 @@ def fail_task(self) -> None:
         with self._lock:
             self.failure_count += 1
 
-    def should_complete(self) -> bool:
-        """Check if execution should complete."""
+    def should_continue(self) -> bool:
+        """
+        Check if we should continue starting new tasks (based on failure tolerance).
+        Matches TypeScript shouldContinue() logic.
+        """
         with self._lock:
-            # Success condition
-            if self.success_count >= self.min_successful:
-                return True
+            # If no completion config, only continue if no failures
+            if (
+                self.tolerated_failure_count is None
+                and self.tolerated_failure_percentage is None
+            ):
+                return self.failure_count == 0
 
-            # Failure conditions
-            if self._is_failure_condition_reached(
-                tolerated_count=self.tolerated_failure_count,
-                tolerated_percentage=self.tolerated_failure_percentage,
-                failure_count=self.failure_count,
+            # Check failure count tolerance
+            if (
+                self.tolerated_failure_count is not None
+                and self.failure_count > self.tolerated_failure_count
             ):
-                return True
+                return False
 
-            # Impossible to succeed condition
-            # TODO: should this keep running? TS doesn't currently handle this either.
-            remaining_tasks = self.total_tasks - self.success_count - self.failure_count
-            return self.success_count + remaining_tasks < self.min_successful
+            # Check failure percentage tolerance
+            if self.tolerated_failure_percentage is not None and self.total_tasks > 0:
+                failure_percentage = (self.failure_count / self.total_tasks) * 100
+                if failure_percentage > self.tolerated_failure_percentage:
+                    return False
+
+            return True
+
+    def is_complete(self) -> bool:
+        """
+        Check if execution should complete (based on completion criteria).
+        Matches TypeScript isComplete() logic.
+        """
+        with self._lock:
+            completed_count = self.success_count + self.failure_count
+
+            # All tasks completed
+            if completed_count == self.total_tasks:
+                # Complete if no failure tolerance OR no failures OR min successful reached
+                return (
+                    (
+                        self.tolerated_failure_count is None
+                        and self.tolerated_failure_percentage is None
+                    )
+                    or self.failure_count == 0
+                    or self.success_count >= self.min_successful
+                )
+
+            # when we breach min successful, we've completed
+            return self.success_count >= self.min_successful
+
+    def should_complete(self) -> bool:
+        """
+        Check if execution should complete.
+        Combines TypeScript shouldContinue() and isComplete() logic.
+        """
+        return self.is_complete() or not self.should_continue()
 
     def is_all_completed(self) -> bool:
         """True if all tasks completed successfully."""
@@ -640,40 +728,46 @@ def _on_task_complete(
                 self._completion_event.set()
 
     def _create_result(self) -> BatchResult[ResultType]:
-        """Build the final BatchResult."""
-        batch_items: list[BatchItem[ResultType]] = []
-        completed_branches: list[ExecutableWithState] = []
-        failed_branches: list[ExecutableWithState] = []
+        """
+        Build the final BatchResult.
 
+        When this function executes, we've terminated the upper/parent context for whatever reason.
+        It follows that our items can be only in 3 states, Completed, Failed and Started (in all of the possible forms).
+        We tag each branch based on its observed value at the time of completion of the parent / upper context, and pass the
+        results to BatchResult.
+
+        Any inference wrt completion reason is left up to BatchResult, keeping the logic inference isolated.
+        """
+        batch_items: list[BatchItem[ResultType]] = []
         for executable in self.executables_with_state:
-            if executable.status is BranchStatus.COMPLETED:
-                completed_branches.append(executable)
-                batch_items.append(
-                    BatchItem(
-                        executable.index, BatchItemStatus.SUCCEEDED, executable.result
+            match executable.status:
+                case BranchStatus.COMPLETED:
+                    batch_items.append(
+                        BatchItem(
+                            executable.index,
+                            BatchItemStatus.SUCCEEDED,
+                            executable.result,
+                        )
                     )
-                )
-            elif executable.status is BranchStatus.FAILED:
-                failed_branches.append(executable)
-                batch_items.append(
-                    BatchItem(
-                        executable.index,
-                        BatchItemStatus.FAILED,
-                        error=ErrorObject.from_exception(executable.error),
+                case BranchStatus.FAILED:
+                    batch_items.append(
+                        BatchItem(
+                            executable.index,
+                            BatchItemStatus.FAILED,
+                            error=ErrorObject.from_exception(executable.error),
+                        )
+                    )
+                case (
+                    BranchStatus.PENDING
+                    | BranchStatus.RUNNING
+                    | BranchStatus.SUSPENDED
+                    | BranchStatus.SUSPENDED_WITH_TIMEOUT
+                ):
+                    batch_items.append(
+                        BatchItem(executable.index, BatchItemStatus.STARTED)
                     )
-                )
-
-        completion_reason: CompletionReason = (
-            CompletionReason.ALL_COMPLETED
-            if self.counters.is_all_completed()
-            else (
-                CompletionReason.MIN_SUCCESSFUL_REACHED
-                if self.counters.is_min_successful_reached()
-                else CompletionReason.FAILURE_TOLERANCE_EXCEEDED
-            )
-        )
 
-        return BatchResult(batch_items, completion_reason)
+        return BatchResult.from_items(batch_items, self.completion_config)
 
     def _execute_item_in_child_context(
         self,