@@ -1045,7 +1045,7 @@ def test_durable_execution_checkpoint_error_in_background_thread():
10451045 # Make the background checkpoint thread fail immediately
10461046 def failing_checkpoint (* args , ** kwargs ):
10471047 msg = "Background checkpoint failed"
1048- raise CheckpointError (msg )
1048+ raise CheckpointError (msg , error_kind = "Execution" )
10491049
10501050 @durable_execution
10511051 def test_handler (event : Any , context : DurableContext ) -> dict :
@@ -1088,7 +1088,7 @@ def test_handler(event: Any, context: DurableContext) -> dict:
10881088# endregion durable_execution
10891089
10901090
1091- def test_durable_execution_checkpoint_error_stops_background ():
1091+ def test_durable_execution_checkpoint_execution_error_stops_background ():
10921092 """Test that CheckpointError handler stops background checkpointing.
10931093
10941094 When user code raises CheckpointError, the handler should stop the background
@@ -1100,7 +1100,7 @@ def test_durable_execution_checkpoint_error_stops_background():
11001100 def test_handler (event : Any , context : DurableContext ) -> dict :
11011101 # Directly raise CheckpointError to simulate checkpoint failure
11021102 msg = "Checkpoint system failed"
1103- raise CheckpointError (msg )
1103+ raise CheckpointError (msg , "Execution" )
11041104
11051105 operation = Operation (
11061106 operation_id = "exec1" ,
@@ -1140,6 +1140,148 @@ def slow_background():
11401140 test_handler (invocation_input , lambda_context )
11411141
11421142
1143+ def test_durable_execution_checkpoint_invocation_error_stops_background ():
1144+ """Test that CheckpointError handler stops background checkpointing.
1145+
1146+ When user code raises CheckpointError, the handler should stop the background
1147+ thread before re-raising to terminate the Lambda.
1148+ """
1149+ mock_client = Mock (spec = DurableServiceClient )
1150+
1151+ @durable_execution
1152+ def test_handler (event : Any , context : DurableContext ) -> dict :
1153+ # Directly raise CheckpointError to simulate checkpoint failure
1154+ msg = "Checkpoint system failed"
1155+ raise CheckpointError (msg , "Invocation" )
1156+
1157+ operation = Operation (
1158+ operation_id = "exec1" ,
1159+ operation_type = OperationType .EXECUTION ,
1160+ status = OperationStatus .STARTED ,
1161+ execution_details = ExecutionDetails (input_payload = "{}" ),
1162+ )
1163+
1164+ initial_state = InitialExecutionState (operations = [operation ], next_marker = "" )
1165+
1166+ invocation_input = DurableExecutionInvocationInputWithClient (
1167+ durable_execution_arn = "arn:test:execution" ,
1168+ checkpoint_token = "token123" , # noqa: S106
1169+ initial_execution_state = initial_state ,
1170+ is_local_runner = False ,
1171+ service_client = mock_client ,
1172+ )
1173+
1174+ lambda_context = Mock ()
1175+ lambda_context .aws_request_id = "test-request"
1176+ lambda_context .client_context = None
1177+ lambda_context .identity = None
1178+ lambda_context ._epoch_deadline_time_in_ms = 1000000 # noqa: SLF001
1179+ lambda_context .invoked_function_arn = None
1180+ lambda_context .tenant_id = None
1181+
1182+ # Make background thread sleep so user code completes first
1183+ def slow_background ():
1184+ time .sleep (1 )
1185+
1186+ # Mock checkpoint_batches_forever to sleep (simulates background thread running)
1187+ with patch (
1188+ "aws_durable_execution_sdk_python.state.ExecutionState.checkpoint_batches_forever" ,
1189+ side_effect = slow_background ,
1190+ ):
1191+ response = test_handler (invocation_input , lambda_context )
1192+ assert response ["Status" ] == InvocationStatus .FAILED .value
1193+ assert response ["Error" ]["ErrorType" ] == "CheckpointError"
1194+
1195+
1196+ def test_durable_execution_background_thread_execution_error_retries ():
1197+ """Test that background thread Execution errors are retried (re-raised)."""
1198+ mock_client = Mock (spec = DurableServiceClient )
1199+
1200+ def failing_checkpoint (* args , ** kwargs ):
1201+ msg = "Background checkpoint failed"
1202+ raise CheckpointError (msg , error_kind = "Execution" )
1203+
1204+ @durable_execution
1205+ def test_handler (event : Any , context : DurableContext ) -> dict :
1206+ context .step (lambda ctx : "step_result" )
1207+ return {"result" : "success" }
1208+
1209+ operation = Operation (
1210+ operation_id = "exec1" ,
1211+ operation_type = OperationType .EXECUTION ,
1212+ status = OperationStatus .STARTED ,
1213+ execution_details = ExecutionDetails (input_payload = "{}" ),
1214+ )
1215+
1216+ initial_state = InitialExecutionState (operations = [operation ], next_marker = "" )
1217+
1218+ invocation_input = DurableExecutionInvocationInputWithClient (
1219+ durable_execution_arn = "arn:test:execution" ,
1220+ checkpoint_token = "token123" , # noqa: S106
1221+ initial_execution_state = initial_state ,
1222+ is_local_runner = False ,
1223+ service_client = mock_client ,
1224+ )
1225+
1226+ lambda_context = Mock ()
1227+ lambda_context .aws_request_id = "test-request"
1228+ lambda_context .client_context = None
1229+ lambda_context .identity = None
1230+ lambda_context ._epoch_deadline_time_in_ms = 1000000 # noqa: SLF001
1231+ lambda_context .invoked_function_arn = None
1232+ lambda_context .tenant_id = None
1233+
1234+ mock_client .checkpoint .side_effect = failing_checkpoint
1235+
1236+ with pytest .raises (CheckpointError , match = "Background checkpoint failed" ):
1237+ test_handler (invocation_input , lambda_context )
1238+
1239+
1240+ def test_durable_execution_background_thread_invocation_error_returns_failed ():
1241+ """Test that background thread Invocation errors return FAILED status."""
1242+ mock_client = Mock (spec = DurableServiceClient )
1243+
1244+ def failing_checkpoint (* args , ** kwargs ):
1245+ msg = "Background checkpoint failed"
1246+ raise CheckpointError (msg , error_kind = "Invocation" )
1247+
1248+ @durable_execution
1249+ def test_handler (event : Any , context : DurableContext ) -> dict :
1250+ context .step (lambda ctx : "step_result" )
1251+ return {"result" : "success" }
1252+
1253+ operation = Operation (
1254+ operation_id = "exec1" ,
1255+ operation_type = OperationType .EXECUTION ,
1256+ status = OperationStatus .STARTED ,
1257+ execution_details = ExecutionDetails (input_payload = "{}" ),
1258+ )
1259+
1260+ initial_state = InitialExecutionState (operations = [operation ], next_marker = "" )
1261+
1262+ invocation_input = DurableExecutionInvocationInputWithClient (
1263+ durable_execution_arn = "arn:test:execution" ,
1264+ checkpoint_token = "token123" , # noqa: S106
1265+ initial_execution_state = initial_state ,
1266+ is_local_runner = False ,
1267+ service_client = mock_client ,
1268+ )
1269+
1270+ lambda_context = Mock ()
1271+ lambda_context .aws_request_id = "test-request"
1272+ lambda_context .client_context = None
1273+ lambda_context .identity = None
1274+ lambda_context ._epoch_deadline_time_in_ms = 1000000 # noqa: SLF001
1275+ lambda_context .invoked_function_arn = None
1276+ lambda_context .tenant_id = None
1277+
1278+ mock_client .checkpoint .side_effect = failing_checkpoint
1279+
1280+ response = test_handler (invocation_input , lambda_context )
1281+ assert response ["Status" ] == InvocationStatus .FAILED .value
1282+ assert response ["Error" ]["ErrorType" ] == "CheckpointError"
1283+
1284+
11431285def test_durable_handler_background_thread_failure_on_succeed_checkpoint ():
11441286 """Test durable_handler handles background thread failure on SUCCEED checkpoint.
11451287
@@ -1468,6 +1610,7 @@ def test_durable_execution_logs_checkpoint_error_extras_from_background_thread()
14681610 def failing_checkpoint (* args , ** kwargs ):
14691611 raise CheckpointError ( # noqa TRY003
14701612 "Checkpoint failed" , # noqa EM101
1613+ error_kind = "Execution" ,
14711614 error = error_obj ,
14721615 response_metadata = metadata_obj , # EM101
14731616 )
@@ -1589,6 +1732,7 @@ def test_durable_execution_logs_checkpoint_error_extras_from_user_code():
15891732 def test_handler (event : Any , context : DurableContext ) -> dict :
15901733 raise CheckpointError ( # noqa TRY003
15911734 "User checkpoint error" , # noqa EM101
1735+ error_kind = "Execution" ,
15921736 error = error_obj ,
15931737 response_metadata = metadata_obj , # EM101
15941738 )
0 commit comments