11from __future__ import annotations
22
3+ import contextlib
34import json
45import logging
56from concurrent .futures import ThreadPoolExecutor
67from dataclasses import dataclass
78from enum import Enum
8- from typing import TYPE_CHECKING , Any
9+ from typing import TYPE_CHECKING , Any , Tuple
910
1011from aws_durable_execution_sdk_python .context import DurableContext , ExecutionState
1112from aws_durable_execution_sdk_python .exceptions import (
@@ -191,6 +192,7 @@ def create_succeeded(cls, result: str) -> DurableExecutionInvocationOutput:
191192# endregion Invocation models
192193
193194
195+
194196def durable_execution (
195197 func : Callable [[Any , DurableContext ], Any ],
196198) -> Callable [[Any , LambdaContext ], Any ]:
@@ -250,9 +252,12 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
250252 )
251253
252254 # Use ThreadPoolExecutor for concurrent execution of user code and background checkpoint processing
253- with ThreadPoolExecutor (
254- max_workers = 2 , thread_name_prefix = "dex-handler"
255- ) as executor :
255+ with (
256+ ThreadPoolExecutor (
257+ max_workers = 2 , thread_name_prefix = "dex-handler"
258+ ) as executor ,
259+ contextlib .closing (execution_state ) as execution_state ,
260+ ):
256261 # Thread 1: Run background checkpoint processing
257262 executor .submit (execution_state .checkpoint_batches_forever )
258263
@@ -296,18 +301,12 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
296301 # Must ensure the result is persisted before returning to Lambda.
297302 # Large results exceed Lambda response limits and must be stored durably
298303 # before the execution completes.
299- execution_state .create_checkpoint_sync (success_operation )
300-
301- # Stop background checkpointing thread
302- execution_state .stop_checkpointing ()
304+ execution_state .create_checkpoint (success_operation , is_sync = True )
303305
304306 return DurableExecutionInvocationOutput .create_succeeded (
305307 result = ""
306308 ).to_dict ()
307309
308- # Stop background checkpointing thread
309- execution_state .stop_checkpointing ()
310-
311310 return DurableExecutionInvocationOutput .create_succeeded (
312311 result = serialized_result
313312 ).to_dict ()
@@ -322,33 +321,28 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
322321 )
323322 else :
324323 logger .exception ("Checkpoint processing failed" )
325- execution_state .stop_checkpointing ()
326324 # Raise the original exception
327325 raise bg_error .source_exception from bg_error
328326
329327 except SuspendExecution :
330328 # User code suspended - stop background checkpointing thread
331329 logger .debug ("Suspending execution..." )
332- execution_state .stop_checkpointing ()
333330 return DurableExecutionInvocationOutput (
334331 status = InvocationStatus .PENDING
335332 ).to_dict ()
336333
337334 except CheckpointError as e :
338335 # Checkpoint system is broken - stop background thread and exit immediately
339- execution_state .stop_checkpointing ()
340336 logger .exception (
341337 "Checkpoint system failed" ,
342338 extra = e .build_logger_extras (),
343339 )
344340 raise # Terminate Lambda immediately
345341 except InvocationError :
346- execution_state .stop_checkpointing ()
347342 logger .exception ("Invocation error. Must terminate." )
348343 # Throw the error to trigger Lambda retry
349344 raise
350345 except ExecutionError as e :
351- execution_state .stop_checkpointing ()
352346 logger .exception ("Execution error. Must terminate without retry." )
353347 return DurableExecutionInvocationOutput (
354348 status = InvocationStatus .FAILED ,
@@ -357,15 +351,37 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
357351 except Exception as e :
358352 # all user-space errors go here
359353 logger .exception ("Execution failed" )
360- failed_operation = OperationUpdate .create_execution_fail (
361- error = ErrorObject .from_exception (e )
362- )
363- # TODO: can optimize, if not too large can just return response rather than checkpoint
364- execution_state .create_checkpoint_sync (failed_operation )
365354
366- execution_state . stop_checkpointing ()
367- return DurableExecutionInvocationOutput (
368- status = InvocationStatus . FAILED
355+ result = DurableExecutionInvocationOutput (
356+ status = InvocationStatus . FAILED ,
357+ error = ErrorObject . from_exception ( e )
369358 ).to_dict ()
370359
360+ serialized_result = json .dumps (result )
361+
362+ if (
363+ serialized_result
364+ and len (serialized_result ) > LAMBDA_RESPONSE_SIZE_LIMIT
365+ ):
366+ logger .debug (
367+ "Response size (%s bytes) exceeds Lambda limit (%s) bytes). Checkpointing result." ,
368+ len (serialized_result ),
369+ LAMBDA_RESPONSE_SIZE_LIMIT ,
370+ )
371+ failed_operation = OperationUpdate .create_execution_fail (
372+ error = ErrorObject .from_exception (e )
373+ )
374+
375+ # Checkpoint large result with blocking (is_sync=True, default).
376+ # Must ensure the result is persisted before returning to Lambda.
377+ # Large results exceed Lambda response limits and must be stored durably
378+ # before the execution completes.
379+ execution_state .create_checkpoint_sync (failed_operation )
380+
381+ return DurableExecutionInvocationOutput (
382+ status = InvocationStatus .FAILED
383+ ).to_dict ()
384+
385+ return result
386+
371387 return wrapper
0 commit comments