@@ -81,7 +81,7 @@ func (l *L2OutputSubmitter) ProcessWitnessgenRequests() error {
81
81
82
82
// Retry a proof request. Sets the status of a proof to FAILED and retries the proof based on the optional proof status response.
83
83
// If an error response is received:
84
- // - Range Proof: Split in two if the block range is > 1. Retry the same request if range is 1 block.
84
+ // - Range Proof: Split in two if the block range is > 1 AND the proof is unexecutable OR has failed before . Retry the same request if range is 1 block.
85
85
// - Agg Proof: Retry the same request.
86
86
func (l * L2OutputSubmitter ) RetryRequest (req * ent.ProofRequest , status ProofStatusResponse ) error {
87
87
err := l .db .UpdateProofStatus (req .ID , proofrequest .StatusFAILED )
@@ -90,11 +90,30 @@ func (l *L2OutputSubmitter) RetryRequest(req *ent.ProofRequest, status ProofStat
90
90
return err
91
91
}
92
92
93
- // If there's an execution error AND the request is a SPAN proof AND the block range is > 1, split the request into two requests.
94
- // This is likely caused by an SP1 OOM due to a large block range with many transactions.
95
- // TODO: This solution can be removed once the embedded allocator is used, because then the programs
96
- // will never OOM.
97
- if req .Type == proofrequest .TypeSPAN && status .ExecutionStatus == SP1ExecutionStatusUnexecutable && req .EndBlock - req .StartBlock > 1 {
93
+ unexecutable := status .ExecutionStatus == SP1ExecutionStatusUnexecutable
94
+ spanProof := req .Type == proofrequest .TypeSPAN
95
+ multiBlockRange := req .EndBlock - req .StartBlock > 1
96
+
97
+ // Get the number of failed requests with the same block range and status.
98
+ prevFailedReq , err := l .db .GetProofRequestsWithBlockRangeAndStatus (req .Type , req .StartBlock , req .EndBlock , proofrequest .StatusFAILED )
99
+ if err != nil {
100
+ l .Log .Error ("failed to check for previous failures" , "err" , err )
101
+ return err
102
+ }
103
+
104
+ // Check if there is another proof (besides the one marked as failed above) with the same block range that also failed.
105
+ severalFailedRequests := len (prevFailedReq ) > 1
106
+
107
+ // If there's an execution error OR several failed requests AND the request is a SPAN proof AND the block range is > 1,
108
+ // split the request into two requests.
109
+ //
110
+ // If the embedded allocator is enabled, the proof will never be unexecutable. Instead, the issue is because there's a limit on the number
111
+ // of shards in V4. This will be fixed in V5 when the cycle limit is removed.
112
+ //
113
+ // If the embedded allocator is not enabled, the trigger for unexecutable is the SP1 OOM.
114
+ //
115
+ // The reason why we only split with multiple failed requests is to avoid transient errors causing unnecessary splits.
116
+ if spanProof && (unexecutable || severalFailedRequests ) && multiBlockRange {
98
117
// Split the request into two requests.
99
118
midBlock := (req .StartBlock + req .EndBlock ) / 2
100
119
err = l .db .NewEntry (req .Type , req .StartBlock , midBlock )
0 commit comments