diff --git a/resources_servers/math_with_code/app.py b/resources_servers/math_with_code/app.py
index 02e9b7dc4..c0f8b2245 100644
--- a/resources_servers/math_with_code/app.py
+++ b/resources_servers/math_with_code/app.py
@@ -206,39 +206,48 @@ async def execute_python(self, request: Request, body: ExecutePythonRequest) ->
             )
 
     async def end_session(self, request: Request) -> ExecutePythonResponse:
-        sid = request.session[SESSION_ID_KEY]
-        if sid in self._sessions:
-            self._sessions[sid].close()
-            del self._sessions[sid]
+        session_id = request.session[SESSION_ID_KEY]
+        self._cleanup_session(session_id)
         return ExecutePythonResponse(success=True, stdout="", stderr="")
 
-    async def verify(self, body: PythonMathVerifyRequest) -> PythonMathVerifyResponse:
-        expected = body.expected_result
-
-        # Extract actual answer from final assistant message
-        actual = None
-        for output in reversed(body.response.output):
-            if output.type == "message" and output.role == "assistant":
-                text_content = ""
-                for content in output.content:
-                    if content.type == "output_text":
-                        text_content += content.text
-
-                # Extract boxed answer
-                match = re.search(r"\\boxed\{([^}]+)\}", text_content)
-                if match:
-                    actual = match.group(1).strip()
-                    break
-
-        accuracy = str(actual) == str(expected)
-        reward = 1.0 if accuracy else 0.0
-
-        return PythonMathVerifyResponse(
-            **body.model_dump(),
-            reward=reward,
-            extracted_answer=actual,
-            accuracy=accuracy,
-        )
+    async def verify(self, request: Request, body: PythonMathVerifyRequest) -> PythonMathVerifyResponse:
+        session_id = request.session[SESSION_ID_KEY]
+
+        try:
+            expected = body.expected_result
+
+            # Extract actual answer from final assistant message
+            actual = None
+            for output in reversed(body.response.output):
+                if output.type == "message" and output.role == "assistant":
+                    text_content = ""
+                    for content in output.content:
+                        if content.type == "output_text":
+                            text_content += content.text
+
+                    # Extract boxed answer
+                    match = re.search(r"\\boxed\{([^}]+)\}", text_content)
+                    if match:
+                        actual = match.group(1).strip()
+                        break
+
+            accuracy = str(actual) == str(expected)
+            reward = 1.0 if accuracy else 0.0
+
+            return PythonMathVerifyResponse(
+                **body.model_dump(),
+                reward=reward,
+                extracted_answer=actual,
+                accuracy=accuracy,
+            )
+        finally:
+            self._cleanup_session(session_id)
+
+    def _cleanup_session(self, session_id: str) -> None:
+        """Clean up subprocess for the given session."""
+        if session_id in self._sessions:
+            self._sessions[session_id].close()
+            del self._sessions[session_id]
 
 
 def _get_last_expr_value(code: str, globals_dict: dict, locals_dict: dict):