expose evaluation

balogh.adam@icloud.com · balogh.adam@icloud.com · commit 71f6d22f1bb4 · 2025-08-06T21:43:24.000+02:00
diff --git a/server/fastapi_server.py b/server/fastapi_server.py
@@ -44,6 +44,8 @@
     create_investor_agent_toolkit,
     create_analytics_agent_toolkit,
 )
+from subnet.subnet_methods import subnet_evaluation
+from subnet.api_types import QuantQuery, QuantResponse
 from langchain_openai import ChatOpenAI
 from server.invitecode import InviteCodeManager
 from server.activity_tracker import ActivityTracker
@@ -390,6 +392,63 @@ async def get_activity_stats(
             )
             raise HTTPException(status_code=500, detail="Internal server error")
 
+    @app.post("/api/subnet/evaluate")
+    async def evaluate_subnet_response(
+        request: Request,
+        user: FirebaseIDTokenData = Depends(get_current_user),
+    ):
+        """
+        Evaluate a subnet miner response using the subnet evaluation model.
+        
+        Expected request body:
+        {
+            "quant_query": {
+                "query": "string",
+                "userID": "string", 
+                "metadata": {}
+            },
+            "quant_response": {
+                "response": "string",
+                "signature": "bytes",
+                "proofs": [],
+                "metadata": {}
+            }
+        }
+        
+        Returns:
+        {
+            "score": float  // Score between 0 and 1
+        }
+        """
+        try:
+            request_data = await request.json()
+            
+            # Validate required fields
+            if "quant_query" not in request_data or "quant_response" not in request_data:
+                raise HTTPException(
+                    status_code=400, 
+                    detail="Both quant_query and quant_response are required"
+                )
+            
+            # Parse the request data into QuantQuery and QuantResponse objects
+            quant_query = QuantQuery(**request_data["quant_query"])
+            quant_response = QuantResponse(**request_data["quant_response"])
+            
+            # Call the subnet evaluation function
+            score = await asyncio.to_thread(
+                subnet_evaluation, quant_query, quant_response
+            )
+            
+            return {"score": score}
+            
+        except ValidationError as e:
+            logging.error(f"Validation error in subnet evaluation: {e}")
+            raise HTTPException(status_code=400, detail=str(e))
+        except Exception as e:
+            logging.error(f"Error in subnet evaluation: {e}")
+            logging.error(f"Traceback: {traceback.format_exc()}")
+            raise HTTPException(status_code=500, detail="Internal server error")
+
     # @app.post("/api/sentient/assist")
     async def sentient_assist(
         request: Request,
diff --git a/subnet/evaluation_prompt.txt b/subnet/evaluation_prompt.txt
@@ -39,10 +39,10 @@ Strategic Consideration: Did the analysis consider broader strategic implication
 
 Final Scoring Calculation:
 
-Score each of the 5 main criteria on a scale of 1 to 10.
-Calculate the final score as the sum of the scores.
+Score each of the 5 main criteria on a scale of 0 to 10.
+Calculate the final score as the sum of the scores (maximum possible score: 50).
 
-Explain your scoring and evaluation method and return the final score as a JSON like: ```json{"score":100}```
+Explain your scoring and evaluation method and return the final score as a JSON like: ```json{"score":35}```
 
 =======
 
@@ -56,4 +56,4 @@ Agent answer:
 
 ========
 
-Remember to output the final score as ```json{"score":100}```.
+Remember to output the final score as ```json{"score":35}```.
diff --git a/subnet/subnet_methods.py b/subnet/subnet_methods.py
@@ -50,13 +50,16 @@ def make_request(input_data: Dict[str, Any], endpoint: str) -> requests.Response
 def subnet_evaluation(quant_query: QuantQuery, quant_response: QuantResponse) -> float:
     """
     Evaluate the subnet miner query based on the provided QuantQuery and QuantResponse, with up to 3 retries on failure.
+    
+    The evaluation uses a 5-criteria scoring system where each criterion is scored 0-10, 
+    resulting in a maximum possible score of 50. The final score is normalized to 0-1 range.
 
     Args:
         quant_query (QuantQuery): The query object containing the query string and metadata.
         quant_response (QuantResponse): The response object containing the agent's response.
 
     Returns:
-        float: A score representing the evaluation of the query and response.
+        float: A normalized score between 0 and 1 representing the evaluation quality.
     """
     global evaluation_model
     if evaluation_model is None:
@@ -94,9 +97,18 @@ def subnet_evaluation(quant_query: QuantQuery, quant_response: QuantResponse) ->
                 logging.error(f"Could not find JSON in model response: {answer}")
                 return 0.0
             json_str = match.group(1)
-            score = json.loads(json_str)["score"]
-            # Normalize the score to be between 0 and 1
-            return float(score) / 50
+            try:
+                parsed_json = json.loads(json_str)
+                score = parsed_json["score"]
+                # Validate score is within expected range (0-50)
+                if not isinstance(score, (int, float)) or score < 0 or score > 50:
+                    logging.error(f"Invalid score value: {score}. Expected range: 0-50")
+                    return 0.0
+                # Normalize the score to be between 0 and 1
+                return float(score) / 50
+            except (json.JSONDecodeError, KeyError) as e:
+                logging.error(f"Failed to parse score from JSON: {e}. JSON string: {json_str}")
+                return 0.0
         except Exception as e:
             last_exception = e
             logging.error(f"subnet_evaluation attempt {attempt} failed: {e}")