Skip to content

Commit 71f6d22

Browse files
expose evaluation
1 parent 8a01be8 commit 71f6d22

File tree

3 files changed

+79
-8
lines changed

3 files changed

+79
-8
lines changed

server/fastapi_server.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
create_investor_agent_toolkit,
4545
create_analytics_agent_toolkit,
4646
)
47+
from subnet.subnet_methods import subnet_evaluation
48+
from subnet.api_types import QuantQuery, QuantResponse
4749
from langchain_openai import ChatOpenAI
4850
from server.invitecode import InviteCodeManager
4951
from server.activity_tracker import ActivityTracker
@@ -390,6 +392,63 @@ async def get_activity_stats(
390392
)
391393
raise HTTPException(status_code=500, detail="Internal server error")
392394

395+
@app.post("/api/subnet/evaluate")
396+
async def evaluate_subnet_response(
397+
request: Request,
398+
user: FirebaseIDTokenData = Depends(get_current_user),
399+
):
400+
"""
401+
Evaluate a subnet miner response using the subnet evaluation model.
402+
403+
Expected request body:
404+
{
405+
"quant_query": {
406+
"query": "string",
407+
"userID": "string",
408+
"metadata": {}
409+
},
410+
"quant_response": {
411+
"response": "string",
412+
"signature": "bytes",
413+
"proofs": [],
414+
"metadata": {}
415+
}
416+
}
417+
418+
Returns:
419+
{
420+
"score": float // Score between 0 and 1
421+
}
422+
"""
423+
try:
424+
request_data = await request.json()
425+
426+
# Validate required fields
427+
if "quant_query" not in request_data or "quant_response" not in request_data:
428+
raise HTTPException(
429+
status_code=400,
430+
detail="Both quant_query and quant_response are required"
431+
)
432+
433+
# Parse the request data into QuantQuery and QuantResponse objects
434+
quant_query = QuantQuery(**request_data["quant_query"])
435+
quant_response = QuantResponse(**request_data["quant_response"])
436+
437+
# Call the subnet evaluation function
438+
score = await asyncio.to_thread(
439+
subnet_evaluation, quant_query, quant_response
440+
)
441+
442+
return {"score": score}
443+
444+
except ValidationError as e:
445+
logging.error(f"Validation error in subnet evaluation: {e}")
446+
raise HTTPException(status_code=400, detail=str(e))
447+
except Exception as e:
448+
logging.error(f"Error in subnet evaluation: {e}")
449+
logging.error(f"Traceback: {traceback.format_exc()}")
450+
raise HTTPException(status_code=500, detail="Internal server error")
451+
393452
# @app.post("/api/sentient/assist")
394453
async def sentient_assist(
395454
request: Request,

subnet/evaluation_prompt.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ Strategic Consideration: Did the analysis consider broader strategic implication
3939

4040
Final Scoring Calculation:
4141

42-
Score each of the 5 main criteria on a scale of 1 to 10.
43-
Calculate the final score as the sum of the scores.
42+
Score each of the 5 main criteria on a scale of 0 to 10.
43+
Calculate the final score as the sum of the scores (maximum possible score: 50).
4444

45-
Explain your scoring and evaluation method and return the final score as a JSON like: ```json{"score":100}```
45+
Explain your scoring and evaluation method and return the final score as a JSON like: ```json{"score":35}```
4646

4747
=======
4848

@@ -56,4 +56,4 @@ Agent answer:
5656

5757
========
5858

59-
Remember to output the final score as ```json{"score":100}```.
59+
Remember to output the final score as ```json{"score":35}```.

subnet/subnet_methods.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,16 @@ def make_request(input_data: Dict[str, Any], endpoint: str) -> requests.Response
5050
def subnet_evaluation(quant_query: QuantQuery, quant_response: QuantResponse) -> float:
5151
"""
5252
Evaluate the subnet miner query based on the provided QuantQuery and QuantResponse, with up to 3 retries on failure.
53+
54+
The evaluation uses a 5-criteria scoring system where each criterion is scored 0-10,
55+
resulting in a maximum possible score of 50. The final score is normalized to 0-1 range.
5356
5457
Args:
5558
quant_query (QuantQuery): The query object containing the query string and metadata.
5659
quant_response (QuantResponse): The response object containing the agent's response.
5760
5861
Returns:
59-
float: A score representing the evaluation of the query and response.
62+
float: A normalized score between 0 and 1 representing the evaluation quality.
6063
"""
6164
global evaluation_model
6265
if evaluation_model is None:
@@ -94,9 +97,18 @@ def subnet_evaluation(quant_query: QuantQuery, quant_response: QuantResponse) ->
9497
logging.error(f"Could not find JSON in model response: {answer}")
9598
return 0.0
9699
json_str = match.group(1)
97-
score = json.loads(json_str)["score"]
98-
# Normalize the score to be between 0 and 1
99-
return float(score) / 50
100+
try:
101+
parsed_json = json.loads(json_str)
102+
score = parsed_json["score"]
103+
# Validate score is within expected range (0-50)
104+
if not isinstance(score, (int, float)) or score < 0 or score > 50:
105+
logging.error(f"Invalid score value: {score}. Expected range: 0-50")
106+
return 0.0
107+
# Normalize the score to be between 0 and 1
108+
return float(score) / 50
109+
except (json.JSONDecodeError, KeyError) as e:
110+
logging.error(f"Failed to parse score from JSON: {e}. JSON string: {json_str}")
111+
return 0.0
100112
except Exception as e:
101113
last_exception = e
102114
logging.error(f"subnet_evaluation attempt {attempt} failed: {e}")

0 commit comments

Comments
 (0)