diff --git a/evals/evaluation/rag_pilot/RAG_Pilot.png b/evals/evaluation/rag_pilot/RAG_Pilot.png deleted file mode 100644 index 18939ce5..00000000 Binary files a/evals/evaluation/rag_pilot/RAG_Pilot.png and /dev/null differ diff --git a/evals/evaluation/rag_pilot/README.md b/evals/evaluation/rag_pilot/README.md index 6006f6bf..7670f0eb 100644 --- a/evals/evaluation/rag_pilot/README.md +++ b/evals/evaluation/rag_pilot/README.md @@ -26,7 +26,9 @@ docker build --build-arg HTTP_PROXY=$HTTP_PROXY --build-arg HTTP_PROXYS=$HTTP_PR cd ./rag_pilot/docker_image_build docker compose -f build.yaml build # Setup ENV -export ECRAG_SERVICE_HOST_IP=${HOST_IP} # HOST IP of EC-RAG Service, usually current host ip + +# If you want to set HOST_IP in command lines instead of in UI +#export ECRAG_SERVICE_HOST_IP=${HOST_IP} # HOST IP of EC-RAG Service, usually current host ip # If EC-RAG Service port is not default #export ECRAG_SERVICE_PORT=16010 diff --git a/evals/evaluation/rag_pilot/VERSION b/evals/evaluation/rag_pilot/VERSION index 59d3fe04..8a048b90 100644 --- a/evals/evaluation/rag_pilot/VERSION +++ b/evals/evaluation/rag_pilot/VERSION @@ -1 +1 @@ -25.07-dev +25.11-dev diff --git a/evals/evaluation/rag_pilot/api/v1/pilot.py b/evals/evaluation/rag_pilot/api/v1/pilot.py index 31ef8c8b..8c5c01b6 100644 --- a/evals/evaluation/rag_pilot/api/v1/pilot.py +++ b/evals/evaluation/rag_pilot/api/v1/pilot.py @@ -2,75 +2,75 @@ # SPDX-License-Identifier: Apache-2.0 import json +import os import uuid from io import BytesIO, StringIO from typing import List -from api_schema import GroundTruth, RAGStage, ResultOut, RunningStatus -from components.connect_utils import create_pipeline, update_active_pipeline, update_pipeline, upload_files -from components.pilot.base import RAGPipeline, convert_dict_to_pipeline -from components.pilot.ecrag.api_schema import DataIn, PipelineCreateIn -from components.pilot.pilot import pilot, update_rag_pipeline -from components.tuner.tunermgr import tunerMgr +from api_schema import ( + AnnotationOutput, + GroundTruth, + GroundTruthContext, + MatchSettings, + PilotSettings, + RAGStage, + ResultOut, +) +from components.adaptor.ecrag import DataIn +from components.pilot.pilot import pilot from components.utils import load_rag_results_from_csv, load_rag_results_from_gt from fastapi import Body, FastAPI, File, HTTPException, UploadFile -from fastapi.responses import JSONResponse, StreamingResponse +from fastapi.responses import StreamingResponse pilot_app = FastAPI() -@pilot_app.post(path="/v1/pilot/pipeline/active") -async def add_active_pipeline(request: PipelineCreateIn): - ret = create_pipeline(request) - if hasattr(ret, "status_code") and ret.status_code != 200: - raise HTTPException(status_code=ret.status_code, detail=f"Failed to create pipeline: {ret.text}") - - if hasattr(ret, "text"): - try: - ret_dict = json.loads(ret.text) - except json.JSONDecodeError: - raise HTTPException(status_code=500, detail="Invalid JSON in pipeline creation response.") - elif isinstance(ret, dict): - ret_dict = ret - else: - raise HTTPException(status_code=500, detail="Unexpected return type from create_pipeline.") - - pipeline_config = convert_dict_to_pipeline(ret_dict) - pl = RAGPipeline(pipeline_config) - pilot.set_curr_pl(pl) - return "Added" - - -@pilot_app.post(path="/v1/pilot/pipeline/active/import") -async def import_active_pipeline(file: UploadFile = File(...)): +@pilot_app.post(path="/v1/pilot/settings") +async def update_pilot_settings(settings: PilotSettings): try: - content = await file.read() - request = json.loads(content) - pipeline_req = PipelineCreateIn(**request) - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Uploaded file is not valid JSON.") - except Exception as e: - raise HTTPException(status_code=400, detail=f"Invalid pipeline request format: {e}") + pilot.set_pilot_settings(settings) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + return pilot.pilot_settings - ret = create_pipeline(pipeline_req) - if hasattr(ret, "status_code") and ret.status_code != 200: - raise HTTPException( - status_code=ret.status_code, detail=f"Failed to create pipeline: {getattr(ret, 'text', '')}" - ) - if hasattr(ret, "text"): - try: - ret_dict = json.loads(ret.text) - except json.JSONDecodeError: - raise HTTPException(status_code=500, detail="Invalid JSON in pipeline creation response.") - elif isinstance(ret, dict): - ret_dict = ret +@pilot_app.get(path="/v1/pilot/settings") +async def get_pilot_settings(): + if pilot.pilot_settings: + return pilot.pilot_settings else: - raise HTTPException(status_code=500, detail="Unexpected return type from create_pipeline.") - - pl = RAGPipeline(convert_dict_to_pipeline(ret_dict)) - pilot.set_curr_pl(pl) - return "Added" + return PilotSettings() + + +# @pilot_app.post(path="/v1/pilot/pipeline/active/import") +# async def import_active_pipeline(file: UploadFile = File(...)): +# try: +# content = await file.read() +# request = json.loads(content) +# pipeline_req = PipelineCreateIn(**request) +# except json.JSONDecodeError: +# raise HTTPException(status_code=400, detail="Uploaded file is not valid JSON.") +# except Exception as e: +# raise HTTPException(status_code=400, detail=f"Invalid pipeline request format: {e}") +# +# ret = create_pipeline(pipeline_req) +# +# if hasattr(ret, "status_code") and ret.status_code != 200: +# raise HTTPException(status_code=ret.status_code, detail=f"Failed to create pipeline: {getattr(ret, 'text', '')}") +# if hasattr(ret, "text"): +# try: +# ret_dict = json.loads(ret.text) +# except json.JSONDecodeError: +# raise HTTPException(status_code=500, detail="Invalid JSON in pipeline creation response.") +# elif isinstance(ret, dict): +# ret_dict = ret +# else: +# raise HTTPException(status_code=500, detail="Unexpected return type from create_pipeline.") +# +# pl = RAGPipeline(convert_dict_to_pipeline(ret_dict)) +# pilot.set_curr_pl(pl) +# return "Added" +# @pilot_app.get(path="/v1/pilot/pipeline/active") @@ -78,75 +78,98 @@ async def get_active_pipeline(): return pilot.get_curr_pl() -@pilot_app.get(path="/v1/pilot/pipeline/active/prompt") -async def get_active_pipeline_prompt(): - return pilot.get_curr_pl().get_prompt() if pilot.get_curr_pl() else None +@pilot_app.get(path="/v1/pilot/pipeline/active/id") +async def get_active_pipeline_id(): + return pilot.get_curr_pl_id() -@pilot_app.get(path="/v1/pilot/pipeline/active/export") -async def export_active_pipeline(): +@pilot_app.post(path="/v1/pilot/pipeline/{id}/active") +async def activate_pipeline_by_id(id: uuid.UUID): try: - pl_dict = pilot.get_curr_pl().export_pipeline().dict() - json_bytes = json.dumps(pl_dict, indent=2).encode("utf-8") - return StreamingResponse( - BytesIO(json_bytes), - media_type="application/json", - headers={"Content-Disposition": "attachment; filename=active_pipeline.json"}, - ) + if pilot.set_curr_pl_by_id(id): + return "Done" + else: + raise HTTPException(status_code=404, detail=f"Error: Pipeline {id} does not exist") + except HTTPException: + raise except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to export pipeline: {e}") + raise HTTPException(status_code=500, detail=f"Error activating pipeline: {e}") -@pilot_app.get(path="/v1/pilot/pipeline/active/id") -async def get_active_pipeline_id(): - return pilot.get_curr_pl_id() +@pilot_app.post(path="/v1/pilot/pipeline/{id}/run") +async def run_pipeline_by_id(id: uuid.UUID): + if pilot.set_curr_pl_by_id(id): + if pilot.run_pipeline(): + return "Done" + else: + return f"Error: Pipeline {id} cannot be executed" + else: + return f"Error: Pipeline {id} does not exist" -@pilot_app.patch(path="/v1/pilot/pipeline/active") -async def update_active_pl(request: PipelineCreateIn): - ret = update_active_pipeline(request) - pl = RAGPipeline(convert_dict_to_pipeline(ret)) - pilot.set_curr_pl(pl) - return "Updated" +@pilot_app.post(path="/v1/pilot/pipeline/{id}/run/blocked") +async def run_pipeline_by_id_blocked(id: uuid.UUID): + if pilot.set_curr_pl_by_id(id): + if pilot.run_pipeline_blocked(): + return "Done" + else: + return f"Error: Pipeline {id} cannot be executed" + else: + return f"Error: Pipeline {id} does not exist" -@pilot_app.post(path="/v1/pilot/pipeline/active/run") -async def run_active_pipeline(): - if pilot.run_pipeline(): - return "Done" +@pilot_app.post(path="/v1/pilot/pipeline/{id}/run/retrieval") +async def run_pipeline_by_id_retrieval(id: uuid.UUID): + if pilot.set_curr_pl_by_id(id): + if pilot.run_pipeline(is_retrieval=True): + return "Done" + else: + return "ERROR: Current pipeline cannot be executed" else: - return "ERROR: Current pipeline cannot execute" + return f"Error: Pipeline {id} does not exist" -@pilot_app.patch(path="/v1/pilot/pipeline/active/top_n/{top_n}") -async def update_active_pl_top_n(top_n: int): - pl_config = pilot.get_curr_pl().export_pipeline() +@pilot_app.get(path="/v1/pilot/pipeline/{id}/results") +async def get_pipeline_by_id_results(id: uuid.UUID): + try: + return pilot.get_results(id) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error retrieving pipeline results: {e}") + - reranker_found = False - for pp in pl_config.postprocessor: - if pp.processor_type == "reranker": - pp.top_n = top_n - reranker_found = True +@pilot_app.get(path="/v1/pilot/pipeline/{id}/results/metrics") +async def get_pipeline_metrics(id: uuid.UUID): + try: + return pilot.get_results_metrics(id) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error retrieving pipeline metrics: {e}") - if not reranker_found: - return {"error": "Reranker not found"}, 404 - ret = update_active_pipeline(pl_config) - pl = RAGPipeline(convert_dict_to_pipeline(ret)) - pl.regenerate_id() - pilot.set_curr_pl(pl) +@pilot_app.patch(path="/v1/pilot/pipeline/{id}/results/metrics") +async def update_pipeline_metrics(id: uuid.UUID, request: list[ResultOut] = Body(...)): + update_results = [] + for result in request: + success = pilot.update_result_metrics(id, result.query_id, result.metadata) + update_results.append({"query_id": result.query_id, "updated": success}) - return {"message": "Updated", "new_top_n": top_n} + return update_results @pilot_app.get(path="/v1/pilot/pipeline/{id}") async def get_pipeline_by_id(id: uuid.UUID): - return pilot.get_pl(id) + return pilot.get_pl(id).to_dict() @pilot_app.get(path="/v1/pilot/pipeline/{id}/prompt") async def get_pipeline_prompt_by_id(id: uuid.UUID): - return pilot.get_pl(id).get_prompt() if pilot.get_pl(id) else None + prompt = pilot.get_pipeline_prompt(id) + if not prompt: + raise HTTPException(status_code=404, detail=f"Prompt for pipeline {id} not found") + return prompt @pilot_app.get(path="/v1/pilot/pipeline/{id}/export") @@ -163,45 +186,24 @@ async def export_pipeline_by_id(id: uuid.UUID): raise HTTPException(status_code=500, detail=f"Failed to export pipeline: {e}") -@pilot_app.post(path="/v1/pilot/pipeline/{id}/active") -async def set_active_pipeline_by_id(id: uuid.UUID): - if pilot.set_curr_pl_by_id(id): - return "Done" - else: - return f"Error: Pipeline {id} cannot be set" - - -@pilot_app.post(path="/v1/pilot/pipeline/{id}/run") -async def run_pipeline_by_id(id: uuid.UUID): - if pilot.set_curr_pl_by_id(id): - if pilot.run_pipeline(): - return "Done" - else: - return f"Error: Pipeline {id} cannot execute" - else: - return f"Error: Pipeline {id} does not exist" - - -@pilot_app.post(path="/v1/pilot/pipeline/restore") -async def restore_pipeline(): - success = pilot.restore_curr_pl() +@pilot_app.post(path="/v1/pilot/pipeline/reconcil") +async def reconcil_pipeline(): + success = pilot.reconcil_curr_pl() if success: current_pl = pilot.get_curr_pl() return { - "message": "Pipeline restored successfully", + "message": "Pipeline reconcil successfully", "pipeline_id": str(current_pl.get_id()) if current_pl else None, - "restored_from": "EdgeCraftRAG active pipeline", } else: raise HTTPException( - status_code=404, - detail="Failed to restore pipeline: No active pipeline found in EdgeCraftRAG service or restore operation failed", + status_code=404, detail="Failed to restore pipeline: No active pipeline found or restore operation failed" ) @pilot_app.post(path="/v1/pilot/files") async def add_files(request: DataIn): - ret = upload_files(request) + ret = pilot.adaptor.upload_files(request) if ret.status_code != 200: raise HTTPException(status_code=ret.status_code, detail=f"Failed to upload files: {ret.text}") @@ -225,29 +227,89 @@ def load_rag_results_from_uploaded_file(uploaded_file: UploadFile, filetype: str @pilot_app.get(path="/v1/pilot/ground_truth") async def get_rag_ground_truth(): - return "Not Implemented" + gt_infos = pilot.get_gt_annotate_infos() or [] + return gt_infos + + +@pilot_app.get(path="/v1/pilot/ground_truth/{query_id}") +async def get_rag_ground_truth_by_id(query_id: int): + gt_infos = pilot.get_gt_annotate_infos() or [] + for gt in gt_infos: + if gt.query_id == query_id: + return gt + raise HTTPException(status_code=404, detail=f"GroundTruth query_id {query_id} not found") + + +@pilot_app.post(path="/v1/pilot/ground_truth/clear_cache") +async def clear_rag_ground_truth_cache(): + pilot.clear_gt_annotate_caches() + pilot.clear_target_query_gt() + return "Cleared ground truth annotation caches" @pilot_app.post(path="/v1/pilot/ground_truth") async def update_rag_ground_truth(gts: List[GroundTruth]): try: - rag_results = load_rag_results_from_gt(gts) + if not gts: + raise ValueError("No ground truth data provided.") + pilot.update_gt_annotate_infos(gts) + rag_results = pilot.process_annotation_batch(gts, clear_cache=False) + suggested_query_ids = pilot.get_suggested_query_ids() - if not rag_results.results: - raise ValueError("No results found.") + if not rag_results or not rag_results.results: + raise ValueError("No RAG results generated from annotations.") - if pilot.update_rag_results_sample(rag_results): + if pilot.update_target_query_gt(rag_results): pilot.clear_rag_result_dict() for stage in RAGStage: - tunerMgr.reset_tuners_by_stage(stage) - return "RAG ground truth updated and database cleared" + pilot.tuner_mgr.reset_tuners_by_stage(stage) + return AnnotationOutput(suggested_query_ids=suggested_query_ids) else: - return "Error" + raise HTTPException(status_code=500, detail="Failed to update target query ground truth") + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {e}") +@pilot_app.get(path="/v1/pilot/ground_truth/suggestions/collect") +async def update_gt_wi_suggestion(): + try: + gt_infos = pilot.get_gt_annotate_infos() or [] + if not gt_infos: + return [] + new_gt_map: dict[int, GroundTruth] = {} + for gt in gt_infos: + if not gt.contexts: + continue + for ctx in gt.contexts: + sug_list = getattr(ctx, "suggestions", None) + if not sug_list: + continue + best_item = max( + sug_list, key=lambda s: (s.best_match_score if s.best_match_score is not None else -1.0) + ) + new_text = best_item.best_match_context or best_item.node_context or ctx.text + new_ctx = GroundTruthContext( + filename=ctx.filename, + text=new_text, + context_id=ctx.context_id, + pages=ctx.pages, + section=ctx.section, + suggestions=[], + ) + if gt.query_id not in new_gt_map: + new_gt_map[gt.query_id] = GroundTruth( + query_id=gt.query_id, query=gt.query, contexts=[new_ctx], answer=gt.answer + ) + else: + new_gt_map[gt.query_id].contexts.append(new_ctx) + return list(new_gt_map.values()) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to build GT with suggestions: {e}") + + @pilot_app.post(path="/v1/pilot/ground_truth/file") async def update_rag_ground_truth_file(file: UploadFile = File(...)): filetype = "" @@ -265,10 +327,10 @@ async def update_rag_ground_truth_file(file: UploadFile = File(...)): if not rag_results.results: raise ValueError("No results found in the uploaded file.") - if pilot.update_rag_results_sample(rag_results): + if pilot.update_target_query_gt(rag_results): pilot.clear_rag_result_dict() for stage in RAGStage: - tunerMgr.reset_tuners_by_stage(stage) + pilot.tuner_mgr.reset_tuners_by_stage(stage) return "RAG ground truth file updated and database cleared" else: return "Error" @@ -279,27 +341,38 @@ async def update_rag_ground_truth_file(file: UploadFile = File(...)): raise HTTPException(status_code=500, detail=f"Internal server error: {e}") -@pilot_app.get(path="/v1/pilot/pipeline/{id}/results") -async def get_pipeline_results(id: uuid.UUID): - return pilot.get_results(id) +@pilot_app.post(path="/v1/pilot/save") +async def save_dicts(): + folder = pilot.save_dicts() + return f"All results saved in {folder}" -@pilot_app.get(path="/v1/pilot/pipeline/{id}/results/metrics") -async def get_pipeline_metrics(id: uuid.UUID): - return pilot.get_results_metrics(id) +@pilot_app.get("/v1/pilot/get_available_docs") +async def get_available_docs(): + try: + documents_info = pilot.adaptor.get_available_documents() + return documents_info + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to retrieve available documents: {str(e)}") -@pilot_app.patch(path="/v1/pilot/pipeline/{id}/results/metrics") -async def update_pipeline_metrics(id: uuid.UUID, request: list[ResultOut] = Body(...)): - update_results = [] - for result in request: - success = pilot.update_result_metrics(id, result.query_id, result.metadata) - update_results.append({"query_id": result.query_id, "updated": success}) +@pilot_app.post(path="/v1/pilot/match/setting") +async def update_match_settings(settings: MatchSettings): - return update_results + try: + if settings.hit_threshold is not None: + pilot.hit_threshold = settings.hit_threshold -@pilot_app.post(path="/v1/pilot/save") -async def save_dicts(): - folder = pilot.save_dicts() - return f"All results saved in {folder}" + if settings.enable_fuzzy is not None: + pilot.enable_fuzzy = settings.enable_fuzzy + + if settings.confidence_topn is not None: + pilot.confidence_topn = settings.confidence_topn + + return pilot.get_match_settings() + + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to update match settings: {e}") diff --git a/evals/evaluation/rag_pilot/api/v1/tuner.py b/evals/evaluation/rag_pilot/api/v1/tuner.py index 8714bc74..7628d414 100644 --- a/evals/evaluation/rag_pilot/api/v1/tuner.py +++ b/evals/evaluation/rag_pilot/api/v1/tuner.py @@ -2,13 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio +from collections import defaultdict from typing import List, Optional -from api_schema import RAGStage, RunningStatus, TunerOut -from components.pilot.base import Metrics +from api_schema import RAGStage, RunningStatus, Tuner, TunerOut, TunerRequest from components.pilot.pilot import pilot -from components.tuner.tunermgr import tunerMgr -from fastapi import FastAPI, Path +from components.pilot.result import Metrics +from fastapi import FastAPI, HTTPException, Path tuner_app = FastAPI() @@ -35,22 +35,85 @@ def get_best_pl_id(pl_id_list: List[int], stage=None): return best_pl_id +@tuner_app.post(path="/v1/tuners/register") +async def register_tuner(reg_tuner_req: TunerRequest): + try: + pilot.tuner_mgr.clear_stage(reg_tuner_req.stage) + stage_tuner_list = [] + tuner_name_list = [] + tuners_dict = {} + for t in reg_tuner_req.tuners: + if t.type is None: + raise HTTPException(status_code=422, detail="Error: Tuner.type not specified.") + if t.params is None: + raise HTTPException(status_code=422, detail="Error: Tuner.params not specified.") + if "name" in t.params and t.params["name"] != "": + stage_tuner_list.append((reg_tuner_req.stage, t.params["name"])) + tuner_name_list.append(t.params["name"]) + tuners_dict[t.params["name"]] = t.dict() + else: + raise HTTPException(status_code=422, detail="Error: Tuner.params.name not specified.") + if pilot.tuner_mgr.init_tuner(stage_tuner_list, tuners_dict): + return f"Tuner {tuner_name_list} registered" + else: + raise HTTPException(status_code=500, detail=f"Error registering tuner: {tuner_name_list}") + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error registering tuner: {e}") + + +@tuner_app.get(path="/v1/tuners") +async def get_tuners() -> List[TunerRequest]: + out = [] + stage_and_tuner_list = pilot.tuner_mgr.get_stage_and_tuner_name_list() + print(stage_and_tuner_list) + for k, v in stage_and_tuner_list.items(): + r = TunerRequest(stage=k, tuners=[Tuner(**pilot.tuner_mgr.get_tuner(t).node.to_dict()) for t in v]) + out.append(r) + return out + + +@tuner_app.get(path="/v1/tuners/{name}") +async def get_tuner_by_name(name: str): + tuner = pilot.tuner_mgr.get_tuner(name) + if tuner is None: + raise HTTPException(status_code=404, detail=f"Tuner {name} not found") + return tuner.node + + +@tuner_app.get(path="/v1/avail_tuners", response_model=List[TunerRequest]) +async def get_available_tuners(): + # TODO: Extend to more storage for available tuner repo + try: + stage_tuner_list, tuner_dict = pilot.tuner_mgr.parse_tuner_config("./configs/tuner.yaml") + + grouped = defaultdict(list) + + for stage, tuner_name in stage_tuner_list: + data = tuner_dict.get(tuner_name) + if data is not None: + grouped[stage].append(Tuner(**data)) + + return [TunerRequest(stage=stage, tuners=tuner_list) for stage, tuner_list in grouped.items()] + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error getting available tuners: {e}") + + @tuner_app.get(path="/v1/tuners/stage/{stage}", response_model=List[TunerOut]) async def get_tuners_by_stage(stage: RAGStage = Path(...)): - active_pl = pilot.get_curr_pl() - tunerMgr.update_adaptor(active_pl) - tuner_names = tunerMgr.get_tuners_by_stage(stage) + tuner_names = pilot.tuner_mgr.get_tuners_by_stage(stage) tuners_out = [] for name in tuner_names: - tuners_out.append(tunerMgr.get_tuner_out(name, stage)) + tuners_out.append(pilot.tuner_mgr.get_tuner_out(name, stage)) return tuners_out @tuner_app.get(path="/v1/tuners/stage/{stage}/status") async def get_stage_status(stage: RAGStage = Path(...)): - return {stage: tunerMgr.get_stage_status(stage).value} + return {stage: pilot.tuner_mgr.get_stage_status(stage).value} pipeline_run_lock = asyncio.Lock() @@ -58,51 +121,51 @@ async def get_stage_status(stage: RAGStage = Path(...)): async def run_tuners_in_background(stage: Optional[RAGStage], tuner_names: List[str]): for tuner_name in tuner_names: - status = tunerMgr.get_tuner_status(tuner_name) + status = pilot.tuner_mgr.get_tuner_status(tuner_name) if status is not RunningStatus.NOT_STARTED: print(f"[Tuner {tuner_name}] Skipped, current status {status}.") continue async with pipeline_run_lock: - try: - tunerMgr.set_tuner_status(tuner_name, RunningStatus.IN_PROGRESS) - print(f"[Tuner {tuner_name}] Starting...") - - pl = pilot.get_curr_pl() - tunerMgr.update_adaptor(pl) - - pl_list, params_candidates = tunerMgr.run_tuner(tuner_name, pl) - if tunerMgr.get_tuner_status(tuner_name) is RunningStatus.INACTIVE: - print(f"[Tuner {tuner_name}] is inactive. Skipped") - - for new_pl in pl_list: - pilot.add_rag_pipeline(new_pl) - - for pl, params in zip(pl_list, params_candidates): - print(f"[Tuner {tuner_name}]: Running {pl.get_id()}") - for attr, tunerUpdate in params.items(): + # try: + pilot.tuner_mgr.set_tuner_status(tuner_name, RunningStatus.IN_PROGRESS) + print(f"[Tuner {tuner_name}] Starting...") + + pl_list = pilot.tuner_mgr.run_tuner(tuner_name, pilot.get_curr_pl()) + if pilot.tuner_mgr.get_tuner_status(tuner_name) is RunningStatus.INACTIVE: + print(f"[Tuner {tuner_name}] is inactive. Skipped") + + for new_pl in pl_list: + pilot.add_rag_pipeline(new_pl) + + for pl in pl_list: + print(f"[Tuner {tuner_name}]: Running {pl.get_id()}") + for node in pl.nodes: + for module in node.modules: + attr_val = module.attributes[0].params["value"] print( - f"[Tuner {tuner_name}][{pl.get_id()}]: Setting {tunerUpdate.node_type}.{tunerUpdate.module_type}.{attr} to {tunerUpdate.val}" + f"[Tuner {tuner_name}][{pl.get_id()}]: Setting {node.type}.{module.type}.{module.attributes[0].type} to {attr_val}" ) - if stage == RAGStage.RETRIEVAL or stage == RAGStage.POSTPROCESSING: - await asyncio.to_thread(pilot.run_pipeline_blocked, pl, True) - else: - await asyncio.to_thread(pilot.run_pipeline_blocked, pl) + if stage == RAGStage.RETRIEVAL or stage == RAGStage.POSTPROCESSING: + await asyncio.to_thread(pilot.run_pipeline_blocked, pl, True) + else: + await asyncio.to_thread(pilot.run_pipeline_blocked, pl) - actual_stage = stage or tunerMgr.get_tuner_stage(tuner_name) - best_pl = await asyncio.to_thread(pilot.change_best_recall_pl, actual_stage) - best_pl_id = best_pl.get_id() if best_pl else None - tunerMgr.complete_tuner(tuner_name, best_pl_id) - print(f"[Tuner {tuner_name}] Completed. Best pipeline ID: {best_pl_id or 'None'}") + actual_stage = stage or pilot.tuner_mgr.get_tuner_stage(tuner_name) + best_pl = await asyncio.to_thread(pilot.change_best_recall_pl, actual_stage) + best_pl_id = best_pl.get_id() if best_pl else None + pilot.tuner_mgr.complete_tuner(tuner_name, best_pl_id) + print(f"[Tuner {tuner_name}] Completed. Best pipeline ID: {best_pl_id or 'None'}") - except Exception as e: - print(f"[Tuner {tuner_name}] Error while running pipelines: {e}") + # except Exception as e: + # print(f"[Tuner {tuner_name}] Error while running pipelines: {e}") @tuner_app.post(path="/v1/tuners/stage/{stage}/run") async def run_stage_tuner(stage: RAGStage = Path(...)): - tuner_names = tunerMgr.get_tuners_by_stage(stage) - tuner_outs = [tunerMgr.get_tuner_out(tuner_name, stage) for tuner_name in tuner_names] + tuner_names = pilot.tuner_mgr.get_tuners_by_stage(stage) + print(f"run_stage_tuner: {tuner_names}") + tuner_outs = [pilot.tuner_mgr.get_tuner_out(tuner_name, stage) for tuner_name in tuner_names] asyncio.create_task(run_tuners_in_background(stage, tuner_names)) return tuner_outs @@ -110,19 +173,18 @@ async def run_stage_tuner(stage: RAGStage = Path(...)): @tuner_app.post(path="/v1/tuners/stage/{stage}/reset") async def reset_stage_tuner(stage: RAGStage = Path(...)): - tunerMgr.reset_tuners_by_stage(stage) + pilot.tuner_mgr.reset_tuners_by_stage(stage) return "Done" @tuner_app.get(path="/v1/tuners/stage/{stage}/results") async def get_stage_results(stage: RAGStage = Path(...)): - tuner_names = tunerMgr.get_tuners_by_stage(stage) + tuner_names = pilot.tuner_mgr.get_tuners_by_stage(stage) results_dict = {} for tuner_name in tuner_names: - record = tunerMgr.get_tuner_record(tuner_name) + record = pilot.tuner_mgr.get_tuner_record(tuner_name) if record is not None: - all_pipeline_ids = list(record.all_pipeline_ids) - for pl_id in all_pipeline_ids: + for pl_id in record.all_pipeline_ids: results_dict[pl_id] = pilot.get_results(pl_id) return results_dict @@ -130,10 +192,10 @@ async def get_stage_results(stage: RAGStage = Path(...)): @tuner_app.get(path="/v1/tuners/stage/{stage}/results/metrics") async def get_stage_results_metrics(stage: RAGStage = Path(...)): - tuner_names = tunerMgr.get_tuners_by_stage(stage) + tuner_names = pilot.tuner_mgr.get_tuners_by_stage(stage) metrics_dict = {} for tuner_name in tuner_names: - record = tunerMgr.get_tuner_record(tuner_name) + record = pilot.tuner_mgr.get_tuner_record(tuner_name) if record is not None: all_pipeline_ids = list(record.all_pipeline_ids) for pl_id in all_pipeline_ids: @@ -142,109 +204,100 @@ async def get_stage_results_metrics(stage: RAGStage = Path(...)): return metrics_dict +# TODO: Remove best_pl_id append logic @tuner_app.get(path="/v1/tuners/stage/{stage}/pipelines") async def get_stage_pipelines(stage: RAGStage = Path(...)): - tuner_names = tunerMgr.get_tuners_by_stage(stage) + tuner_names = pilot.tuner_mgr.get_tuners_by_stage(stage) pipeline_list = [] for tuner_name in tuner_names: - record = tunerMgr.get_tuner_record(tuner_name) + record = pilot.tuner_mgr.get_tuner_record(tuner_name) if record is not None and record.best_pipeline_id is None: pl_id_list = list(record.all_pipeline_ids) if record.base_pipeline_id not in pl_id_list: pl_id_list.append(record.base_pipeline_id) best_pl_id = get_best_pl_id(record.all_pipeline_ids, stage) record.best_pipeline_id = best_pl_id - pipeline_list.append(tunerMgr.get_tuner_update_outs_by_name(tuner_name)) + pipeline_list.append(pilot.tuner_mgr.get_tuner_update_outs_by_name(tuner_name)) return pipeline_list +# TODO: Remove best_pl_id append logic @tuner_app.get(path="/v1/tuners/stage/{stage}/pipelines/best/id") async def get_stage_pipelines_best(stage: RAGStage = Path(...)): - tuner_names = tunerMgr.get_tuners_by_stage(stage) + tuner_names = pilot.tuner_mgr.get_tuners_by_stage(stage) pl_id_list = [] for tuner_name in tuner_names: - record = tunerMgr.get_tuner_record(tuner_name) + record = pilot.tuner_mgr.get_tuner_record(tuner_name) if record is not None: pl_id_list.extend(list(record.all_pipeline_ids)) if record.base_pipeline_id not in pl_id_list: pl_id_list.append(record.base_pipeline_id) pl_id_list = list(set(pl_id_list)) best_pl_id = get_best_pl_id(pl_id_list, stage) + if best_pl_id is None: + return record.best_pipeline_id return best_pl_id @tuner_app.get(path="/v1/tuners/{tuner_name}/pipelines/best") async def get_pipeline_best(tuner_name): - record = tunerMgr.get_tuner_record(tuner_name) - if record is not None and record.best_pipeline_id is None: - stage = tunerMgr.get_tuner_stage(tuner_name) - pl_id_list = list(record.all_pipeline_ids) - if record.base_pipeline_id not in pl_id_list: - pl_id_list.append(record.base_pipeline_id) - best_pl_id = get_best_pl_id(pl_id_list, stage) - record.best_pipeline_id = best_pl_id + best_pl = pilot.tuner_mgr.get_pipeline_best(tuner_name) + if not best_pl: + raise HTTPException(status_code=404, detail=f"Error: Invalid info tuner {tuner_name}") - return tunerMgr.get_pipeline_best(tuner_name) + return best_pl @tuner_app.get(path="/v1/tuners/{tuner_name}/pipelines/base") async def get_pipeline_base(tuner_name): - return tunerMgr.get_pipeline_base(tuner_name) + base_pl = pilot.tuner_mgr.get_pipeline_base(tuner_name) + if not base_pl: + raise HTTPException(status_code=404, detail=f"Error: Invalid info tuner {tuner_name}") + + return base_pl @tuner_app.post(path="/v1/tuners/{tuner_name}/run") async def run_tuner(tuner_name: str): - stage = tunerMgr.get_tuner_stage(tuner_name) + stage = pilot.tuner_mgr.get_tuner_stage(tuner_name) asyncio.create_task(run_tuners_in_background(stage, [tuner_name])) - tunerOut = tunerMgr.get_tuner_out(tuner_name) + tunerOut = pilot.tuner_mgr.get_tuner_out(tuner_name) return tunerOut @tuner_app.post(path="/v1/tuners/{tuner_name}/reset") async def reset_tuner(tuner_name): - tunerMgr.set_tuner_status(tuner_name, RunningStatus.NOT_STARTED) + pilot.tuner_mgr.set_tuner_status(tuner_name, RunningStatus.NOT_STARTED) return "Done" @tuner_app.get(path="/v1/tuners/{tuner_name}") async def get_tuner(tuner_name): - record = tunerMgr.get_tuner_record(tuner_name) - if record is not None and record.best_pipeline_id is None: - stage = tunerMgr.get_tuner_stage(tuner_name) - pl_id_list = list(record.all_pipeline_ids) - if record.base_pipeline_id not in pl_id_list: - pl_id_list.append(record.base_pipeline_id) - best_pl_id = get_best_pl_id(record.all_pipeline_ids, stage) - record.best_pipeline_id = best_pl_id - return tunerMgr.get_tuner_update_outs_by_name(tuner_name) + tuner = pilot.tuner_mgr.get_tuner(tuner_name) + if not tuner: + raise HTTPException(status_code=404, detail=f"Error: Tuner {tuner_name} not found") + return tuner @tuner_app.get(path="/v1/tuners/{tuner_name}/status") async def get_stage_status_by_tuner_name(tuner_name): - status = tunerMgr.get_tuner_status(tuner_name) - return status.value if status else f"Invalid tuner {tuner_name}" + status = pilot.tuner_mgr.get_tuner_status(tuner_name) + if not status: + raise HTTPException(status_code=404, detail=f"Error: Invalid info tuner {tuner_name}") + return status.value @tuner_app.get(path="/v1/tuners/{tuner_name}/pipelines") async def get_tuner_pipelines(tuner_name): - record = tunerMgr.get_tuner_record(tuner_name) - if record is not None and record.best_pipeline_id is None: - stage = tunerMgr.get_tuner_stage(tuner_name) - pl_id_list = list(record.all_pipeline_ids) - if record.base_pipeline_id not in pl_id_list: - pl_id_list.append(record.base_pipeline_id) - best_pl_id = get_best_pl_id(record.all_pipeline_ids, stage) - record.best_pipeline_id = best_pl_id - return tunerMgr.get_tuner_update_outs_by_name(tuner_name) + return pilot.tuner_mgr.get_tuner_update_outs_by_name(tuner_name) @tuner_app.get(path="/v1/tuners/{tuner_name}/results") async def get_tuner_results(tuner_name): - record = tunerMgr.get_tuner_record(tuner_name) + record = pilot.tuner_mgr.get_tuner_record(tuner_name) results_dict = {} if record is not None: - all_pipeline_ids = list(record.all_pipeline_ids) - for pl_id in all_pipeline_ids: + for pl_id in record.all_pipeline_ids: results_dict[pl_id] = pilot.get_results(pl_id) return results_dict @@ -252,11 +305,10 @@ async def get_tuner_results(tuner_name): @tuner_app.get(path="/v1/tuners/{tuner_name}/results/metrics") async def get_tuner_results_metrics(tuner_name): - record = tunerMgr.get_tuner_record(tuner_name) + record = pilot.tuner_mgr.get_tuner_record(tuner_name) metrics_dict = {} if record is not None: - all_pipeline_ids = list(record.all_pipeline_ids) - for pl_id in all_pipeline_ids: + for pl_id in record.all_pipeline_ids: metrics_dict[pl_id] = pilot.get_results_metrics(pl_id) return metrics_dict diff --git a/evals/evaluation/rag_pilot/api_schema.py b/evals/evaluation/rag_pilot/api_schema.py index 26a86a06..28190ebe 100644 --- a/evals/evaluation/rag_pilot/api_schema.py +++ b/evals/evaluation/rag_pilot/api_schema.py @@ -3,9 +3,9 @@ import uuid from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union -from pydantic import BaseModel +from pydantic import BaseModel, Field class RunningStatus(str, Enum): @@ -21,6 +21,28 @@ class RAGStage(str, Enum): GENERATION = "generation" +class TunerAttribute(BaseModel): + type: str + params: Dict + + +class TunerModule(BaseModel): + type: str + params: Optional[Dict] = Field(default_factory=dict) + attributes: Optional[List[TunerAttribute]] = Field(default_factory=list) + + +class Tuner(BaseModel): + type: str + params: Optional[Dict] = Field(default_factory=dict) + modules: Optional[List[TunerModule]] = Field(default_factory=list) + + +class TunerRequest(BaseModel): + stage: RAGStage + tuners: List[Tuner] = Field(default_factory=list) + + class TunerOut(BaseModel): stage: str name: str @@ -58,9 +80,22 @@ class ResultsOut(BaseModel): results: Optional[List[ResultOut]] = None +class GroundTruthContextSuggestion(BaseModel): + node_id: Optional[str] = None + node_page_label: Optional[str] = None + node_context: Optional[str] = None + confidence_score: Optional[float] = None + best_match_score: Optional[float] = None + best_match_context: Optional[str] = None + + class GroundTruthContext(BaseModel): filename: str text: str + context_id: int + pages: Optional[List[str]] = None + section: Optional[str] = None + suggestions: Optional[List[GroundTruthContextSuggestion]] = None class GroundTruth(BaseModel): @@ -68,3 +103,19 @@ class GroundTruth(BaseModel): query: str contexts: List[GroundTruthContext] answer: Optional[str] = None + + +class MatchSettings(BaseModel): + hit_threshold: Optional[float] = None + enable_fuzzy: Optional[bool] = None + confidence_topn: Optional[int] = None + + +class AnnotationOutput(BaseModel): + suggested_query_ids: list[int] + + +class PilotSettings(BaseModel): + # {ECRAG_SERVICE_HOST_IP}:{ECRAG_SERVICE_PORT} + target_endpoint: Optional[str] = None + target_type: Optional[str] = "ecrag" diff --git a/evals/evaluation/rag_pilot/components/adaptor/__init__.py b/evals/evaluation/rag_pilot/components/adaptor/__init__.py new file mode 100644 index 00000000..4057dc01 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/adaptor/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/evals/evaluation/rag_pilot/components/adaptor/adaptor.py b/evals/evaluation/rag_pilot/components/adaptor/adaptor.py new file mode 100644 index 00000000..6bcc3822 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/adaptor/adaptor.py @@ -0,0 +1,29 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from abc import ABC, abstractmethod + +import yaml + + +class AdaptorBase(ABC): + + def __init__(self, spec_file: str): + self.spec = {} + self.server_addr = "" + + if spec_file: + """Load a complete pipeline from YAML file.""" + with open(spec_file, "r") as f: + for doc in yaml.safe_load_all(f): + self.spec.update(doc) + if not self.spec: + raise ValueError("No recognized nodes found in the YAML file") + + @abstractmethod + def get_active_pipeline(): + pass + + @abstractmethod + def apply_pipeline(): + pass diff --git a/evals/evaluation/rag_pilot/components/adaptor/connector.py b/evals/evaluation/rag_pilot/components/adaptor/connector.py new file mode 100644 index 00000000..515b1149 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/adaptor/connector.py @@ -0,0 +1,4 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# TODO: Add a general connector class for restapi diff --git a/evals/evaluation/rag_pilot/components/adaptor/ecrag.py b/evals/evaluation/rag_pilot/components/adaptor/ecrag.py new file mode 100644 index 00000000..e5d75eb2 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/adaptor/ecrag.py @@ -0,0 +1,506 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +import uuid +from enum import Enum +from typing import Any, List, Optional +from urllib.parse import quote + +import requests +from components.adaptor.adaptor import AdaptorBase +from components.pilot.base import Attribute, Module, Node +from components.pilot.pipeline import Pipeline +from llama_index.core.schema import TextNode +from pydantic import BaseModel + +ECRAG_SERVICE_HOST_IP = os.getenv("ECRAG_SERVICE_HOST_IP", "127.0.0.1") +ECRAG_SERVICE_PORT = int(os.getenv("ECRAG_SERVICE_PORT", 16010)) +server_addr = f"http://{ECRAG_SERVICE_HOST_IP}:{ECRAG_SERVICE_PORT}" + + +class CompType(str, Enum): + + DEFAULT = "default" + MODEL = "model" + PIPELINE = "pipeline" + NODEPARSER = "node_parser" + INDEXER = "indexer" + RETRIEVER = "retriever" + POSTPROCESSOR = "postprocessor" + GENERATOR = "generator" + FILE = "file" + + +class ModelType(str, Enum): + + EMBEDDING = "embedding" + RERANKER = "reranker" + LLM = "llm" + VLLM = "vllm" + + +class FileType(str, Enum): + TEXT = "text" + VISUAL = "visual" + AURAL = "aural" + VIRTUAL = "virtual" + OTHER = "other" + + +class NodeParserType(str, Enum): + + SIMPLE = "simple" + HIERARCHY = "hierarchical" + SENTENCEWINDOW = "sentencewindow" + UNSTRUCTURED = "unstructured" + + +class IndexerType(str, Enum): + + FAISS_VECTOR = "faiss_vector" + DEFAULT_VECTOR = "vector" + MILVUS_VECTOR = "milvus_vector" + + +class RetrieverType(str, Enum): + + VECTORSIMILARITY = "vectorsimilarity" + AUTOMERGE = "auto_merge" + BM25 = "bm25" + + +class PostProcessorType(str, Enum): + + RERANKER = "reranker" + METADATAREPLACE = "metadata_replace" + + +class GeneratorType(str, Enum): + + CHATQNA = "chatqna" + + +class InferenceType(str, Enum): + + LOCAL = "local" + VLLM = "vllm" + + +class CallbackType(str, Enum): + + DATAPREP = "dataprep" + RETRIEVE = "retrieve" + PIPELINE = "pipeline" + + +class ModelIn(BaseModel): + model_type: Optional[str] = "LLM" + model_id: Optional[str] + model_path: Optional[str] = "./" + weight: Optional[str] = "INT4" + device: Optional[str] = "cpu" + + +class NodeParserIn(BaseModel): + chunk_size: Optional[int] = None + chunk_overlap: Optional[int] = None + chunk_sizes: Optional[list] = None + parser_type: str + window_size: Optional[int] = 3 + + +class IndexerIn(BaseModel): + indexer_type: str + embedding_model: Optional[ModelIn] = None + embedding_url: Optional[str] = None + vector_url: Optional[str] = None + + +class RetrieverIn(BaseModel): + retriever_type: str + retrieve_topk: Optional[int] = 3 + + +class PostProcessorIn(BaseModel): + processor_type: str + reranker_model: Optional[ModelIn] = None + top_n: Optional[int] = 5 + + +class GeneratorIn(BaseModel): + prompt_path: Optional[str] = None + prompt_content: Optional[str] = None + model: Optional[ModelIn] = None + inference_type: Optional[str] = "local" + vllm_endpoint: Optional[str] = None + + +class PipelineCreateIn(BaseModel): + name: Optional[str] = None + node_parser: Optional[NodeParserIn] = None + indexer: Optional[IndexerIn] = None + retriever: Optional[RetrieverIn] = None + postprocessor: Optional[list[PostProcessorIn]] = None + generator: Optional[GeneratorIn] = None + active: Optional[bool] = False + + +class DataIn(BaseModel): + text: Optional[str] = None + local_path: Optional[str] = None + + +class FilesIn(BaseModel): + local_paths: Optional[list[str]] = None + + +class RagOut(BaseModel): + query: str + contexts: Optional[dict[str, Any]] = None + response: str + + +class PromptIn(BaseModel): + prompt: Optional[str] = None + + +class KnowledgeBaseCreateIn(BaseModel): + name: str + description: Optional[str] = None + active: Optional[bool] = None + comp_type: Optional[str] = "knowledge" + comp_subtype: Optional[str] = "origin_kb" + experience_active: Optional[bool] = None + + +class ExperienceIn(BaseModel): + question: str + content: list[str] = None + + +class MilvusConnectRequest(BaseModel): + vector_url: str + + +class ECRAGAdaptor(AdaptorBase): + def __init__(self, config_file="configs/ecrag.yaml"): + super().__init__(config_file) + self.server_addr = server_addr + + def set_server_addr(self, cur_server_addr: str): + self.server_addr = cur_server_addr + + def test(self): + path = "/v1/settings/pipelines" + res = requests.get(f"{self.server_addr}{path}", proxies={"http": None}) + if res.status_code == 200: + return True + return False + + def get_active_pipeline(self) -> Pipeline: + pl = self.get_active_pipeline_ecrag() + if pl: + return self.convert_ecrag_schema_to_pipeline(pl) + else: + return None + + def get_active_pipeline_ecrag(self) -> dict: + path = "/v1/settings/pipelines" + res = requests.get(f"{self.server_addr}{path}", proxies={"http": None}) + if res.status_code == 200: + for pl in res.json(): + if pl["status"]["active"]: + active_pl = self.get_pipeline_ecrag(pl["name"]) + # active_pl["generator"]["prompt_content"] = self.get_prompt() + return active_pl + return None + + def get_pipeline_ecrag(self, name) -> dict: + path = f"/v1/settings/pipelines/{name}/json" + res = requests.get(f"{self.server_addr}{path}", proxies={"http": None}) + if res.status_code == 200: + return json.loads(res.json()) + return None + + def apply_pipeline(self, tgt_pl: Pipeline): + ecrag_pl = self.get_active_pipeline_ecrag() + if tgt_pl is not None and len(tgt_pl.nodes) > 0: + ecrag_pl = self.update_ecrag_pipeline_conf(ecrag_pl, tgt_pl) + self.update_active_pipeline(ecrag_pl) + + def get_ragqna(self, query): + new_req = {"messages": query, "stream": True} + path = "/v1/ragqna" + res = requests.post(f"{self.server_addr}{path}", json=new_req, proxies={"http": None}) + if res.status_code == 200: + return RagOut(**res.json()) + else: + return None + + def get_retrieval(self, query): + new_req = {"messages": query} + path = "/v1/retrieval" + res = requests.post(f"{self.server_addr}{path}", json=new_req, proxies={"http": None}) + if res.status_code == 200: + return RagOut(**res.json()) + else: + return None + + def update_active_pipeline(self, pipeline): + pipeline["active"] = False + res = self.update_pipeline(pipeline) + if res.status_code == 200: + pipeline["active"] = True + res = self.update_pipeline(pipeline) + if res.status_code == 200: + return res.json() + else: + return None + + def create_pipeline(self, pipeline_conf): + path = "/v1/settings/pipelines" + return requests.post(f"{self.server_addr}{path}", json=pipeline_conf, proxies={"http": None}) + + def update_pipeline(self, pipeline_conf): + path = "/v1/settings/pipelines" + pl_name = pipeline_conf["name"] + return requests.patch(f"{self.server_addr}{path}/{pl_name}", json=pipeline_conf, proxies={"http": None}) + + def upload_files(self, file_conf): + path = "/v1/data" + return requests.post(f"{self.server_addr}{path}", json=file_conf.dict(), proxies={"http": None}) + + def get_prompt(self): + path = "/v1/chatqna/prompt" + res = requests.get(f"{self.server_addr}{path}", proxies={"http": None}) + + if res.status_code == 200: + return res.json() + else: + error_detail = res.text if hasattr(res, "text") else "Unknown error" + print(f"Failed to get prompt: {error_detail}") + return False + + def get_default_prompt(self): + path = "/v1/chatqna/prompt/default" + res = requests.get(f"{self.server_addr}{path}", proxies={"http": None}) + + if res.status_code == 200: + return res.json() + else: + error_detail = res.text if hasattr(res, "text") else "Unknown error" + print(f"Failed to get default prompt: {error_detail}") + return False + + def reindex_data(self): + path = "/v1/data" + res = requests.post(f"{self.server_addr}{path}/reindex", proxies={"http": None}) + return res.status_code == 200 + + def update_ecrag_pipeline_conf(self, ecrag_pl: dict, target_pl: Pipeline) -> dict: + print(f"[Pilot Adaptor] EC-RAG Configuration before tuning {ecrag_pl}") + # target_pl.nodes is a List of Node object + # self.spec is a Dict of Node dict?? + # TODO: consider align the types + # Matching node type + for pl_node in target_pl.nodes: + for n_k, ecrag_node in self.spec.items(): + # Found node + if pl_node.type == ecrag_node["type"]: + # Matching node type + for pl_m in pl_node.modules: + for ecrag_module in ecrag_node["modules"]: + # Found module ecrag_pl[n_k] + if pl_m.type == ecrag_module["type"]: + ecrag_pl = update_ecrag_module_dispatch(ecrag_pl, pl_m, n_k, ecrag_module) + + print(f"[Pilot Adaptor] EC-RAG Configuration after tuning {ecrag_pl}") + return ecrag_pl + + # TODO: Finish the conversion + def convert_ecrag_schema_to_pipeline(self, ecrag_pl: dict, uid: uuid.UUID = None) -> Pipeline: + pl = Pipeline(uid) + pl.type = "RAG" + node_type = ["node_parser", "indexer", "retriever", "postprocessor", "generator"] + if ecrag_pl: + for n_type in node_type: + node = convert_ecrag_schema_to_node(n_type, Node(type=n_type), ecrag_pl[n_type]) + pl.nodes.append(node) + + return pl + + def get_document_chunks(self, file_name: str) -> List[TextNode]: + encoded_file_name = quote(file_name, safe="") + # Use the actual EdgeCraftRAG API endpoint with properly encoded filename + path = f"/v1/data/{encoded_file_name}/nodes" + res = requests.get(f"{self.server_addr}{path}", proxies={"http": None}) + if res.status_code == 200: + chunks_data = res.json() + text_nodes = [] + print(f"[Pilot Adaptor] Received {len(chunks_data)} chunks of data for file: '{file_name}'") + for i, chunk_data in enumerate(chunks_data): + try: + text_node = TextNode(**chunk_data) + text_nodes.append(text_node) + except Exception as e: + print(f"[Pilot Adaptor] ❌ Error creating TextNode from chunk_data[{i}]: {e}") + continue + + print(f"[Pilot Adaptor] Successfully created {len(text_nodes)} TextNode objects") + return text_nodes + else: + print(f"[Pilot Adaptor] Failed to get document chunks for {file_name}: HTTP {res.status_code}") + return [] + + def get_available_documents(self): + path = "/v1/data/documents" + res = requests.get(f"{self.server_addr}{path}", proxies={"http": None}) + if res.status_code == 200: + return res.json() + else: + print(f"Failed to get available documents: HTTP {res.status_code}") + return {"total_documents": 0, "documents": []} + + +# +# Implementation of convert ecrag config to pipeline node object +# (ECRAG->Pipeline) +# +def convert_ecrag_schema_to_node(n_type, node, ecrag_comp): + if n_type == "node_parser": + if ecrag_comp["parser_type"] == NodeParserType.SIMPLE: + node.modules.append( + Module( + type="direct", + params={}, + attributes=[ + Attribute(type="chunk_size", params={"value": ecrag_comp["chunk_size"]}), + Attribute(type="chunk_overlap", params={"value": ecrag_comp["chunk_overlap"]}), + ], + ) + ) + elif n_type == "indexer": + node.modules.append( + Module( + type="embedding_model", + params={}, + attributes=[ + Attribute(type="model_name", params={"value": ecrag_comp["embedding_model"]["model_id"]}), + ], + ) + ) + elif n_type == "retriever": + node.modules.append( + Module( + type="vectorsimilarity", + params={}, + attributes=[ + Attribute(type="top_k", params={"value": ecrag_comp["retrieve_topk"]}), + ], + ) + ) + elif n_type == "postprocessor": + node.modules.append( + Module( + type="reranker", + params={}, + attributes=[ + Attribute( + type="top_n", + # TODO: Consider reranker not the only postprocessor + params={"value": ecrag_comp[0]["top_n"]}, + ), + Attribute( + type="model_name", + # TODO: Consider reranker not the only postprocessor + params={"value": ecrag_comp[0]["reranker_model"]["model_id"]}, + ), + ], + ) + ) + elif n_type == "generator": + node.modules.append( + Module( + type="prompt", + params={}, + attributes=[ + Attribute(type="content", params={"value": ecrag_comp["prompt_content"]}), + ], + ) + ) + + return node + + +# +# Apply new pipeline config to update ECRAG schema attributes +# Pipeline->ECRAG +# +def update_ecrag_module_dispatch(ecrag_pl: dict, target_module: Module, spec_node_name: str, spec_module: dict) -> dict: + # Match attribute type + for tgt_attr in target_module.attributes: + for spec_attr in spec_module["attributes"]: + if tgt_attr.type == spec_attr["type"]: + # Update module values + match spec_module["type"]: + case "direct": + ecrag_pl = update_ecrag_direct(ecrag_pl, tgt_attr, spec_node_name) + case "embedding_model": + ecrag_pl = update_ecrag_embedding_model(ecrag_pl, tgt_attr, spec_node_name, spec_module["type"]) + case "vectorsimilarity": + ecrag_pl = update_ecrag_vectorsimilarity(ecrag_pl, tgt_attr, spec_node_name) + case "reranker": + ecrag_pl = update_ecrag_reranker(ecrag_pl, tgt_attr, spec_node_name) + case "prompt": + ecrag_pl = update_ecrag_prompt(ecrag_pl, tgt_attr, spec_node_name) + case _: + pass + return ecrag_pl + + +# +# Implementation of updating ECRAG pipeline attributes +# (Pipeline->ECRAG schema) +# +def update_ecrag_direct(ecrag_pl: dict, target_attr: Attribute, spec_node_name: str) -> dict: + node_parser = ecrag_pl[spec_node_name] + if target_attr.type == "chunk_size": + node_parser["chunk_size"] = target_attr.params["value"] + if target_attr.type == "chunk_overlap": + node_parser["chunk_overlap"] = target_attr.params["value"] + return ecrag_pl + + +def update_ecrag_embedding_model( + ecrag_pl: dict, target_attr: Attribute, spec_node_name: str, spec_module_name: str +) -> dict: + embedding_model = ecrag_pl[spec_node_name][spec_module_name] + if target_attr.type == "model_name": + embedding_model["model_id"] = target_attr.params["value"] + embedding_model["model_path"] = "./models/" + target_attr.params["value"] + return ecrag_pl + + +def update_ecrag_vectorsimilarity(ecrag_pl: dict, target_attr: Attribute, spec_node_name: str) -> dict: + retriever = ecrag_pl[spec_node_name] + if target_attr.type == "top_k": + retriever["retrieve_topk"] = target_attr.params["value"] + return ecrag_pl + + +def update_ecrag_reranker(ecrag_pl: dict, target_attr: Attribute, spec_node_name: str) -> dict: + postprocessors = ecrag_pl[spec_node_name] + for p in postprocessors: + if p["processor_type"] == PostProcessorType.RERANKER: + if target_attr.type == "top_n": + p["top_n"] = target_attr.params["value"] + return ecrag_pl + + +def update_ecrag_prompt(ecrag_pl: dict, target_attr: Attribute, spec_node_name: str) -> dict: + generator = ecrag_pl[spec_node_name] + if target_attr.type == "content": + generator["prompt_content"] = target_attr.params["value"] + return ecrag_pl diff --git a/evals/evaluation/rag_pilot/components/annotation/__init__.py b/evals/evaluation/rag_pilot/components/annotation/__init__.py new file mode 100644 index 00000000..4057dc01 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/annotation/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/evals/evaluation/rag_pilot/components/annotation/annotator.py b/evals/evaluation/rag_pilot/components/annotation/annotator.py new file mode 100644 index 00000000..2b38a689 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/annotation/annotator.py @@ -0,0 +1,84 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import logging +import time +import uuid +from typing import Dict, List, Optional + +from llama_index.core.schema import TextNode + +from .matcher import Matcher, default_matcher +from .schemas import AnnotationRequest, AnnotationResponse, GTMatchResult, QueryGTMatchResults, SuggestionItem + +logger = logging.getLogger(__name__) + + +class Annotator: + + def __init__(self, matcher: Optional[Matcher] = None): + self.matcher = matcher or default_matcher + self._node_cache: Dict[str, List[TextNode]] = {} + self.matched_results: Dict[int, QueryGTMatchResults] = {} + + def annotate(self, request: AnnotationRequest) -> AnnotationResponse: + try: + available_nodes = self._get_available_nodes(request.gt_file_name) + match_result = self.matcher.match_gt_chunks(request, available_nodes) + self._merge_match_result(request.query_id, request.query, match_result) + return AnnotationResponse( + success=match_result.matched_chunk is not None, + message="Annotation completed successfully", + suggestion_items=match_result.suggestion_items, + ) + + except Exception as e: + logger.error(f"[Annotator] Error in annotation process: {e}") + + empty_result = GTMatchResult( + context_id=request.context_id, + context_text=request.gt_text_content, + matched_chunk=None, + suggestion_items=[], + ) + + self._merge_match_result(request.query_id, request.query, empty_result) + + return AnnotationResponse(success=False, message=f"Annotation failed: {str(e)}", suggestion_items=[]) + + def _merge_match_result(self, query_id: int, query: str, new_result: GTMatchResult): + ctx_id = new_result.context_id + container = self.matched_results.get(query_id) + if container is None: + container = QueryGTMatchResults(query_id=query_id, query=query, context_map={}) + self.matched_results[query_id] = container + logger.info(f"[Annotator] Created QueryGTMatchResults for query_id {query_id}") + + is_replace = ctx_id in container.context_map + container.context_map[ctx_id] = new_result + action = "Replaced" if is_replace else "Added" + logger.info(f"[Annotator] {action} match result query_id={query_id} context_id={ctx_id}") + + def _get_available_nodes(self, file_name: str) -> List[TextNode]: + from components.pilot.pilot import pilot + + # Check exist cache + if file_name in self._node_cache: + return self._node_cache[file_name] + + # Get TextNode objects directly from EdgeCraftRAG (now returns List[TextNode]) + nodes = pilot.adaptor.get_document_chunks(file_name) + # Save Cache results + self._node_cache[file_name] = nodes + return nodes + + def clear_caches(self): + self.matched_results.clear() + self._node_cache.clear() + + def get_all_match_results(self) -> Dict[int, QueryGTMatchResults]: + return self.matched_results.copy() + + +annotator = Annotator(default_matcher) diff --git a/evals/evaluation/rag_pilot/components/annotation/matcher.py b/evals/evaluation/rag_pilot/components/annotation/matcher.py new file mode 100644 index 00000000..8b6a7eeb --- /dev/null +++ b/evals/evaluation/rag_pilot/components/annotation/matcher.py @@ -0,0 +1,452 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +import re +from difflib import SequenceMatcher +from typing import Any, Dict, List, Optional, Tuple + +from llama_index.core.schema import TextNode + +from .schemas import AnnotationRequest, GTMatchResult, SuggestionItem + +logger = logging.getLogger(__name__) + + +class Matcher: + + def __init__(self, similarity_threshold: float = 0.8, enable_fuzzy: bool = False, confidence_topn: int = 5): + self.similarity_threshold = similarity_threshold + self.enable_fuzzy = enable_fuzzy + self.confidence_topn = confidence_topn + + def match_gt_chunks(self, request: AnnotationRequest, available_nodes: List[TextNode]) -> GTMatchResult: + try: + exact_matches: List[TextNode] = [] + partial_matches: List[TextNode] = [] + self.update_settings( + similarity_threshold=request.similarity_threshold, + enable_fuzzy=request.enable_fuzzy, + confidence_topn=request.confidence_topn, + ) + # Apply all filtering logic in Matcher(now only have page filter) + relevant_nodes = self._filter_nodes_for_matching(available_nodes, request) + # Note: If the issue that duplicate nodes in ecrag solve, then no need to do deduplication here + relevant_nodes = self._deduplicate_chunks(relevant_nodes) + # Perform text matching + node_confidences: Dict[str, float] = {} + node_lookup: Dict[str, TextNode] = {} + for node in relevant_nodes: + match_type, confidence = self.match_texts(node.text, request.gt_text_content) + node_id = node.node_id + node_confidences[node_id] = confidence + node_lookup[node_id] = node + + if match_type == "exact": + exact_matches.append(node) + elif match_type == "partial": + partial_matches.append(node) + + match_res = exact_matches + partial_matches + best_chunk = None + best_conf = -1.0 + # Select the best matched chunk based on confidence score + for node in match_res: + nid = node.node_id + conf = node_confidences.get(nid, 0.0) + if conf > best_conf: + best_conf = conf + best_chunk = node + + # Only create suggestion_items if no matched chunk found + suggestion_items = [] + if best_chunk is None: + sorted_conf = sorted(node_confidences.items(), key=lambda x: x[1], reverse=True) + top_conf = sorted_conf[: self.confidence_topn] + for nid, score in top_conf: + n = node_lookup.get(nid) + if not n: + continue + seg, seg_score = self._extract_best_match_segment(n.text, request.gt_text_content) + suggestion_items.append( + SuggestionItem( + node_id=nid, + node_page_label=n.metadata.get("page_label", ""), + node_context=n.text, + confidence_score=score, + best_match_context=seg, + best_match_score=seg_score, + ) + ) + logger.info( + f"[Matcher] Matching completed for query_id: {request.query_id} , context_id: {request.context_id}" + ) + + return GTMatchResult( + context_id=request.context_id, + context_text=request.gt_text_content, + matched_chunk=best_chunk, + suggestion_items=suggestion_items, + ) + + except Exception as e: + logger.error(f"[Matcher] Error matching GT chunks: {e}") + return GTMatchResult( + context_id=request.context_id, + context_text=request.gt_text_content, + matched_chunk=None, + suggestion_items=[], + ) + + def _filter_nodes_for_matching(self, nodes: List[TextNode], request: AnnotationRequest) -> List[TextNode]: + relevant_nodes = nodes + # Filter by pages if specified + if request.gt_pages: + relevant_nodes = self._filter_nodes_by_pages(relevant_nodes, request.gt_pages) + + # Future: Add more filtering criteria here + # if request.gt_section: + # relevant_nodes = self._filter_nodes_by_section(relevant_nodes, request.gt_section) + # if request.date_range: + # relevant_nodes = self._filter_nodes_by_date(relevant_nodes, request.date_range) + + return relevant_nodes + + def _filter_nodes_by_pages(self, nodes: List[TextNode], target_pages: List[str]) -> List[TextNode]: + if not target_pages: + return nodes + + # Check if any nodes have page_label information + has_page_info = any(node.metadata.get("page_label") for node in nodes) + + # If no nodes have page information, return all nodes (cannot filter) + if not has_page_info: + logger.warning(f"[Matcher] No page_label information found in nodes, returning all {len(nodes)} nodes") + return nodes + + relevant_nodes = [] + for node in nodes: + page_label = node.metadata.get("page_label", "") + if page_label and page_label in target_pages: + relevant_nodes.append(node) + # If we have page info but no matches, return empty list (strict filtering) + # If we found matches, return them + return relevant_nodes + + def _is_filename_match(self, node_filename: str, target_filename: str) -> bool: + if not node_filename or not target_filename: + return False + + # 1. Exact match + if node_filename.lower() == target_filename.lower(): + return True + + # 2. Extract base filename (remove path and extension) for matching + node_base = self._extract_base_filename(node_filename) + target_base = self._extract_base_filename(target_filename) + + if node_base.lower() == target_base.lower(): + return True + + # 3. Partial match - check if target filename is contained in node filename + if target_base.lower() in node_base.lower(): + return True + + # 4. Handle special cases: e.g. "9-TCB Bonder-TCB BKM_v12 1.docx" and "TCB_Bonder_Manual.pdf" + # Normalize filenames: remove version numbers, special characters, etc. + node_normalized = self._normalize_filename(node_base) + target_normalized = self._normalize_filename(target_base) + + if node_normalized in target_normalized or target_normalized in node_normalized: + return True + + # 5. Keyword matching: extract keywords from filenames for matching + node_keywords = self._extract_keywords(node_base) + target_keywords = self._extract_keywords(target_base) + + # If there's sufficient keyword overlap, consider it a match + common_keywords = node_keywords.intersection(target_keywords) + if len(common_keywords) >= 2 or (len(common_keywords) >= 1 and len(node_keywords) <= 2): + return True + + return False + + def _normalize_filename(self, filename: str) -> str: + # Convert to lowercase + normalized = filename.lower() + + # Remove common version patterns: v1, v2, _v12, etc. + normalized = re.sub(r"[_\-\s]*v\d+[._\d]*", "", normalized) + + # Remove numeric suffixes: e.g. "1", "2", etc. + normalized = re.sub(r"[_\-\s]*\d+$", "", normalized) + + # Unify separators: replace - _ spaces with single separator + normalized = re.sub(r"[-_\s]+", "_", normalized) + + # Remove leading and trailing separators + normalized = normalized.strip("_") + + return normalized + + def _extract_keywords(self, filename: str) -> set: + # Split filename + words = re.split(r"[-_\s]+", filename.lower()) + + # Filter out short words, numbers and common meaningless words + meaningful_words = set() + stopwords = {"v", "ver", "version", "doc", "pdf", "docx", "manual", "guide", "bkm"} + + for word in words: + # Keep words longer than 2 characters that are not pure numbers and not in stopwords + if len(word) > 2 and not word.isdigit() and word not in stopwords: + meaningful_words.add(word) + + return meaningful_words + + def _extract_base_filename(self, filename: str) -> str: + import os + + base = os.path.basename(filename) + # Remove extension + base = os.path.splitext(base)[0] + return base + + def match_texts(self, node_text: str, gt_text: str) -> Tuple[str, float]: + # Exact match check + if self._is_exact_match(node_text, gt_text): + return "exact", 1.0 + + # Partial match check + similarity = self._calculate_similarity(node_text, gt_text) + + if similarity >= self.similarity_threshold: + return "partial", similarity + + # If fuzzy matching is enabled, try more lenient matching + if self.enable_fuzzy: + fuzzy_similarity = self._fuzzy_match(node_text, gt_text) + if fuzzy_similarity >= self.similarity_threshold * 0.7: # Lower threshold + return "partial", fuzzy_similarity + + return "none", similarity + + def _is_exact_match(self, text1: str, text2: str) -> bool: + # Normalize text (remove excess whitespace, unify line breaks) + normalized_text1 = self._normalize_text_for_match(text1) + normalized_text2 = self._normalize_text_for_match(text2) + + # 1. Complete match + if normalized_text1 == normalized_text2: + return True + + # 2. Direct containment match + if normalized_text1 in normalized_text2 or normalized_text2 in normalized_text1: + return True + + # 3. More lenient matching: compare after removing all spaces + text1_no_space = re.sub(r"\s+", "", normalized_text1) + text2_no_space = re.sub(r"\s+", "", normalized_text2) + + if text1_no_space in text2_no_space or text2_no_space in text1_no_space: + return True + + # 4. Keyword matching: extract key parts for matching + shorter, longer = ( + (normalized_text1, normalized_text2) + if len(normalized_text1) < len(normalized_text2) + else (normalized_text2, normalized_text1) + ) + + # Break down into keywords for matching + shorter_keywords = self._extract_matching_keywords(shorter) + longer_keywords = self._extract_matching_keywords(longer) + + # If all keywords from shorter text are found in longer text, consider it a match + if shorter_keywords and all(keyword in longer_keywords for keyword in shorter_keywords): + return True + + # 5. For shorter texts, check if it's a subset of longer text (lower threshold) + if len(shorter) < len(longer) * 0.5 and shorter in longer: + return True + + return False + + def _normalize_text_for_match(self, text: str) -> str: + # Remove excess whitespace and line breaks + normalized = re.sub(r"\s+", " ", text.strip()) + + # Remove special Unicode characters like zero-width spaces + normalized = re.sub(r"[\u200b\u200c\u200d\ufeff]", "", normalized) + + return normalized + + def _extract_matching_keywords(self, text: str) -> set: + keywords = set() + + # 1. Extract number identifiers (like 14.14) + number_patterns = re.findall(r"\d+\.?\d*", text) + keywords.update(number_patterns) + + # 2. Extract English words + english_words = re.findall(r"[a-zA-Z]+", text.lower()) + keywords.update([word for word in english_words if len(word) > 2]) + + # 3. Extract Chinese words (simple segmentation) + chinese_chars = re.findall(r"[\u4e00-\u9fff]+", text) + for segment in chinese_chars: + if len(segment) >= 2: + # Add entire Chinese segment + keywords.add(segment) + # Also add longer sub-segments + for i in range(len(segment)): + for length in [4, 3, 2]: + if i + length <= len(segment): + keywords.add(segment[i : i + length]) + + # 4. Extract special symbol-separated terms + special_terms = re.findall(r"[a-zA-Z]+/[a-zA-Z]+", text.lower()) + keywords.update(special_terms) + + return keywords + + def _calculate_similarity(self, text1: str, text2: str) -> float: + return SequenceMatcher(None, text1, text2).ratio() + + def _fuzzy_match(self, text1: str, text2: str) -> float: + # Tokenize and calculate overlap + words1 = set(self._tokenize_mixed_language(text1.lower())) + words2 = set(self._tokenize_mixed_language(text2.lower())) + + if not words1 or not words2: + return 0.0 + + intersection = len(words1.intersection(words2)) + union = len(words1.union(words2)) + + # Jaccard similarity + jaccard_sim = intersection / union if union > 0 else 0.0 + + # Consider vocabulary coverage + coverage1 = intersection / len(words1) if words1 else 0.0 + coverage2 = intersection / len(words2) if words2 else 0.0 + + # Combined score: Jaccard similarity + weighted maximum coverage + combined_score = 0.6 * jaccard_sim + 0.4 * max(coverage1, coverage2) + + return combined_score + + def _tokenize_mixed_language(self, text: str) -> List[str]: + tokens = [] + + # 1. Extract English words (including numbers and hyphens) + english_pattern = r"[a-zA-Z]+(?:[-_][a-zA-Z0-9]+)*" + english_words = re.findall(english_pattern, text) + tokens.extend([word.lower() for word in english_words if len(word) > 1]) + + # 2. Extract numbers (if meaningful) + number_pattern = r"\d+\.?\d*" + numbers = re.findall(number_pattern, text) + tokens.extend([num for num in numbers if len(num) > 0]) + + # 3. Simple character-level tokenization for Chinese (can be improved with tools like jieba) + chinese_pattern = r"[\u4e00-\u9fff]+" + chinese_segments = re.findall(chinese_pattern, text) + for segment in chinese_segments: + # For Chinese, can split by characters or use advanced tokenization tools + if len(segment) > 1: + # Tokenize by 2-3 character combinations + for i in range(len(segment)): + for length in [3, 2, 1]: # Prefer 3-character words, then 2-character words + if i + length <= len(segment): + token = segment[i : i + length] + tokens.append(token) + else: + tokens.append(segment) + + # 4. Extract special technical terms (containing combinations of letters and numbers) + technical_pattern = r"[a-zA-Z]*\d+[a-zA-Z]*|\d+[a-zA-Z]+[0-9]*" + technical_terms = re.findall(technical_pattern, text) + tokens.extend([term.lower() for term in technical_terms if len(term) > 1]) + + # Deduplicate and filter + unique_tokens = [] + seen = set() + for token in tokens: + if token not in seen and len(token) > 0: + unique_tokens.append(token) + seen.add(token) + + return unique_tokens + + def _deduplicate_chunks(self, chunks: List[TextNode]) -> List[TextNode]: + seen_contents = set() + unique_chunks = [] + + for chunk in chunks: + # Create a signature based on text content and key metadata + # Use file_path and page info to distinguish legitimate duplicates + content_signature = chunk.text.strip() + if content_signature not in seen_contents: + seen_contents.add(content_signature) + unique_chunks.append(chunk) + else: + logger.warning( + f"[Matcher] Duplicate chunk detected and removed: node_id={chunk.node_id}, " + f"file={chunk.metadata.get('file_name', 'unknown')}, " + f"page={chunk.metadata.get('page_label', 'unknown')}," + f"start_char_ids={chunk.metadata.get('start_char_ids', 'unknown')}, " + f"end_char_ids={chunk.metadata.get('end_char_ids', 'unknown')} " + ) + + logger.info(f"[Matcher] Deduplication: {len(chunks)} -> {len(unique_chunks)} chunks") + return unique_chunks + + def _tokenize(self, text: str) -> List[str]: + return self._tokenize_mixed_language(text) + + def update_settings( + self, + similarity_threshold: Optional[float] = None, + enable_fuzzy: Optional[bool] = None, + confidence_topn: Optional[int] = None, + ): + if similarity_threshold is not None: + if 0.0 <= similarity_threshold <= 1.0: + self.similarity_threshold = similarity_threshold + else: + raise ValueError("[Matcher] Similarity threshold must be between 0.0 and 1.0") + if enable_fuzzy is not None: + self.enable_fuzzy = enable_fuzzy + if confidence_topn is not None: + if confidence_topn > 0: + self.confidence_topn = confidence_topn + else: + raise ValueError("[Matcher] confidence_topn must be a positive integer") + + @staticmethod + def _extract_best_match_segment(node_text: str, gt_text: str) -> Tuple[Optional[str], Optional[float]]: + if not node_text or not gt_text: + return None, None + target_len = len(gt_text) + if target_len == 0 or len(node_text) < target_len: + return None, None + + best_score = -1.0 + best_segment = None + + for start in range(0, len(node_text) - target_len + 1): + window = node_text[start : start + target_len] + score = SequenceMatcher(None, window, gt_text).ratio() + if score > best_score: + best_score = score + best_segment = window + if score >= 1.0: + break + + if best_segment is None: + return None, None + return best_segment, best_score + + +default_matcher = Matcher(similarity_threshold=0.8, enable_fuzzy=False, confidence_topn=5) diff --git a/evals/evaluation/rag_pilot/components/annotation/schemas.py b/evals/evaluation/rag_pilot/components/annotation/schemas.py new file mode 100644 index 00000000..ae0202b6 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/annotation/schemas.py @@ -0,0 +1,51 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Dict, List, Optional + +from llama_index.core.schema import TextNode +from pydantic import BaseModel, Field + + +class SuggestionItem(BaseModel): + node_id: Optional[str] = None + node_page_label: Optional[str] = None + node_context: Optional[str] = None + confidence_score: Optional[float] = None + best_match_score: Optional[float] = None + best_match_context: Optional[str] = None + + +class GTMatchResult(BaseModel): + context_id: int + context_text: str + matched_chunk: Optional[TextNode] = None + suggestion_items: Optional[List[SuggestionItem]] = None + + +class QueryGTMatchResults(BaseModel): + query_id: int + query: str + context_map: Dict[int, GTMatchResult] = Field(default_factory=dict) + + +class AnnotationRequest(BaseModel): + query_id: int + query: str + context_id: int + gt_file_name: str + gt_text_content: str + gt_section: Optional[str] = None + gt_pages: Optional[List[str]] = None + gt_metadata: Optional[Dict[str, Any]] = None + + # Matching parameters + similarity_threshold: float = Field(default=0.8) + enable_fuzzy: bool = Field(default=False) + confidence_topn: int = Field(default=5) + + +class AnnotationResponse(BaseModel): + success: bool + message: str + suggestion_items: Optional[List[SuggestionItem]] = None diff --git a/evals/evaluation/rag_pilot/components/connect_utils.py b/evals/evaluation/rag_pilot/components/connect_utils.py deleted file mode 100644 index 14530ec3..00000000 --- a/evals/evaluation/rag_pilot/components/connect_utils.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os - -import requests -from components.pilot.base import convert_dict_to_pipeline -from components.pilot.ecrag.api_schema import PipelineCreateIn, RagOut - -ECRAG_SERVICE_HOST_IP = os.getenv("ECRAG_SERVICE_HOST_IP", "127.0.0.1") -ECRAG_SERVICE_PORT = int(os.getenv("ECRAG_SERVICE_PORT", 16010)) -server_addr = f"http://{ECRAG_SERVICE_HOST_IP}:{ECRAG_SERVICE_PORT}" - - -def get_active_pipeline() -> PipelineCreateIn: - path = "/v1/settings/pipelines" - res = requests.get(f"{server_addr}{path}", proxies={"http": None}) - if res.status_code == 200: - for pl in res.json(): - if pl["status"]["active"]: - return convert_dict_to_pipeline(pl) - return None - - -def load_prompt(prompt_text): - path = "/v1/chatqna/prompt" - request_data = {"prompt": prompt_text} - res = requests.post(f"{server_addr}{path}", json=request_data, proxies={"http": None}) - - if res.status_code == 200: - print("Successfully set prompt") - return True - else: - error_detail = res.text if hasattr(res, "text") else "Unknown error" - print(f"Failed to set prompt: {error_detail}") - return False - - -def create_pipeline(pipeline_conf): - path = "/v1/settings/pipelines" - return requests.post(f"{server_addr}{path}", json=pipeline_conf.dict(), proxies={"http": None}) - - -def update_pipeline(pipeline_conf): - path = "/v1/settings/pipelines" - return requests.patch( - f"{server_addr}{path}/{pipeline_conf.name}", json=pipeline_conf.dict(), proxies={"http": None} - ) - - -def update_active_pipeline(pipeline): - pipeline.active = False - res = update_pipeline(pipeline) - if res.status_code == 200: - pipeline.active = True - res = update_pipeline(pipeline) - if res.status_code == 200: - return res.json() - else: - return None - - -def upload_files(file_conf): - path = "/v1/data" - return requests.post(f"{server_addr}{path}", json=file_conf.dict(), proxies={"http": None}) - - -def get_ragqna(query): - new_req = {"messages": query, "stream": True} - path = "/v1/ragqna" - res = requests.post(f"{server_addr}{path}", json=new_req, proxies={"http": None}) - if res.status_code == 200: - return RagOut(**res.json()) - else: - return None - - -def get_retrieval(query): - new_req = {"messages": query} - path = "/v1/retrieval" - res = requests.post(f"{server_addr}{path}", json=new_req, proxies={"http": None}) - if res.status_code == 200: - return RagOut(**res.json()) - else: - return None - - -def reindex_data(): - path = "/v1/data" - res = requests.post(f"{server_addr}{path}/reindex", proxies={"http": None}) - return res.status_code == 200 - - -def get_ecrag_module_map(ecrag_pl): - ecrag_modules = { - # root - "root": (ecrag_pl, ""), - # node_parser - "node_parser": (ecrag_pl, "node_parser"), - "simple": (ecrag_pl, "node_parser"), - "hierarchical": (ecrag_pl, "node_parser"), - "sentencewindow": (ecrag_pl, "node_parser"), - # indexer - "indexer": (ecrag_pl, "indexer"), - "vector": (ecrag_pl, "indexer"), - "faiss_vector": (ecrag_pl, "indexer"), - # retriever - "retriever": (ecrag_pl, "retriever"), - "vectorsimilarity": (ecrag_pl, "retriever"), - "auto_merge": (ecrag_pl, "retriever"), - "bm25": (ecrag_pl, "retriever"), - # postprocessor - "postprocessor": (ecrag_pl, "postprocessor[0]"), - "reranker": (ecrag_pl, "postprocessor[0]"), - "metadata_replace": (ecrag_pl, "postprocessor[0]"), - # generator - "generator": (ecrag_pl, "generator"), - } - return ecrag_modules - - -COMP_TYPE_MAP = { - "node_parser": "parser_type", - "indexer": "indexer_type", - "retriever": "retriever_type", - "postprocessor": "processor_type", - "generator": "inference_type", -} - - -def load_pipeline_from_json(file_path): - try: - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) - return convert_dict_to_pipeline(data) - except FileNotFoundError: - print(f"The file '{file_path}' was not found.") - except json.JSONDecodeError: - print(f"Error decoding JSON in the file '{file_path}'.") - except Exception as e: - print(f"An unexpected error occurred: {e}") - - return None diff --git a/evals/evaluation/rag_pilot/components/pilot/__init__.py b/evals/evaluation/rag_pilot/components/pilot/__init__.py new file mode 100644 index 00000000..4057dc01 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/pilot/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/evals/evaluation/rag_pilot/components/pilot/base.py b/evals/evaluation/rag_pilot/components/pilot/base.py index 9ecebbd7..751a7803 100644 --- a/evals/evaluation/rag_pilot/components/pilot/base.py +++ b/evals/evaluation/rag_pilot/components/pilot/base.py @@ -1,214 +1,172 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import copy -import csv +from __future__ import annotations + import hashlib import json import re -import uuid +from copy import deepcopy from difflib import SequenceMatcher from enum import Enum -from pathlib import Path -from typing import Dict, List, Optional, Union - -import numpy as np -from components.pilot.ecrag.api_schema import ( - GeneratorIn, - IndexerIn, - ModelIn, - NodeParserIn, - PipelineCreateIn, - PostProcessorIn, - RetrieverIn, -) -from pydantic import BaseModel, Field, model_serializer - - -class Metrics(str, Enum): - RETRIEVAL_RECALL = "retrieval_recall_rate" - POSTPROCESSING_RECALL = "postprocessing_recall_rate" - ANSWER_RELEVANCY = "answer_relevancy" - - -def convert_dict_to_pipeline(pl: dict) -> PipelineCreateIn: - def initialize_component(cls, data, extra=None, key_map=None, nested_fields=None): - if not data: - return None +from typing import Any, Callable, Dict, List, Optional, Union - extra = extra or {} - key_map = key_map or {} - nested_fields = nested_fields or {} - - processed_data = {} - for k, v in data.items(): - mapped_key = key_map.get(k, k) - if mapped_key in nested_fields: - processed_data[mapped_key] = initialize_component(nested_fields[mapped_key], v) - else: - processed_data[mapped_key] = v - if cls == ModelIn: - processed_data["model_type"] = data.get("type", processed_data.get("model_type")) - - processed_data.update(extra) - return cls(**processed_data) - - return PipelineCreateIn( - idx=pl.get("idx"), - name=pl.get("name"), - node_parser=initialize_component(NodeParserIn, pl.get("node_parser"), key_map={"idx": "idx"}), - indexer=initialize_component( - IndexerIn, - pl.get("indexer"), - key_map={"model": "embedding_model", "idx": "idx"}, - nested_fields={"embedding_model": ModelIn}, - ), - retriever=initialize_component(RetrieverIn, pl.get("retriever"), key_map={"idx": "idx"}), - postprocessor=[ - initialize_component( - PostProcessorIn, - pp, - extra={"processor_type": pp.get("processor_type")}, - key_map={"model": "reranker_model", "idx": "idx"}, - nested_fields={"reranker_model": ModelIn}, - ) - for pp in pl.get("postprocessor", []) - ], - generator=initialize_component( - GeneratorIn, pl.get("generator"), key_map={"idx": "idx"}, nested_fields={"model": ModelIn} - ), - active=pl.get("status", {}).get("active", False), - ) +import yaml +from pydantic import BaseModel, Field, field_validator -def generate_json_id(config, length=8) -> int: - if "active" in config: - del config["active"] - if "name" in config: - del config["name"] - config_str = json.dumps(config, sort_keys=True) - unique_id = hashlib.sha256(config_str.encode()).hexdigest() - return int(unique_id[:length], 16) +def dynamically_find_function(key: str, target_dict: Dict) -> Callable: + if key in target_dict: + instance, attr_expression = target_dict[key] + if "[" in attr_expression and "]" in attr_expression: + attr_name, index = attr_expression[:-1].split("[") + index = int(index) + func = getattr(instance, attr_name) + if isinstance(func, list) and 0 <= index < len(func): + func = func[index] + else: + raise ValueError(f"Attribute '{attr_name}' is not a list or index {index} is out of bounds") + elif attr_expression == "": + func = instance + else: + func = getattr(instance, attr_expression) + return func + else: + print(f"Input module or node '{key}' is not supported.") -class RAGPipeline(BaseModel): - pl: PipelineCreateIn - id: int = Field(default=0) - _backup: Dict = {} +def get_support_modules(type_name: str, module_map: Dict[str, Callable]) -> Optional[Callable]: + support_modules = module_map + return dynamically_find_function(type_name, support_modules) - def __init__(self, pl): - super().__init__(pl=pl) - self._replace_model_with_id() - # self.id = generate_json_id(self.pl.dict()) - self.id = uuid.uuid4() - def _replace_model_with_id(self): - self._backup = {} +class ModuleBase(BaseModel): + type: str + params: Dict[str, Any] = Field(default_factory=dict) + func: Optional[Callable] = None + is_active: bool = False - def extract_model_id(model): - if model: - if model.model_type not in self._backup: - self._backup[model.model_type] = [] - self._backup[model.model_type].append(model) - return model.model_id + @classmethod + def from_dict(cls, component_dict: Dict) -> "ModuleBase": + _component_dict = deepcopy(component_dict) + type_ = _component_dict.pop("type") + params = _component_dict + return cls(type=type_, params=params) + + def update_func(self, module_map: Dict[str, Callable]): + self.func = get_support_modules(self.type, module_map) + if self.func is None: + print(f"{self.__class__.__name__} type {self.type} is not supported.") + + def get_params(self, attr: str): + return self.params.get(attr) + + def get_status(self) -> bool: + return self.is_active + + def get_value(self, attr: str): + if self.func is None: + print(f"{self.__class__.__name__} type {self.type} is not supported.") return None + return getattr(self.func, attr, None) - if self.pl.indexer and self.pl.indexer.embedding_model: - self.pl.indexer.embedding_model = extract_model_id(self.pl.indexer.embedding_model) - - if self.pl.postprocessor: - for proc in self.pl.postprocessor: - if proc.reranker_model: - proc.reranker_model = extract_model_id(proc.reranker_model) - - if self.pl.generator and self.pl.generator.model: - self.pl.generator.model = extract_model_id(self.pl.generator.model) - - def _restore_model_instances(self): - if not self._backup: - self._backup = {} + def set_value(self, attr: str, value: Any): + if self.func is None: + print(f"{self.__class__.__name__} type {self.type} is not supported.") + else: + setattr(self.func, attr, value) - def restore_model(model_id, model_type, is_generator=False): - if model_type in self._backup: - for existing_model in self._backup[model_type]: - if existing_model.model_id == model_id: - return existing_model - weight = self._backup[model_type][0].weight - device = self._backup[model_type][0].device - else: - weight = "INT4" if is_generator else "" - device = "auto" +class Attribute(ModuleBase): + @classmethod + def from_dict(cls, attr_dict: Dict) -> "Attribute": + _attr_dict = deepcopy(attr_dict) + type_ = _attr_dict.pop("type") + params = _attr_dict.pop("params", {}) + return cls(type=type_, params=params) - model_path = f"./models/{model_id}" - if is_generator: - model_path += f"/{weight}_compressed_weights" + def to_dict(self) -> Dict[str, Any]: + return {"type": self.type, "params": self.params} - return ModelIn( - model_type=model_type, model_id=model_id, model_path=model_path, weight=weight, device=device - ) - if self.pl.indexer and isinstance(self.pl.indexer.embedding_model, str): - self.pl.indexer.embedding_model = restore_model(self.pl.indexer.embedding_model, "embedding") +class Module(ModuleBase): + attributes: List[Attribute] = Field(default_factory=list) - if self.pl.postprocessor: - for proc in self.pl.postprocessor: - if isinstance(proc.reranker_model, str): - proc.reranker_model = restore_model(proc.reranker_model, "reranker") + @field_validator("attributes", mode="before") + @classmethod + def validate_attributes(cls, v): + if v is None: + return [] + if not isinstance(v, list): + raise TypeError("attributes must be a list") + return [a if isinstance(a, Attribute) else Attribute.model_validate(a) for a in v] - if self.pl.generator and isinstance(self.pl.generator.model, str): - self.pl.generator.model = restore_model(self.pl.generator.model, "llm", is_generator=True) + @classmethod + def from_dict(cls, module_dict: Dict) -> "Module": + _module_dict = deepcopy(module_dict) + type_ = _module_dict.pop("type") + params = _module_dict.pop("params", {}) + attributes_list = _module_dict.pop("attributes", []) + attributes = [] + if attributes_list is not None: + attributes = [Attribute.from_dict(value) for value in attributes_list] + return cls(type=type_, params=params, attributes=attributes) - self._backup = None + def to_dict(self) -> Dict[str, Any]: + return {"type": self.type, "params": self.params, "attributes": [a.to_dict() for a in self.attributes]} - def get_prompt(self) -> Optional[str]: - generator = self.pl.generator - if not generator: - return None - if generator.prompt_content: - return generator.prompt_content - # if generator.prompt_path: - # try: - # with open(generator.prompt_path, 'r', encoding='utf-8') as f: - # return f.read() - # except FileNotFoundError: - # raise FileNotFoundError(f"Prompt file not found at path: {generator.prompt_path}") - # except Exception as e: - # raise RuntimeError(f"Error reading prompt from {generator.prompt_path}: {e}") - return None - def export_pipeline(self): - self._restore_model_instances() - exported_pl = copy.deepcopy(self.pl) - self._replace_model_with_id() - return exported_pl +class Node(ModuleBase): + modules: List[Module] = Field(default_factory=list) - @model_serializer(mode="plain") - def ser_model(self): - return {"id": self.id, **self.pl.model_dump()} + @field_validator("modules", mode="before") + @classmethod + def validate_modules(cls, v): + if v is None: + return [] + if not isinstance(v, list): + raise TypeError("modules must be a list of Module") + return [m if isinstance(m, Module) else Module.model_validate(m) for m in v] - def copy(self): - return copy.deepcopy(self) + @classmethod + def from_dict(cls, node_dict: Dict) -> "Node": + type_ = node_dict.get("type") + params = node_dict.get("params", {}) + modules_list = node_dict.get("modules", []) + modules = [] + if modules_list is not None: + modules = [Module.from_dict(m) for m in modules_list] + return cls(type=type_, params=params, modules=modules) - def get_id(self): - return self.id + @classmethod + def from_yaml(cls, yaml_path: str) -> "Node": + with open(yaml_path, "r") as f: + node_dict = yaml.safe_load(f) + return cls.from_dict(node_dict) + + def get_params(self, attr: str): + if attr in self.params: + return self.params[attr] + elif attr.endswith("type"): + return [m.type for m in self.modules] + return None - def regenerate_id(self): - # self.id = generate_json_id(self.pl.dict()) - self.id = uuid.uuid4() + def to_dict(self) -> Dict[str, Any]: + return {"type": self.type, "params": self.params, "modules": [m.to_dict() for m in self.modules]} - def activate_pl(self): - self.pl.active = True + def to_yaml(self, yaml_path: str) -> None: + with open(yaml_path, "w") as f: + yaml.dump(self.to_dict(), f, sort_keys=False) - def deactivate_pl(self): - self.pl.active = False - def save_to_json(self, save_path="pipeline.json"): - if self.pl: - pipeline_dict = self.export_pipeline().dict() - with open(save_path, "w") as json_file: - json.dump(pipeline_dict, json_file, indent=4) - # print(f'RAG pipeline is successfully exported to "{save_path}"') +def generate_json_id(config, length=8) -> int: + if "active" in config: + del config["active"] + if "name" in config: + del config["name"] + config_str = json.dumps(config, sort_keys=True) + unique_id = hashlib.sha256(config_str.encode()).hexdigest() + return int(unique_id[:length], 16) class ContextType(str, Enum): @@ -217,13 +175,30 @@ class ContextType(str, Enum): POSTPROCESSING = "postprocessing" +class GTType(str, Enum): + TRADITIONAL = "traditional" + ANNOTATION = "annotation" + + class ContextItem(BaseModel): context_idx: Optional[int] = None + node_id: Optional[str] = None file_name: Optional[str] = None text: str = "" metadata: Optional[Dict[str, Union[float, int, list]]] = {} +class ContextGT(BaseModel): + context_idx: Optional[int] = None + file_name: Optional[str] = None + node_id: Optional[str] = None # Changed from int to str to match NodeInfo + node_text: str = "" + text: str = "" + metadata: Optional[Dict[str, Union[float, int, list]]] = {} + page_label: Optional[str] = None + gt_type: GTType = GTType.TRADITIONAL + + def normalize_text(text): """Removes whitespace and English/Chinese punctuation from text for fair comparison.""" return re.sub(r"[ \u3000\n\t,。!?;:“”‘’\"',.;!?()\[\]{}<>《》|]+", "", text) @@ -255,263 +230,3 @@ def fuzzy_contains(needle, haystack, threshold): if score >= threshold: return True return False - - -class RAGResult(BaseModel): - metadata: Optional[Dict[str, Union[float, int, list]]] = {} - query_id: Optional[int] = None - query: str - ground_truth: Optional[str] = None - response: Optional[str] = None - - gt_contexts: Optional[List[ContextItem]] = None - retrieval_contexts: Optional[List[ContextItem]] = None - postprocessing_contexts: Optional[List[ContextItem]] = None - - finished: bool = False - - def __init__(self, **data): - super().__init__(**data) - - def __post_init__(self): - for context_type in ContextType: - self.init_context_idx(context_type) - - def copy(self): - return copy.deepcopy(self) - - def update_metrics(self, metrics: Dict[str, Union[float, int]]): - if not metrics: - return - if self.metadata is None: - self.metadata = {} - for key, value in metrics.items(): - if isinstance(value, (float, int)): - self.metadata[key] = value - - def init_context_idx(self, context_type): - context_list_name = f"{context_type.value}_contexts" - context_list = getattr(self, context_list_name, None) - if context_list is not None: - for idx, context in enumerate(context_list): - context.context_idx = idx - - def add_context(self, context_type: ContextType, context: ContextItem): - context_list_name = f"{context_type.value}_contexts" - context_list = getattr(self, context_list_name, None) - if context_list is None: - context_list = [] - setattr(self, context_list_name, context_list) - context.context_idx = len(context_list) - context_list.append(context) - - def update_metadata_hits(self, threshold=1): - if self.gt_contexts: - for context_type in [ContextType.RETRIEVAL, ContextType.POSTPROCESSING]: - context_list_name = f"{context_type.value}_contexts" - context_list = getattr(self, context_list_name, None) - if context_list is None: - continue - for context in context_list: - self.context_matches_gt(self.gt_contexts, context, context_type, threshold) - - for context_type in [ContextType.RETRIEVAL, ContextType.POSTPROCESSING]: - count = 0 - for gt_context in self.gt_contexts: - if gt_context.metadata.get(context_type, None): - count += 1 - self.metadata[context_type] = count - - def set_response(self, response: str): - self.response = response - # if self.ground_truth: - # self.metadata = self.cal_metric(self.query, self.ground_truth, response) - - @classmethod - def check_parts_in_text(cls, gt_context, text, threshold): - if threshold < 1: - return fuzzy_contains(gt_context, text, threshold) - else: - parts = gt_context.split() - return all(part in text for part in parts) - - @classmethod - def context_matches_gt( - cls, gt_contexts: List[ContextItem], candidate_context: ContextItem, context_type: ContextType, threshold - ): - for gt in gt_contexts: - if ( - candidate_context.file_name and gt.file_name and gt.file_name in candidate_context.file_name - ) or gt.file_name == "": - if candidate_context.text in gt.text or cls.check_parts_in_text( - gt.text, candidate_context.text, threshold - ): - gt.metadata = gt.metadata or {} - retrieved_file_name_list = gt.metadata.get(context_type, []) - retrieved_file_name_list.append(candidate_context.context_idx) - gt.metadata[context_type] = retrieved_file_name_list - - candidate_context.metadata = candidate_context.metadata or {} - candidate_context.metadata["hit"] = gt.context_idx - return True - return False - - @classmethod - def cal_metric(cls, query: str, ground_truth: str, response: str) -> Dict[str, float]: - # Placeholder: Use actual metric calculations as needed. - accuracy = float(ground_truth in response) - return {"accuracy": accuracy} - - -class RAGResults(BaseModel): - metadata: Optional[Dict[str, Union[float, int]]] = None - results: List[RAGResult] = [] - finished: bool = False - - def add_result(self, result): - # if result.query_id has appear in self.results, then update the result,else append the results - updated_existing = False - if result.query_id is not None: - for idx, r in enumerate(self.results): - if r.query_id == result.query_id: - self.results[idx] = result - updated_existing = True - break - if not updated_existing: - self.results.append(result) - self.cal_metadata() - - def cal_recall(self): - recall_rates = {} - for context_type in [ContextType.RETRIEVAL, ContextType.POSTPROCESSING]: - hit_count = 0 - gt_count = 0 - for result in self.results: - gt_count += len(result.gt_contexts) if result.gt_contexts else 0 - hit_count += result.metadata.get(context_type, 0) if result.metadata else 0 - - recall_rate = hit_count / gt_count if gt_count > 0 else np.nan - if context_type is ContextType.RETRIEVAL: - recall_rates[Metrics.RETRIEVAL_RECALL.value] = recall_rate - elif context_type is ContextType.POSTPROCESSING: - recall_rates[Metrics.POSTPROCESSING_RECALL.value] = recall_rate - self.metadata = self.metadata or {} - self.metadata.update(recall_rates) - - def cal_metadata(self): - self.cal_recall() - - rate_sums = {} - rate_counts = {} - - for result in self.results: - if not result.metadata: - continue - for key, value in result.metadata.items(): - if isinstance(value, (int, float)) and key in {metric.value for metric in Metrics}: - if key not in rate_sums: - rate_sums[key] = 0.0 - rate_counts[key] = 0 - rate_sums[key] += value - rate_counts[key] += 1 - - self.metadata = self.metadata or {} - for key, total in rate_sums.items(): - avg = total / rate_counts[key] if rate_counts[key] > 0 else np.nan - self.metadata[f"{key}"] = avg - - def get_metrics(self): - return self.metadata or {} - - def get_metric(self, metric: Metrics, default=float("-inf")): - return (self.metadata or {}).get(metric.value, default) - - def update_result_metrics(self, query_id: int, metrics: Dict[str, Union[float, int]]): - updated = False - for result in self.results: - if result.query_id == query_id: - result.update_metrics(metrics) - updated = True - break - - if updated: - self.cal_metadata() - return updated - - def check_metadata(self): - if not self.metadata: - print("No metadata found.") - return - for key, value in self.metadata.items(): - print(f"{key}: {value}") - - def save_to_json(self, file_path: str): - cleaned_metadata = {str(k): v for k, v in (self.metadata or {}).items()} - rag_results_dict = { - **self.dict(exclude={"metadata"}), - "metadata": cleaned_metadata, - } - - with open(file_path, "w", encoding="utf-8") as f: - json.dump(rag_results_dict, f, ensure_ascii=False, indent=4) - - def save_to_csv(self, output_dir: str): - output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - - # --- CSV 1: Contexts --- - contexts_csv = output_dir / "rag_contexts.csv" - with contexts_csv.open("w", newline="", encoding="utf-8-sig") as f: - fieldnames = ["query_id", "context_type", "context_idx", "file_name", "text"] - metadata_keys = set() - for result in self.results: - for context_type in ContextType: - context_list = getattr(result, f"{context_type.value}_contexts") or [] - for ctx in context_list: - if ctx.metadata: - metadata_keys.update(ctx.metadata.keys()) - fieldnames.extend(metadata_keys) - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - - for result in self.results: - for context_type in ContextType: - context_list = getattr(result, f"{context_type.value}_contexts") or [] - for ctx in context_list: - row = { - "query_id": result.query_id, - "context_type": f"{context_type.value}_contexts", - "context_idx": ctx.context_idx, - "file_name": ctx.file_name, - "text": ctx.text, - } - if ctx.metadata: - for key in metadata_keys: - row[key] = ctx.metadata.get(key, "") - writer.writerow(row) - - # --- CSV 2: Summary --- - summary_csv = output_dir / "rag_summary.csv" - with summary_csv.open("w", newline="", encoding="utf-8-sig") as f: - fieldnames = ["query_id", "query", "ground_truth", "response", "gt_count"] - metadata_keys = set() - for result in self.results: - if result.metadata: - metadata_keys.update(result.metadata.keys()) - fieldnames.extend(metadata_keys) - - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - - for result in self.results: - row = { - "query_id": result.query_id, - "query": result.query, - "ground_truth": result.ground_truth, - "response": result.response, - "gt_count": len(result.gt_contexts), - } - if result.metadata: - for key in metadata_keys: - row[key] = result.metadata.get(key, "") - writer.writerow(row) diff --git a/evals/evaluation/rag_pilot/components/pilot/connector.py b/evals/evaluation/rag_pilot/components/pilot/connector.py deleted file mode 100644 index c2094746..00000000 --- a/evals/evaluation/rag_pilot/components/pilot/connector.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -import requests -from components.pilot.base import convert_dict_to_pipeline -from components.pilot.ecrag.api_schema import PipelineCreateIn, RagOut - -ECRAG_SERVICE_HOST_IP = os.getenv("ECRAG_SERVICE_HOST_IP", "127.0.0.1") -ECRAG_SERVICE_PORT = int(os.getenv("ECRAG_SERVICE_PORT", 16010)) -server_addr = f"http://{ECRAG_SERVICE_HOST_IP}:{ECRAG_SERVICE_PORT}" - - -def get_active_pipeline() -> PipelineCreateIn: - path = "/v1/settings/pipelines" - res = requests.get(f"{server_addr}{path}", proxies={"http": None}) - if res.status_code == 200: - for pl in res.json(): - if pl["status"]["active"]: - return convert_dict_to_pipeline(pl) - return None - - -def update_pipeline(pipeline_conf): - path = "/v1/settings/pipelines" - return requests.patch( - f"{server_addr}{path}/{pipeline_conf.name}", json=pipeline_conf.dict(), proxies={"http": None} - ) - - -def get_ragqna(query): - new_req = {"messages": query} - path = "/v1/ragqna" - res = requests.post(f"{server_addr}{path}", json=new_req, proxies={"http": None}) - if res.status_code == 200: - return RagOut(**res.json()) - else: - return None - - -def reindex_data(): - path = "/v1/data" - res = requests.post(f"{server_addr}{path}/reindex", proxies={"http": None}) - return res.status_code == 200 - - -def update_active_pipeline(pipeline): - pipeline.active = False - res = update_pipeline(pipeline) - if res.status_code == 200: - pipeline.active = True - res = update_pipeline(pipeline) - return res.status_code == 200 - - -def get_ecrag_module_map(ecrag_pl): - ecrag_modules = { - # root - "root": (ecrag_pl, ""), - # node_parser - "node_parser": (ecrag_pl, "node_parser"), - "simple": (ecrag_pl, "node_parser"), - "hierarchical": (ecrag_pl, "node_parser"), - "sentencewindow": (ecrag_pl, "node_parser"), - # indexer - "indexer": (ecrag_pl, "indexer"), - "vector": (ecrag_pl, "indexer"), - "faiss_vector": (ecrag_pl, "indexer"), - # retriever - "retriever": (ecrag_pl, "retriever"), - "vectorsimilarity": (ecrag_pl, "retriever"), - "auto_merge": (ecrag_pl, "retriever"), - "bm25": (ecrag_pl, "retriever"), - # postprocessor - "postprocessor": (ecrag_pl, "postprocessor[0]"), - "reranker": (ecrag_pl, "postprocessor[0]"), - "metadata_replace": (ecrag_pl, "postprocessor[0]"), - # generator - "generator": (ecrag_pl, "generator"), - } - return ecrag_modules - - -COMP_TYPE_MAP = { - "node_parser": "parser_type", - "indexer": "indexer_type", - "retriever": "retriever_type", - "postprocessor": "processor_type", - "generator": "inference_type", -} diff --git a/evals/evaluation/rag_pilot/components/pilot/ecrag/api_schema.py b/evals/evaluation/rag_pilot/components/pilot/ecrag/api_schema.py deleted file mode 100644 index d7ae1c84..00000000 --- a/evals/evaluation/rag_pilot/components/pilot/ecrag/api_schema.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from typing import Any, Optional - -from pydantic import BaseModel - - -class ModelIn(BaseModel): - model_type: Optional[str] = "LLM" - model_id: Optional[str] - model_path: Optional[str] = "./" - weight: Optional[str] = "INT4" - device: Optional[str] = "cpu" - - -class NodeParserIn(BaseModel): - chunk_size: Optional[int] = None - chunk_overlap: Optional[int] = None - chunk_sizes: Optional[list] = None - parser_type: str - window_size: Optional[int] = 3 - - -class IndexerIn(BaseModel): - indexer_type: str - embedding_model: Optional[ModelIn] = None - vector_uri: Optional[str] = None - - -class RetrieverIn(BaseModel): - retriever_type: str - retrieve_topk: Optional[int] = 3 - - -class PostProcessorIn(BaseModel): - processor_type: str - reranker_model: Optional[ModelIn] = None - top_n: Optional[int] = 5 - - -class GeneratorIn(BaseModel): - prompt_path: Optional[str] = None - prompt_content: Optional[str] = None - model: Optional[ModelIn] = None - inference_type: Optional[str] = "local" - vllm_endpoint: Optional[str] = None - - -class PipelineCreateIn(BaseModel): - name: Optional[str] = None - node_parser: Optional[NodeParserIn] = None - indexer: Optional[IndexerIn] = None - retriever: Optional[RetrieverIn] = None - postprocessor: Optional[list[PostProcessorIn]] = None - generator: Optional[GeneratorIn] = None - active: Optional[bool] = False - - -class DataIn(BaseModel): - text: Optional[str] = None - local_path: Optional[str] = None - - -class FilesIn(BaseModel): - local_paths: Optional[list[str]] = None - - -class RagOut(BaseModel): - query: str - contexts: Optional[dict[str, Any]] = None - response: str - - -class PromptIn(BaseModel): - prompt: Optional[str] = None - - -class KnowledgeBaseCreateIn(BaseModel): - name: str - description: Optional[str] = None - active: Optional[bool] = None - - -class MilvusConnectRequest(BaseModel): - vector_uri: str diff --git a/evals/evaluation/rag_pilot/components/pilot/ecrag/base.py b/evals/evaluation/rag_pilot/components/pilot/ecrag/base.py deleted file mode 100644 index db1dc414..00000000 --- a/evals/evaluation/rag_pilot/components/pilot/ecrag/base.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import abc -import uuid -from enum import Enum -from typing import Any, Callable, List, Optional - -from pydantic import BaseModel, ConfigDict, Field, model_serializer - - -class CompType(str, Enum): - - DEFAULT = "default" - MODEL = "model" - PIPELINE = "pipeline" - NODEPARSER = "node_parser" - INDEXER = "indexer" - RETRIEVER = "retriever" - POSTPROCESSOR = "postprocessor" - GENERATOR = "generator" - FILE = "file" - - -class ModelType(str, Enum): - - EMBEDDING = "embedding" - RERANKER = "reranker" - LLM = "llm" - VLLM = "vllm" - - -class FileType(str, Enum): - TEXT = "text" - VISUAL = "visual" - AURAL = "aural" - VIRTUAL = "virtual" - OTHER = "other" - - -class NodeParserType(str, Enum): - - SIMPLE = "simple" - HIERARCHY = "hierarchical" - SENTENCEWINDOW = "sentencewindow" - UNSTRUCTURED = "unstructured" - - -class IndexerType(str, Enum): - - FAISS_VECTOR = "faiss_vector" - DEFAULT_VECTOR = "vector" - MILVUS_VECTOR = "milvus_vector" - - -class RetrieverType(str, Enum): - - VECTORSIMILARITY = "vectorsimilarity" - AUTOMERGE = "auto_merge" - BM25 = "bm25" - - -class PostProcessorType(str, Enum): - - RERANKER = "reranker" - METADATAREPLACE = "metadata_replace" - - -class GeneratorType(str, Enum): - - CHATQNA = "chatqna" - - -class InferenceType(str, Enum): - - LOCAL = "local" - VLLM = "vllm" - - -class CallbackType(str, Enum): - - DATAPREP = "dataprep" - RETRIEVE = "retrieve" - PIPELINE = "pipeline" - - -class BaseComponent(BaseModel): - - model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) - - idx: str = Field(default_factory=lambda: str(uuid.uuid4())) - name: Optional[str] = Field(default="") - comp_type: str = Field(default="") - comp_subtype: Optional[str] = Field(default="") - - @model_serializer - def ser_model(self): - set = { - "idx": self.idx, - "name": self.name, - "comp_type": self.comp_type, - "comp_subtype": self.comp_subtype, - } - return set - - @abc.abstractmethod - def run(self, **kwargs) -> Any: - pass - - -class BaseMgr: - - def __init__(self): - self.components = {} - - def add(self, comp: BaseComponent): - self.components[comp.idx] = comp - - def get(self, idx: str) -> BaseComponent: - if idx in self.components: - return self.components[idx] - else: - return None - - def remove(self, idx): - # remove the reference count - # after reference count == 0, object memory can be freed with Garbage Collector - del self.components[idx] diff --git a/evals/evaluation/rag_pilot/components/pilot/pilot.py b/evals/evaluation/rag_pilot/components/pilot/pilot.py index 082037f2..087eabc8 100644 --- a/evals/evaluation/rag_pilot/components/pilot/pilot.py +++ b/evals/evaluation/rag_pilot/components/pilot/pilot.py @@ -5,86 +5,216 @@ import os import threading from datetime import datetime +from typing import List, Optional, Tuple -from api_schema import RAGStage -from components.connect_utils import get_active_pipeline, get_ragqna, get_retrieval, load_prompt, update_active_pipeline -from components.pilot.base import ContextItem, ContextType, Metrics, RAGPipeline, RAGResults - - -def update_rag_pipeline(rag_pipeline: RAGPipeline): - ecrag_pipeline = rag_pipeline.export_pipeline() - ret = update_active_pipeline(ecrag_pipeline) - prompt = rag_pipeline.get_prompt() - if prompt: - load_prompt(prompt) - # TODO load_prompt() error check - return ret - - -def get_rag_results(results_out: RAGResults, results_in: RAGResults, hit_threshold, is_retrieval=False): - if results_in is None: - return None - - # Update each rag_result in rag_results and add to new instance - for result in results_in.results: - query = result.query - ragqna = None - if is_retrieval: - ragqna = get_retrieval(query) - else: - ragqna = get_ragqna(query) - if ragqna is None: - continue - - # Create a new result object to avoid modifying the input - new_result = result.copy() - for key, nodes in ragqna.contexts.items(): - for node in nodes: - node_node = node.get("node", {}) - metadata = node_node.get("metadata", {}) - possible_file_keys = ["file_name", "filename"] - file_name = next( - (metadata[key] for key in possible_file_keys if key in metadata), - None, - ) - text = node_node.get("text", "") - context_item = ContextItem(file_name=file_name, text=text) - if key == "retriever": - new_result.add_context(ContextType.RETRIEVAL, context_item) - else: - new_result.add_context(ContextType.POSTPROCESSING, context_item) - - new_result.update_metadata_hits(hit_threshold) - new_result.set_response(ragqna.response) - new_result.finished = True - results_out.add_result(new_result) - - results_out.finished = True +from api_schema import GroundTruth, GroundTruthContextSuggestion, MatchSettings, PilotSettings, RAGStage +from components.adaptor.adaptor import AdaptorBase +from components.annotation.annotator import annotator +from components.annotation.schemas import AnnotationRequest +from components.pilot.base import ContextGT, ContextItem, ContextType +from components.pilot.pipeline import Pipeline +from components.pilot.result import Metrics, RAGResults +from components.tuner.tunermgr import TunerMgr +from components.utils import load_rag_results_from_gt_match_results class Pilot: - rag_pipeline_dict: dict[int, RAGPipeline] = {} + rag_pipeline_dict: dict[int, Pipeline] = {} rag_results_dict: dict[int, RAGResults] = {} curr_pl_id: int = None - rag_results_sample: RAGResults = None + target_query_gt: RAGResults = None hit_threshold: float - + enable_fuzzy: bool = False + confidence_topn: int = 5 + gt_annotate_infos: List[GroundTruth] = None + use_annotation: bool = False + pilot_settings: PilotSettings = None base_folder: str _run_lock = threading.Lock() _run_done_event = threading.Event() - def __init__(self, rag_results_sample=None, hit_threshold=1): - self.rag_results_sample = rag_results_sample + def __init__(self, target_query_gt=None, hit_threshold=0.8): + self.target_query_gt = target_query_gt self.hit_threshold = hit_threshold timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") self.base_folder = os.path.join(os.getcwd(), f"rag_pilot_{timestamp}") + self.tuner_mgr = TunerMgr() + + def add_adaptor(self, adaptor: AdaptorBase): + self.adaptor = adaptor + + def update_target_query_gt(self, target_query_gt): + self.target_query_gt = target_query_gt + return True + + def clear_target_query_gt(self): + self.target_query_gt = None + + def set_gt_annotate_infos(self, gt_annotate_infos): + self.gt_annotate_infos = gt_annotate_infos + self.use_annotation = True + print(f"[Pilot] GT annotation info set with {len(gt_annotate_infos)} queries") + return True + + def set_pilot_settings(self, settings: PilotSettings): + endpoint = getattr(settings, "target_endpoint", None) + if not endpoint or not isinstance(endpoint, str): + raise ValueError("target_endpoint must be a non-empty string.") + + parts = endpoint.split(":", 1) + host = parts[0].strip() + port = parts[1].strip() if len(parts) > 1 and parts[1].strip() else "16010" - def update_rag_results_sample(self, rag_results_sample): - self.rag_results_sample = rag_results_sample + if not host: + raise ValueError("target_endpoint must include a valid host.") + if not port.isdigit(): + raise ValueError("port must be a number.") + + normalized = f"{host}:{port}" + self.adaptor.set_server_addr(f"http://{normalized}") + self.pilot_settings = PilotSettings(target_endpoint=normalized) return True + def get_gt_annotate_infos(self): + return self.gt_annotate_infos + + def clear_gt_annotate_caches(self): + self.gt_annotate_infos = None + annotator.clear_caches() + + def update_gt_annotate_infos(self, gt_annotate_infos): + if self.gt_annotate_infos is None: + self.set_gt_annotate_infos(gt_annotate_infos) + return True + + existing_by_query = {gt.query_id: gt for gt in self.gt_annotate_infos} + for incoming in gt_annotate_infos: + qid = incoming.query_id + if qid not in existing_by_query: + self.gt_annotate_infos.append(incoming) + continue + + existing = existing_by_query[qid] + # Map existing contexts by context_id for O(1) replace + existing_ctx_index = {ctx.context_id: idx for idx, ctx in enumerate(existing.contexts)} + for ctx in incoming.contexts: + if ctx.context_id in existing_ctx_index: + idx = existing_ctx_index[ctx.context_id] + existing.contexts[idx] = ctx + else: + existing.contexts.append(ctx) + if incoming.answer is not None: + existing.answer = incoming.answer + + print("[Pilot] Updated GT annotation infos in pilot.") + return True + + def get_suggested_query_ids(self) -> List[int]: + if not self.gt_annotate_infos: + return [] + suggested_ids = [ + gt.query_id + for gt in self.gt_annotate_infos + if gt.contexts and any(getattr(ctx, "suggestions", None) for ctx in gt.contexts) + ] + return suggested_ids + + def process_annotation_batch( + self, new_gt_annotate_infos: List[GroundTruth], clear_cache: bool = False + ) -> RAGResults: + if clear_cache: + annotator.clear_caches() + + for gt_data in new_gt_annotate_infos: + query_id = gt_data.query_id + query = gt_data.query + contexts = gt_data.contexts + + if not query_id or not query or not contexts: + raise ValueError("Missing required fields in gt_data") + + if not isinstance(contexts, list) or not contexts: + raise ValueError(f"GT contexts must be a non-empty list for query_id {query_id}") + + for context in contexts: + # Validate context fields + if not context.filename or not context.text: + raise ValueError( + f"Missing required field 'filename' or 'text' in GT context for query_id {query_id}" + ) + + # Create annotation request + annotation_request = AnnotationRequest( + query_id=query_id, + query=query, + context_id=context.context_id, + gt_file_name=context.filename, + gt_text_content=context.text, + gt_section=context.section, + gt_pages=context.pages, + gt_metadata=getattr(context, "metadata", None), + similarity_threshold=self.hit_threshold, + enable_fuzzy=self.enable_fuzzy, + confidence_topn=self.confidence_topn, + ) + + # Process annotation + annotation_response = annotator.annotate(annotation_request) + success = annotation_response.success + # Locate corresponding context entry once per annotation + target_ctx_entry = None + for gt_entry in self.gt_annotate_infos: + if gt_entry.query_id == query_id: + for ctx_entry in gt_entry.contexts: + if ctx_entry.context_id == context.context_id: + target_ctx_entry = ctx_entry + break + break + + if success: + # Clear any previous suggestions if annotation succeeded + if target_ctx_entry is not None: + target_ctx_entry.suggestions = [] + else: + suggestions = annotation_response.suggestion_items + if suggestions and target_ctx_entry is not None: + target_ctx_entry.suggestions = [ + GroundTruthContextSuggestion(**s.model_dump(exclude_unset=True)) for s in suggestions + ] + + # Get all matched results and convert to RAG results + all_matched_results = annotator.get_all_match_results() + rag_results = None + if all_matched_results: + gt_match_results_list = list(all_matched_results.values()) + rag_results = load_rag_results_from_gt_match_results(gt_match_results_list) + return rag_results + + def re_annotate_gt_results(self, rag_results: RAGResults): + print("[Pilot] Re-annotate RAG results using stored GT annotation info after parameter changes") + if not self.use_annotation or not self.gt_annotate_infos: + print("[Pilot] No annotation info available, skipping re-annotation") + return rag_results + + try: + print(f"[Pilot] Starting re-annotation for {len(self.gt_annotate_infos)} queries") + + # Use the shared annotation processing method + annotated_rag_results = self.process_annotation_batch(self.gt_annotate_infos, clear_cache=True) + + if annotated_rag_results and annotated_rag_results.results: + print("[Pilot] Re-annotation completed.") + return annotated_rag_results + else: + print("[Pilot] No results from re-annotation, returning original") + return rag_results + + except Exception as e: + print(f"[Pilot] Error during re-annotation: {e}") + return rag_results + def add_rag_pipeline(self, rag_pipeline): id = rag_pipeline.get_id() self.rag_pipeline_dict[id] = rag_pipeline @@ -92,13 +222,13 @@ def add_rag_pipeline(self, rag_pipeline): self.curr_pl_id = id def set_curr_pl_by_id(self, pl_id): + if self.curr_pl_id == pl_id: + return True if pl_id in self.rag_pipeline_dict: + self.curr_pl_id = pl_id curr_rag_pl = self.rag_pipeline_dict[pl_id] - if update_rag_pipeline(curr_rag_pl) is not None: - self.curr_pl_id = pl_id - return True - else: - return False + self.adaptor.apply_pipeline(curr_rag_pl) + return True else: return False @@ -108,7 +238,7 @@ def set_curr_pl(self, rag_pipeline): self.rag_pipeline_dict[id] = rag_pipeline return self.set_curr_pl_by_id(id) - def add_rag_results(self, pl_id): + def get_rag_results(self, pl_id): if pl_id not in self.rag_results_dict: # Create a new instance of RAGResults self.rag_results_dict[pl_id] = RAGResults() @@ -118,11 +248,37 @@ def add_rag_results(self, pl_id): def clear_rag_result_dict(self): self.rag_results_dict = {} - def _execute_pipeline(self, pipeline, is_retrieval=False): - if not self.rag_results_sample: - print("[ERROR] RAG result sample file is not initiated") - return False + # TODO: need to refine + def create_result(self, target, ragqna): + new_result = target.copy() + for key, nodes in ragqna.contexts.items(): + for node in nodes: + node_node = node.get("node", {}) + node_id = node_node.get("id_") + metadata = node_node.get("metadata", {}) + possible_file_keys = ["file_name", "filename", "docnm_kwd"] + file_name = next( + (metadata[key] for key in possible_file_keys if key in metadata), + None, + ) + text = node_node.get("text", "") + # TODO: need to fix + # Support KBadmin node structure + if text == "": + if "text_resource" in node_node: + text = node_node["text_resource"]["text"] + context_item = ContextItem(file_name=file_name, text=text, node_id=node_id) + if key == "retriever": + new_result.add_context(ContextType.RETRIEVAL, context_item) + else: + new_result.add_context(ContextType.POSTPROCESSING, context_item) + new_result.update_metadata_hits(self.hit_threshold) + new_result.set_response(ragqna.response) + new_result.finished = True + return new_result + + def _execute_pipeline(self, pipeline, is_retrieval=False): print("[Pilot] Trying to acquire run lock (non-blocking)...") if not self._run_lock.acquire(blocking=False): print("[Pilot] Pipeline is already running. Skipping execution.") @@ -133,9 +289,33 @@ def _execute_pipeline(self, pipeline, is_retrieval=False): print("[Pilot] Acquired run lock.") if self.set_curr_pl(pipeline): - rag_results = self.add_rag_results(self.curr_pl_id) - - get_rag_results(rag_results, self.rag_results_sample, self.hit_threshold, is_retrieval) + print(f"[Pilot] Configuring pipeline id: {pipeline.id}") + rag_results = self.get_rag_results(self.curr_pl_id) + + # Re-annotate if we have stored GT annotation info and this is a parameter change + if self.use_annotation and self.gt_annotate_infos: + print("[Pilot] Re-annotating RAG results after parameter changes...") + annotated_gt_results = self.re_annotate_gt_results(rag_results) + if annotated_gt_results: + # Update pilot.rag_results_sample with newly annotated results + self.update_target_query_gt(annotated_gt_results) + print("[Pilot] Updated rag_results_sample with re-annotated data") + + for target in self.target_query_gt.results: + query = target.query + ragqna = None + # TODO: Generalize the operations + if is_retrieval: + ragqna = self.adaptor.get_retrieval(query) + else: + ragqna = self.adaptor.get_ragqna(query) + if ragqna is None: + continue + + new_result = self.create_result(target, ragqna) + rag_results.add_result(new_result) + + rag_results.finished = True return True return False @@ -145,7 +325,14 @@ def _execute_pipeline(self, pipeline, is_retrieval=False): self._run_done_event.set() def run_pipeline(self, rag_pipeline=None, is_retrieval=False): + if not self.target_query_gt: + print("[ERROR] RAG result sample file is not initiated") + return False + pipeline = rag_pipeline or self.get_curr_pl() + if not pipeline: + print("[ERROR] Pipeline not activated") + return False thread = threading.Thread( target=self._execute_pipeline, args=(pipeline, is_retrieval), @@ -158,13 +345,16 @@ def run_pipeline(self, rag_pipeline=None, is_retrieval=False): return "Pipeline {thread.ident} is running" def run_pipeline_blocked(self, rag_pipeline=None, is_retrieval=False): - if not self.rag_results_sample: - print("[Pilot] Skipping pipeline run — rag_results_sample not set.") + if not self.target_query_gt: + print("[Pilot] Skipping pipeline run — target_query_gt not set.") return False thread_id = threading.get_ident() thread_name = threading.current_thread().name pipeline = rag_pipeline or self.get_curr_pl() + if not pipeline: + print("[ERROR] Pipeline not activated") + return False print(f"[Pilot][{thread_name}:{thread_id}] Waiting for current pipeline run to complete (if any)...") while not self._execute_pipeline(pipeline, is_retrieval): @@ -173,28 +363,29 @@ def run_pipeline_blocked(self, rag_pipeline=None, is_retrieval=False): return True def get_curr_pl(self): - if self.curr_pl_id: - return self.rag_pipeline_dict[self.curr_pl_id] - else: - pl_raw = get_active_pipeline() - if pl_raw: - active_pl = RAGPipeline(pl_raw) - active_pl.regenerate_id() - pilot.add_rag_pipeline(active_pl) - return self.rag_pipeline_dict[self.curr_pl_id] + if not self.curr_pl_id: + active_pl = self.adaptor.get_active_pipeline() + if active_pl: + self.curr_pl_id = active_pl.get_id() + self.add_rag_pipeline(active_pl) else: return None - - def restore_curr_pl(self): - pilot.curr_pl_id = None - pl_raw = get_active_pipeline() - if pl_raw: - active_pl = RAGPipeline(pl_raw) - active_pl.regenerate_id() - pilot.add_rag_pipeline(active_pl) - return self.rag_pipeline_dict[self.curr_pl_id] - else: - return None + if self.curr_pl_id not in self.rag_pipeline_dict: + self.add_rag_pipeline(active_pl) + return self.rag_pipeline_dict[self.curr_pl_id] + + def reconcil_curr_pl(self): + active_pl = self.adaptor.get_active_pipeline() + print(active_pl.to_dict()) + self.add_rag_pipeline(active_pl) + self.curr_pl_id = active_pl.get_id() + print(self.curr_pl_id) + return self.rag_pipeline_dict[self.curr_pl_id] + + def get_match_settings(self): + return MatchSettings( + hit_threshold=self.hit_threshold, enable_fuzzy=self.enable_fuzzy, confidence_topn=self.confidence_topn + ) def get_curr_pl_id(self): if self.curr_pl_id: @@ -258,7 +449,14 @@ def change_best_recall_pl(self, stage: RAGStage = None): self.set_curr_pl_by_id(best_pl_id) print(f"Stage {stage}: Pipeline is set to {self.get_curr_pl_id()}") - self.get_curr_results().check_metadata() + # self.get_curr_results().check_metadata() + # Check and update metadata consistency + curr_results = self.get_curr_results() + if curr_results is not None: + curr_results.check_metadata() + else: + print(f"Warning: No current results found for pipeline {self.get_curr_pl_id()}") + return self.get_curr_pl() def save_dicts(self): @@ -327,13 +525,20 @@ def export_config_and_metadata_csv(self, save_path: str): for row in rows: writer.writerow(row) + def get_pipeline_prompt(self, pl_id: int) -> str: + pipeline = self.get_pl(pl_id).to_dict() + if not pipeline: + return "" + generator = pipeline.get("generator", {}) + prompt = "" + if isinstance(generator, dict): + prompt_obj = generator.get("prompt", {}) + if isinstance(prompt_obj, dict): + prompt = prompt_obj.get("content", "") + if prompt: + return prompt + prompt = self.adaptor.get_default_prompt() + return prompt -pilot = Pilot() - -def init_active_pipeline(): - pl_raw = get_active_pipeline() - if pl_raw: - active_pl = RAGPipeline(pl_raw) - active_pl.regenerate_id() - pilot.add_rag_pipeline(active_pl) +pilot = Pilot() diff --git a/evals/evaluation/rag_pilot/components/pilot/pipeline.py b/evals/evaluation/rag_pilot/components/pilot/pipeline.py new file mode 100644 index 00000000..7a906a35 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/pilot/pipeline.py @@ -0,0 +1,68 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import uuid +from abc import ABC +from typing import Any, Dict, List + +import yaml +from components.pilot.base import Node + + +class Pipeline(ABC): + + def __init__(self, ptype: str = "", Nodes: List[Node] = [], uid: uuid.UUID = None): + self.type: str = ptype + self.nodes: List[Node] = Nodes + if uid: + self.id = uid + else: + self.id = uuid.uuid4() + + def get_id(self): + return self.id + + def regenerate_id(self): + self.id = uuid.uuid4() + + def to_dict(self) -> Dict[str, Any]: + """Convert RAGPipeline to dictionary representation.""" + pipeline = {} + pipeline["type"] = self.type + pipeline["id"] = self.id + for n in self.nodes: + pipeline[n.type] = {} + for m in n.modules: + pipeline[n.type][m.type] = {} + for a in m.attributes: + if "value" in a.params: + pipeline[n.type][m.type][a.type] = a.params["value"] + + return pipeline + + +class RAGPipelineTemplate(Pipeline): + def __init__(self, config_file="configs/RAGPipeline.yaml"): + ragnodes = [] + if config_file: + """Load a complete pipeline from YAML file.""" + with open(config_file, "r") as file: + config = yaml.safe_load(file) + + nodes_config = config.get("nodes", []) + if not nodes_config: + raise ValueError("No nodes found in the YAML file") + for n in nodes_config: + ragnodes.append(Node.from_dict(n)) + + else: + # Fallback to default rag nodes + ragnodes = [ + Node(type="node_parser"), + Node(type="indexer"), + Node(type="retriever"), + Node(type="postprocessor"), + Node(type="generator"), + ] + + super().__init__("RAG", ragnodes) diff --git a/evals/evaluation/rag_pilot/components/pilot/result.py b/evals/evaluation/rag_pilot/components/pilot/result.py new file mode 100644 index 00000000..f46008b7 --- /dev/null +++ b/evals/evaluation/rag_pilot/components/pilot/result.py @@ -0,0 +1,366 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import copy +import csv +import hashlib +import json +import re +import uuid +from difflib import SequenceMatcher +from enum import Enum +from pathlib import Path +from typing import Dict, List, Optional, Union + +import numpy as np +from components.pilot.base import ContextGT, ContextItem, ContextType, GTType, fuzzy_contains +from pydantic import BaseModel, Field, model_serializer + +# Import matcher for advanced text matching +try: + from components.annotation.matcher import default_matcher +except ImportError: + # Fallback in case matcher is not available + default_matcher = None + + +class Metrics(str, Enum): + RETRIEVAL_RECALL = "retrieval_recall_rate" + POSTPROCESSING_RECALL = "postprocessing_recall_rate" + ANSWER_RELEVANCY = "answer_relevancy" + + +class RAGResult(BaseModel): + metadata: Optional[Dict[str, Union[float, int, list]]] = {} + query_id: Optional[int] = None + query: str + ground_truth: Optional[str] = None + response: Optional[str] = None + + gt_contexts: Optional[List[ContextGT]] = None + retrieval_contexts: Optional[List[ContextItem]] = None + postprocessing_contexts: Optional[List[ContextItem]] = None + + finished: bool = False + + def __init__(self, **data): + super().__init__(**data) + + def __post_init__(self): + for context_type in ContextType: + self.init_context_idx(context_type) + + def copy(self): + return copy.deepcopy(self) + + def update_metrics(self, metrics: Dict[str, Union[float, int]]): + if not metrics: + return + if self.metadata is None: + self.metadata = {} + for key, value in metrics.items(): + if isinstance(value, (float, int)): + self.metadata[key] = value + + def init_context_idx(self, context_type): + context_list_name = f"{context_type.value}_contexts" + context_list = getattr(self, context_list_name, None) + if context_list is not None: + for idx, context in enumerate(context_list): + context.context_idx = idx + + def add_context(self, context_type: ContextType, context: ContextItem): + context_list_name = f"{context_type.value}_contexts" + context_list = getattr(self, context_list_name, None) + if context_list is None: + context_list = [] + setattr(self, context_list_name, context_list) + context.context_idx = len(context_list) + context_list.append(context) + + def update_metadata_hits(self, threshold=1, enable_fuzzy=False, confidence_topn=5): + if self.gt_contexts: + for context_type in [ContextType.RETRIEVAL, ContextType.POSTPROCESSING]: + context_list_name = f"{context_type.value}_contexts" + context_list = getattr(self, context_list_name, None) + if context_list is None: + continue + for context in context_list: + self.context_matches_gt( + self.gt_contexts, context, context_type, threshold, enable_fuzzy, confidence_topn + ) + + for context_type in [ContextType.RETRIEVAL, ContextType.POSTPROCESSING]: + count = 0 + for gt_context in self.gt_contexts: + if gt_context.metadata.get(context_type, None): + count += 1 + self.metadata[context_type] = count + + def set_response(self, response: str): + self.response = response + # if self.ground_truth: + # self.metadata = self.cal_metric(self.query, self.ground_truth, response) + + def append_gt_contexts( + self, + new_gts: List[ContextGT], + ) -> dict: + if self.gt_contexts is None: + self.gt_contexts = [] + + existing_node_ids = {c.node_id for c in self.gt_contexts if c.node_id} + added = 0 + skipped_ids = [] + for gt in new_gts: + if gt.node_id and gt.node_id in existing_node_ids: + skipped_ids.append(gt.node_id) + continue + + self.gt_contexts.append(gt) + added += 1 + if gt.node_id: + existing_node_ids.add(gt.node_id) + + self.init_context_idx(ContextType.GT) + return {"added": added, "skipped_duplicates": set(skipped_ids), "total_gt_contexts": len(self.gt_contexts)} + + @classmethod + def check_parts_in_text(cls, gt_context, text, threshold): + if threshold < 1: + return fuzzy_contains(gt_context, text, threshold) + else: + parts = gt_context.split() + return all(part in text for part in parts) + + @classmethod + def check_annotation_gt_match(cls, gt: ContextGT, candidate_context: ContextItem): + # Primary matching: node_id comparison (most accurate) + if candidate_context.node_id and gt.node_id and candidate_context.node_id == gt.node_id: + return True + # Secondly matching: context comparison + candidate_content = candidate_context.text.strip() + gt_content = gt.node_text.strip() + return candidate_content == gt_content + + @classmethod + def check_traditional_gt_match( + cls, + gt: ContextGT, + candidate_context: ContextItem, + threshold: float = 1.0, + enable_fuzzy=False, + confidence_topn=5, + ) -> bool: + # First check file name matching if both are available + file_match = True + if candidate_context.file_name and gt.file_name and gt.file_name != "": + file_match = gt.file_name in candidate_context.file_name + + if not file_match: + return False + default_matcher.update_settings( + similarity_threshold=threshold, enable_fuzzy=enable_fuzzy, confidence_topn=confidence_topn + ) + match_type, confidence = default_matcher.match_texts(candidate_context.text, gt.text) + # Consider exact matches and high-confidence partial matches + if match_type == "exact": + return True + elif match_type == "partial": + return True + + @classmethod + def context_matches_gt( + cls, + gt_contexts: List[ContextGT], + candidate_context: ContextItem, + context_type: ContextType, + threshold=1, + enable_fuzzy=False, + confidence_topn=5, + ): + hit_indices = [] + for gt in gt_contexts: + matched = False + if gt.gt_type == GTType.ANNOTATION: + matched = cls.check_annotation_gt_match(gt, candidate_context) + else: + matched = cls.check_traditional_gt_match( + gt, candidate_context, threshold, enable_fuzzy, confidence_topn + ) + + if matched: + gt.metadata = gt.metadata or {} + retrieved_list = gt.metadata.get(context_type, []) + retrieved_list.append(candidate_context.context_idx) + gt.metadata[context_type] = retrieved_list + hit_indices.append(gt.context_idx) + + if hit_indices: + candidate_context.metadata = candidate_context.metadata or {} + prev_hit = candidate_context.metadata.get("hit", []) + if not isinstance(prev_hit, list): + prev_hit = [prev_hit] + candidate_context.metadata["hit"] = list(set(prev_hit + hit_indices)) + return True + return False + + @classmethod + def cal_metric(cls, query: str, ground_truth: str, response: str) -> Dict[str, float]: + # Placeholder: Use actual metric calculations as needed. + accuracy = float(ground_truth in response) + return {"accuracy": accuracy} + + +class RAGResults(BaseModel): + metadata: Optional[Dict[str, Union[float, int]]] = None + results: List[RAGResult] = [] + finished: bool = False + + def add_result(self, result): + # if result.query_id has appear in self.results, then update the result,else append the results + updated_existing = False + if result.query_id is not None: + for idx, r in enumerate(self.results): + if r.query_id == result.query_id: + self.results[idx] = result + updated_existing = True + break + if not updated_existing: + self.results.append(result) + self.cal_metadata() + + def cal_recall(self): + recall_rates = {} + for context_type in [ContextType.RETRIEVAL, ContextType.POSTPROCESSING]: + hit_count = 0 + gt_count = 0 + for result in self.results: + gt_count += len(result.gt_contexts) if result.gt_contexts else 0 + hit_count += result.metadata.get(context_type, 0) if result.metadata else 0 + + recall_rate = hit_count / gt_count if gt_count > 0 else np.nan + if context_type is ContextType.RETRIEVAL: + recall_rates[Metrics.RETRIEVAL_RECALL.value] = recall_rate + elif context_type is ContextType.POSTPROCESSING: + recall_rates[Metrics.POSTPROCESSING_RECALL.value] = recall_rate + self.metadata = self.metadata or {} + self.metadata.update(recall_rates) + + def cal_metadata(self): + self.cal_recall() + + rate_sums = {} + rate_counts = {} + + for result in self.results: + if not result.metadata: + continue + for key, value in result.metadata.items(): + if isinstance(value, (int, float)) and key in {metric.value for metric in Metrics}: + if key not in rate_sums: + rate_sums[key] = 0.0 + rate_counts[key] = 0 + rate_sums[key] += value + rate_counts[key] += 1 + + self.metadata = self.metadata or {} + for key, total in rate_sums.items(): + avg = total / rate_counts[key] if rate_counts[key] > 0 else np.nan + self.metadata[f"{key}"] = avg + + def get_metrics(self): + return self.metadata or {} + + def get_metric(self, metric: Metrics, default=float("-inf")): + return (self.metadata or {}).get(metric.value, default) + + def update_result_metrics(self, query_id: int, metrics: Dict[str, Union[float, int]]): + updated = False + for result in self.results: + if result.query_id == query_id: + result.update_metrics(metrics) + updated = True + break + + if updated: + self.cal_metadata() + return updated + + def check_metadata(self): + if not self.metadata: + print("No metadata found.") + return + for key, value in self.metadata.items(): + print(f"{key}: {value}") + + def save_to_json(self, file_path: str): + cleaned_metadata = {str(k): v for k, v in (self.metadata or {}).items()} + rag_results_dict = { + **self.dict(exclude={"metadata"}), + "metadata": cleaned_metadata, + } + + with open(file_path, "w", encoding="utf-8") as f: + json.dump(rag_results_dict, f, ensure_ascii=False, indent=4) + + def save_to_csv(self, output_dir: str): + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # --- CSV 1: Contexts --- + contexts_csv = output_dir / "rag_contexts.csv" + with contexts_csv.open("w", newline="", encoding="utf-8-sig") as f: + fieldnames = ["query_id", "context_type", "context_idx", "file_name", "text"] + metadata_keys = set() + for result in self.results: + for context_type in ContextType: + context_list = getattr(result, f"{context_type.value}_contexts") or [] + for ctx in context_list: + if ctx.metadata: + metadata_keys.update(ctx.metadata.keys()) + fieldnames.extend(metadata_keys) + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + + for result in self.results: + for context_type in ContextType: + context_list = getattr(result, f"{context_type.value}_contexts") or [] + for ctx in context_list: + row = { + "query_id": result.query_id, + "context_type": f"{context_type.value}_contexts", + "context_idx": ctx.context_idx, + "file_name": ctx.file_name, + "text": ctx.text, + } + if ctx.metadata: + for key in metadata_keys: + row[key] = ctx.metadata.get(key, "") + writer.writerow(row) + + # --- CSV 2: Summary --- + summary_csv = output_dir / "rag_summary.csv" + with summary_csv.open("w", newline="", encoding="utf-8-sig") as f: + fieldnames = ["query_id", "query", "ground_truth", "response", "gt_count"] + metadata_keys = set() + for result in self.results: + if result.metadata: + metadata_keys.update(result.metadata.keys()) + fieldnames.extend(metadata_keys) + + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + + for result in self.results: + row = { + "query_id": result.query_id, + "query": result.query, + "ground_truth": result.ground_truth, + "response": result.response, + "gt_count": len(result.gt_contexts), + } + if result.metadata: + for key in metadata_keys: + row[key] = result.metadata.get(key, "") + writer.writerow(row) diff --git a/evals/evaluation/rag_pilot/components/tuner/adaptor.py b/evals/evaluation/rag_pilot/components/tuner/adaptor.py deleted file mode 100644 index 9e2fc323..00000000 --- a/evals/evaluation/rag_pilot/components/tuner/adaptor.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import ast -from copy import deepcopy -from dataclasses import dataclass -from typing import Any, Callable, Dict, Optional - -from components.connect_utils import COMP_TYPE_MAP, get_ecrag_module_map -from components.pilot.base import RAGPipeline - - -def get_support_modules(module_name: str, module_map) -> Callable: - support_modules = module_map - return dynamically_find_function(module_name, support_modules) - - -def dynamically_find_function(key: str, target_dict: Dict) -> Callable: - if key in target_dict: - instance, attr_expression = target_dict[key] - if "[" in attr_expression and "]" in attr_expression: - attr_name, index = attr_expression[:-1].split("[") - index = int(index) - func = getattr(instance, attr_name) - if isinstance(func, list) and 0 <= index < len(func): - func = func[index] - else: - raise ValueError(f"Attribute '{attr_name}' is not a list or index {index} is out of bounds") - elif attr_expression == "": - func = instance - else: - func = getattr(instance, attr_expression) - return func - else: - print(f"Input module or node '{key}' is not supported.") - - -def convert_tuple(value): - if isinstance(value, str): - try: - evaluated = ast.literal_eval(value) - if isinstance(evaluated, tuple): - if len(evaluated) == 2: - return Range(*evaluated) - else: - return evaluated - except (SyntaxError, ValueError): - pass - return value - - -class Range: - def __init__(self, min_value: int, max_value: int): - self.min = min_value - self.max = max_value - - -class ModuleBase: - def __init__(self, type: str, params: Dict[str, Any]): - self.type: str = type - self.params: Dict[str, Any] = params - self.func: Optional[Callable] = None - self.is_active = False - - @classmethod - def from_dict(cls, component_dict: Dict) -> "ModuleBase": - _component_dict = deepcopy(component_dict) - type = _component_dict.pop("type") - params = _component_dict - return cls(type, params) - - def update_func(self, module_map): - self.func = get_support_modules(self.type, module_map) - if self.func is None: - print(f"{self.__class__.__name__} type {self.type} is not supported.") - - def get_params(self, attr): - return self.params[attr] if attr in self.params else None - - def get_status(self): - return self.is_active - - def get_value(self, attr): - if self.func is None: - print(f"{self.__class__.__name__} type {self.type} is not supported.") - else: - return getattr(self.func, attr, None) - - def set_value(self, attr, value): - if self.func is None: - print(f"{self.__class__.__name__} type {self.type} is not supported.") - else: - setattr(self.func, attr, value) - - -@dataclass -class Module(ModuleBase): - type: str - params: Dict[str, Any] - func: Optional[Callable] - - def __init__(self, type, params): - super().__init__(type, params) - - -@dataclass -class Node(ModuleBase): - type: str - params: Dict[str, Any] - modules: Dict[str, Module] - func: Optional[Callable] - - def __init__(self, type, params, modules): - super().__init__(type, params) - self.modules = modules - - @classmethod - def from_dict(cls, node_dict: Dict) -> "Node": - _node_dict = deepcopy(node_dict) - type = _node_dict.pop("type") - modules_dict = _node_dict.pop("modules") - modules = {key: Module.from_dict(value) for key, value in modules_dict.items()} - params = _node_dict - return cls(type, params, modules) - - def get_params(self, attr): - if attr in self.params: - return self.params[attr] - # Make sure attr ends with "type" when tuning node's modules - elif attr.endswith("type"): - return list(self.modules.keys()) - else: - return None - - def set_value(self, attr, value): - if self.func is None: - print(f"{self.__class__.__name__} type {self.type} is not supported.") - else: - setattr(self.func, attr, value) - if value in self.modules: - module = self.modules[value] - for param in module.params: - val = module.get_params(param) - if val: - module.set_value(param, val) - - -class Adaptor: - - def __init__(self, yaml_data: str): - self.nodes = self.parse_nodes(yaml_data) - self.root_func: Optional[Callable] = None - - self.rag_pipeline: Optional[RAGPipeline] = None - - def parse_nodes(self, yaml_data): - parsed_nodes = {} - for node in yaml_data.get("nodes", []): - node_type = node.get("node") - modules_dict = { - mod.get("module_type"): Module( - type=mod.get("module_type", ""), - params={k: convert_tuple(v) for k, v in mod.items() if k not in ["module_type"]}, - ) - for mod in node.get("modules", []) - if mod.get("module_type") - } - node_params = {k: convert_tuple(v) for k, v in node.items() if k not in ["node", "node_type", "modules"]} - cur_node = Node(type=node_type, params=node_params, modules=modules_dict) - if node_type in parsed_nodes: - parsed_nodes[node_type].append(cur_node) - else: - parsed_nodes[node_type] = [cur_node] - return parsed_nodes - - def get_node(self, node_type, idx=0): - nodes = self.nodes[node_type] if node_type in self.nodes else None - return nodes[idx] if nodes and idx < len(nodes) else None - - def get_modules_from_node(self, node_type, idx=0): - node = self.get_node(node_type, idx) - return node.modules if node else None - - def get_module(self, node_type, module_type, idx=0): - if module_type is None: - return self.get_node(node_type, idx) - else: - modules = self.get_modules_from_node(node_type, idx) - return modules[module_type] if modules and module_type in modules else None - - def update_all_module_functions_tmp(self, rag_pipeline, node_type_map=COMP_TYPE_MAP): - module_map = get_ecrag_module_map(rag_pipeline.pl) - self.root_func = get_support_modules("root", module_map) - - for node_list in self.nodes.values(): - for node in node_list: - node.update_func(module_map) - node.is_active = False - for module in node.modules.values(): - module.update_func(module_map) - module.is_active = False - - self.activate_modules_based_on_type(node_type_map) - - def update_all_module_functions(self, rag_pipeline, node_type_map=COMP_TYPE_MAP): - self.update_all_module_functions_tmp(rag_pipeline, node_type_map) - self.rag_pipeline = rag_pipeline - - def activate_modules_based_on_type(self, node_type_map): - if not self.root_func: - return - - for node_list in self.nodes.values(): - for node in node_list: - node_type = node.type - if not getattr(self.root_func, node_type, None): - continue - node.is_active = True - active_module_type = getattr(node.func, node_type_map[node_type], None) - if active_module_type and active_module_type in node.modules: - node.modules[active_module_type].is_active = True - - def get_rag_pipelines_candidates(self, params_candidates): - rag_pls = [] - for params_candidate in params_candidates: - rag_pl = self.rag_pipeline.copy() - self.update_all_module_functions_tmp(rag_pl) - for attr, tunerUpdate in params_candidate.items(): - module = self.get_module(tunerUpdate.node_type, tunerUpdate.module_type) - module.set_value(attr, tunerUpdate.val) - rag_pl.regenerate_id() - rag_pls.append(rag_pl) - self.update_all_module_functions(self.rag_pipeline) - return rag_pls, params_candidates diff --git a/evals/evaluation/rag_pilot/components/tuner/base.py b/evals/evaluation/rag_pilot/components/tuner/base.py index b03e03ff..db8a2295 100644 --- a/evals/evaluation/rag_pilot/components/tuner/base.py +++ b/evals/evaluation/rag_pilot/components/tuner/base.py @@ -1,9 +1,15 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import copy +import itertools +from collections import defaultdict from enum import Enum, auto -from typing import Callable, List, Optional, Tuple, Union +from typing import List, Optional, Union +from api_schema import RunningStatus, TunerUpdateOut +from components.pilot.base import Attribute, Module, Node +from components.pilot.pipeline import Pipeline from pydantic import BaseModel, validator @@ -58,6 +64,7 @@ class SuggestionType(Enum): STEPWISE_GROUPED = auto() STEPWISE = auto() GRID_SEARCH = auto() + NONE = auto() class Suggestion(UserInput): @@ -76,15 +83,15 @@ class DirectionType(Enum): class Target(BaseModel): node_type: str - module_type: Optional[str] = None - attribute: str + module_type: str + attribute_type: str orig_val: Optional[Union[int, float, str]] = None new_vals: List[Union[int, float, str]] = None suggestion: Suggestion = None def as_string(self) -> str: module = f"{self.module_type}." if self.module_type else "" - return f"{self.node_type}.{module}{self.attribute}" + return f"{self.node_type}.{module}{self.attribute_type}" class TargetUpdate(BaseModel): @@ -92,3 +99,163 @@ class TargetUpdate(BaseModel): module_type: Optional[str] = None attribute: str val: Optional[Union[int, float, str]] = None + + +def input_parser(upper_limit: int = None): + if upper_limit: + user_input = input(f"(1 - {upper_limit}): ") + else: + user_input = input("Provide a number: ") + upper_limit = 10000 + + if user_input.isdigit() and 1 <= int(user_input) <= upper_limit: + return True, int(user_input) + else: + print(f"Invalid input. Please enter a number between 1 and {upper_limit}.") + return False, None + + +class Tuner: + + name: str + + def __init__(self, tuner_dict: dict): + if tuner_dict["params"]["name"]: + self.name = tuner_dict["params"]["name"] + else: + self.name = tuner_dict["type"] + self.node = Node.from_dict(tuner_dict) + targets = {} + for m in self.node.modules: + for attr in m.attributes: + target = Target( + node_type=self.node.type, + module_type=m.type, + attribute_type=attr.type, + new_vals=attr.params["values"], + ) + targets[attr.type] = target + + # A target aims for an attribute + # The key of targets dict is the attribute type + self.targets = targets + self._status = RunningStatus.NOT_STARTED + self.tunerUpdateOuts = [] + + def set_status(self, status: RunningStatus): + self._status = status + + def get_status(self): + return self._status + + def set_status_completed(self): + self._status = RunningStatus.COMPLETED + + def reset(self): + self._status = RunningStatus.NOT_STARTED + self.tunerUpdateOuts = [] + + # Convert targets to pipeline candidate + def run(self, pl_template): + attribute_candidates = [] + for k, tgt in self.targets.items(): + tgt_node = get_node_from_pipeline(pl_template, tgt.node_type) + if tgt_node: + tgt_module = get_module_from_node(tgt_node, tgt.module_type) + if tgt_module: + tgt_attr = get_attr_from_module(tgt_module, tgt.attribute_type) + if tgt_attr: + for v in tgt.new_vals: + attribute_candidates.append((tgt_node.type, tgt_module.type, tgt_attr.type, v)) + else: + print(f"Tuner attribute {tgt.attribute_type} is not applicable to the pipeline") + continue + + else: + print(f"Tuner module {tgt.module_type} is not applicable to the pipeline") + continue + + else: + print(f"Tuner node {tgt.node_type} is not applicable to the pipeline") + continue + if len(attribute_candidates) == 0: + return [] + + print(attribute_candidates) + node_suggestions = generate_node_suggestion(attribute_candidates) + print(node_suggestions) + suggestion_list = [] + # Single node pipeline + # TODO: Could extend to a pipeline suggestion + for n in node_suggestions: + # Reconstruct pipelines for a tuner + tgt_pl = copy.deepcopy(pl_template) + tgt_pl.regenerate_id() + for n_to_del in tgt_pl.nodes: + if n_to_del.type == n.type: + tgt_pl.nodes.remove(n_to_del) + tgt_pl.nodes.append(n) + suggestion_list.append(tgt_pl) + + # Generate TunerUpdateOuts + targets = {} + for module in n.modules: + for attr in module.attributes: + parts = [n.type, module.type, attr.type] + target_key = ".".join(parts) # e.g., "postprocessor.reranker.top_n" + targets[target_key] = attr.params["value"] + self.tunerUpdateOuts.append( + TunerUpdateOut( + tuner_name=self.name, + base_pipeline_id=pl_template.id, + pipeline_id=tgt_pl.id, + targets=targets, + ) + ) + return suggestion_list + + +# attr_candidate = (node_type, module_type, attribute_type, attr_value) +# attributes will be expanded +def generate_node_suggestion(attr_candidates: []): + grouped = defaultdict(list) + for n, m, a, v in attr_candidates: + grouped[(n, m, a)].append(v) + + # group by (n, m) → map attribute type → values + nm_grouped = defaultdict(lambda: defaultdict(list)) + for (n, m, a), v_list in grouped.items(): + nm_grouped[(n, m)][a] = v_list + + results = [] + for (n, m), a_to_vs in nm_grouped.items(): + # Build cartesian product of choices for each attribute type + choice_lists = [[Attribute(type=a, params={"value": v}) for v in vs] for a, vs in a_to_vs.items()] + + for combo in itertools.product(*choice_lists): + module = Module(type=m, attributes=list(combo)) + node = Node(type=n, modules=[module]) + results.append(node) + return results + + +# TODO: Move to Node implementation +def get_node_from_pipeline(pl: Pipeline, node_type: str): + for n in pl.nodes: + if n.type == node_type: + return n + return None + + +def get_module_from_node(node: Node, module_type: str): + for m in node.modules: + if m.type == module_type: + return m + return None + + +def get_attr_from_module(mod: Module, attribute_type: str): + for a in mod.attributes: + if a.type == attribute_type: + return a + return None diff --git a/evals/evaluation/rag_pilot/components/tuner/tuner.py b/evals/evaluation/rag_pilot/components/tuner/tuner.py deleted file mode 100644 index daa5aafc..00000000 --- a/evals/evaluation/rag_pilot/components/tuner/tuner.py +++ /dev/null @@ -1,595 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import glob -import os -from abc import ABC, abstractmethod -from itertools import product -from typing import Dict, Optional - -from api_schema import RunningStatus -from components.tuner.adaptor import Adaptor -from components.tuner.base import ContentType, Feedback, Question, Suggestion, SuggestionType, Target, TargetUpdate - - -def input_parser(upper_limit: int = None): - if upper_limit: - user_input = input(f"(1 - {upper_limit}): ") - else: - user_input = input("Provide a number: ") - upper_limit = 10000 - - if user_input.isdigit() and 1 <= int(user_input) <= upper_limit: - return True, int(user_input) - else: - print(f"Invalid input. Please enter a number between 1 and {upper_limit}.") - return False, None - - -def display_ragqna(ragqna): - print("\nRAG Query\n" "---------\n" f"{ragqna.query}\n\n" "RAG Response\n" "------------\n" f"{ragqna.response}\n") - - if ragqna.contexts: - for index, context in enumerate(ragqna.contexts): - cleaned_context = context.replace("\n", " ") - print(f"RAG Context {index}\n" "-------------------\n" f"{cleaned_context}\n") - else: - print("RAG Contexts\n" "------------\n" "None\n") - - -def display_list(list): - for index, value in enumerate(list): - print(f"{index}: {value}") - - -class Tuner(ABC): - - def __init__(self, question: Question, adaptor: Adaptor, targets: Dict[str, Target]): - self.name = self.__class__.__name__ - self.question = question - self.adaptor = adaptor - self.targets = targets - self._status = RunningStatus.NOT_STARTED - - def check_active(self): - for target in self.targets.values(): - target_obj = self.adaptor.get_module(target.node_type, target.module_type) - if not target_obj.get_status(): - return False - return True - - def get_status(self): - return self._status - - def set_status(self, status: RunningStatus): - self._status = status - - def set_status_completed(self): - self._status = RunningStatus.COMPLETED - - def set_param( - self, - param_name, - suggestion_type: SuggestionType, - new_vals: Optional[int] = None, - step: Optional[int] = None, - lower_limit: Optional[int] = None, - count: Optional[int] = 1, - ): - target_obj = None - if param_name in self.targets: - target = self.targets[param_name] - target_obj = self.adaptor.get_module(target.node_type, target.module_type) - - if not target_obj: - print(f"[!] Target not found: node={target.node_type}, module={target.module_type}") - return - - if not target_obj.get_status(): - print(f"[!] Skipping inactive component: node={target.node_type}, module={target.module_type}") - return - - target.orig_val = target_obj.get_value(target.attribute) - - match suggestion_type: - case SuggestionType.STEPWISE_GROUPED | SuggestionType.GRID_SEARCH | SuggestionType.STEPWISE: - if new_vals: - target.new_vals = new_vals - else: - if step is None: - raise ValueError("Step must be provided for stepwise tuning.") - if lower_limit: - start = lower_limit - else: - start = target.orig_val - - if count: - target.new_vals = [start + i * step for i in range(count)] - else: - target.new_vals = [start + step] - - target.suggestion = Suggestion( - hint=f"{target.attribute}'s current value: {target.orig_val}\n" f"Setting it to {target.new_vals}", - suggestion_type=suggestion_type, - ) - case SuggestionType.CHOOSE: - target.new_vals = target_obj.get_params(target.attribute) - target.suggestion = Suggestion( - hint=f"{target.attribute}'s current value: {target.orig_val}\n" - f"Please choose a new value from below:", - options=target.new_vals, - suggestion_type=suggestion_type, - ) - case SuggestionType.ITERATE: - target.new_vals = target_obj.get_params(target.attribute) - target.suggestion = Suggestion( - hint=f"{target.attribute}'s current value: {target.orig_val}\n" f"Iterate from available values", - options=target.new_vals, - suggestion_type=suggestion_type, - ) - case SuggestionType.SET: - if new_vals: - target.new_vals = new_vals - hint = f"Change {target.attribute}'s value: {target.orig_val} -> {new_vals}\n" - else: - target.new_vals = None - hint = f"{target.attribute}'s current value: {target.orig_val}\nPlease enter a new value: " - - target.suggestion = Suggestion(hint=hint, options=target.new_vals, suggestion_type=suggestion_type) - - def request_feedback(self, auto=False): - if not self.check_active(): - self.set_status(RunningStatus.INACTIVE) - return False - - print(f"\033[1m\033[93m{self}\033[0m: {self.question}\n") - if not auto: - valid, user_input = input_parser(len(self.question.options)) - if not valid: - return False - auto = False - else: - user_input = -1 - auto = True - - self.user_feedback = Feedback(feedback=user_input, auto=auto) - return self._feedback_to_suggestions() - - @abstractmethod - def _feedback_to_suggestions(self): - pass - - def apply_suggestions(self): - if not self.check_active(): - self.set_status(RunningStatus.INACTIVE) - return None, None - - params_candidates = [] - new_values_dict = {} - - # STEPWISE_GROUPED - grouped_targets = { - a: t - for a, t in self.targets.items() - if t.suggestion and t.suggestion.suggestion_type == SuggestionType.STEPWISE_GROUPED - } - if grouped_targets: - count = min(len(t.new_vals) for t in grouped_targets.values()) - - for idx in range(count): - candidate = {a: t.new_vals[idx] for a, t in grouped_targets.items()} - new_values_dict = { - a: TargetUpdate(node_type=t.node_type, module_type=t.module_type, attribute=a, val=t.new_vals[idx]) - for a, t in grouped_targets.items() - } - params_candidates.append(new_values_dict) - if len(params_candidates) > 0: - return self.adaptor.get_rag_pipelines_candidates(params_candidates) - - # GRID_SEARCH - grid_targets = { - a: t - for a, t in self.targets.items() - if t.suggestion and t.suggestion.suggestion_type == SuggestionType.GRID_SEARCH - } - if grid_targets: - keys, values_list = zip(*((a, t.new_vals) for a, t in grid_targets.items())) - for combination in product(*values_list): - candidate = dict(zip(keys, combination)) - new_values_dict = {} - for a, val in candidate.items(): - new_values_dict[a] = TargetUpdate( - node_type=self.targets[a].node_type, - module_type=self.targets[a].module_type, - attribute=a, - val=val, - ) - params_candidates.append(new_values_dict) - if len(params_candidates) > 0: - return self.adaptor.get_rag_pipelines_candidates(params_candidates) - - new_values_dict = {} - for attr, target in self.targets.items(): - suggestion = target.suggestion - if not suggestion or attr in new_values_dict: - continue - - orig_val = target.orig_val - match suggestion.suggestion_type: - case SuggestionType.SET: - print(f"{suggestion}") - if suggestion.options: - new_values_dict[attr] = TargetUpdate( - node_type=target.node_type, - module_type=target.module_type, - attribute=attr, - val=suggestion.options[0].content, - ) - else: - valid, user_input = input_parser() - if valid: - new_values_dict[attr] = TargetUpdate( - node_type=target.node_type, - module_type=target.module_type, - attribute=attr, - val=user_input, - ) - - case SuggestionType.CHOOSE: - print(f"{suggestion}") - new_options = [x for x in suggestion.options if x != orig_val] - valid, user_input = input_parser(len(new_options)) - if valid: - chosed_val = suggestion.options[user_input - 1] - new_values_dict[attr] = TargetUpdate( - node_type=target.node_type, - module_type=target.module_type, - attribute=attr, - val=chosed_val.content, - ) - - case SuggestionType.ITERATE: - print(f"{suggestion}") - for option in suggestion.options: - new_values_dict = {} - new_values_dict[attr] = TargetUpdate( - node_type=target.node_type, - module_type=target.module_type, - attribute=attr, - val=option.content, - ) - params_candidates.append(new_values_dict) - if len(params_candidates) > 0: - return self.adaptor.get_rag_pipelines_candidates(params_candidates) - - case SuggestionType.STEPWISE: - if len(target.new_vals) == 1: - val = target.new_vals[idx] - new_values_dict[attr] = TargetUpdate( - node_type=target.node_type, module_type=target.module_type, attribute=attr, val=val - ) - else: - for idx in range(len(target.new_vals)): - new_values_dict = {} - val = target.new_vals[idx] - new_values_dict[attr] = TargetUpdate( - node_type=target.node_type, module_type=target.module_type, attribute=attr, val=val - ) - params_candidates.append(new_values_dict) - if len(params_candidates) > 0: - return self.adaptor.get_rag_pipelines_candidates(params_candidates) - - case _: - print(f"ERROR: Unknown suggestion type '{suggestion.suggestion_type}'.") - - params_candidates.append(new_values_dict) - return self.adaptor.get_rag_pipelines_candidates(params_candidates) - - def run(self, pl): - self.adaptor.update_all_module_functions(pl) - if self.request_feedback(auto=True): - pl_list, params_candidates = self.apply_suggestions() - else: - pl_list, params_candidates = None, None - return pl_list, params_candidates - - def __str__(self): - return f"{self.__class__.__name__}" - - -class EmbeddingTuner(Tuner): - def __init__(self, adaptor: Adaptor): - # question - question = Question( - hint="Do you want to tune embedding model", - options=["Yes, iterate it from available options", "No, skip this tuner"], - ) - - targets = {} - # targets - attribute = "embedding_model" - target = Target( - node_type="indexer", - attribute=attribute, - ) - targets[attribute] = target - - super().__init__(question, adaptor, targets) - - def _feedback_to_suggestions(self): - assert isinstance(self.user_feedback, Feedback) - if self.user_feedback.feedback == 1 or self.user_feedback.auto: - self.set_param(param_name="embedding_model", suggestion_type=SuggestionType.ITERATE) - return True - else: - return False - - -class NodeParserTuner(Tuner): - - def __init__(self, adaptor: Adaptor): - # question - question = Question( - hint="Do you want to tune node parser", - options=["Yes, iterate it from available options", "No, skip this tuner"], - ) - - targets = {} - # targets - attribute = "parser_type" - target = Target( - node_type="node_parser", - attribute=attribute, - ) - targets[attribute] = target - - super().__init__(question, adaptor, targets) - - def _feedback_to_suggestions(self): - assert isinstance(self.user_feedback, Feedback) - if self.user_feedback.feedback == 1 or self.user_feedback.auto: - self.set_param(param_name="parser_type", suggestion_type=SuggestionType.ITERATE) - return True - else: - return False - - -class SimpleNodeParserChunkTuner(Tuner): - - def __init__(self, adaptor: Adaptor): - # question - question = Question( - hint="Do you want to tune chunk size and chunk overlap", - options=[ - "Yes, iterate the chunk size and chunk overlap based on current values stepwisely", - "Yes, set them to designated values", - "No, skip this tuner", - ], - ) - - targets = {} - # targets - attribute = "chunk_size" - target = Target( - node_type="node_parser", - module_type="simple", - attribute=attribute, - ) - targets[attribute] = target - - attribute = "chunk_overlap" - target = Target( - node_type="node_parser", - module_type="simple", - attribute=attribute, - ) - targets[attribute] = target - - super().__init__(question, adaptor, targets) - - def _feedback_to_suggestions(self): - assert isinstance(self.user_feedback, Feedback) - if self.user_feedback.feedback == 1 or self.user_feedback.auto: - self.set_param(param_name="chunk_size", suggestion_type=SuggestionType.STEPWISE_GROUPED, step=100, count=3) - self.set_param( - param_name="chunk_overlap", suggestion_type=SuggestionType.STEPWISE_GROUPED, step=16, count=3 - ) - return True - elif self.user_feedback.feedback == 2: - self.set_param(param_name="chunk_size", suggestion_type=SuggestionType.SET) - self.set_param(param_name="chunk_overlap", suggestion_type=SuggestionType.SET) - return True - else: - return False - - -class RetrievalTopkTuner(Tuner): - - def __init__(self, adaptor: Adaptor): - # question - question = Question( - hint="Do you want to tune retrieve's topk", - options=[ - "Yes, iterate it based on current values stepwisely", - "Yes, set it to designated value", - "No, skip this tuner", - ], - ) - - targets = {} - # targets - attribute = "retrieve_topk" - target = Target( - node_type="retriever", - attribute=attribute, - ) - targets[attribute] = target - - super().__init__(question, adaptor, targets) - - def _feedback_to_suggestions(self): - assert isinstance(self.user_feedback, Feedback) - if self.user_feedback.feedback == 1 or self.user_feedback.auto: - self.set_param( - param_name="retrieve_topk", suggestion_type=SuggestionType.STEPWISE, step=15, lower_limit=30, count=4 - ) - return True - if self.user_feedback.feedback == 2: - self.set_param( - param_name="retrieve_topk", - suggestion_type=SuggestionType.SET, - ) - return True - else: - return False - - -class RerankerTopnTuner(Tuner): - - def __init__(self, adaptor: Adaptor): - # question - question = Question( - hint="Do you want to tune reranker's top_n", - options=["Yes, iterate it based on current values stepwisely", "No, skip this tuner"], - ) - - targets = {} - # targets - attribute = "top_n" - target = Target( - node_type="postprocessor", - module_type="reranker", - attribute=attribute, - ) - targets[attribute] = target - - super().__init__(question, adaptor, targets) - - def _feedback_to_suggestions(self): - assert isinstance(self.user_feedback, Feedback) - if self.user_feedback.feedback == 1 or self.user_feedback.auto: - self.set_param(param_name="top_n", suggestion_type=SuggestionType.STEPWISE, step=2, lower_limit=3, count=2) - return True - else: - return False - - -class RetrievalTopkRerankerTopnTuner(Tuner): - - def __init__(self, adaptor: Adaptor): - # question - question = Question( - hint="Do you want to tune retrieve_topk and reranker's top_n", - options=[ - "Yes, iterate it based on current values stepwisely", - "Yes, set retrieve_topk to [30, 50, 100, 200], top_n to [5, 10]", - "No, skip this tuner", - ], - ) - - targets = {} - # targets - attribute = "retrieve_topk" - target = Target( - node_type="retriever", - attribute=attribute, - ) - targets[attribute] = target - - attribute = "top_n" - target = Target( - node_type="postprocessor", - module_type="reranker", - attribute=attribute, - ) - targets[attribute] = target - - super().__init__(question, adaptor, targets) - - def _feedback_to_suggestions(self): - assert isinstance(self.user_feedback, Feedback) - if self.user_feedback.feedback == 1 or self.user_feedback.auto: - self.set_param( - param_name="retrieve_topk", suggestion_type=SuggestionType.GRID_SEARCH, step=15, lower_limit=30, count=4 - ) - self.set_param( - param_name="top_n", suggestion_type=SuggestionType.GRID_SEARCH, step=5, lower_limit=5, count=2 - ) - return True - if self.user_feedback.feedback == 2: - self.set_param( - param_name="retrieve_topk", suggestion_type=SuggestionType.GRID_SEARCH, new_vals=[30, 50, 100, 200] - ) - self.set_param( - param_name="top_n", suggestion_type=SuggestionType.GRID_SEARCH, step=5, lower_limit=5, count=2 - ) - return True - else: - return False - - -class PromptTuner(Tuner): - - def __init__(self, adaptor: Adaptor): - # question - question = Question( - hint="Do you want to tune the prompt template?", - options=[ - "Yes, iterate all prompts based on prompt candidates", - "Yes, choose from available prompt templates", - "No, skip this tuner", - ], - ) - targets = {} - # targets - attribute = "prompt_path" - target = Target( - node_type="generator", - attribute=attribute, - ) - targets[attribute] = target - - attribute = "prompt_content" - target = Target( - node_type="generator", - attribute=attribute, - ) - targets[attribute] = target - super().__init__(question, adaptor, targets) - - path_target = targets["prompt_path"] - content_target = targets["prompt_content"] - target_obj_path = self.adaptor.get_module(path_target.node_type, path_target.module_type) - target_obj_content = self.adaptor.get_module(content_target.node_type, content_target.module_type) - - prompt_contents = [] - if target_obj_path: - paths = target_obj_path.get_params(path_target.attribute) - all_files = [] - for cur_path in paths: - if os.path.isdir(cur_path): - txt_files = glob.glob(os.path.join(cur_path, "*.txt")) - all_files.extend(txt_files) - elif os.path.isfile(cur_path) and cur_path.endswith(".txt"): - all_files.append(cur_path) - if all_files: - for file_path in all_files: - try: - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - prompt_contents.append(content) - except Exception as e: - print(f"Warning: Could not read file {file_path}: {e}") - if target_obj_content and prompt_contents: - target_obj_content.params[content_target.attribute].extend(prompt_contents) - - def _feedback_to_suggestions(self): - assert isinstance(self.user_feedback, Feedback) - if self.user_feedback.feedback == 1 or self.user_feedback.auto: - self.set_param(param_name="prompt_content", suggestion_type=SuggestionType.ITERATE) - return True - elif self.user_feedback.feedback == 2: - self.set_param(param_name="prompt_content", suggestion_type=SuggestionType.CHOOSE) - return True - else: - return False diff --git a/evals/evaluation/rag_pilot/components/tuner/tunermgr.py b/evals/evaluation/rag_pilot/components/tuner/tunermgr.py index f15421b3..db06cec7 100644 --- a/evals/evaluation/rag_pilot/components/tuner/tunermgr.py +++ b/evals/evaluation/rag_pilot/components/tuner/tunermgr.py @@ -2,21 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 import uuid -from typing import Dict, List, Optional, Type +from typing import Any, Dict, List, Optional, Tuple +import yaml from api_schema import RAGStage, RunningStatus, TunerOut, TunerUpdateOut -from components.tuner.adaptor import Adaptor -from components.tuner.base import TargetUpdate -from components.tuner.tuner import ( - EmbeddingTuner, - NodeParserTuner, - PromptTuner, - RerankerTopnTuner, - RetrievalTopkTuner, - SimpleNodeParserChunkTuner, - Tuner, -) -from components.utils import read_yaml +from components.tuner.base import Tuner from pydantic import BaseModel @@ -24,7 +14,8 @@ class TunerRecord(BaseModel): base_pipeline_id: Optional[uuid.UUID] = None best_pipeline_id: Optional[uuid.UUID] = None all_pipeline_ids: List[uuid.UUID] = [] - targets: List[Dict[str, TargetUpdate]] = [] + # TODO: Change Any type + targets: Dict[uuid.UUID, Any] = {} class TunerMgr: @@ -32,21 +23,27 @@ def __init__(self): self._tuners_by_name: Dict[str, Tuner] = {} self._tuners_by_stage: Dict[RAGStage, List[str]] = {} self._records: Dict[str, TunerRecord] = {} - self.adaptor: Adaptor = None - def init_adaptor(self, rag_module_yaml): - self.adaptor = Adaptor(rag_module_yaml) + def clear_stage(self, stage: RAGStage): + names = self._tuners_by_stage.get(stage, []) + for name in names: + self._tuners_by_name.pop(name, None) + self._records.pop(name, None) + self._tuners_by_stage.pop(stage, None) - def update_adaptor(self, pl): - self.adaptor.update_all_module_functions(pl) - - def register_tuner(self, stage: RAGStage, tuner_cls: Type[Tuner]): - tuner = tuner_cls(self.adaptor) + def _register_tuner(self, stage: RAGStage, tuner_dict: dict): + tuner = Tuner(tuner_dict) name = tuner.name self._tuners_by_name[name] = tuner self._tuners_by_stage.setdefault(stage, []).append(name) self._records[name] = TunerRecord() + def get_stages(self) -> List[RAGStage]: + return self._tuners_by_stage.keys() + + def get_stage_and_tuner_name_list(self) -> Dict[RAGStage, List[str]]: + return self._tuners_by_stage + def get_tuner_stage(self, name: str) -> Optional[RAGStage]: for stage, tuner_names in self._tuners_by_stage.items(): if name in tuner_names: @@ -73,28 +70,8 @@ def get_tuner_out(self, name: str, stage: RAGStage = None) -> TunerOut: return tunerOut def get_tuner_update_outs_by_name(self, name: str) -> TunerUpdateOut: - record = self.get_tuner_record(name) - if record is None: - return [] - tunerUpdateOuts = [] - for pl_id, params in zip(record.all_pipeline_ids, record.targets): - targets = {} - for attr, update in params.items(): - parts = [update.node_type] - if update.module_type: - parts.append(update.module_type) - parts.append(update.attribute) - target_key = ".".join(parts) # e.g., "postprocessor.reranker.top_n" - targets[target_key] = update.val - tunerUpdateOuts.append( - TunerUpdateOut( - tuner_name=name, - base_pipeline_id=record.base_pipeline_id, - pipeline_id=pl_id, - targets=targets, - ) - ) - return tunerUpdateOuts + tuner = self._tuners_by_name[name] + return tuner.tunerUpdateOuts def get_stage_status(self, stage): tuner_names = self.get_tuners_by_stage(stage) @@ -125,9 +102,12 @@ def set_tuner_status(self, tuner_name, status): tuner.set_status(status) def reset_tuners_by_stage(self, stage): - tuner_names = tunerMgr.get_tuners_by_stage(stage) + tuner_names = self.get_tuners_by_stage(stage) for tuner_name in tuner_names: - tunerMgr.set_tuner_status(tuner_name, RunningStatus.NOT_STARTED) + tuner = self.get_tuner(tuner_name) + if tuner: + tuner.reset() + self.clear_tuner_record(tuner_name) def complete_tuner(self, tuner_name: str, best_pipeline_id: int = None): tuner = self.get_tuner(tuner_name) @@ -156,15 +136,24 @@ def set_best_pipeline(self, name, pipeline_id): def get_tuner(self, name): return self._tuners_by_name[name] if name in self._records else None + def get_tuners(self): + tuners = [] + for v in self._tuners_by_name.values(): + tuners.append(v) + return tuners + def get_tuner_record(self, name) -> Optional[TunerRecord]: return self._records[name] if name in self._records else None def set_tuner_record(self, name, tunerRecord): self._records[name] = tunerRecord + def clear_tuner_record(self, name): + self._records[name] = TunerRecord() + def run_tuner(self, name: str, pl): tuner = self.get_tuner(name) - pl_list, params_candidates = tuner.run(pl) + pl_list = tuner.run(pl) if tuner.get_status() is not RunningStatus.INACTIVE: tunerRecord = TunerRecord( @@ -172,27 +161,73 @@ def run_tuner(self, name: str, pl): base_pipeline_id=pl.get_id(), best_pipeline_id=None, all_pipeline_ids=[], - targets=[], + targets={}, ) self.set_tuner_record(name, tunerRecord) - for pl, params in zip(pl_list, params_candidates): - tunerRecord.all_pipeline_ids.append(pl.get_id()) - tunerRecord.targets.append(params) - - return pl_list, params_candidates - - -tunerMgr = TunerMgr() - - -def init_tuners(adaptor_yaml="configs/ecrag.yaml"): - tunerMgr.init_adaptor(read_yaml(adaptor_yaml)) - tunerMgr.register_tuner(RAGStage.RETRIEVAL, EmbeddingTuner) - tunerMgr.register_tuner(RAGStage.RETRIEVAL, NodeParserTuner) - tunerMgr.register_tuner(RAGStage.RETRIEVAL, SimpleNodeParserChunkTuner) - tunerMgr.register_tuner(RAGStage.RETRIEVAL, RetrievalTopkTuner) - - tunerMgr.register_tuner(RAGStage.POSTPROCESSING, RerankerTopnTuner) - - tunerMgr.register_tuner(RAGStage.GENERATION, PromptTuner) + for p in pl_list: + tunerRecord.all_pipeline_ids.append(p.get_id()) + tunerRecord.targets[p.get_id()] = p + + return pl_list + + def parse_tuner_config(self, config_path: str) -> Tuple[List[Tuple[str, str]], dict]: + """Parse YAML configuration file and return stage and tuner name pairs. + + Args: + config_path (str): Path to the YAML configuration file + + Returns: + List[Tuple[str, str]]: List of (stage_name, tuner_name) tuples + dict: {tuner_name:tuner} + """ + config = {} + # Read the YAML file + with open(config_path, "r") as file: + for doc in yaml.safe_load_all(file): + config.update(doc) + + # Collect stage and tuner pairs + stage_tuner_list = [] + tuner_dict = {} + + for stage_name, tuners in config["stage"].items(): + for tuner_name in tuners: + stage_tuner_list.append((stage_name, tuner_name)) + + for tuner in config["tuner"]: + tuner_dict[tuner["params"]["name"]] = tuner + + return stage_tuner_list, tuner_dict + + def init_tuner_from_file(self, config_path: str) -> None: + """Initialize tuners by parsing config file and registering them with tuner manager. + + Args: + config_path (str): Path to the YAML configuration file + """ + # Parse the configuration file + stage_tuner_list, tuner_dict = self.parse_tuner_config(config_path) + self.init_tuner(stage_tuner_list, tuner_dict) + + def init_tuner(self, stage_tuner_list, tuner_dict): + # Register each tuner with the tuner manager + # (stage_name, tuner_name) + for s_t_pair in stage_tuner_list: + + # Map string stage names to enum values + stage_enum = getattr(RAGStage, s_t_pair[0].upper(), None) + tuner_name = s_t_pair[1] + + try: + # Assuming tuners are imported in the current namespace + self._register_tuner(stage_enum, tuner_dict[tuner_name]) + print(f"Registered tuner {s_t_pair[1]} for stage {s_t_pair[0]}") + except KeyError: + print(f"Warning: Could not find tuner definition {s_t_pair[1]}") + return False + except Exception as e: + print(f"Error registering tuner {s_t_pair[1]}: {e}") + return False + + return True diff --git a/evals/evaluation/rag_pilot/components/utils.py b/evals/evaluation/rag_pilot/components/utils.py index 06e06f51..dae64712 100644 --- a/evals/evaluation/rag_pilot/components/utils.py +++ b/evals/evaluation/rag_pilot/components/utils.py @@ -8,7 +8,8 @@ import pandas as pd import yaml from api_schema import GroundTruth -from components.pilot.base import ContextItem, ContextType, RAGResult, RAGResults +from components.pilot.base import ContextGT, ContextType, GTType +from components.pilot.result import RAGResult, RAGResults def load_rag_results_from_csv(file_obj: Union[str, TextIO]): @@ -46,7 +47,9 @@ def load_rag_results_from_csv(file_obj: Union[str, TextIO]): file_name = "" if pd.isna(file_name) else str(file_name) if gt_context: - rag_results_dict[query_id]["gt_contexts"].append(ContextItem(text=gt_context, file_name=file_name)) + rag_results_dict[query_id]["gt_contexts"].append( + ContextGT(gt_type=GTType.TRADITIONAL, text=gt_context, file_name=file_name) + ) rag_results = RAGResults() for query_id, data in rag_results_dict.items(): @@ -76,7 +79,7 @@ def load_rag_results_from_gt(gts: List[GroundTruth]): ground_truth=gt.answer, ) for ctx in gt.contexts: - result.gt_contexts.append(ContextItem(text=ctx.text, file_name=ctx.filename)) + result.gt_contexts.append(ContextGT(gt_type=GTType.TRADITIONAL, text=ctx.text, file_name=ctx.filename)) result.init_context_idx(ContextType.GT) rag_results.add_result(result) @@ -86,6 +89,39 @@ def load_rag_results_from_gt(gts: List[GroundTruth]): raise ValueError(f"Error processing RAG results from GroundTruth: {e}") +def load_rag_results_from_gt_match_results(gt_match_results: List): + try: + rag_results = RAGResults() + for gt_match_result in gt_match_results: + gt_contexts = [] + for context_id, context_match_res in gt_match_result.context_map.items(): + chunk = context_match_res.matched_chunk if context_match_res.matched_chunk else None + if not chunk: + continue + gt_context_item = ContextGT( + gt_type=GTType.ANNOTATION, + node_id=chunk.node_id, + node_text=chunk.text, + text=context_match_res.context_text, + file_name=chunk.metadata.get("file_name", "unknown"), + page_label=chunk.metadata.get("page_label", ""), + ) + gt_contexts.append(gt_context_item) + + result = RAGResult( + query_id=gt_match_result.query_id, + query=gt_match_result.query, + gt_contexts=gt_contexts, + ground_truth="", # Can be set later if available + ) + result.init_context_idx(ContextType.GT) + rag_results.add_result(result) + return rag_results + + except Exception as e: + raise ValueError(f"Error processing RAG results from GTMatchResult: {e}") + + def read_yaml(file_path): with open(file_path, "r") as file: yaml_content = file.read() diff --git a/evals/evaluation/rag_pilot/configs/RAGPipeline.yaml b/evals/evaluation/rag_pilot/configs/RAGPipeline.yaml new file mode 100644 index 00000000..7370a3cb --- /dev/null +++ b/evals/evaluation/rag_pilot/configs/RAGPipeline.yaml @@ -0,0 +1,55 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +nodes: + - type: "node_parser" + params: + name: "Node Parser Node" + description: "An example node for demonstration" + version: "1.0.0" + modules: + - type: "direct" + attributes: + - type: "chunk_size" + - type: "chunk_overlap" + - type: "hierarchical" + attributes: + - type: "chunk_size" + + - type: "indexer" + params: + name: "Indexer Node" + description: "An example node for demonstration" + version: "1.0.0" + modules: + - type: "embedding_model" + attributes: + - type: "model_name" + + - type: "retriever" + params: + name: "Retriever Node" + modules: + - type: "vectorsimilarity" + attributes: + - type: "top_k" + + - type: "postprocessor" + params: + name: "Postprocessor Node" + modules: + - type: "reranker" + attributes: + - type: "top_n" + - type: "model_name" + + - type: "generator" + params: + name: "Generator Node" + modules: + - type: "prompt" + attributes: + - type: "content" + - type: "llm_model" + attributes: + - type: "model_name" diff --git a/evals/evaluation/rag_pilot/configs/ecrag.yaml b/evals/evaluation/rag_pilot/configs/ecrag.yaml index a6b5da75..e93a5884 100644 --- a/evals/evaluation/rag_pilot/configs/ecrag.yaml +++ b/evals/evaluation/rag_pilot/configs/ecrag.yaml @@ -1,45 +1,47 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -nodes: - - node: node_parser - modules: - - module_type: simple - chunk_size: 400 - chunk_overlap: 48 - - module_type: hierarchical - chunk_sizes: - - 384 - - 512 - - 640 - - node: indexer - embedding_model: - - BAAI/bge-m3 - - BAAI/bge-large-zh-v1.5 - - BAAI/bge-large-en-v1.5 - modules: - - module_type: vector - - module_type: faiss_vector - - node: retriever - retrieve_topk: 30 - modules: - - module_type: vectorsimilarity - - module_type: auto_merge - - module_type: bm25 - - node: postprocessor - modules: - - module_type: reranker - top_n: 3 - reranker_model: BAAI/bge-reranker-large - - module_type: metadata_replace - - node: generator - prompt_path: - - "./prompt_templates" - prompt_content: - - "<|im_start|>System: You are an AI assistant. Your task is to learn from the following context. Then answer the user's question based on what you learned from the context but not your own knowledge.<|im_end|>\n\n<|im_start|>{context}<|im_end|>\n\n<|im_start|>System: Pay attention to your formatting of response. If you need to reference content from context, try to keep the formatting.<|im_end|>\n<|im_start|>System: Try to summarize from the context, do some reasoning before response, then response. Make sure your response is logically sound and self-consistent.<|im_end|>\n\n<|im_start|>{input}" - model: - - Qwen/Qwen2-7B-Instruct - inference_type: - - local - - vllm - prompt: null +node_parser: + type: "node_parser" + modules: + - type: "direct" + attributes: + - type: "chunk_size" + - type: "chunk_overlap" + - type: "hierarchical" + attributes: + - type: "chunk_size" + +--- +indexer: + type: "indexer" + modules: + - type: "embedding_model" + attributes: + - type: "model_name" + +--- +retriever: + type: "retriever" + modules: + - type: "vectorsimilarity" + attributes: + - type: "top_k" + +--- +postprocessor: + type: "postprocessor" + modules: + - type: "reranker" + attributes: + - type: "top_n" + - type: "model_name" + - type: "metadata" + +--- +generator: + type: "generator" + modules: + - type: "prompt" + attributes: + - type: "content" diff --git a/evals/evaluation/rag_pilot/configs/netsec_sample.csv b/evals/evaluation/rag_pilot/configs/netsec_sample.csv deleted file mode 100644 index 63eaed1c..00000000 --- a/evals/evaluation/rag_pilot/configs/netsec_sample.csv +++ /dev/null @@ -1,6 +0,0 @@ -query_id,query,file_name,gt_context,ground_truth -53,故障来源有哪些?,故障处理记录表.txt,故障来源:用户投诉、日志系统、例行维护中发现、其它来源。,故障来源:用户投诉、日志系统、例行维护中发现、其它来源。 -73,故障类别有哪些?,故障处理记录表.txt,故障类别:硬件设备故障、电源故障、传输网故障、数据修改、其它故障。,"故障类别:硬件设备故障、电源故障、传输网故障、数据修改、其它故障。 故障类别:硬件设备故障、电源故障、传输网故障、数据修改、其它故障。" -93,uMAC网元VNFC有哪几种备份方式,index.txt,ZUF-76-04-005 VNFC支持1+1主备冗余,uMAC网元VFNC有3中备份方式: 支持1+1主备冗余,支持N+M负荷分担冗余, 支持1+1互备冗余。 -93,,index.txt,ZUF-76-04-006 VNFC支持N+M负荷分担冗余, -93,,index.txt,ZUF-76-04-008 VNFC支持1+1互备冗余, diff --git a/evals/evaluation/rag_pilot/configs/rag_pipeline_sample.json b/evals/evaluation/rag_pilot/configs/rag_pipeline_sample.json deleted file mode 100644 index 26a8eeec..00000000 --- a/evals/evaluation/rag_pilot/configs/rag_pipeline_sample.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "name": "rag_test_local_llm", - "node_parser": { - "chunk_size": 512, - "chunk_overlap": 64, - "chunk_sizes": null, - "parser_type": "simple", - "window_size": null - }, - "indexer": { - "indexer_type": "faiss_vector", - "embedding_model": { - "model_type": "embedding", - "model_id": "BAAI/bge-small-zh-v1.5", - "model_path": "./models/BAAI/bge-small-zh-v1.5", - "weight": "", - "device": "auto" - } - }, - "retriever": { - "retriever_type": "vectorsimilarity", - "retrieve_topk": 30 - }, - "postprocessor": [ - { - "processor_type": "reranker", - "reranker_model": { - "model_type": "reranker", - "model_id": "BAAI/bge-reranker-large", - "model_path": "./models/BAAI/bge-reranker-large", - "weight": "", - "device": "auto" - }, - "top_n": 2 - } - ], - "generator": { - "prompt_path": null, - "model": { - "model_type": "llm", - "model_id": "Qwen/Qwen2-7B-Instruct", - "model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights", - "weight": "INT4", - "device": "auto" - }, - "inference_type": "local" - }, - "active": true -} diff --git a/evals/evaluation/rag_pilot/configs/tuner.yaml b/evals/evaluation/rag_pilot/configs/tuner.yaml new file mode 100644 index 00000000..026fdc00 --- /dev/null +++ b/evals/evaluation/rag_pilot/configs/tuner.yaml @@ -0,0 +1,113 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +stage: + retrieval: + - "ObserverTuner" +# - "EmbeddingModelTuner" + - "SimpleNodeParserTuner" +# - "NodeParserTuner" +# - "SimpleNodeParserChunkTuner" +# - "RetrievalTopkTuner" + postprocessing: +# - "ObserverTuner" + - "RerankerTopnTuner" + generation: + - "PromptTuner" + +--- +tuner: + - type: "node_parser" + params: + name: "SimpleNodeParserTuner" + description: "A simple node parser tuner" + version: "1.0.0" + modules: + - type: "direct" + attributes: + - type: "chunk_size" + params: + values: + - 200 + - 300 + # - 400 + # - 500 + - type: "node_parser" + params: + name: "NodeParserTuner" + description: "A general node parser tuner" + version: "1.0.0" + modules: + - type: "direct" + attributes: + - type: "chunk_size" + params: + values: + - 100 + - 200 + - 300 + - type: "chunk_overlap" + params: + values: + - 30 + - 40 + - 50 + - type: "all" + params: + name: "ObserverTuner" + - type: "indexer" + params: + name: "EmbeddingModelTuner" + description: "An Embedding Model tuner" + version: "1.0.0" + modules: + - type: "embedding_model" + attributes: + - type: "model_name" + params: + values: + - BAAI/bge-m3 +# - BAAI/bge-small-en-v1.5 +# - BAAI/bge-large-zh-v1.5 + - BAAI/bge-large-en-v1.5 + - type: "retriever" + params: + name: "RetrievalTopkTuner" + description: "A Retriever tuner" + version: "1.0.0" + modules: + - type: "vectorsimilarity" + attributes: + - type: "top_k" + params: + values: + - 10 + - 50 + - 100 + - type: "postprocessor" + params: + name: "RerankerTopnTuner" + description: "A reranker tuner" + version: "1.0.0" + modules: + - type: "reranker" + attributes: + - type: "top_n" + params: + values: + # - 5 + # - 10 + - 40 + + - type: "generator" + params: + name: "PromptTuner" + description: "A Prompt tuner" + version: "1.0.0" + modules: + - type: "prompt" + attributes: + - type: "content" + params: + values: + - "<|im_start|>System: You are an AI assistant. Your task is to learn from the following context. Then answer the user's question based on what you learned from the context but not your own knowledge.<|im_end|>\n\n<|im_start|>{context}<|im_end|>\n\n<|im_start|>System: Pay attention to your formatting of response. If you need to reference content from context, try to keep the formatting.<|im_end|>\n<|im_start|>System: Try to summarize from the context, do some reasoning before response, then response. Make sure your response is logically sound and self-consistent.<|im_end|>\n\n<|im_start|>{input}" diff --git a/evals/evaluation/rag_pilot/docker_compose/intel/gpu/arc/compose.yaml b/evals/evaluation/rag_pilot/docker_compose/intel/gpu/arc/compose.yaml index f88373f5..c852747d 100644 --- a/evals/evaluation/rag_pilot/docker_compose/intel/gpu/arc/compose.yaml +++ b/evals/evaluation/rag_pilot/docker_compose/intel/gpu/arc/compose.yaml @@ -24,6 +24,8 @@ services: restart: always ports: - ${RAGPILOT_SERVICE_PORT:-16030}:16030 + volumes: + - ${RAGPILOT_CONFIG_DIR:-${PWD}}:/home/user/rag_pilot/configs networks: default: diff --git a/evals/evaluation/rag_pilot/docs/Detail_Guide.md b/evals/evaluation/rag_pilot/docs/Detail_Guide.md index 561a1183..42339b59 100644 --- a/evals/evaluation/rag_pilot/docs/Detail_Guide.md +++ b/evals/evaluation/rag_pilot/docs/Detail_Guide.md @@ -8,9 +8,8 @@ RAG Pilot provides a set of tuners to optimize various parameters in a retrieval | Tuner | Stage | Function | Configuration | |---|---|---|---| -| **EmbeddingTuner** | Retrieval | Tune embedding model and related parameters | Allows selection and configuration of the embedding model used for vectorization, including model name and optional parameters like dimension or backend. | -| **NodeParserTuner** | Retrieval | Tune node parser parameters | General tuner for configuring node parsers, possibly extending to custom strategies or pre-processing logic. | -| **SimpleNodeParserChunkTuner** | Retrieval | Tune `SentenceSplitter`'s `chunk_size` and `chunk_overlap` | Configures chunking behavior for document parsing by adjusting the size of individual text chunks and their overlap to ensure context retention. | +| **EmbeddingModelTuner** | Retrieval | Tune embedding model and related parameters | Allows selection and configuration of the embedding model used for vectorization, including model name and optional parameters like dimension or backend. | +| **NodeParserTuner** | Retrieval | Tune `SentenceSplitter`'s `chunk_size` and `chunk_overlap` | Configures chunking behavior for document parsing by adjusting the size of individual text chunks and their overlap to ensure context retention. | | **RetrievalTopkTuner** | Retrieval | Tune `top_k` for retriever | Adjusts how many documents are retrieved before reranking, balancing recall and performance. | | **RerankerTopnTuner** | Postprocessing | Tune `top_n` for reranking | Adjusts the number of top-ranked documents returned after reranking, optimizing relevance and conciseness. | | **PromptTuner** | Generator | Tune `prompt` for generator |Generate multiple responses using different prompts for users. | @@ -20,84 +19,149 @@ These tuners help in optimizing document parsing, chunking strategies, reranking ## 🚦 How to use RAG Pilot -To launch RAG Pilot, create the following *required files* before running the command: +### ▶️ Use RAG Pilot with UI -### 🔹Input file: QA List File (`your_queries.csv`) +RAG Pilot provides an interactive UI interface to assist with usage, including the following stages: +#### 1. Set EC-RAG endpoint +Click the gear button to set EC-RAG endpoint: + +#### 2. Ground truth upload +We provide two ways to upload Ground truth: +`Upload File` and `Create New` +First time you use Rag Pilot you can start with `Create Now`. +##### 2.1 Create Now + +  +- Available options and meanings: + | Item | usage | + |---|---| + | **Query** | The query you want to ask. | + | **File name** | File name which containing the context, select from the drop-down menu. | + | **Context** | Context ground truth which related to the query. | + | **Section** | Node with context in the file. | + | **Pages** | The page number of the context in the file. | + +- `Add Context`: Add context of the same query. +- `Add Query` : Add other queries infornation. +- `Save`: Save single query ground truth information. +- `Batch Save`: Save all queries ground truth information. +- Once the user click `Save` or `Batch Save` button ,RAG Pilot will search matched nodes based on the ground truth information you entered as ground truth. If no matched node, RAG Pilot will will return the top few nodes with the highest match scores for the user to select: +  +- After create gt, you can click `download` button to download ground truth file for `Upload Files`. + +##### 2.2 Upload files +After create gt, you can use downloaded json file as upload file. + +#### 3. Response Rating +After groud truth loading, RAG Pilot wii generate response bases on EC-RAG current pipeline. +- Click `Run` to get rating results. +- Click `Skip` to skip rating. -The input CSV file should contain queries and associated ground truth data (optional) used for evaluation or tuning. Each row corresponds to a specific query and context file. The CSV must include the following **columns**: +  -| Column | Required | Description | -|--------|----------|-------------| -| `query_id` | ✅ Yes | Unique identifier for the query. Can be used to group multiple context entries under the same query. | -| `query` | ✅ Yes (at least one per `query_id`) | The actual query string. If left empty for some rows sharing the same `query_id`, the query from the first row with a non-empty value will be used. | -| `file_name` | ✅ Yes | The name of the file or document where the context (for retrieval or grounding) is drawn from. | -| `gt_context` | ✅ Yes | The ground truth context string that should be retrieved or matched against. | -| `ground_truth` | ❌ Optional | The ideal answer or response for the query, used for optional answer-level evaluation. | +After clicking `Run`: +- You can rating each result after the responses generated. +- Click numbers on the left to switch between responses of different queries. +- Click `Next` to the next stage. -#### 📌 CSV File Example +  +#### 4. Retrieve Context Tuning +During this stage, RAG Pilot will execute four tuners:`ObserberTuner`, `EmbeddingModelTuner`, `NodeParserTuner` and `RetrievalTopKTuner`. -```csv -query_id,query,file_name,gt_context,ground_truth -53,故障来源有哪些?,故障处理记录表.txt,故障来源:用户投诉、日志系统、例行维护中发现、其它来源。,故障来源:用户投诉、日志系统、例行维护中发现、其它来源。 -93,uMAC网元VNFC有哪几种备份方式,index.txt,ZUF-76-04-005 VNFC支持1+1主备冗余,uMAC网元VFNC有3中备份方式: 支持1+1主备冗余,支持N+M负荷分担冗余, 支持1+1互备冗余。 -93,,index.txt,ZUF-76-04-006 VNFC支持N+M负荷分担冗余, -93,,index.txt,ZUF-76-04-008 VNFC支持1+1互备冗余, -``` +##### 4.1 Retrieve Context Tuning Configure +You can configure the specific content for each tuners. +- Click `Run Tuners` will start retrieval stage tuning. +- Click `Cancel` then click `Skip` to skip the Retrieve Context Tuning stage. +- Support exporting and importing tuners configure with `Export` and `Import` buttons. -### ▶️ Use RAG Pilot with UI +  +##### 4.2 Retrieve Context Tuning Run & Results +After clicking `Run Tuners`, these tuners will experiment with various parameter combinations to construct corresponding pipelines, ultimately selecting the most effective pipeline as the operational one. -RAG Pilot provides an interactive UI interface to assist with usage, including the following stages: -#### 1. Ground Truth Uploading -Upload the QA List File mentioned above by click the `Upload` button: +- Click numbers on the left to switch between different queries. - -Or you can also create your own ground truth by click the `Create Ground Truth` button, use `+` and `-` button to add or delete ground truth. +  - +- Once the selected tuners have completed their tasks, the page will display the results, including the `ground truth hits` and the `retrieved chunks`. -#### 2. Response Rating -After groud truth loading, RAG Pilot wii generate response bases on EC-RAG current pipeline. -You can rating each result after the responses generated. - +- Users can search text via the search box in the upper-right corner to observe which parts of the context match the ground truth context. Text entered into the search box will be highlighted. -#### 3. Retrieve Context Tuning -During this stage, RAG Pilot will execute four tuners: EmbeddingTuner, NodeParserTuner, SimpleNodeParserChunkTuner, and RerankerTopnTuner. +  -These tuners will experiment with various parameter combinations to construct corresponding pipelines, ultimately selecting the most effective pipeline as the operational one. +- Click `Next` to the Postprocess Context Tuning stage. - -Once all four tuners have completed their tasks, the page will display the results, including the `ground truth hits` and the `retrieved chunks`. - +#### 5. Postprocess Context Tuning +This stage includes one tuner:`RerankerTopnTuner` which adjusts the number of top-ranked documents returned after reranking, optimizing relevance and conciseness. +##### 5.1 Postprocess Context Tuning Configure +Users can configure `RerankerTopnTuner` with UI. +- Click `Run Tuners` will start retrieval stage tuning. +- Click `Cancel` then click `Skip` to skip the Postprocess Context Tuning. +- Support exporting and importing tuners configure with `Export` and `Import` buttons. -#### 4. Postprocess Context Tuning -This stage adjusts the number of top-ranked documents returned after reranking, optimizing relevance and conciseness. -After this tuner finished, the page will show recall plots of different `topn` +  + +##### 5.2 Postprocess Context Tuning Run & Results + +After the tuning finished, the page will show recall plots of different `topn`.  -You can select the desired `Top n` value. The page will display the `ground truth hits` from both the postprocessing and retrieval stages, as well as the `retrieved chunks` from the postprocessing stage. +- You can select the desired `Top n` value. + +- The page will display the `ground truth hits` from both the postprocessing and retrieval stages, as well as the `retrieved chunks` from the postprocessing stage. + +- Click numbers on the left to switch between different queries. + +- Users can search text via the search box in the upper-right corner to observe which parts of the context match the ground truth context. Text entered into the search box will be highlighted. + - -#### 5. Generation tuning +  -This page will displays the activated prompts sourced from text files located in the `./prompt_templates` folder, along with detailed prompt contents specified in the `prompt_contents` section of the ecrag.yaml file. +- Click `Next` to the generation tuning stage. - +#### 6. Generation tuning +This stage includes one tuner: `PromptTuner`, you can add your own prompts to generate different responses. +##### 6.1 Generation Tuning Configure +Users can configure `PromptTuner` with UI: +- Click `Cancel` then click `Skip` to skip the Generation Tuning. +- Support exporting and importing tuners configure with `Export` and `Import` buttons. -After clicking the Next button, RAG Pilot will utilize these prompts to generate answers. You can then evaluate and rate the responses generated from different prompts. +  + +- Click `Run tuners` will display all activated prompts: + +  + +- Click `Next` to utilize these prompts to generate answers. + +##### 6.2 Generation Tuning Run & Results + + +Once the response is generated, you can then evaluate and rate the responses generated from different prompts. + +Click numbers on the left to switch between different queries.  -#### 6. View Results -After `Generation tuning` stage, you can see the overall user rating of different prompts. For each pipeline, you can view configuration details, download the specific pipeline configurations, and update them to EC-RAG. +Click `Next` to the next stage. + +#### 7. View Results +After `Generation tuning` stage, you can see the overall rating of different prompts. For each pipeline, you can view configuration details and update them to EC-RAG.  + Note that once you run `retrieval`,`postprocessing` or `generation` stage , the EC-RAG active pipeline will be changed, you have to reset EC-RAG pipeline in EC-RAG server if needed. ### ▶️ Use RAG Pilot with RESTful API +#### Set EC-RAG endpoint +```bash + curl -X POST http://localhost:16030/v1/pilot/settings + -H 'Content-Type: application/json' + -d '{"target_endpoint": "10.67.106.189:16010","target_type":"ecrag"}'| jq '.' +``` #### Upload ground truth ```bash curl -X POST http://localhost:16030/v1/pilot/ground_truth/file \ @@ -144,11 +208,6 @@ curl -X GET http://localhost:16030/v1/tuners/stage/{stage}/pipelines/best/id | j ``` #### Reset -##### Restore EC-RAG current active pipeline -Once you change EC-RAG pipeline ,you can restore RAG Pilot active pipeline to the new EC-RAG pipeline by: -```bash -curl -X POST http://localhost:16030/v1/pilot/pipeline/restore | jq '.' -``` ##### Reset stage ```bash curl -X POST http://localhost:16030/v1/tuners/stage/{stage}/reset | jq '.' @@ -158,7 +217,7 @@ Note that once you run `retrieval`,`postprocessing` or `generation` stage , the ### 🧩 What's Nodes and Modules -RAG Pilot represents each stage of the RAG pipeline as a **node**, such as `node_parser`, `indexer`, `retriever`, etc. Each node can have different **modules** that define its type and configuration. The nodes and modules are specified in a YAML file, allowing users to switch between different implementations easily. +RAG Pilot represents each stage of the RAG pipeline as a **node**, such as `node_parser`, `indexer`, `retriever`, etc. Each node can have different **modules** that define its type and configuration. The nodes and modules are specified in a YAML file, allowing user to switch between different implementations easily. Here is an example of nodes and modules for EdgeCraftRAG. diff --git a/evals/evaluation/rag_pilot/pics/activated_prompt.png b/evals/evaluation/rag_pilot/pics/activated_prompt.png index 1b96d0a2..64bf61d8 100644 Binary files a/evals/evaluation/rag_pilot/pics/activated_prompt.png and b/evals/evaluation/rag_pilot/pics/activated_prompt.png differ diff --git a/evals/evaluation/rag_pilot/pics/answer_wi_diff_prompts.png b/evals/evaluation/rag_pilot/pics/answer_wi_diff_prompts.png index b52396f7..8e18eab6 100644 Binary files a/evals/evaluation/rag_pilot/pics/answer_wi_diff_prompts.png and b/evals/evaluation/rag_pilot/pics/answer_wi_diff_prompts.png differ diff --git a/evals/evaluation/rag_pilot/pics/creat_gt1.png b/evals/evaluation/rag_pilot/pics/creat_gt1.png new file mode 100644 index 00000000..422f6c15 Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/creat_gt1.png differ diff --git a/evals/evaluation/rag_pilot/pics/create_ground_truth.png b/evals/evaluation/rag_pilot/pics/create_ground_truth.png deleted file mode 100644 index 88a15c6a..00000000 Binary files a/evals/evaluation/rag_pilot/pics/create_ground_truth.png and /dev/null differ diff --git a/evals/evaluation/rag_pilot/pics/diff_prompt_res.png b/evals/evaluation/rag_pilot/pics/diff_prompt_res.png index 3d5f824a..4fe52e00 100644 Binary files a/evals/evaluation/rag_pilot/pics/diff_prompt_res.png and b/evals/evaluation/rag_pilot/pics/diff_prompt_res.png differ diff --git a/evals/evaluation/rag_pilot/pics/download.png b/evals/evaluation/rag_pilot/pics/download.png new file mode 100644 index 00000000..7625e676 Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/download.png differ diff --git a/evals/evaluation/rag_pilot/pics/gt_select.png b/evals/evaluation/rag_pilot/pics/gt_select.png new file mode 100644 index 00000000..f4f37893 Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/gt_select.png differ diff --git a/evals/evaluation/rag_pilot/pics/postprocess_chunks.png b/evals/evaluation/rag_pilot/pics/postprocess_chunks.png index c1c12fc5..52268b4d 100644 Binary files a/evals/evaluation/rag_pilot/pics/postprocess_chunks.png and b/evals/evaluation/rag_pilot/pics/postprocess_chunks.png differ diff --git a/evals/evaluation/rag_pilot/pics/postprocessing_config.png b/evals/evaluation/rag_pilot/pics/postprocessing_config.png new file mode 100644 index 00000000..23d2f4d6 Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/postprocessing_config.png differ diff --git a/evals/evaluation/rag_pilot/pics/prompt_config.png b/evals/evaluation/rag_pilot/pics/prompt_config.png new file mode 100644 index 00000000..f20e826f Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/prompt_config.png differ diff --git a/evals/evaluation/rag_pilot/pics/rating.png b/evals/evaluation/rag_pilot/pics/rating.png deleted file mode 100644 index 705da858..00000000 Binary files a/evals/evaluation/rag_pilot/pics/rating.png and /dev/null differ diff --git a/evals/evaluation/rag_pilot/pics/rating1.png b/evals/evaluation/rag_pilot/pics/rating1.png new file mode 100644 index 00000000..f0e89ad3 Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/rating1.png differ diff --git a/evals/evaluation/rag_pilot/pics/rating2.png b/evals/evaluation/rag_pilot/pics/rating2.png new file mode 100644 index 00000000..15d38b44 Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/rating2.png differ diff --git a/evals/evaluation/rag_pilot/pics/retrieval_config.png b/evals/evaluation/rag_pilot/pics/retrieval_config.png new file mode 100644 index 00000000..857b809b Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/retrieval_config.png differ diff --git a/evals/evaluation/rag_pilot/pics/retrieved_chunks.png b/evals/evaluation/rag_pilot/pics/retrieved_chunks.png index 04886144..b15c33b8 100644 Binary files a/evals/evaluation/rag_pilot/pics/retrieved_chunks.png and b/evals/evaluation/rag_pilot/pics/retrieved_chunks.png differ diff --git a/evals/evaluation/rag_pilot/pics/retrieved_pipelines.png b/evals/evaluation/rag_pilot/pics/retrieved_pipelines.png index 84168ea6..31877743 100644 Binary files a/evals/evaluation/rag_pilot/pics/retrieved_pipelines.png and b/evals/evaluation/rag_pilot/pics/retrieved_pipelines.png differ diff --git a/evals/evaluation/rag_pilot/pics/set_ecragendpoint.png b/evals/evaluation/rag_pilot/pics/set_ecragendpoint.png new file mode 100644 index 00000000..a6a64e90 Binary files /dev/null and b/evals/evaluation/rag_pilot/pics/set_ecragendpoint.png differ diff --git a/evals/evaluation/rag_pilot/pics/upload_ground_truth.png b/evals/evaluation/rag_pilot/pics/upload_ground_truth.png deleted file mode 100644 index 312f0667..00000000 Binary files a/evals/evaluation/rag_pilot/pics/upload_ground_truth.png and /dev/null differ diff --git a/evals/evaluation/rag_pilot/pics/view_res.png b/evals/evaluation/rag_pilot/pics/view_res.png deleted file mode 100644 index 0c2059a1..00000000 Binary files a/evals/evaluation/rag_pilot/pics/view_res.png and /dev/null differ diff --git a/evals/evaluation/rag_pilot/requirements.txt b/evals/evaluation/rag_pilot/requirements.txt index 1a0bb29c..c11afd30 100644 --- a/evals/evaluation/rag_pilot/requirements.txt +++ b/evals/evaluation/rag_pilot/requirements.txt @@ -1,6 +1,11 @@ -fastapi>=0.115.0 -pandas>=2.3.0 -python-multipart>=0.0.20 -PyYAML>=6.0.2 -requests>=2.32.4 -uvicorn>=0.34.3 +beautifulsoup4==4.12.3 +deprecated==1.2.10 +fastapi==0.115.0 +fsspec==2024.3.1 +llama-index==0.12.37 +llama-index-core==0.12.37 +pandas==2.3.0 +python-multipart==0.0.20 +PyYAML==6.0.2 +requests==2.32.4 +uvicorn==0.34.3 diff --git a/evals/evaluation/rag_pilot/run_pilot.py b/evals/evaluation/rag_pilot/run_pilot.py deleted file mode 100644 index f8273767..00000000 --- a/evals/evaluation/rag_pilot/run_pilot.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import argparse -from collections import defaultdict -from enum import Enum -from time import sleep - -from api_schema import RAGStage -from components.connect_utils import get_active_pipeline, load_pipeline_from_json, reindex_data -from components.pilot.base import Metrics, RAGPipeline -from components.pilot.pilot import Pilot -from components.tuner.adaptor import Adaptor -from components.tuner.tuner import ( - EmbeddingTuner, - NodeParserTuner, - PromptTuner, - RerankerTopnTuner, - RetrievalTopkRerankerTopnTuner, - RetrievalTopkTuner, - SimpleNodeParserChunkTuner, - input_parser, -) -from components.utils import load_rag_results_from_csv, read_yaml - - -class Mode(str, Enum): - ONLINE = "online" - OFFLINE = "offline" - - -RESET = "\033[0m" -BOLD = "\033[1m" -YELLOW = "\033[93m" -GREEN = "\033[92m" - - -def main(): - parser = argparse.ArgumentParser() - - # common - parser.add_argument( - "-y", - "--rag_module_yaml", - default="configs/ecrag.yaml", - type=str, - help="Path to the YAML file containing all tunable rag configurations.", - ) - - # online - parser.add_argument( - "-q", - "--qa_list", - default="configs/netsec_sample.csv", - type=str, - help="Path to the file containing the list of queries.", - ) - - args = parser.parse_args() - - adaptor = Adaptor(read_yaml(args.rag_module_yaml)) - - retrieval_tuner_list = [ - EmbeddingTuner(adaptor), - NodeParserTuner(adaptor), - SimpleNodeParserChunkTuner(adaptor), - RetrievalTopkTuner(adaptor), - ] - postprocessing_tuner_list = [RerankerTopnTuner(adaptor)] - prompt_tuner_list = [PromptTuner(adaptor)] - rag_results = load_rag_results_from_csv(args.qa_list) - pilot = Pilot(rag_results_sample=rag_results, hit_threshold=0.9) - - active_pl = RAGPipeline(get_active_pipeline()) - active_pl.regenerate_id() - - pilot.add_rag_pipeline(active_pl) - pilot.run_pipeline() - - def ask_stage_satisfaction(stage) -> bool: - rag_results = pilot.get_curr_results() - if stage is RAGStage.RETRIEVAL: - recall_rate = rag_results.metadata.get(Metrics.RETRIEVAL_RECALL, None) - elif stage is RAGStage.POSTPROCESSING: - recall_rate = rag_results.metadata.get(Metrics.POSTPROCESSING_RECALL, None) - else: - recall_rate = None - - print(f"\n{BOLD}{YELLOW}[STAGE {stage.value}]{RESET} recall_rate is {recall_rate}") - print("Are you satisfied with this metric?\n 1: Yes and jump to next stage\n 2: No and keep tuning") - valid, user_input = input_parser(2) - return valid and user_input == 1 - - def run_tuner_stage(tuner_list, stage): - print(f"\n{BOLD}{YELLOW}🔄 Starting tuning stage: {stage.value}{RESET}") - - for i, tuner in enumerate(tuner_list): - active_pl = pilot.get_curr_pl() - adaptor.update_all_module_functions(active_pl) - - pl_list = [] - params_candidates = [] - - print("") - if tuner.request_feedback(): - pl_list, params_candidates = tuner.apply_suggestions() - for pl, params in zip(pl_list, params_candidates): - print(f"Trying to update pipeline to {params}") - is_prompt_tuning = stage == RAGStage.GENERATION and "prompt_content" in params - if pl.id != active_pl.id: - pilot.add_rag_pipeline(pl) - pilot.curr_pl_id = pl.id - if not is_prompt_tuning: - reindex_data() - pilot.run_pipeline() - print("Metrics of this pipeline:") - results = pilot.get_results(pl.id) - if results: - results.check_metadata() - - pilot.change_best_recall_pl(stage) - - print("") - for pl, params in zip(pl_list, params_candidates): - if pl.id == pilot.curr_pl_id: - print(f"{BOLD}{GREEN}✅ Changing pipeline to {params} with below metrics:{RESET}") - break - else: - print(f"{BOLD}{GREEN}↩️ Fallback to previous pipeline with below metrics:{RESET}") - pilot.get_curr_results().check_metadata() - - # Ask satisfaction only if not the last tuner - if i < len(tuner_list) - 1: - if ask_stage_satisfaction(stage): - return True - else: - print(f"{BOLD}{YELLOW}⏭️ All tuners tried for {stage.value}, proceeding to next stage...{RESET}") - - return False - - def run_full_tuning(): - # Step 1: POSTPROCESSING initial check - if ask_stage_satisfaction(RAGStage.POSTPROCESSING): - print("User satisfied with POSTPROCESSING. Exiting.") - return - - # Step 2: RETRIEVAL - if ask_stage_satisfaction(RAGStage.RETRIEVAL): - print("User satisfied with RETRIEVAL. Proceeding to POSTPROCESSING tuning...") - else: - _ = run_tuner_stage(retrieval_tuner_list, RAGStage.RETRIEVAL) - sleep(1) - if ask_stage_satisfaction(RAGStage.POSTPROCESSING): - print("User satisfied with POSTPROCESSING. Exiting.") - return - - # Step 3: POSTPROCESSING tuning - print("\nStarting POSTPROCESSING tuning...") - _ = run_tuner_stage(postprocessing_tuner_list, RAGStage.POSTPROCESSING) - - # Step 4: Optional PROMPT tuning - print("\nStarting PROMPT tuning...") - _ = run_tuner_stage(prompt_tuner_list, RAGStage.GENERATION) - - print(f"\n{BOLD}{GREEN}🎯 Tuning complete.{RESET}") - - run_full_tuning() - - print("Metrics of final pipeline:") - pilot.get_curr_results().check_metadata() - - pilot.save_dicts() - - -if __name__ == "__main__": - main() diff --git a/evals/evaluation/rag_pilot/server.py b/evals/evaluation/rag_pilot/server.py index 73c493b3..1eafae78 100644 --- a/evals/evaluation/rag_pilot/server.py +++ b/evals/evaluation/rag_pilot/server.py @@ -6,8 +6,8 @@ import uvicorn from api.v1.pilot import pilot_app from api.v1.tuner import tuner_app -from components.pilot.pilot import init_active_pipeline -from components.tuner.tunermgr import init_tuners +from components.adaptor.ecrag import ECRAGAdaptor +from components.pilot.pilot import pilot from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -29,8 +29,7 @@ @app.on_event("startup") def startup(): - init_active_pipeline() - init_tuners() + pilot.add_adaptor(ECRAGAdaptor()) if __name__ == "__main__": diff --git a/evals/evaluation/rag_pilot/ui/.env.development b/evals/evaluation/rag_pilot/ui/.env.development index bb77a0be..069a555c 100644 --- a/evals/evaluation/rag_pilot/ui/.env.development +++ b/evals/evaluation/rag_pilot/ui/.env.development @@ -2,4 +2,4 @@ ENV = development # Local Api -VITE_API_URL = http://10.67.106.189:16030/ +VITE_API_URL = http://10.67.106.238:16030/ diff --git a/evals/evaluation/rag_pilot/ui/components.d.ts b/evals/evaluation/rag_pilot/ui/components.d.ts index 63402aa7..b3e3bc58 100644 --- a/evals/evaluation/rag_pilot/ui/components.d.ts +++ b/evals/evaluation/rag_pilot/ui/components.d.ts @@ -14,7 +14,8 @@ declare module 'vue' { ABadgeRibbon: typeof import('ant-design-vue/es')['BadgeRibbon'] AButton: typeof import('ant-design-vue/es')['Button'] ACard: typeof import('ant-design-vue/es')['Card'] - ACardMeta: typeof import('ant-design-vue/es')['CardMeta'] + ACheckbox: typeof import('ant-design-vue/es')['Checkbox'] + ACheckboxGroup: typeof import('ant-design-vue/es')['CheckboxGroup'] ACol: typeof import('ant-design-vue/es')['Col'] ACollapse: typeof import('ant-design-vue/es')['Collapse'] ACollapsePanel: typeof import('ant-design-vue/es')['CollapsePanel'] @@ -22,18 +23,26 @@ declare module 'vue' { ADivider: typeof import('ant-design-vue/es')['Divider'] ADrawer: typeof import('ant-design-vue/es')['Drawer'] ADropdown: typeof import('ant-design-vue/es')['Dropdown'] + AEmpty: typeof import('ant-design-vue/es')['Empty'] AForm: typeof import('ant-design-vue/es')['Form'] AFormItem: typeof import('ant-design-vue/es')['FormItem'] + AFormItemRest: typeof import('ant-design-vue/es')['FormItemRest'] AInput: typeof import('ant-design-vue/es')['Input'] ALayout: typeof import('ant-design-vue/es')['Layout'] ALayoutContent: typeof import('ant-design-vue/es')['LayoutContent'] ALayoutHeader: typeof import('ant-design-vue/es')['LayoutHeader'] AMenu: typeof import('ant-design-vue/es')['Menu'] AMenuItem: typeof import('ant-design-vue/es')['MenuItem'] + AModal: typeof import('ant-design-vue/es')['Modal'] APagination: typeof import('ant-design-vue/es')['Pagination'] + ARadio: typeof import('ant-design-vue/es')['Radio'] + ARadioGroup: typeof import('ant-design-vue/es')['RadioGroup'] ARate: typeof import('ant-design-vue/es')['Rate'] ARow: typeof import('ant-design-vue/es')['Row'] + ASelect: typeof import('ant-design-vue/es')['Select'] + ASelectOption: typeof import('ant-design-vue/es')['SelectOption'] ASlider: typeof import('ant-design-vue/es')['Slider'] + ASpace: typeof import('ant-design-vue/es')['Space'] ASpin: typeof import('ant-design-vue/es')['Spin'] AStep: typeof import('ant-design-vue/es')['Step'] ASteps: typeof import('ant-design-vue/es')['Steps'] @@ -43,6 +52,7 @@ declare module 'vue' { ATag: typeof import('ant-design-vue/es')['Tag'] ATextarea: typeof import('ant-design-vue/es')['Textarea'] ATooltip: typeof import('ant-design-vue/es')['Tooltip'] + AUpload: typeof import('ant-design-vue/es')['Upload'] AUploadDragger: typeof import('ant-design-vue/es')['UploadDragger'] FormTooltip: typeof import('./src/components/FormTooltip.vue')['default'] RouterLink: typeof import('vue-router')['RouterLink'] diff --git a/evals/evaluation/rag_pilot/ui/index.html b/evals/evaluation/rag_pilot/ui/index.html index 1cacbdf1..77122547 100644 --- a/evals/evaluation/rag_pilot/ui/index.html +++ b/evals/evaluation/rag_pilot/ui/index.html @@ -11,7 +11,7 @@ -
${codeHtml}
+renderer.code = ({ text, lang }: CodeRenderParams) => {
+ const language = hljs.getLanguage(lang || "") ? lang : "plaintext";
+ const codeTitle = formatCapitalize(language || "Code");
+ const codeHtml = hljs.highlight(text, {
+ language: language || "plaintext",
+ }).value;
+
+ return `
+ ${codeHtml}
+ {{ $t("home.sizeFormat") }}
@@ -90,7 +87,7 @@- {{ $t("home.createdText") }} + {{ $t("home.annotationDes") }}
{{ $t("home.createdTip") }}
- {{ $t("pipeline.name") }} - {{ formData.name }} -
-+ {{ $t("common.noData") }} +
++ {{ $t("common.runTip") }} +
+ +- {{ describe }} + {{ describe ? describe : t("common.waitTip") }}