Merge branch 'main' into bedrock_embedding_support

pingcap · Dec 29, 2024 · d2b26d6 · d2b26d6
2 parents 2c155e4 + 9b05f10
commit d2b26d6
Show file tree

Hide file tree

Showing 138 changed files with 4,335 additions and 1,771 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -2,7 +2,7 @@
 
 ## Contributing Guidelines
 
-pingcap/tidb.ai is an open-source project and we welcome contributions from the community. If you are interested in contributing to the project, please read the following guidelines.
+[pingcap/autoflow](https://github.com/pingcap/autoflow) is an open-source project and we welcome contributions from the community. If you are interested in contributing to the project, please read the following guidelines.
 
 ### Before You Get Started
 
@@ -22,25 +22,42 @@ Setting up the project on your local machine is the first step to contributing t
 To test your local changes, you can build and run the project using:
 
 ```bash
-docker compose -f docker-compose.build.yml up
+docker compose -f docker-compose.dev.yml up
 ```
 
 ### Your First Contribution
 
-All set to participate in the project? You can start by looking at the [open issues](https://github.com/pingcap/tidb.ai/issues) in this repo.
+All set to participate in the project? You can start by looking at the [open issues](https://github.com/pingcap/autoflow/issues) in this repo.
 
 
 ### Components of the Project
 
 The project is divided into several components, and you can contribute to any of the following components:
-* [Frontend](https://github.com/pingcap/tidb.ai/tree/main/frontend): The frontend of the project is built using Next.js.
+* [Frontend](https://github.com/pingcap/autoflow/tree/main/frontend): The frontend of the project is built using Next.js.
 * [Backend](https://github.com/pingcap/tidb.ai/tree/main/backend): The backend of the project is built using FastAPI.
-  * [Data Source](https://github.com/pingcap/tidb.ai/tree/main/backend/app/rag/datasource): The Data Source component is responsible for indexing the data from different type of sources. You can add more data source types to the project.
+  * [Data Source](https://github.com/pingcap/autoflow/tree/main/backend/app/rag/datasource): The Data Source component is responsible for indexing the data from different type of sources. You can add more data source types to the project.
   * [LLM](https://github.com/pingcap/tidb.ai/tree/main/backend/app/rag/llms): The LLM Engine component is responsible for extracting knowledge from docs and generating responses. You can add more LLM models support to the project.
   * [Reranker](https://github.com/pingcap/tidb.ai/blob/main/backend/app/rag/reranker_model_option.py): The Reranker Engine component is responsible for reranking the results retrieved from the database. You can add more Reranker models support to the project.
   * [Embedding](https://github.com/pingcap/tidb.ai/blob/main/backend/app/rag/embed_model_option.py): The Embedding Engine component is responsible for converting text into vectors. You can add more Embedding models support to the project.
   * [RAG & GraphRAG Engine](https://github.com/pingcap/tidb.ai/tree/main/backend/app/rag): The component is responsible for extracting knowldge from docs and then chunking, indexing and storing the data in the database, also includes retrieving the data from the database and generating the answer for the user.
-  * [Documentations](https://github.com/pingcap/tidb.ai/tree/main/frontend/app/src/pages): The documentation of the project is written in Markdown files. You can contribute to the documentation by adding more content to the documentation.]
+  * [Documentations](https://github.com/pingcap/tidb.ai/tree/main/frontend/app/src/pages): The documentation of the project is written in Markdown files. You can contribute to the documentation by adding more content to the documentation.
+
+### How to add an API?
+Using the FastAPI framework.
+* Create a FastAPI Instance. 
+    ```python
+    router = FastAPI()
+    ```
+* Use Decorators to Define API Endpoints. For example:
+    ```python
+    @router.get("xxx")
+    ```
+* Implement the Route Handler Function.
+
+* Add sub-routes to the main route  and tag the sub-routes in `backend/app/api/main.py` .
+  ```python
+  api_router.include_router(sub_router, tags=["xxxx"])  
+  ```
 
 ## Maintainers
 

diff --git a/backend/app/api/admin_routes/evaluation/evaluation_dataset.py b/backend/app/api/admin_routes/evaluation/evaluation_dataset.py
@@ -1,15 +1,16 @@
 import pandas as pd
 from fastapi import APIRouter, status, HTTPException, Depends
-from fastapi_pagination import Params, Page
+from fastapi_pagination import Page
 from fastapi_pagination.ext.sqlmodel import paginate
 from sqlmodel import select, desc
 
 from app.api.admin_routes.evaluation.models import (
     CreateEvaluationDataset,
     UpdateEvaluationDataset,
     ModifyEvaluationDatasetItem,
+    ParamsWithKeyword,
 )
-from app.api.admin_routes.evaluation.tools import must_get, must_get_and_belong
+from app.api.admin_routes.evaluation.tools import must_get
 from app.api.deps import SessionDep, CurrentSuperuserDep
 from app.file_storage import default_file_storage
 from app.models import Upload, EvaluationDataset, EvaluationDatasetItem
@@ -42,11 +43,11 @@ def create_evaluation_dataset(
         True if the evaluation dataset is created successfully.
     """
     name = evaluation_dataset.name
-    evaluation_file_id = evaluation_dataset.upload_id
-
-    if evaluation_file_id is not None:
+    evaluation_data_list = []
+    if evaluation_dataset.upload_id is not None:
         # If the evaluation_file_id is provided, validate the uploaded file
-        upload = must_get_and_belong(session, Upload, evaluation_file_id, user.id)
+        evaluation_file_id = evaluation_dataset.upload_id
+        upload = must_get(session, Upload, evaluation_file_id)
 
         if upload.mime_type != MimeTypes.CSV:
             raise HTTPException(
@@ -85,6 +86,7 @@ def create_evaluation_dataset(
 
     session.add(evaluation_dataset)
     session.commit()
+    session.refresh(evaluation_dataset)
 
     return evaluation_dataset
 
@@ -93,8 +95,8 @@ def create_evaluation_dataset(
 def delete_evaluation_dataset(
     evaluation_dataset_id: int, session: SessionDep, user: CurrentSuperuserDep
 ) -> bool:
-    evaluation_dataset = must_get_and_belong(
-        session, EvaluationDataset, evaluation_dataset_id, user.id
+    evaluation_dataset = must_get(
+        session, EvaluationDataset, evaluation_dataset_id
     )
 
     session.delete(evaluation_dataset)
@@ -110,14 +112,15 @@ def update_evaluation_dataset(
     session: SessionDep,
     user: CurrentSuperuserDep,
 ) -> EvaluationDataset:
-    evaluation_dataset = must_get_and_belong(
-        session, EvaluationDataset, evaluation_dataset_id, user.id
+    evaluation_dataset = must_get(
+        session, EvaluationDataset, evaluation_dataset_id
     )
 
     evaluation_dataset.name = updated_evaluation_dataset.name
 
     session.merge(evaluation_dataset)
     session.commit()
+    session.refresh(evaluation_dataset)
 
     return evaluation_dataset
 
@@ -126,13 +129,16 @@ def update_evaluation_dataset(
 def list_evaluation_dataset(
     session: SessionDep,
     user: CurrentSuperuserDep,
-    params: Params = Depends(),
+    params: ParamsWithKeyword = Depends(),
 ) -> Page[EvaluationDataset]:
     stmt = (
         select(EvaluationDataset)
-        .where(EvaluationDataset.user_id == user.id)
         .order_by(desc(EvaluationDataset.id))
     )
+
+    if params.keyword:
+        stmt = stmt.where(EvaluationDataset.name.ilike(f"%{params.keyword}%"))
+
     return paginate(session, stmt, params)
 
 
@@ -152,6 +158,7 @@ def create_evaluation_dataset_item(
 
     session.add(evaluation_dataset_item)
     session.commit()
+    session.refresh(evaluation_dataset_item)
 
     return evaluation_dataset_item
 
@@ -161,7 +168,7 @@ def delete_evaluation_dataset_item(
     evaluation_dataset_item_id: int, session: SessionDep, user: CurrentSuperuserDep
 ) -> bool:
     evaluation_dataset_item = must_get(
-        session, EvaluationDataset, evaluation_dataset_item_id
+        session, EvaluationDatasetItem, evaluation_dataset_item_id
     )
 
     session.delete(evaluation_dataset_item)
@@ -190,8 +197,9 @@ def update_evaluation_dataset_item(
     evaluation_dataset_item.evaluation_dataset_id = (
         updated_evaluation_dataset_item.evaluation_dataset_id
     )
-
+    session.merge(evaluation_dataset_item)
     session.commit()
+    session.refresh(evaluation_dataset_item)
 
     return evaluation_dataset_item
 
@@ -201,11 +209,23 @@ def list_evaluation_dataset_item(
     session: SessionDep,
     user: CurrentSuperuserDep,
     evaluation_dataset_id: int,
-    params: Params = Depends(),
+    params: ParamsWithKeyword = Depends(),
 ) -> Page[EvaluationDatasetItem]:
     stmt = (
         select(EvaluationDatasetItem)
         .where(EvaluationDatasetItem.evaluation_dataset_id == evaluation_dataset_id)
         .order_by(EvaluationDatasetItem.id)
     )
+
+    if params.keyword:
+        stmt = stmt.where(EvaluationDatasetItem.query.ilike(f"%{params.keyword}%"))
     return paginate(session, stmt, params)
+
+
+@router.get("/admin/evaluation/dataset-items/{evaluation_dataset_item_id}")
+def get_evaluation_dataset_item(
+    session: SessionDep,
+    user: CurrentSuperuserDep,
+    evaluation_dataset_item_id: int,
+) -> EvaluationDatasetItem:
+    return must_get(session, EvaluationDatasetItem, evaluation_dataset_item_id)