From affe319460e9fcf181a6b5f160aee38f6710d33f Mon Sep 17 00:00:00 2001
From: Deshraj Yadav <deshrajdry@gmail.com>
Date: Fri, 12 Jan 2024 21:29:59 +0530
Subject: [PATCH] [Refactor] Change evaluation script path (#1165)

---
 docs/api-reference/{pipeline => app}/add.mdx  |  0
 docs/api-reference/{pipeline => app}/chat.mdx |  0
 .../{pipeline => app}/delete.mdx              |  0
 .../{pipeline => app}/deploy.mdx              |  0
 .../{pipeline => app}/evaluate.mdx            |  0
 .../{pipeline => app}/overview.mdx            | 22 ++++++-------
 .../api-reference/{pipeline => app}/query.mdx |  0
 .../api-reference/{pipeline => app}/reset.mdx |  0
 .../{pipeline => app}/search.mdx              |  0
 docs/components/evaluation.mdx                | 33 ++++++++++---------
 embedchain/app.py                             | 18 ++++++----
 .../config/{eval => evaluation}/__init__.py   |  0
 .../config/{eval => evaluation}/base.py       |  0
 embedchain/{eval => evaluation}/__init__.py   |  0
 embedchain/{eval => evaluation}/base.py       |  2 +-
 .../{eval => evaluation}/metrics/__init__.py  |  0
 .../metrics/answer_relevancy.py               |  6 ++--
 .../metrics/context_relevancy.py              |  6 ++--
 .../metrics/groundedness.py                   |  6 ++--
 embedchain/utils/{eval.py => evaluation.py}   |  0
 pyproject.toml                                |  2 +-
 21 files changed, 50 insertions(+), 45 deletions(-)
 rename docs/api-reference/{pipeline => app}/add.mdx (100%)
 rename docs/api-reference/{pipeline => app}/chat.mdx (100%)
 rename docs/api-reference/{pipeline => app}/delete.mdx (100%)
 rename docs/api-reference/{pipeline => app}/deploy.mdx (100%)
 rename docs/api-reference/{pipeline => app}/evaluate.mdx (100%)
 rename docs/api-reference/{pipeline => app}/overview.mdx (76%)
 rename docs/api-reference/{pipeline => app}/query.mdx (100%)
 rename docs/api-reference/{pipeline => app}/reset.mdx (100%)
 rename docs/api-reference/{pipeline => app}/search.mdx (100%)
 rename embedchain/config/{eval => evaluation}/__init__.py (100%)
 rename embedchain/config/{eval => evaluation}/base.py (100%)
 rename embedchain/{eval => evaluation}/__init__.py (100%)
 rename embedchain/{eval => evaluation}/base.py (93%)
 rename embedchain/{eval => evaluation}/metrics/__init__.py (100%)
 rename embedchain/{eval => evaluation}/metrics/answer_relevancy.py (94%)
 rename embedchain/{eval => evaluation}/metrics/context_relevancy.py (93%)
 rename embedchain/{eval => evaluation}/metrics/groundedness.py (95%)
 rename embedchain/utils/{eval.py => evaluation.py} (100%)

diff --git a/docs/api-reference/pipeline/add.mdx b/docs/api-reference/app/add.mdx
similarity index 100%
rename from docs/api-reference/pipeline/add.mdx
rename to docs/api-reference/app/add.mdx
diff --git a/docs/api-reference/pipeline/chat.mdx b/docs/api-reference/app/chat.mdx
similarity index 100%
rename from docs/api-reference/pipeline/chat.mdx
rename to docs/api-reference/app/chat.mdx
diff --git a/docs/api-reference/pipeline/delete.mdx b/docs/api-reference/app/delete.mdx
similarity index 100%
rename from docs/api-reference/pipeline/delete.mdx
rename to docs/api-reference/app/delete.mdx
diff --git a/docs/api-reference/pipeline/deploy.mdx b/docs/api-reference/app/deploy.mdx
similarity index 100%
rename from docs/api-reference/pipeline/deploy.mdx
rename to docs/api-reference/app/deploy.mdx
diff --git a/docs/api-reference/pipeline/evaluate.mdx b/docs/api-reference/app/evaluate.mdx
similarity index 100%
rename from docs/api-reference/pipeline/evaluate.mdx
rename to docs/api-reference/app/evaluate.mdx
diff --git a/docs/api-reference/pipeline/overview.mdx b/docs/api-reference/app/overview.mdx
similarity index 76%
rename from docs/api-reference/pipeline/overview.mdx
rename to docs/api-reference/app/overview.mdx
index 2f6fc9f6c3..8c369cbf84 100644
--- a/docs/api-reference/pipeline/overview.mdx
+++ b/docs/api-reference/app/overview.mdx
@@ -1,34 +1,34 @@
 ---
-title: "Pipeline"
+title: "App"
 ---
 
-Create a RAG pipeline object on Embedchain. This is the main entrypoint for a developer to interact with Embedchain APIs. A pipeline configures the llm, vector database, embedding model, and retrieval strategy of your choice.
+Create a RAG app object on Embedchain. This is the main entrypoint for a developer to interact with Embedchain APIs. An app configures the llm, vector database, embedding model, and retrieval strategy of your choice.
 
 ### Attributes
 
 <ParamField path="local_id" type="str">
-    Pipeline ID
+    App ID
 </ParamField>
 <ParamField path="name" type="str" optional>
-    Name of the pipeline
+    Name of the app
 </ParamField>
 <ParamField path="config" type="BaseConfig">
-    Configuration of the pipeline
+    Configuration of the app
 </ParamField>
 <ParamField path="llm" type="BaseLlm">
-    Configured LLM for the RAG pipeline
+    Configured LLM for the RAG app
 </ParamField>
 <ParamField path="db" type="BaseVectorDB">
-    Configured vector database for the RAG pipeline
+    Configured vector database for the RAG app
 </ParamField>
 <ParamField path="embedding_model" type="BaseEmbedder">
-    Configured embedding model for the RAG pipeline
+    Configured embedding model for the RAG app
 </ParamField>
 <ParamField path="chunker" type="ChunkerConfig">
     Chunker configuration
 </ParamField>
 <ParamField path="client" type="Client" optional>
-    Client object (used to deploy a pipeline to Embedchain platform)
+    Client object (used to deploy an app to Embedchain platform)
 </ParamField>
 <ParamField path="logger" type="logging.Logger">
     Logger object
@@ -36,7 +36,7 @@ Create a RAG pipeline object on Embedchain. This is the main entrypoint for a de
 
 ## Usage
 
-You can create an embedchain pipeline instance using the following methods:
+You can create an app instance using the following methods:
 
 ### Default setting
 
@@ -127,4 +127,4 @@ app = App.from_config(config_path="config.json")
 }
 ```
 
-</CodeGroup>
\ No newline at end of file
+</CodeGroup>
diff --git a/docs/api-reference/pipeline/query.mdx b/docs/api-reference/app/query.mdx
similarity index 100%
rename from docs/api-reference/pipeline/query.mdx
rename to docs/api-reference/app/query.mdx
diff --git a/docs/api-reference/pipeline/reset.mdx b/docs/api-reference/app/reset.mdx
similarity index 100%
rename from docs/api-reference/pipeline/reset.mdx
rename to docs/api-reference/app/reset.mdx
diff --git a/docs/api-reference/pipeline/search.mdx b/docs/api-reference/app/search.mdx
similarity index 100%
rename from docs/api-reference/pipeline/search.mdx
rename to docs/api-reference/app/search.mdx
diff --git a/docs/components/evaluation.mdx b/docs/components/evaluation.mdx
index 6eed725e9d..c1143d2ecd 100644
--- a/docs/components/evaluation.mdx
+++ b/docs/components/evaluation.mdx
@@ -84,7 +84,7 @@ Once you have created your dataset, you can run evaluation on the dataset by pic
 For example, you can run evaluation on context relevancy metric using the following code:
 
 ```python
-from embedchain.eval.metrics import ContextRelevance
+from embedchain.evaluation.metrics import ContextRelevance
 metric = ContextRelevance()
 score = metric.evaluate(dataset)
 print(score)
@@ -112,20 +112,21 @@ context_relevance_score = num_relevant_sentences_in_context / num_of_sentences_i
 You can run the context relevancy evaluation with the following simple code:
 
 ```python
-from embedchain.eval.metrics import ContextRelevance
+from embedchain.evaluation.metrics import ContextRelevance
 
 metric = ContextRelevance()
 score = metric.evaluate(dataset)  # 'dataset' is definted in the create dataset section
 print(score)
 # 0.27975528364849833
 ```
+
 In the above example, we used sensible defaults for the evaluation. However, you can also configure the evaluation metric as per your needs using the `ContextRelevanceConfig` class.
 
 Here is a more advanced example of how to pass a custom evaluation config for evaluating on context relevance metric:
 
 ```python
-from embedchain.config.eval.base import ContextRelevanceConfig
-from embedchain.eval.metrics import ContextRelevance
+from embedchain.config.evaluation.base import ContextRelevanceConfig
+from embedchain.evaluation.metrics import ContextRelevance
 
 eval_config = ContextRelevanceConfig(model="gpt-4", api_key="sk-xxx", language="en")
 metric = ContextRelevance(config=eval_config)
@@ -144,7 +145,7 @@ metric.evaluate(dataset)
     The language of the dataset being evaluated. We need this to determine the understand the context provided in the dataset. Defaults to `en`.
 </ParamField>
 <ParamField path="prompt" type="str" optional>
-    The prompt to extract the relevant sentences from the context. Defaults to `CONTEXT_RELEVANCY_PROMPT`, which can be found at `embedchain.config.eval.base` path.
+    The prompt to extract the relevant sentences from the context. Defaults to `CONTEXT_RELEVANCY_PROMPT`, which can be found at `embedchain.config.evaluation.base` path.
 </ParamField>
 
 
@@ -161,7 +162,7 @@ answer_relevancy_score = mean(cosine_similarity(generated_questions, original_qu
 You can run the answer relevancy evaluation with the following simple code:
 
 ```python
-from embedchain.eval.metrics import AnswerRelevance
+from embedchain.evaluation.metrics import AnswerRelevance
 
 metric = AnswerRelevance()
 score = metric.evaluate(dataset)
@@ -172,8 +173,8 @@ print(score)
 In the above example, we used sensible defaults for the evaluation. However, you can also configure the evaluation metric as per your needs using the `AnswerRelevanceConfig` class. Here is a more advanced example where you can provide your own evaluation config:
 
 ```python
-from embedchain.config.eval.base import AnswerRelevanceConfig
-from embedchain.eval.metrics import AnswerRelevance
+from embedchain.config.evaluation.base import AnswerRelevanceConfig
+from embedchain.evaluation.metrics import AnswerRelevance
 
 eval_config = AnswerRelevanceConfig(
     model='gpt-4',
@@ -200,7 +201,7 @@ score = metric.evaluate(dataset)
     The number of questions to generate for each answer. We use the generated questions to compare the similarity with the original question to determine the score. Defaults to `1`.
 </ParamField>
 <ParamField path="prompt" type="str" optional>
-    The prompt to extract the `num_gen_questions` number of questions from the provided answer. Defaults to `ANSWER_RELEVANCY_PROMPT`, which can be found at `embedchain.config.eval.base` path.
+    The prompt to extract the `num_gen_questions` number of questions from the provided answer. Defaults to `ANSWER_RELEVANCY_PROMPT`, which can be found at `embedchain.config.evaluation.base` path.
 </ParamField>
 
 ## Groundedness <a id="groundedness"></a>
@@ -214,7 +215,7 @@ groundedness_score = (sum of all verdicts) / (total # of claims)
 You can run the groundedness evaluation with the following simple code:
 
 ```python
-from embedchain.eval.metrics import Groundedness
+from embedchain.evaluation.metrics import Groundedness
 metric = Groundedness()
 score = metric.evaluate(dataset)    # dataset from above
 print(score)
@@ -224,8 +225,8 @@ print(score)
 In the above example, we used sensible defaults for the evaluation. However, you can also configure the evaluation metric as per your needs using the `GroundednessConfig` class. Here is a more advanced example where you can configure the evaluation config:
 
 ```python
-from embedchain.config.eval.base import GroundednessConfig
-from embedchain.eval.metrics import Groundedness
+from embedchain.config.evaluation.base import GroundednessConfig
+from embedchain.evaluation.metrics import Groundedness
 
 eval_config = GroundednessConfig(model='gpt-4', api_key="sk-xxx")
 metric = Groundedness(config=eval_config)
@@ -242,15 +243,15 @@ score = metric.evaluate(dataset)
     The openai api key to use for the evaluation. Defaults to `None`. If not provided, we will use the `OPENAI_API_KEY` environment variable.
 </ParamField>
 <ParamField path="answer_claims_prompt" type="str" optional>
-    The prompt to extract the claims from the provided answer. Defaults to `GROUNDEDNESS_ANSWER_CLAIMS_PROMPT`, which can be found at `embedchain.config.eval.base` path.
+    The prompt to extract the claims from the provided answer. Defaults to `GROUNDEDNESS_ANSWER_CLAIMS_PROMPT`, which can be found at `embedchain.config.evaluation.base` path.
 </ParamField>
 <ParamField path="claims_inference_prompt" type="str" optional>
-    The prompt to get verdicts on the claims from the answer from the given context. Defaults to `GROUNDEDNESS_CLAIMS_INFERENCE_PROMPT`, which can be found at `embedchain.config.eval.base` path.
+    The prompt to get verdicts on the claims from the answer from the given context. Defaults to `GROUNDEDNESS_CLAIMS_INFERENCE_PROMPT`, which can be found at `embedchain.config.evaluation.base` path.
 </ParamField>
 
 ## Custom <a id="custom_metric"></a>
 
-You can also create your own evaluation metric by extending the `BaseMetric` class. You can find the source code for the existing metrics at `embedchain.eval.metrics` path.
+You can also create your own evaluation metric by extending the `BaseMetric` class. You can find the source code for the existing metrics at `embedchain.evaluation.metrics` path.
 
 <Note>
 You must provide the `name` of your custom metric in the `__init__` method of your class. This name will be used to identify your metric in the evaluation report.
@@ -260,7 +261,7 @@ You must provide the `name` of your custom metric in the `__init__` method of yo
 from typing import Optional
 
 from embedchain.config.base_config import BaseConfig
-from embedchain.eval.metrics import BaseMetric
+from embedchain.evaluation.metrics import BaseMetric
 from embedchain.utils.eval import EvalData
 
 class MyCustomMetric(BaseMetric):
diff --git a/embedchain/app.py b/embedchain/app.py
index a9e222fec0..966f5b6e88 100644
--- a/embedchain/app.py
+++ b/embedchain/app.py
@@ -11,24 +11,28 @@
 import yaml
 from tqdm import tqdm
 
-from embedchain.cache import (Config, ExactMatchEvaluation,
-                              SearchDistanceEvaluation, cache,
-                              gptcache_data_manager, gptcache_pre_function)
+from embedchain.cache import (
+    Config,
+    ExactMatchEvaluation,
+    SearchDistanceEvaluation,
+    cache,
+    gptcache_data_manager,
+    gptcache_pre_function,
+)
 from embedchain.client import Client
 from embedchain.config import AppConfig, CacheConfig, ChunkerConfig
 from embedchain.constants import SQLITE_PATH
 from embedchain.embedchain import EmbedChain
 from embedchain.embedder.base import BaseEmbedder
 from embedchain.embedder.openai import OpenAIEmbedder
-from embedchain.eval.base import BaseMetric
-from embedchain.eval.metrics import (AnswerRelevance, ContextRelevance,
-                                     Groundedness)
+from embedchain.evaluation.base import BaseMetric
+from embedchain.evaluation.metrics import AnswerRelevance, ContextRelevance, Groundedness
 from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.llm.base import BaseLlm
 from embedchain.llm.openai import OpenAILlm
 from embedchain.telemetry.posthog import AnonymousTelemetry
-from embedchain.utils.eval import EvalData, EvalMetric
+from embedchain.utils.evaluation import EvalData, EvalMetric
 from embedchain.utils.misc import validate_config
 from embedchain.vectordb.base import BaseVectorDB
 from embedchain.vectordb.chroma import ChromaDB
diff --git a/embedchain/config/eval/__init__.py b/embedchain/config/evaluation/__init__.py
similarity index 100%
rename from embedchain/config/eval/__init__.py
rename to embedchain/config/evaluation/__init__.py
diff --git a/embedchain/config/eval/base.py b/embedchain/config/evaluation/base.py
similarity index 100%
rename from embedchain/config/eval/base.py
rename to embedchain/config/evaluation/base.py
diff --git a/embedchain/eval/__init__.py b/embedchain/evaluation/__init__.py
similarity index 100%
rename from embedchain/eval/__init__.py
rename to embedchain/evaluation/__init__.py
diff --git a/embedchain/eval/base.py b/embedchain/evaluation/base.py
similarity index 93%
rename from embedchain/eval/base.py
rename to embedchain/evaluation/base.py
index d86a8be18e..4528e7689c 100644
--- a/embedchain/eval/base.py
+++ b/embedchain/evaluation/base.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 
-from embedchain.utils.eval import EvalData
+from embedchain.utils.evaluation import EvalData
 
 
 class BaseMetric(ABC):
diff --git a/embedchain/eval/metrics/__init__.py b/embedchain/evaluation/metrics/__init__.py
similarity index 100%
rename from embedchain/eval/metrics/__init__.py
rename to embedchain/evaluation/metrics/__init__.py
diff --git a/embedchain/eval/metrics/answer_relevancy.py b/embedchain/evaluation/metrics/answer_relevancy.py
similarity index 94%
rename from embedchain/eval/metrics/answer_relevancy.py
rename to embedchain/evaluation/metrics/answer_relevancy.py
index 5335449c7a..588fc0fd13 100644
--- a/embedchain/eval/metrics/answer_relevancy.py
+++ b/embedchain/evaluation/metrics/answer_relevancy.py
@@ -8,9 +8,9 @@
 from openai import OpenAI
 from tqdm import tqdm
 
-from embedchain.config.eval.base import AnswerRelevanceConfig
-from embedchain.eval.base import BaseMetric
-from embedchain.utils.eval import EvalData, EvalMetric
+from embedchain.config.evaluation.base import AnswerRelevanceConfig
+from embedchain.evaluation.base import BaseMetric
+from embedchain.utils.evaluation import EvalData, EvalMetric
 
 
 class AnswerRelevance(BaseMetric):
diff --git a/embedchain/eval/metrics/context_relevancy.py b/embedchain/evaluation/metrics/context_relevancy.py
similarity index 93%
rename from embedchain/eval/metrics/context_relevancy.py
rename to embedchain/evaluation/metrics/context_relevancy.py
index 44bfe75d7e..f821713fa9 100644
--- a/embedchain/eval/metrics/context_relevancy.py
+++ b/embedchain/evaluation/metrics/context_relevancy.py
@@ -8,9 +8,9 @@
 from openai import OpenAI
 from tqdm import tqdm
 
-from embedchain.config.eval.base import ContextRelevanceConfig
-from embedchain.eval.base import BaseMetric
-from embedchain.utils.eval import EvalData, EvalMetric
+from embedchain.config.evaluation.base import ContextRelevanceConfig
+from embedchain.evaluation.base import BaseMetric
+from embedchain.utils.evaluation import EvalData, EvalMetric
 
 
 class ContextRelevance(BaseMetric):
diff --git a/embedchain/eval/metrics/groundedness.py b/embedchain/evaluation/metrics/groundedness.py
similarity index 95%
rename from embedchain/eval/metrics/groundedness.py
rename to embedchain/evaluation/metrics/groundedness.py
index eac76d4ee2..1feaab42e4 100644
--- a/embedchain/eval/metrics/groundedness.py
+++ b/embedchain/evaluation/metrics/groundedness.py
@@ -8,9 +8,9 @@
 from openai import OpenAI
 from tqdm import tqdm
 
-from embedchain.config.eval.base import GroundednessConfig
-from embedchain.eval.base import BaseMetric
-from embedchain.utils.eval import EvalData, EvalMetric
+from embedchain.config.evaluation.base import GroundednessConfig
+from embedchain.evaluation.base import BaseMetric
+from embedchain.utils.evaluation import EvalData, EvalMetric
 
 
 class Groundedness(BaseMetric):
diff --git a/embedchain/utils/eval.py b/embedchain/utils/evaluation.py
similarity index 100%
rename from embedchain/utils/eval.py
rename to embedchain/utils/evaluation.py
diff --git a/pyproject.toml b/pyproject.toml
index f70003c744..431f832415 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "embedchain"
-version = "0.1.63"
+version = "0.1.64"
 description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
 authors = [
     "Taranjeet Singh <taranjeet@embedchain.ai>",