From affe319460e9fcf181a6b5f160aee38f6710d33f Mon Sep 17 00:00:00 2001 From: Deshraj Yadav Date: Fri, 12 Jan 2024 21:29:59 +0530 Subject: [PATCH] [Refactor] Change evaluation script path (#1165) --- docs/api-reference/{pipeline => app}/add.mdx | 0 docs/api-reference/{pipeline => app}/chat.mdx | 0 .../{pipeline => app}/delete.mdx | 0 .../{pipeline => app}/deploy.mdx | 0 .../{pipeline => app}/evaluate.mdx | 0 .../{pipeline => app}/overview.mdx | 22 ++++++------- .../api-reference/{pipeline => app}/query.mdx | 0 .../api-reference/{pipeline => app}/reset.mdx | 0 .../{pipeline => app}/search.mdx | 0 docs/components/evaluation.mdx | 33 ++++++++++--------- embedchain/app.py | 18 ++++++---- .../config/{eval => evaluation}/__init__.py | 0 .../config/{eval => evaluation}/base.py | 0 embedchain/{eval => evaluation}/__init__.py | 0 embedchain/{eval => evaluation}/base.py | 2 +- .../{eval => evaluation}/metrics/__init__.py | 0 .../metrics/answer_relevancy.py | 6 ++-- .../metrics/context_relevancy.py | 6 ++-- .../metrics/groundedness.py | 6 ++-- embedchain/utils/{eval.py => evaluation.py} | 0 pyproject.toml | 2 +- 21 files changed, 50 insertions(+), 45 deletions(-) rename docs/api-reference/{pipeline => app}/add.mdx (100%) rename docs/api-reference/{pipeline => app}/chat.mdx (100%) rename docs/api-reference/{pipeline => app}/delete.mdx (100%) rename docs/api-reference/{pipeline => app}/deploy.mdx (100%) rename docs/api-reference/{pipeline => app}/evaluate.mdx (100%) rename docs/api-reference/{pipeline => app}/overview.mdx (76%) rename docs/api-reference/{pipeline => app}/query.mdx (100%) rename docs/api-reference/{pipeline => app}/reset.mdx (100%) rename docs/api-reference/{pipeline => app}/search.mdx (100%) rename embedchain/config/{eval => evaluation}/__init__.py (100%) rename embedchain/config/{eval => evaluation}/base.py (100%) rename embedchain/{eval => evaluation}/__init__.py (100%) rename embedchain/{eval => evaluation}/base.py (93%) rename embedchain/{eval => evaluation}/metrics/__init__.py (100%) rename embedchain/{eval => evaluation}/metrics/answer_relevancy.py (94%) rename embedchain/{eval => evaluation}/metrics/context_relevancy.py (93%) rename embedchain/{eval => evaluation}/metrics/groundedness.py (95%) rename embedchain/utils/{eval.py => evaluation.py} (100%) diff --git a/docs/api-reference/pipeline/add.mdx b/docs/api-reference/app/add.mdx similarity index 100% rename from docs/api-reference/pipeline/add.mdx rename to docs/api-reference/app/add.mdx diff --git a/docs/api-reference/pipeline/chat.mdx b/docs/api-reference/app/chat.mdx similarity index 100% rename from docs/api-reference/pipeline/chat.mdx rename to docs/api-reference/app/chat.mdx diff --git a/docs/api-reference/pipeline/delete.mdx b/docs/api-reference/app/delete.mdx similarity index 100% rename from docs/api-reference/pipeline/delete.mdx rename to docs/api-reference/app/delete.mdx diff --git a/docs/api-reference/pipeline/deploy.mdx b/docs/api-reference/app/deploy.mdx similarity index 100% rename from docs/api-reference/pipeline/deploy.mdx rename to docs/api-reference/app/deploy.mdx diff --git a/docs/api-reference/pipeline/evaluate.mdx b/docs/api-reference/app/evaluate.mdx similarity index 100% rename from docs/api-reference/pipeline/evaluate.mdx rename to docs/api-reference/app/evaluate.mdx diff --git a/docs/api-reference/pipeline/overview.mdx b/docs/api-reference/app/overview.mdx similarity index 76% rename from docs/api-reference/pipeline/overview.mdx rename to docs/api-reference/app/overview.mdx index 2f6fc9f6c3..8c369cbf84 100644 --- a/docs/api-reference/pipeline/overview.mdx +++ b/docs/api-reference/app/overview.mdx @@ -1,34 +1,34 @@ --- -title: "Pipeline" +title: "App" --- -Create a RAG pipeline object on Embedchain. This is the main entrypoint for a developer to interact with Embedchain APIs. A pipeline configures the llm, vector database, embedding model, and retrieval strategy of your choice. +Create a RAG app object on Embedchain. This is the main entrypoint for a developer to interact with Embedchain APIs. An app configures the llm, vector database, embedding model, and retrieval strategy of your choice. ### Attributes - Pipeline ID + App ID - Name of the pipeline + Name of the app - Configuration of the pipeline + Configuration of the app - Configured LLM for the RAG pipeline + Configured LLM for the RAG app - Configured vector database for the RAG pipeline + Configured vector database for the RAG app - Configured embedding model for the RAG pipeline + Configured embedding model for the RAG app Chunker configuration - Client object (used to deploy a pipeline to Embedchain platform) + Client object (used to deploy an app to Embedchain platform) Logger object @@ -36,7 +36,7 @@ Create a RAG pipeline object on Embedchain. This is the main entrypoint for a de ## Usage -You can create an embedchain pipeline instance using the following methods: +You can create an app instance using the following methods: ### Default setting @@ -127,4 +127,4 @@ app = App.from_config(config_path="config.json") } ``` - \ No newline at end of file + diff --git a/docs/api-reference/pipeline/query.mdx b/docs/api-reference/app/query.mdx similarity index 100% rename from docs/api-reference/pipeline/query.mdx rename to docs/api-reference/app/query.mdx diff --git a/docs/api-reference/pipeline/reset.mdx b/docs/api-reference/app/reset.mdx similarity index 100% rename from docs/api-reference/pipeline/reset.mdx rename to docs/api-reference/app/reset.mdx diff --git a/docs/api-reference/pipeline/search.mdx b/docs/api-reference/app/search.mdx similarity index 100% rename from docs/api-reference/pipeline/search.mdx rename to docs/api-reference/app/search.mdx diff --git a/docs/components/evaluation.mdx b/docs/components/evaluation.mdx index 6eed725e9d..c1143d2ecd 100644 --- a/docs/components/evaluation.mdx +++ b/docs/components/evaluation.mdx @@ -84,7 +84,7 @@ Once you have created your dataset, you can run evaluation on the dataset by pic For example, you can run evaluation on context relevancy metric using the following code: ```python -from embedchain.eval.metrics import ContextRelevance +from embedchain.evaluation.metrics import ContextRelevance metric = ContextRelevance() score = metric.evaluate(dataset) print(score) @@ -112,20 +112,21 @@ context_relevance_score = num_relevant_sentences_in_context / num_of_sentences_i You can run the context relevancy evaluation with the following simple code: ```python -from embedchain.eval.metrics import ContextRelevance +from embedchain.evaluation.metrics import ContextRelevance metric = ContextRelevance() score = metric.evaluate(dataset) # 'dataset' is definted in the create dataset section print(score) # 0.27975528364849833 ``` + In the above example, we used sensible defaults for the evaluation. However, you can also configure the evaluation metric as per your needs using the `ContextRelevanceConfig` class. Here is a more advanced example of how to pass a custom evaluation config for evaluating on context relevance metric: ```python -from embedchain.config.eval.base import ContextRelevanceConfig -from embedchain.eval.metrics import ContextRelevance +from embedchain.config.evaluation.base import ContextRelevanceConfig +from embedchain.evaluation.metrics import ContextRelevance eval_config = ContextRelevanceConfig(model="gpt-4", api_key="sk-xxx", language="en") metric = ContextRelevance(config=eval_config) @@ -144,7 +145,7 @@ metric.evaluate(dataset) The language of the dataset being evaluated. We need this to determine the understand the context provided in the dataset. Defaults to `en`. - The prompt to extract the relevant sentences from the context. Defaults to `CONTEXT_RELEVANCY_PROMPT`, which can be found at `embedchain.config.eval.base` path. + The prompt to extract the relevant sentences from the context. Defaults to `CONTEXT_RELEVANCY_PROMPT`, which can be found at `embedchain.config.evaluation.base` path. @@ -161,7 +162,7 @@ answer_relevancy_score = mean(cosine_similarity(generated_questions, original_qu You can run the answer relevancy evaluation with the following simple code: ```python -from embedchain.eval.metrics import AnswerRelevance +from embedchain.evaluation.metrics import AnswerRelevance metric = AnswerRelevance() score = metric.evaluate(dataset) @@ -172,8 +173,8 @@ print(score) In the above example, we used sensible defaults for the evaluation. However, you can also configure the evaluation metric as per your needs using the `AnswerRelevanceConfig` class. Here is a more advanced example where you can provide your own evaluation config: ```python -from embedchain.config.eval.base import AnswerRelevanceConfig -from embedchain.eval.metrics import AnswerRelevance +from embedchain.config.evaluation.base import AnswerRelevanceConfig +from embedchain.evaluation.metrics import AnswerRelevance eval_config = AnswerRelevanceConfig( model='gpt-4', @@ -200,7 +201,7 @@ score = metric.evaluate(dataset) The number of questions to generate for each answer. We use the generated questions to compare the similarity with the original question to determine the score. Defaults to `1`. - The prompt to extract the `num_gen_questions` number of questions from the provided answer. Defaults to `ANSWER_RELEVANCY_PROMPT`, which can be found at `embedchain.config.eval.base` path. + The prompt to extract the `num_gen_questions` number of questions from the provided answer. Defaults to `ANSWER_RELEVANCY_PROMPT`, which can be found at `embedchain.config.evaluation.base` path. ## Groundedness @@ -214,7 +215,7 @@ groundedness_score = (sum of all verdicts) / (total # of claims) You can run the groundedness evaluation with the following simple code: ```python -from embedchain.eval.metrics import Groundedness +from embedchain.evaluation.metrics import Groundedness metric = Groundedness() score = metric.evaluate(dataset) # dataset from above print(score) @@ -224,8 +225,8 @@ print(score) In the above example, we used sensible defaults for the evaluation. However, you can also configure the evaluation metric as per your needs using the `GroundednessConfig` class. Here is a more advanced example where you can configure the evaluation config: ```python -from embedchain.config.eval.base import GroundednessConfig -from embedchain.eval.metrics import Groundedness +from embedchain.config.evaluation.base import GroundednessConfig +from embedchain.evaluation.metrics import Groundedness eval_config = GroundednessConfig(model='gpt-4', api_key="sk-xxx") metric = Groundedness(config=eval_config) @@ -242,15 +243,15 @@ score = metric.evaluate(dataset) The openai api key to use for the evaluation. Defaults to `None`. If not provided, we will use the `OPENAI_API_KEY` environment variable. - The prompt to extract the claims from the provided answer. Defaults to `GROUNDEDNESS_ANSWER_CLAIMS_PROMPT`, which can be found at `embedchain.config.eval.base` path. + The prompt to extract the claims from the provided answer. Defaults to `GROUNDEDNESS_ANSWER_CLAIMS_PROMPT`, which can be found at `embedchain.config.evaluation.base` path. - The prompt to get verdicts on the claims from the answer from the given context. Defaults to `GROUNDEDNESS_CLAIMS_INFERENCE_PROMPT`, which can be found at `embedchain.config.eval.base` path. + The prompt to get verdicts on the claims from the answer from the given context. Defaults to `GROUNDEDNESS_CLAIMS_INFERENCE_PROMPT`, which can be found at `embedchain.config.evaluation.base` path. ## Custom -You can also create your own evaluation metric by extending the `BaseMetric` class. You can find the source code for the existing metrics at `embedchain.eval.metrics` path. +You can also create your own evaluation metric by extending the `BaseMetric` class. You can find the source code for the existing metrics at `embedchain.evaluation.metrics` path. You must provide the `name` of your custom metric in the `__init__` method of your class. This name will be used to identify your metric in the evaluation report. @@ -260,7 +261,7 @@ You must provide the `name` of your custom metric in the `__init__` method of yo from typing import Optional from embedchain.config.base_config import BaseConfig -from embedchain.eval.metrics import BaseMetric +from embedchain.evaluation.metrics import BaseMetric from embedchain.utils.eval import EvalData class MyCustomMetric(BaseMetric): diff --git a/embedchain/app.py b/embedchain/app.py index a9e222fec0..966f5b6e88 100644 --- a/embedchain/app.py +++ b/embedchain/app.py @@ -11,24 +11,28 @@ import yaml from tqdm import tqdm -from embedchain.cache import (Config, ExactMatchEvaluation, - SearchDistanceEvaluation, cache, - gptcache_data_manager, gptcache_pre_function) +from embedchain.cache import ( + Config, + ExactMatchEvaluation, + SearchDistanceEvaluation, + cache, + gptcache_data_manager, + gptcache_pre_function, +) from embedchain.client import Client from embedchain.config import AppConfig, CacheConfig, ChunkerConfig from embedchain.constants import SQLITE_PATH from embedchain.embedchain import EmbedChain from embedchain.embedder.base import BaseEmbedder from embedchain.embedder.openai import OpenAIEmbedder -from embedchain.eval.base import BaseMetric -from embedchain.eval.metrics import (AnswerRelevance, ContextRelevance, - Groundedness) +from embedchain.evaluation.base import BaseMetric +from embedchain.evaluation.metrics import AnswerRelevance, ContextRelevance, Groundedness from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory from embedchain.helpers.json_serializable import register_deserializable from embedchain.llm.base import BaseLlm from embedchain.llm.openai import OpenAILlm from embedchain.telemetry.posthog import AnonymousTelemetry -from embedchain.utils.eval import EvalData, EvalMetric +from embedchain.utils.evaluation import EvalData, EvalMetric from embedchain.utils.misc import validate_config from embedchain.vectordb.base import BaseVectorDB from embedchain.vectordb.chroma import ChromaDB diff --git a/embedchain/config/eval/__init__.py b/embedchain/config/evaluation/__init__.py similarity index 100% rename from embedchain/config/eval/__init__.py rename to embedchain/config/evaluation/__init__.py diff --git a/embedchain/config/eval/base.py b/embedchain/config/evaluation/base.py similarity index 100% rename from embedchain/config/eval/base.py rename to embedchain/config/evaluation/base.py diff --git a/embedchain/eval/__init__.py b/embedchain/evaluation/__init__.py similarity index 100% rename from embedchain/eval/__init__.py rename to embedchain/evaluation/__init__.py diff --git a/embedchain/eval/base.py b/embedchain/evaluation/base.py similarity index 93% rename from embedchain/eval/base.py rename to embedchain/evaluation/base.py index d86a8be18e..4528e7689c 100644 --- a/embedchain/eval/base.py +++ b/embedchain/evaluation/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from embedchain.utils.eval import EvalData +from embedchain.utils.evaluation import EvalData class BaseMetric(ABC): diff --git a/embedchain/eval/metrics/__init__.py b/embedchain/evaluation/metrics/__init__.py similarity index 100% rename from embedchain/eval/metrics/__init__.py rename to embedchain/evaluation/metrics/__init__.py diff --git a/embedchain/eval/metrics/answer_relevancy.py b/embedchain/evaluation/metrics/answer_relevancy.py similarity index 94% rename from embedchain/eval/metrics/answer_relevancy.py rename to embedchain/evaluation/metrics/answer_relevancy.py index 5335449c7a..588fc0fd13 100644 --- a/embedchain/eval/metrics/answer_relevancy.py +++ b/embedchain/evaluation/metrics/answer_relevancy.py @@ -8,9 +8,9 @@ from openai import OpenAI from tqdm import tqdm -from embedchain.config.eval.base import AnswerRelevanceConfig -from embedchain.eval.base import BaseMetric -from embedchain.utils.eval import EvalData, EvalMetric +from embedchain.config.evaluation.base import AnswerRelevanceConfig +from embedchain.evaluation.base import BaseMetric +from embedchain.utils.evaluation import EvalData, EvalMetric class AnswerRelevance(BaseMetric): diff --git a/embedchain/eval/metrics/context_relevancy.py b/embedchain/evaluation/metrics/context_relevancy.py similarity index 93% rename from embedchain/eval/metrics/context_relevancy.py rename to embedchain/evaluation/metrics/context_relevancy.py index 44bfe75d7e..f821713fa9 100644 --- a/embedchain/eval/metrics/context_relevancy.py +++ b/embedchain/evaluation/metrics/context_relevancy.py @@ -8,9 +8,9 @@ from openai import OpenAI from tqdm import tqdm -from embedchain.config.eval.base import ContextRelevanceConfig -from embedchain.eval.base import BaseMetric -from embedchain.utils.eval import EvalData, EvalMetric +from embedchain.config.evaluation.base import ContextRelevanceConfig +from embedchain.evaluation.base import BaseMetric +from embedchain.utils.evaluation import EvalData, EvalMetric class ContextRelevance(BaseMetric): diff --git a/embedchain/eval/metrics/groundedness.py b/embedchain/evaluation/metrics/groundedness.py similarity index 95% rename from embedchain/eval/metrics/groundedness.py rename to embedchain/evaluation/metrics/groundedness.py index eac76d4ee2..1feaab42e4 100644 --- a/embedchain/eval/metrics/groundedness.py +++ b/embedchain/evaluation/metrics/groundedness.py @@ -8,9 +8,9 @@ from openai import OpenAI from tqdm import tqdm -from embedchain.config.eval.base import GroundednessConfig -from embedchain.eval.base import BaseMetric -from embedchain.utils.eval import EvalData, EvalMetric +from embedchain.config.evaluation.base import GroundednessConfig +from embedchain.evaluation.base import BaseMetric +from embedchain.utils.evaluation import EvalData, EvalMetric class Groundedness(BaseMetric): diff --git a/embedchain/utils/eval.py b/embedchain/utils/evaluation.py similarity index 100% rename from embedchain/utils/eval.py rename to embedchain/utils/evaluation.py diff --git a/pyproject.toml b/pyproject.toml index f70003c744..431f832415 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.1.63" +version = "0.1.64" description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data" authors = [ "Taranjeet Singh ",