Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "Python 3 with Jupyter",
"image": "mcr.microsoft.com/devcontainers/python:3.11",
"postCreateCommand": "pip3 install --user -r requirements.txt && pip3 install --user -r document-search/requirements.txt",
"postCreateCommand": "pip3 install --user -r requirements.txt",
"customizations": {
"vscode": {
"extensions": [
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.
33 changes: 27 additions & 6 deletions TESTING.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,29 @@
# Testing
# Testing Documentation

Run:
This document provides detailed instructions and procedures for manually testing
the various functionalities of Llamaindex, ensuring that all features operate
correctly and as expected before deployment or release.

```bash
cd <package-name>
python -m unittest discover -s tests
```
## Test Case: test

**Objective:** Verify test

**Preconditions:**

- [ ] test.

**Test Steps:**

1. test

**Expected Results:**

- [ ] test

**Actual Results:**

- [ ] test

**Pass/Fail Criteria:**

- [ ] test
180 changes: 180 additions & 0 deletions data/QnA/good_question.csv

Large diffs are not rendered by default.

230 changes: 230 additions & 0 deletions data/document_test/chunk_slice.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions index/graph_store.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"graph_dict": {}}
1 change: 1 addition & 0 deletions index/image__vector_store.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
1 change: 1 addition & 0 deletions index/index_store.json

Large diffs are not rendered by default.

244 changes: 244 additions & 0 deletions notebooks/llamaindex-load-latest-version.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import sys\n",
"from dotenv import load_dotenv\n",
"from llama_index.core import StorageContext, get_response_synthesizer, load_index_from_storage\n",
"from llama_index.core.postprocessor import SimilarityPostprocessor\n",
"from llama_index.core.query_engine import RetrieverQueryEngine\n",
"from llama_index.core.retrievers import VectorIndexRetriever\n",
"from llama_index.llms.azure_openai import AzureOpenAI\n",
"from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding\n",
"from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
"from llama_index.core import Settings\n",
"import pickle\n",
"import os\n",
"\n",
"load_dotenv()\n",
"logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
"logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Download query engine from Azure Blob Storage Container\n",
"from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient\n",
"\n",
"# Your storage account connection string\n",
"connection_string = \"DefaultEndpointsProtocol=https;AccountName=finessetestblobstorage;AccountKey=;EndpointSuffix=core.windows.net\"\n",
"\n",
"# The name of your container\n",
"container_name = \"llamaindex-v1\"\n",
"\n",
"# The name of the virtual folder you want to list files from\n",
"folder_name = \"index\"\n",
"\n",
"# Initialize the BlobServiceClient\n",
"blob_service_client = BlobServiceClient.from_connection_string(connection_string)\n",
"\n",
"# Get the container client\n",
"container_client = blob_service_client.get_container_client(container_name)\n",
"\n",
"# List all blobs in the specified folder\n",
"blobs_list = container_client.list_blobs(name_starts_with=folder_name)\n",
"\n",
"# List all blobs in the container (at the root)\n",
"blobs_list = container_client.list_blobs()\n",
"\n",
"for blob in blobs_list:\n",
" print(\"Blob name: \" + blob.name)\n",
" blob_name = blob.name\n",
" blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)\n",
" # Download the blob to a local file\n",
" download_file_path = \"./index/\" + blob_name\n",
" with open(download_file_path, \"wb\") as download_file:\n",
" download_file.write(blob_client.download_blob().readall())\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"llm = AzureOpenAI(\n",
" model=\"gpt-4\",\n",
" deployment_name=\"ailab-llm\",\n",
" api_key=os.getenv(\"API_KEY\"),\n",
" azure_endpoint=os.getenv(\"AZURE_ENDPOINT\"),\n",
" api_version=os.getenv(\"API_VERSION\"),\n",
")\n",
"\n",
"embed_model = AzureOpenAIEmbedding(\n",
" model=\"text-embedding-ada-002\",\n",
" deployment_name=\"ada\",\n",
" api_key=os.getenv(\"API_KEY\"),\n",
" azure_endpoint=os.getenv(\"AZURE_ENDPOINT\"),\n",
" api_version=os.getenv(\"API_VERSION\"),\n",
")\n",
"\n",
"Settings.llm = llm\n",
"Settings.embed_model = embed_model"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"rebuild storage context\n"
]
}
],
"source": [
"print(\"rebuild storage context\")\n",
"storage_context = StorageContext.from_defaults(persist_dir=\"../index\")\n",
"with open('storage_context.pkl', 'wb') as file:\n",
" pickle.dump(storage_context, file)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"load storage_context\n",
"load index\n",
"INFO:llama_index.core.indices.loading:Loading all indices.\n",
"Loading all indices.\n",
"Loading all indices.\n",
"Loading all indices.\n",
"configure retriever for debugging and retrieving metadata\n",
"configure response synthesizer\n",
"assemble query engine\n"
]
}
],
"source": [
"\n",
"# print(\"load storage_context\")\n",
"# with open('storage_context.pkl', 'rb') as file:\n",
"# storage_context = pickle.load(file)\n",
"\n",
"print(\"load index\")\n",
"index = load_index_from_storage(storage_context)\n",
"\n",
"print(\"configure retriever for debugging and retrieving metadata\")\n",
"retriever = VectorIndexRetriever(\n",
" index=index,\n",
" similarity_top_k=15,\n",
")\n",
"\n",
"print(\"configure response synthesizer\")\n",
"response_synthesizer = get_response_synthesizer()\n",
"\n",
"print(\"assemble query engine\")\n",
"query_engine = RetrieverQueryEngine(\n",
" retriever=retriever,\n",
" response_synthesizer=response_synthesizer,\n",
" node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:httpx:HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"INFO:httpx:HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
"For importing a domestic cat from France to Canada, you need to ensure that the cat is accompanied by a veterinarian's certificate in English or French that clearly identifies the animal and indicates one of the following: the cat is currently vaccinated against rabies, or the cat is being imported from a country designated as free from rabies, in which it was living during the six-month period immediately before the date of entry, or the cat has a Rabies Neutralising Antibody Titre Test (RNATT) resulting in a titre of at least 0.5 IU/ml, taken at least 30 days after any prior rabies vaccination. The cat must be properly identified on the laboratory report and the lab report must accompany the animal. If the cat does not meet these requirements, an inspector may order the person importing the cat to have the cat vaccinated against rabies within a specified period of time at the owner’s expense, and present the vaccination certificate to an inspector. The person must comply with this order.\n"
]
}
],
"source": [
"# Example of query execute\n",
"response = query_engine.query(\"How do I import a cat from France to Canada?\")\n",
"print(response)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'id_': '558835ec-1285-428c-bc83-89c30128f68a'}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get top k result into a list, in order of match score\n",
"top_k_result = []\n",
"for i in range(15): # arbitrary 15 because similarity_top_k=15 in this example\n",
" top_k_result.append(response.source_nodes[i])\n",
" \n",
"# get content\n",
"response.source_nodes[0].get_content()\n",
"# get embedding\n",
"response.source_nodes[0].embedding\n",
"# get score\n",
"response.source_nodes[0].get_score()\n",
"# get customized metadata. In this example, this retrieves chunk_id\n",
"response.source_nodes[0].metadata"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llamaindex-db",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading