ai-cfia · leejaeka · Feb 1, 2024 · Feb 1, 2024 · Feb 8, 2024 · Feb 13, 2024
@@ -1,7 +1,7 @@
 {
   "name": "Python 3 with Jupyter",
   "image": "mcr.microsoft.com/devcontainers/python:3.11",
-  "postCreateCommand": "pip3 install --user -r requirements.txt && pip3 install --user -r document-search/requirements.txt",
+  "postCreateCommand": "pip3 install --user -r requirements.txt",
   "customizations": {
     "vscode": {
       "extensions": [

@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
@@ -1,8 +1,29 @@
-# Testing
+# Testing Documentation
 
-Run:
+This document provides detailed instructions and procedures for manually testing
+the various functionalities of Llamaindex, ensuring that all features operate
+correctly and as expected before deployment or release.
 
-```bash
-cd <package-name>
-python -m unittest discover -s tests
-```
+## Test Case: test
+
+**Objective:** Verify test
+
+**Preconditions:**
+
+- [ ] test.
+
+**Test Steps:**
+
+1. test
+
+**Expected Results:**
+
+- [ ] test
+
+**Actual Results:**
+
+- [ ] test
+
+**Pass/Fail Criteria:**
+
+- [ ] test
@@ -0,0 +1 @@
+{"graph_dict": {}}
@@ -0,0 +1 @@
+{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
@@ -0,0 +1,244 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "import sys\n",
+    "from dotenv import load_dotenv\n",
+    "from llama_index.core import StorageContext, get_response_synthesizer, load_index_from_storage\n",
+    "from llama_index.core.postprocessor import SimilarityPostprocessor\n",
+    "from llama_index.core.query_engine import RetrieverQueryEngine\n",
+    "from llama_index.core.retrievers import VectorIndexRetriever\n",
+    "from llama_index.llms.azure_openai import AzureOpenAI\n",
+    "from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding\n",
+    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
+    "from llama_index.core import Settings\n",
+    "import pickle\n",
+    "import os\n",
+    "\n",
+    "load_dotenv()\n",
+    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
+    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download query engine from Azure Blob Storage Container\n",
+    "from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient\n",
+    "\n",
+    "# Your storage account connection string\n",
+    "connection_string = \"DefaultEndpointsProtocol=https;AccountName=finessetestblobstorage;AccountKey=;EndpointSuffix=core.windows.net\"\n",
+    "\n",
+    "# The name of your container\n",
+    "container_name = \"llamaindex-v1\"\n",
+    "\n",
+    "# The name of the virtual folder you want to list files from\n",
+    "folder_name = \"index\"\n",
+    "\n",
+    "# Initialize the BlobServiceClient\n",
+    "blob_service_client = BlobServiceClient.from_connection_string(connection_string)\n",
+    "\n",
+    "# Get the container client\n",
+    "container_client = blob_service_client.get_container_client(container_name)\n",
+    "\n",
+    "# List all blobs in the specified folder\n",
+    "blobs_list = container_client.list_blobs(name_starts_with=folder_name)\n",
+    "\n",
+    "# List all blobs in the container (at the root)\n",
+    "blobs_list = container_client.list_blobs()\n",
+    "\n",
+    "for blob in blobs_list:\n",
+    "    print(\"Blob name: \" + blob.name)\n",
+    "    blob_name = blob.name\n",
+    "    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)\n",
+    "    # Download the blob to a local file\n",
+    "    download_file_path = \"./index/\" + blob_name\n",
+    "    with open(download_file_path, \"wb\") as download_file:\n",
+    "        download_file.write(blob_client.download_blob().readall())\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = AzureOpenAI(\n",
+    "    model=\"gpt-4\",\n",
+    "    deployment_name=\"ailab-llm\",\n",
+    "    api_key=os.getenv(\"API_KEY\"),\n",
+    "    azure_endpoint=os.getenv(\"AZURE_ENDPOINT\"),\n",
+    "    api_version=os.getenv(\"API_VERSION\"),\n",
+    ")\n",
+    "\n",
+    "embed_model = AzureOpenAIEmbedding(\n",
+    "    model=\"text-embedding-ada-002\",\n",
+    "    deployment_name=\"ada\",\n",
+    "    api_key=os.getenv(\"API_KEY\"),\n",
+    "    azure_endpoint=os.getenv(\"AZURE_ENDPOINT\"),\n",
+    "    api_version=os.getenv(\"API_VERSION\"),\n",
+    ")\n",
+    "\n",
+    "Settings.llm = llm\n",
+    "Settings.embed_model = embed_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rebuild storage context\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"rebuild storage context\")\n",
+    "storage_context = StorageContext.from_defaults(persist_dir=\"../index\")\n",
+    "with open('storage_context.pkl', 'wb') as file:\n",
+    "    pickle.dump(storage_context, file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "load storage_context\n",
+      "load index\n",
+      "INFO:llama_index.core.indices.loading:Loading all indices.\n",
+      "Loading all indices.\n",
+      "Loading all indices.\n",
+      "Loading all indices.\n",
+      "configure retriever for debugging and retrieving metadata\n",
+      "configure response synthesizer\n",
+      "assemble query engine\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# print(\"load storage_context\")\n",
+    "# with open('storage_context.pkl', 'rb') as file:\n",
+    "#     storage_context = pickle.load(file)\n",
+    "\n",
+    "print(\"load index\")\n",
+    "index = load_index_from_storage(storage_context)\n",
+    "\n",
+    "print(\"configure retriever for debugging and retrieving metadata\")\n",
+    "retriever = VectorIndexRetriever(\n",
+    "    index=index,\n",
+    "    similarity_top_k=15,\n",
+    ")\n",
+    "\n",
+    "print(\"configure response synthesizer\")\n",
+    "response_synthesizer = get_response_synthesizer()\n",
+    "\n",
+    "print(\"assemble query engine\")\n",
+    "query_engine = RetrieverQueryEngine(\n",
+    "    retriever=retriever,\n",
+    "    response_synthesizer=response_synthesizer,\n",
+    "    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ailab-llm/chat/completions?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n",
+      "For importing a domestic cat from France to Canada, you need to ensure that the cat is accompanied by a veterinarian's certificate in English or French that clearly identifies the animal and indicates one of the following: the cat is currently vaccinated against rabies, or the cat is being imported from a country designated as free from rabies, in which it was living during the six-month period immediately before the date of entry, or the cat has a Rabies Neutralising Antibody Titre Test (RNATT) resulting in a titre of at least 0.5 IU/ml, taken at least 30 days after any prior rabies vaccination. The cat must be properly identified on the laboratory report and the lab report must accompany the animal. If the cat does not meet these requirements, an inspector may order the person importing the cat to have the cat vaccinated against rabies within a specified period of time at the owner’s expense, and present the vaccination certificate to an inspector. The person must comply with this order.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example of query execute\n",
+    "response = query_engine.query(\"How do I import a cat from France to Canada?\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'id_': '558835ec-1285-428c-bc83-89c30128f68a'}"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# get top k result into a list, in order of match score\n",
+    "top_k_result = []\n",
+    "for i in range(15): # arbitrary 15 because similarity_top_k=15 in this example\n",
+    "    top_k_result.append(response.source_nodes[i])\n",
+    "    \n",
+    "# get content\n",
+    "response.source_nodes[0].get_content()\n",
+    "# get embedding\n",
+    "response.source_nodes[0].embedding\n",
+    "# get score\n",
+    "response.source_nodes[0].get_score()\n",
+    "# get customized metadata. In this example, this retrieves chunk_id\n",
+    "response.source_nodes[0].metadata"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llamaindex-db",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
rngadam marked this conversation as resolved. Show resolved Hide resolved