added kiara endpoint

haesleinhuepf · haesleinhuepf · commit ebb3ca26baa7 · 2025-07-09T10:48:06.000+02:00
diff --git a/docs/15_endpoint_apis/09_kiara_llm_endpoint.ipynb b/docs/15_endpoint_apis/09_kiara_llm_endpoint.ipynb
@@ -0,0 +1,162 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "87404224-b84b-409f-8683-c4a243d29722",
+   "metadata": {},
+   "source": [
+    "# Kiara LLM endpoint\n",
+    "In this notebook we will use yet experimental LLM infrastructure infrastructure. To use it, you must enter two enviroment variables `KIARA_API_KEY` and `KIARA_LLM_SERVER`. Also this method uses the OpenAI API and we just change the `base_url`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "752e974d-9aaf-44aa-80fb-01a042cf5774",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.90.0'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import openai\n",
+    "openai.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ab55e229-93b9-4e9b-974d-037002690bf0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def prompt_kiara(message:str, model=\"ollama-llama3-3-70b\"):\n",
+    "    \"\"\"A prompt helper function that sends a message to kiara LLM server \n",
+    "    and returns only the text response.\n",
+    "    \"\"\"\n",
+    "    import os\n",
+    "    \n",
+    "    # convert message in the right format if necessary\n",
+    "    if isinstance(message, str):\n",
+    "        message = [{\"role\": \"user\", \"content\": message}]\n",
+    "    \n",
+    "    # setup connection to the LLM\n",
+    "    client = openai.OpenAI(base_url=os.environ.get('KIARA_LLM_SERVER') + \"api/\",\n",
+    "                           api_key=os.environ.get('KIARA_API_KEY')\n",
+    "    )\n",
+    "    \n",
+    "    response = client.chat.completions.create(\n",
+    "        model=model,\n",
+    "        messages=message\n",
+    "    )\n",
+    "    \n",
+    "    # extract answer\n",
+    "    return response.choices[0].message.content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a7654a20-a307-4b26-8d25-bef20b70224e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"It's nice to meet you. Is there something I can help you with or would you like to chat?\""
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prompt_kiara(\"Hi!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "578e9edd-b58f-4fd0-a56d-1966105221dc",
+   "metadata": {},
+   "source": [
+    "## Exercise\n",
+    "List the models available in the endpoint and try them out by specifying them when calling `prompt_scadsai_llm()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "05171ba7-a775-41c5-954d-7d4fc2b5b625",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ollama-llama3-3-70b\n",
+      "vllm-baai-bge-m3\n",
+      "vllm-deepseek-coder-33b-instruct\n",
+      "vllm-deepseek-r1-distill-llama-70b\n",
+      "vllm-llama-3-3-nemotron-super-49b-v1\n",
+      "vllm-llama-4-scout-17b-16e-instruct\n",
+      "vllm-meta-llama-llama-3-3-70b-instruct\n",
+      "vllm-mistral-small-24b-instruct-2501\n",
+      "vllm-multilingual-e5-large-instruct\n",
+      "vllm-nvidia-llama-3-3-70b-instruct-fp8\n"
+     ]
+    }
+   ],
+   "source": [
+    "client = openai.OpenAI(base_url=os.environ.get('KIARA_LLM_SERVER') + \"api/\",\n",
+    "                       api_key=os.environ.get('KIARA_API_KEY'))\n",
+    "\n",
+    "print(\"\\n\".join([model.id for model in client.models.list().data]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e810ee2-4d22-42f6-add5-532cf95b4b9c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -24,6 +24,7 @@ parts:
       - file: 15_endpoint_apis/01_openai_api.ipynb
       - file: 15_endpoint_apis/02_ollama_endpoint.ipynb
       - file: 15_endpoint_apis/04_scadsai_llm_endpoint.ipynb
+      - file: 15_endpoint_apis/09_kiara_llm_endpoint.ipynb
       - file: 15_endpoint_apis/06_kisski_endpoint.ipynb
       - file: 15_endpoint_apis/03_blablador_endpoint.ipynb
       - file: 15_endpoint_apis/05_azure_endpoints.ipynb