From bec6917a92f027e75c67ebcd0726e58bbd652306 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Tue, 1 Oct 2024 15:09:58 -0400 Subject: [PATCH 1/2] Add AzureDBForPostgres connector --- .../third_party/postgres-memory.ipynb | 173 ++++++++++++++++-- .../memory/azure_db_for_postgres/__init__.py | 1 + .../azure_db_for_postgres_collection.py | 57 ++++++ .../azure_db_for_postgres_settings.py | 43 +++++ .../azure_db_for_postgres_store.py | 9 + .../memory/azure_db_for_postgres/constants.py | 3 + .../memory/azure_db_for_postgres/utils.py | 25 +++ .../memory/postgres/postgres_settings.py | 14 +- 8 files changed, 309 insertions(+), 16 deletions(-) create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py diff --git a/python/samples/getting_started/third_party/postgres-memory.ipynb b/python/samples/getting_started/third_party/postgres-memory.ipynb index b0069a59a1c7..872273efb9a4 100644 --- a/python/samples/getting_started/third_party/postgres-memory.ipynb +++ b/python/samples/getting_started/third_party/postgres-memory.ipynb @@ -33,6 +33,9 @@ ")\n", "from semantic_kernel.connectors.ai.open_ai.services.azure_text_embedding import AzureTextEmbedding\n", "from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding\n", + "from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_collection import (\n", + " AzureDBForPostgresCollection,\n", + ")\n", "from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection\n", "from semantic_kernel.data.const import DistanceFunction, IndexKind\n", "from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel\n", @@ -55,10 +58,23 @@ "\n", "To do this, copy the `.env.example` file to `.env` and fill in the necessary information.\n", "\n", + "Note that if you are using VS Code to execute this notebook, ensure you don't have alternate values in the .env file at the root of the workspace, as that will take precedence over the .env file in the notebook.\n", + "\n", "### Postgres configuration\n", "\n", "You'll need to provide a connection string to a Postgres database. You can use a local Postgres instance, or a cloud-hosted one.\n", - "You can provide a connection string, or provide environment variables with the connection information. See the .env.example file for `POSTGRES_` settings.\n", + "You can provide a connection string, or provide environment variables with the connection information. See the .env.example file for `POSTGRES_CONNECTION_STRING` and `PG*` settings.\n", + "\n", + "#### Using Azure DB for Postgres\n", + "\n", + "You can use Azure DB for Postgres by following the steps below:\n", + "\n", + "1. Create an Azure DB for Postgres instance. You can set the database to only allow Entra authentication to avoid\n", + " storing the password in the `.env` file.\n", + "2. Set the `PG*` settings, except for the password if using Entra authentication. If using entra, ensure you\n", + " are logged in via the Azure CLI. You can get the configuration values from the Azure portal Settings -> Connect\n", + " page.\n", + "3. Set \"USE_AZURE_DB_FOR_POSTGRES\" to True in the cell below.\n", "\n", "#### Using Docker\n", "\n", @@ -130,7 +146,13 @@ "USE_AZURE_OPENAI = True\n", "\n", "# The name of the OpenAI model or Azure OpenAI deployment to use\n", - "EMBEDDING_MODEL = \"text-embedding-3-small\"" + "EMBEDDING_MODEL = \"text-embedding-3-small\"\n", + "\n", + "# -- Postgres settings --\n", + "\n", + "# Use Azure DB For Postgres. This enables Entra authentication against the database instead of\n", + "# setting a password in the environment.\n", + "USE_AZURE_DB_FOR_POSTGRES = True" ] }, { @@ -240,9 +262,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 10 papers on 'generative ai'\n" + ] + } + ], "source": [ "arxiv_papers: list[ArxivPaper] = [\n", " ArxivPaper.from_arxiv_info(paper)\n", @@ -263,11 +293,36 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "cannot pickle '_thread.lock' object", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m USE_AZURE_DB_FOR_POSTGRES:\n\u001b[0;32m----> 2\u001b[0m collection \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresCollection\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mArxivPaper\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marxiv_papers\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_model_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mArxivPaper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6\u001b[0m collection \u001b[38;5;241m=\u001b[39m PostgresCollection[\u001b[38;5;28mstr\u001b[39m, ArxivPaper](\n\u001b[1;32m 7\u001b[0m collection_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marxiv_papers\u001b[39m\u001b[38;5;124m\"\u001b[39m, data_model_type\u001b[38;5;241m=\u001b[39mArxivPaper, env_file_path\u001b[38;5;241m=\u001b[39menv_file_path\n\u001b[1;32m 8\u001b[0m )\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py:47\u001b[0m, in \u001b[0;36mAzureDBForPostgresCollection.__init__\u001b[0;34m(self, collection_name, data_model_type, data_model_definition, connection_pool, db_schema, env_file_path, env_file_encoding, settings)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# If the connection pool or settings were not provided, create the settings from the environment.\u001b[39;00m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# Passing this to the super class will enforce using Azure DB settings.\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m connection_pool \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m settings:\n\u001b[0;32m---> 47\u001b[0m settings \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresSettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_encoding\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 51\u001b[0m collection_name\u001b[38;5;241m=\u001b[39mcollection_name,\n\u001b[1;32m 52\u001b[0m data_model_type\u001b[38;5;241m=\u001b[39mdata_model_type,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 56\u001b[0m settings\u001b[38;5;241m=\u001b[39msettings,\n\u001b[1;32m 57\u001b[0m )\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/kernel_pydantic.py:56\u001b[0m, in \u001b[0;36mKernelBaseSettings.create\u001b[0;34m(cls, **data)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 55\u001b[0m data \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m}\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic_settings/main.py:144\u001b[0m, in \u001b[0;36mBaseSettings.__init__\u001b[0;34m(__pydantic_self__, _case_sensitive, _env_prefix, _env_file, _env_file_encoding, _env_ignore_empty, _env_nested_delimiter, _env_parse_none_str, _env_parse_enums, _cli_prog_name, _cli_parse_args, _cli_settings_source, _cli_parse_none_str, _cli_hide_none_type, _cli_avoid_json, _cli_enforce_required, _cli_use_class_docs_for_groups, _cli_exit_on_error, _cli_prefix, _secrets_dir, **values)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 121\u001b[0m __pydantic_self__,\n\u001b[1;32m 122\u001b[0m _case_sensitive: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 142\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;66;03m# Uses something other than `self` the first arg to allow \"self\" as a settable attribute\u001b[39;00m\n\u001b[0;32m--> 144\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m__pydantic_self__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_settings_build_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 146\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[43m \u001b[49m\u001b[43m_case_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_case_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 149\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 150\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file_encoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 152\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 153\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_enums\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_enums\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prog_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prog_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_settings_source\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_settings_source\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m_secrets_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_secrets_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_model_construction.py:292\u001b[0m, in \u001b[0;36minit_private_attributes\u001b[0;34m(self, context)\u001b[0m\n\u001b[1;32m 290\u001b[0m pydantic_private \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, private_attr \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__private_attributes__\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 292\u001b[0m default \u001b[38;5;241m=\u001b[39m \u001b[43mprivate_attr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m default \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m PydanticUndefined:\n\u001b[1;32m 294\u001b[0m pydantic_private[name] \u001b[38;5;241m=\u001b[39m default\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/fields.py:936\u001b[0m, in \u001b[0;36mModelPrivateAttr.get_default\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_default\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 927\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Retrieve the default value of the object.\u001b[39;00m\n\u001b[1;32m 928\u001b[0m \n\u001b[1;32m 929\u001b[0m \u001b[38;5;124;03m If `self.default_factory` is `None`, the method will return a deep copy of the `self.default` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;124;03m The default value of the object.\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 936\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msmart_deepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory()\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_utils.py:318\u001b[0m, in \u001b[0;36msmart_deepcopy\u001b[0;34m(obj)\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mRuntimeError\u001b[39;00m):\n\u001b[1;32m 315\u001b[0m \u001b[38;5;66;03m# do we really dare to catch ALL errors? Seems a bit risky\u001b[39;00m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 318\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/share/uv/python/cpython-3.10.15-linux-x86_64-gnu/lib/python3.10/copy.py:161\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 159\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce_ex__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m reductor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 161\u001b[0m rv \u001b[38;5;241m=\u001b[39m \u001b[43mreductor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 163\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", + "\u001b[0;31mTypeError\u001b[0m: cannot pickle '_thread.lock' object" + ] + } + ], "source": [ - "collection = PostgresCollection[str, ArxivPaper](\n", - " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", - ")" + "if USE_AZURE_DB_FOR_POSTGRES:\n", + " collection = AzureDBForPostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", + " )\n", + "else:\n", + " collection = PostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", + " )" ] }, { @@ -279,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -305,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -321,9 +376,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n" + ] + } + ], "source": [ "async with collection:\n", " await collection.create_collection_if_not_exists()\n", @@ -339,9 +402,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n", + "# Data Analysis in the Era of Generative AI\n", + "\n", + "Abstract: This paper explores the potential of AI-powered tools to reshape data\n", + "analysis, focusing on design considerations and challenges. We explore how the\n", + "emergence of large language and multimodal models offers new opportunities to\n", + "enhance various stages of data analysis workflow by translating high-level user\n", + "intentions into executable code, charts, and insights. We then examine human-\n", + "centered design principles that facilitate intuitive interactions, build user\n", + "trust, and streamline the AI-assisted analysis workflow across multiple apps.\n", + "Finally, we discuss the research challenges that impede the development of these\n", + "AI-based systems such as enhancing model capabilities, evaluating and\n", + "benchmarking, and understanding end-user needs.\n", + "Published: 2024-09-27 06:31:03\n", + "Link: http://arxiv.org/abs/2409.18475v1\n", + "PDF Link: http://arxiv.org/abs/2409.18475v1\n", + "Authors: Jeevana Priya Inala, Chenglong Wang, Steven Drucker, Gonzalo Ramos, Victor Dibia, Nathalie Riche, Dave Brown, Dan Marshall, Jianfeng Gao\n", + "Embedding: [ 0.0324665 0.03064382 0.04381268 ... -0.00220003 -0.01004753\n", + " 0.0266567 ]\n", + "\n", + "\n", + "# Speech to Reality: On-Demand Production using Natural Language, 3D Generative AI, and Discrete Robotic Assembly\n", + "\n", + "Abstract: We present a system that transforms speech into physical objects by combining\n", + "3D generative Artificial Intelligence with robotic assembly. The system\n", + "leverages natural language input to make design and manufacturing more\n", + "accessible, enabling individuals without expertise in 3D modeling or robotic\n", + "programming to create physical objects. We propose utilizing discrete robotic\n", + "assembly of lattice-based voxel components to address the challenges of using\n", + "generative AI outputs in physical production, such as design variability,\n", + "fabrication speed, structural integrity, and material waste. The system\n", + "interprets speech to generate 3D objects, discretizes them into voxel\n", + "components, computes an optimized assembly sequence, and generates a robotic\n", + "toolpath. The results are demonstrated through the assembly of various objects,\n", + "ranging from chairs to shelves, which are prompted via speech and realized\n", + "within 5 minutes using a 6-axis robotic arm.\n", + "Published: 2024-09-27 02:12:56\n", + "Link: http://arxiv.org/abs/2409.18390v1\n", + "PDF Link: http://arxiv.org/abs/2409.18390v1\n", + "Authors: Alexander Htet Kyaw, Se Hwan Jeon, Miana Smith, Neil Gershenfeld\n", + "Embedding: [ 0.00662689 0.03477224 0.01948195 ... 0.02736436 -0.00416199\n", + " 0.01341196]\n", + "\n", + "\n", + "# Deep Generative Model for Mechanical System Configuration Design\n", + "\n", + "Abstract: Generative AI has made remarkable progress in addressing various design\n", + "challenges. One prominent area where generative AI could bring significant value\n", + "is in engineering design. In particular, selecting an optimal set of components\n", + "and their interfaces to create a mechanical system that meets design\n", + "requirements is one of the most challenging and time-consuming tasks for\n", + "engineers. This configuration design task is inherently challenging due to its\n", + "categorical nature, multiple design requirements a solution must satisfy, and\n", + "the reliance on physics simulations for evaluating potential solutions. These\n", + "characteristics entail solving a combinatorial optimization problem with\n", + "multiple constraints involving black-box functions. To address this challenge,\n", + "we propose a deep generative model to predict the optimal combination of\n", + "components and interfaces for a given design problem. To demonstrate our\n", + "approach, we solve a gear train synthesis problem by first creating a synthetic\n", + "dataset using a grammar, a parts catalogue, and a physics simulator. We then\n", + "train a Transformer using this dataset, named GearFormer, which can not only\n", + "generate quality solutions on its own, but also augment search methods such as\n", + "an evolutionary algorithm and Monte Carlo tree search. We show that GearFormer\n", + "outperforms such search methods on their own in terms of satisfying the\n", + "specified design requirements with orders of magnitude faster generation time.\n", + "Additionally, we showcase the benefit of hybrid methods that leverage both\n", + "GearFormer and search methods, which further improve the quality of the\n", + "solutions.\n", + "Published: 2024-09-09 19:15:45\n", + "Link: http://arxiv.org/abs/2409.06016v2\n", + "PDF Link: http://arxiv.org/abs/2409.06016v2\n", + "Authors: Yasaman Etesam, Hyunmin Cheong, Mohammadmehdi Ataei, Pradeep Kumar Jayaraman\n", + "Embedding: [0.02961224 0.01000344 0.03739412 ... 0.00147997 0.02606801 0.044123 ]\n", + "\n", + "\n" + ] + } + ], "source": [ "async with collection:\n", " results = await collection.get_batch(keys[:3])\n", diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py new file mode 100644 index 000000000000..2a50eae89411 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Microsoft. All rights reserved. diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py new file mode 100644 index 000000000000..f0a408e58e94 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py @@ -0,0 +1,57 @@ +# Copyright (c) Microsoft. All rights reserved. +from typing import TypeVar + +from psycopg_pool import AsyncConnectionPool + +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_settings import ( + AzureDBForPostgresSettings, +) +from semantic_kernel.connectors.memory.postgres.constants import DEFAULT_SCHEMA +from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition + +TKey = TypeVar("TKey", str, int) +TModel = TypeVar("TModel") + + +class AzureDBForPostgresCollection(PostgresCollection[TKey, TModel]): + """AzureDBForPostgresCollection class.""" + + def __init__( + self, + collection_name: str, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + connection_pool: AsyncConnectionPool | None = None, + db_schema: str = DEFAULT_SCHEMA, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + settings: AzureDBForPostgresSettings | None = None, + ): + """Initialize the collection. + + Args: + collection_name: The name of the collection, which corresponds to the table name. + data_model_type (type[TModel]): The type of the data model. + data_model_definition: The data model definition. + connection_pool: The connection pool. + db_schema: The database schema. + env_file_path (str): Use the environment settings file as a fallback to environment variables. + env_file_encoding (str): The encoding of the environment settings file. + settings: The settings for the Azure DB for Postgres connection. If not provided, the settings will be + created from the environment. + """ + # If the connection pool or settings were not provided, create the settings from the environment. + # Passing this to the super class will enforce using Azure DB settings. + if not connection_pool and not settings: + settings = AzureDBForPostgresSettings.create( + env_file_path=env_file_path, env_file_encoding=env_file_encoding + ) + super().__init__( + collection_name=collection_name, + data_model_type=data_model_type, + data_model_definition=data_model_definition, + connection_pool=connection_pool, + db_schema=db_schema, + settings=settings, + ) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py new file mode 100644 index 000000000000..669dd0f3f0c4 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py @@ -0,0 +1,43 @@ +# Copyright (c) Microsoft. All rights reserved. +import sys +from typing import Any + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from azure.core.credentials import TokenCredential +from azure.core.credentials_async import AsyncTokenCredential +from azure.identity import DefaultAzureCredential +from psycopg.conninfo import conninfo_to_dict + +from semantic_kernel.connectors.memory.azure_db_for_postgres.utils import get_entra_token, get_entra_token_aysnc +from semantic_kernel.connectors.memory.postgres.postgres_settings import PostgresSettings + + +class AzureDBForPostgresSettings(PostgresSettings): + """Azure DB for Postgres model settings. + + This is the same as PostgresSettings, but does not a require a password. + If a password is not supplied, then Entra will use the Azure AD token. + You can also supply an Azure credential directly. + """ + + credential: AsyncTokenCredential | TokenCredential | None = None + + @override + def get_connection_args(self, **kwargs) -> dict[str, Any]: + """Get connection arguments.""" + password: Any = self.password.get_secret_value() if self.password else None + if not password and self.connection_string: + password = conninfo_to_dict(self.connection_string.get_secret_value()).get("password") + + if not password: + self.credential = self.credential or DefaultAzureCredential() + if isinstance(self.credential, AsyncTokenCredential): + password = get_entra_token_aysnc(self.credential) + else: + password = get_entra_token(self.credential) + + return super().get_connection_args(password=password) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py new file mode 100644 index 000000000000..eba3138b4cfe --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py @@ -0,0 +1,9 @@ +# Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.connectors.memory.postgres.postgres_store import PostgresStore + + +class AzureDBForPostgresStore(PostgresStore): + """AzureDBForPostgresStore class.""" + + pass diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py new file mode 100644 index 000000000000..612b938173e1 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft. All rights reserved. + +AZURE_DB_FOR_POSTGRES_SCOPE = "https://ossrdbms-aad.database.windows.net/.default" diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py new file mode 100644 index 000000000000..f7dee848eea1 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft. All rights reserved. +import logging + +from azure.core.credentials import TokenCredential +from azure.core.credentials_async import AsyncTokenCredential + +from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import AZURE_DB_FOR_POSTGRES_SCOPE + +logger = logging.getLogger(__name__) + + +async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + + async with credential: + cred = await credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE) + return cred.token + + +def get_entra_token(credential: TokenCredential) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + + return credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE).token diff --git a/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py b/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py index 32cd56f7b079..c90275237e58 100644 --- a/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py +++ b/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py @@ -71,8 +71,16 @@ class PostgresSettings(KernelBaseSettings): default_dimensionality: int = 100 max_rows_per_transaction: int = 1000 - def get_connection_args(self) -> dict[str, Any]: - """Get connection arguments.""" + def get_connection_args(self, **kwargs) -> dict[str, Any]: + """Get connection arguments. + + Args: + kwargs: dict[str, Any] - Additional arguments + Use this to override any connection arguments. + + Returns: + dict[str, Any]: Connection arguments that can be passed to psycopg.connect + """ result = conninfo_to_dict(self.connection_string.get_secret_value()) if self.connection_string else {} if self.host: @@ -86,6 +94,8 @@ def get_connection_args(self) -> dict[str, Any]: if self.password: result["password"] = self.password.get_secret_value() + result = {**result, **kwargs} + # Ensure required values if "host" not in result: raise MemoryConnectorInitializationError("host is required. Please set PGHOST or connection_string.") From c758b123064e3f78e50a3c95786cd7df7ab5641a Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Fri, 18 Oct 2024 11:41:36 -0400 Subject: [PATCH 2/2] Add dynamic Entra credentials to AsyncConnectionPool --- python/samples/concepts/memory/new_memory.py | 7 + .../third_party/postgres-memory.ipynb | 187 ++++++------------ .../azure_db_for_postgres_collection.py | 6 +- .../azure_db_for_postgres_settings.py | 83 ++++++-- .../azure_db_for_postgres/entra_connection.py | 81 ++++++++ .../memory/azure_db_for_postgres/utils.py | 17 +- 6 files changed, 230 insertions(+), 151 deletions(-) create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py diff --git a/python/samples/concepts/memory/new_memory.py b/python/samples/concepts/memory/new_memory.py index a76716659ad2..0819fd2aeadf 100644 --- a/python/samples/concepts/memory/new_memory.py +++ b/python/samples/concepts/memory/new_memory.py @@ -12,6 +12,9 @@ from semantic_kernel.connectors.ai.open_ai import OpenAIEmbeddingPromptExecutionSettings, OpenAITextEmbedding from semantic_kernel.connectors.ai.open_ai.services.azure_text_embedding import AzureTextEmbedding from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_collection import ( + AzureDBForPostgresCollection, +) from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection from semantic_kernel.connectors.memory.qdrant import QdrantCollection from semantic_kernel.connectors.memory.redis import RedisHashsetCollection, RedisJsonCollection @@ -88,6 +91,10 @@ class MyDataModelList: "ai_search": lambda: AzureAISearchCollection[MyDataModel]( data_model_type=MyDataModel, ), + "azure_db_for_postgres": lambda: AzureDBForPostgresCollection[str, MyDataModel]( + data_model_type=MyDataModel, + collection_name=collection_name, + ), "postgres": lambda: PostgresCollection[str, MyDataModel]( data_model_type=MyDataModel, collection_name=collection_name, diff --git a/python/samples/getting_started/third_party/postgres-memory.ipynb b/python/samples/getting_started/third_party/postgres-memory.ipynb index 872273efb9a4..1aeca721b2d7 100644 --- a/python/samples/getting_started/third_party/postgres-memory.ipynb +++ b/python/samples/getting_started/third_party/postgres-memory.ipynb @@ -262,17 +262,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 10 papers on 'generative ai'\n" - ] - } - ], + "outputs": [], "source": [ "arxiv_papers: list[ArxivPaper] = [\n", " ArxivPaper.from_arxiv_info(paper)\n", @@ -293,27 +285,7 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "cannot pickle '_thread.lock' object", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m USE_AZURE_DB_FOR_POSTGRES:\n\u001b[0;32m----> 2\u001b[0m collection \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresCollection\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mArxivPaper\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marxiv_papers\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_model_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mArxivPaper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6\u001b[0m collection \u001b[38;5;241m=\u001b[39m PostgresCollection[\u001b[38;5;28mstr\u001b[39m, ArxivPaper](\n\u001b[1;32m 7\u001b[0m collection_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marxiv_papers\u001b[39m\u001b[38;5;124m\"\u001b[39m, data_model_type\u001b[38;5;241m=\u001b[39mArxivPaper, env_file_path\u001b[38;5;241m=\u001b[39menv_file_path\n\u001b[1;32m 8\u001b[0m )\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py:47\u001b[0m, in \u001b[0;36mAzureDBForPostgresCollection.__init__\u001b[0;34m(self, collection_name, data_model_type, data_model_definition, connection_pool, db_schema, env_file_path, env_file_encoding, settings)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# If the connection pool or settings were not provided, create the settings from the environment.\u001b[39;00m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# Passing this to the super class will enforce using Azure DB settings.\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m connection_pool \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m settings:\n\u001b[0;32m---> 47\u001b[0m settings \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresSettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_encoding\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 51\u001b[0m collection_name\u001b[38;5;241m=\u001b[39mcollection_name,\n\u001b[1;32m 52\u001b[0m data_model_type\u001b[38;5;241m=\u001b[39mdata_model_type,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 56\u001b[0m settings\u001b[38;5;241m=\u001b[39msettings,\n\u001b[1;32m 57\u001b[0m )\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/kernel_pydantic.py:56\u001b[0m, in \u001b[0;36mKernelBaseSettings.create\u001b[0;34m(cls, **data)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 55\u001b[0m data \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m}\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic_settings/main.py:144\u001b[0m, in \u001b[0;36mBaseSettings.__init__\u001b[0;34m(__pydantic_self__, _case_sensitive, _env_prefix, _env_file, _env_file_encoding, _env_ignore_empty, _env_nested_delimiter, _env_parse_none_str, _env_parse_enums, _cli_prog_name, _cli_parse_args, _cli_settings_source, _cli_parse_none_str, _cli_hide_none_type, _cli_avoid_json, _cli_enforce_required, _cli_use_class_docs_for_groups, _cli_exit_on_error, _cli_prefix, _secrets_dir, **values)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 121\u001b[0m __pydantic_self__,\n\u001b[1;32m 122\u001b[0m _case_sensitive: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 142\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;66;03m# Uses something other than `self` the first arg to allow \"self\" as a settable attribute\u001b[39;00m\n\u001b[0;32m--> 144\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m__pydantic_self__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_settings_build_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 146\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[43m \u001b[49m\u001b[43m_case_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_case_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 149\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 150\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file_encoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 152\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 153\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_enums\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_enums\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prog_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prog_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_settings_source\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_settings_source\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m_secrets_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_secrets_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_model_construction.py:292\u001b[0m, in \u001b[0;36minit_private_attributes\u001b[0;34m(self, context)\u001b[0m\n\u001b[1;32m 290\u001b[0m pydantic_private \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, private_attr \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__private_attributes__\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 292\u001b[0m default \u001b[38;5;241m=\u001b[39m \u001b[43mprivate_attr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m default \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m PydanticUndefined:\n\u001b[1;32m 294\u001b[0m pydantic_private[name] \u001b[38;5;241m=\u001b[39m default\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/fields.py:936\u001b[0m, in \u001b[0;36mModelPrivateAttr.get_default\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_default\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 927\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Retrieve the default value of the object.\u001b[39;00m\n\u001b[1;32m 928\u001b[0m \n\u001b[1;32m 929\u001b[0m \u001b[38;5;124;03m If `self.default_factory` is `None`, the method will return a deep copy of the `self.default` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;124;03m The default value of the object.\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 936\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msmart_deepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory()\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_utils.py:318\u001b[0m, in \u001b[0;36msmart_deepcopy\u001b[0;34m(obj)\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mRuntimeError\u001b[39;00m):\n\u001b[1;32m 315\u001b[0m \u001b[38;5;66;03m# do we really dare to catch ALL errors? Seems a bit risky\u001b[39;00m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 318\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/share/uv/python/cpython-3.10.15-linux-x86_64-gnu/lib/python3.10/copy.py:161\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 159\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce_ex__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m reductor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 161\u001b[0m rv \u001b[38;5;241m=\u001b[39m \u001b[43mreductor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 163\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", - "\u001b[0;31mTypeError\u001b[0m: cannot pickle '_thread.lock' object" - ] - } - ], + "outputs": [], "source": [ "if USE_AZURE_DB_FOR_POSTGRES:\n", " collection = AzureDBForPostgresCollection[str, ArxivPaper](\n", @@ -334,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -360,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -376,17 +348,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n" - ] - } - ], + "outputs": [], "source": [ "async with collection:\n", " await collection.create_collection_if_not_exists()\n", @@ -397,96 +361,49 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "Note that we use the collection as a context manager. This opens and closes the connection pool that is created by the collection. If we want to maintain a persistent connection pool, which is more typical for a long-running application, we can create the connection pool outside of the context manager and pass it in. This is also useful if we want to use the same connection pool for multiple collections. \n", + "\n", + "The settings object PostgresSettings and AzureDBForPostgresSettings enable easy creation of collection pools. We use this technique in the next cell.\n", + "\n", "Here we retrieve the first few models from the database and print out their information." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_settings import (\n", + " AzureDBForPostgresSettings,\n", + ")\n", + "from semantic_kernel.connectors.memory.postgres.postgres_settings import PostgresSettings\n", + "\n", + "if USE_AZURE_DB_FOR_POSTGRES:\n", + " settings = AzureDBForPostgresSettings(env_file_path=env_file_path)\n", + " connection_pool = await settings.create_connection_pool()\n", + " collection = AzureDBForPostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\",\n", + " data_model_type=ArxivPaper,\n", + " connection_pool=connection_pool,\n", + " settings=AzureDBForPostgresSettings(env_file_path=env_file_path),\n", + " )\n", + "else:\n", + " settings = PostgresSettings(env_file_path=env_file_path)\n", + " connection_pool = await settings.create_connection_pool()\n", + " collection = PostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", + " )\n", + "\n", + "# Open the connection pool\n", + "await connection_pool.open()" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n", - "# Data Analysis in the Era of Generative AI\n", - "\n", - "Abstract: This paper explores the potential of AI-powered tools to reshape data\n", - "analysis, focusing on design considerations and challenges. We explore how the\n", - "emergence of large language and multimodal models offers new opportunities to\n", - "enhance various stages of data analysis workflow by translating high-level user\n", - "intentions into executable code, charts, and insights. We then examine human-\n", - "centered design principles that facilitate intuitive interactions, build user\n", - "trust, and streamline the AI-assisted analysis workflow across multiple apps.\n", - "Finally, we discuss the research challenges that impede the development of these\n", - "AI-based systems such as enhancing model capabilities, evaluating and\n", - "benchmarking, and understanding end-user needs.\n", - "Published: 2024-09-27 06:31:03\n", - "Link: http://arxiv.org/abs/2409.18475v1\n", - "PDF Link: http://arxiv.org/abs/2409.18475v1\n", - "Authors: Jeevana Priya Inala, Chenglong Wang, Steven Drucker, Gonzalo Ramos, Victor Dibia, Nathalie Riche, Dave Brown, Dan Marshall, Jianfeng Gao\n", - "Embedding: [ 0.0324665 0.03064382 0.04381268 ... -0.00220003 -0.01004753\n", - " 0.0266567 ]\n", - "\n", - "\n", - "# Speech to Reality: On-Demand Production using Natural Language, 3D Generative AI, and Discrete Robotic Assembly\n", - "\n", - "Abstract: We present a system that transforms speech into physical objects by combining\n", - "3D generative Artificial Intelligence with robotic assembly. The system\n", - "leverages natural language input to make design and manufacturing more\n", - "accessible, enabling individuals without expertise in 3D modeling or robotic\n", - "programming to create physical objects. We propose utilizing discrete robotic\n", - "assembly of lattice-based voxel components to address the challenges of using\n", - "generative AI outputs in physical production, such as design variability,\n", - "fabrication speed, structural integrity, and material waste. The system\n", - "interprets speech to generate 3D objects, discretizes them into voxel\n", - "components, computes an optimized assembly sequence, and generates a robotic\n", - "toolpath. The results are demonstrated through the assembly of various objects,\n", - "ranging from chairs to shelves, which are prompted via speech and realized\n", - "within 5 minutes using a 6-axis robotic arm.\n", - "Published: 2024-09-27 02:12:56\n", - "Link: http://arxiv.org/abs/2409.18390v1\n", - "PDF Link: http://arxiv.org/abs/2409.18390v1\n", - "Authors: Alexander Htet Kyaw, Se Hwan Jeon, Miana Smith, Neil Gershenfeld\n", - "Embedding: [ 0.00662689 0.03477224 0.01948195 ... 0.02736436 -0.00416199\n", - " 0.01341196]\n", - "\n", - "\n", - "# Deep Generative Model for Mechanical System Configuration Design\n", - "\n", - "Abstract: Generative AI has made remarkable progress in addressing various design\n", - "challenges. One prominent area where generative AI could bring significant value\n", - "is in engineering design. In particular, selecting an optimal set of components\n", - "and their interfaces to create a mechanical system that meets design\n", - "requirements is one of the most challenging and time-consuming tasks for\n", - "engineers. This configuration design task is inherently challenging due to its\n", - "categorical nature, multiple design requirements a solution must satisfy, and\n", - "the reliance on physics simulations for evaluating potential solutions. These\n", - "characteristics entail solving a combinatorial optimization problem with\n", - "multiple constraints involving black-box functions. To address this challenge,\n", - "we propose a deep generative model to predict the optimal combination of\n", - "components and interfaces for a given design problem. To demonstrate our\n", - "approach, we solve a gear train synthesis problem by first creating a synthetic\n", - "dataset using a grammar, a parts catalogue, and a physics simulator. We then\n", - "train a Transformer using this dataset, named GearFormer, which can not only\n", - "generate quality solutions on its own, but also augment search methods such as\n", - "an evolutionary algorithm and Monte Carlo tree search. We show that GearFormer\n", - "outperforms such search methods on their own in terms of satisfying the\n", - "specified design requirements with orders of magnitude faster generation time.\n", - "Additionally, we showcase the benefit of hybrid methods that leverage both\n", - "GearFormer and search methods, which further improve the quality of the\n", - "solutions.\n", - "Published: 2024-09-09 19:15:45\n", - "Link: http://arxiv.org/abs/2409.06016v2\n", - "PDF Link: http://arxiv.org/abs/2409.06016v2\n", - "Authors: Yasaman Etesam, Hyunmin Cheong, Mohammadmehdi Ataei, Pradeep Kumar Jayaraman\n", - "Embedding: [0.02961224 0.01000344 0.03739412 ... 0.00147997 0.02606801 0.044123 ]\n", - "\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "async with collection:\n", " results = await collection.get_batch(keys[:3])\n", @@ -505,6 +422,22 @@ " print()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we need to close the connection pool explicitly." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "await connection_pool.close()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py index f0a408e58e94..9427a390831a 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py @@ -32,12 +32,12 @@ def __init__( Args: collection_name: The name of the collection, which corresponds to the table name. - data_model_type (type[TModel]): The type of the data model. + data_model_type: The type of the data model. data_model_definition: The data model definition. connection_pool: The connection pool. db_schema: The database schema. - env_file_path (str): Use the environment settings file as a fallback to environment variables. - env_file_encoding (str): The encoding of the environment settings file. + env_file_path: Use the environment settings file as a fallback to environment variables. + env_file_encoding: The encoding of the environment settings file. settings: The settings for the Azure DB for Postgres connection. If not provided, the settings will be created from the environment. """ diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py index 669dd0f3f0c4..ea5e61e646b6 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py @@ -2,17 +2,21 @@ import sys from typing import Any +from psycopg.conninfo import conninfo_to_dict +from psycopg_pool import AsyncConnectionPool + +from semantic_kernel.connectors.memory.azure_db_for_postgres.entra_connection import AsyncEntraConnection +from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorInitializationError + if sys.version_info >= (3, 12): - from typing import override # pragma: no cover + pass # pragma: no cover else: - from typing_extensions import override # pragma: no cover + pass # pragma: no cover from azure.core.credentials import TokenCredential from azure.core.credentials_async import AsyncTokenCredential -from azure.identity import DefaultAzureCredential -from psycopg.conninfo import conninfo_to_dict -from semantic_kernel.connectors.memory.azure_db_for_postgres.utils import get_entra_token, get_entra_token_aysnc +from semantic_kernel import __version__ from semantic_kernel.connectors.memory.postgres.postgres_settings import PostgresSettings @@ -26,18 +30,59 @@ class AzureDBForPostgresSettings(PostgresSettings): credential: AsyncTokenCredential | TokenCredential | None = None - @override def get_connection_args(self, **kwargs) -> dict[str, Any]: - """Get connection arguments.""" - password: Any = self.password.get_secret_value() if self.password else None - if not password and self.connection_string: - password = conninfo_to_dict(self.connection_string.get_secret_value()).get("password") - - if not password: - self.credential = self.credential or DefaultAzureCredential() - if isinstance(self.credential, AsyncTokenCredential): - password = get_entra_token_aysnc(self.credential) - else: - password = get_entra_token(self.credential) - - return super().get_connection_args(password=password) + """Get connection arguments. + + Args: + kwargs: dict[str, Any] - Additional arguments + Use this to override any connection arguments. + + Returns: + dict[str, Any]: Connection arguments that can be passed to psycopg.connect + """ + result = conninfo_to_dict(self.connection_string.get_secret_value()) if self.connection_string else {} + + if self.host: + result["host"] = self.host + if self.port: + result["port"] = self.port + if self.dbname: + result["dbname"] = self.dbname + if self.user: + result["user"] = self.user + if self.password: + result["password"] = self.password.get_secret_value() + + result = {**result, **kwargs} + + # Ensure required values + if "host" not in result: + raise MemoryConnectorInitializationError("host is required. Please set PGHOST or connection_string.") + if "dbname" not in result: + raise MemoryConnectorInitializationError( + "database is required. Please set PGDATABASE or connection_string." + ) + + return result + + async def create_connection_pool(self) -> AsyncConnectionPool: + """Creates a connection pool based off of settings. + + Uses AsyncEntraConnection as the connection class, which + can set the user and password based on a Entra token. + """ + pool: AsyncConnectionPool = AsyncConnectionPool( + min_size=self.min_pool, + max_size=self.max_pool, + open=False, + kwargs={ + **self.get_connection_args(), + **{ + "credential": self.credential, + "application_name": f"semantic_kernel (python) v{__version__}", + }, + }, + connection_class=AsyncEntraConnection, + ) + await pool.open() + return pool diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py new file mode 100644 index 000000000000..2a2d044e3802 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py @@ -0,0 +1,81 @@ +# Copyright (c) Microsoft. All rights reserved. +import base64 +import json +import logging +from functools import lru_cache + +from azure.core.credentials import TokenCredential +from azure.core.credentials_async import AsyncTokenCredential +from azure.identity import DefaultAzureCredential +from psycopg import AsyncConnection + +from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import AZURE_DB_FOR_POSTGRES_SCOPE + +logger = logging.getLogger(__name__) + + +async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + + async with credential: + cred = await credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE) + return cred.token + + +def get_entra_token(credential: TokenCredential | None) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + credential = credential or get_default_azure_credentials() + + return credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE).token + + +@lru_cache(maxsize=1) +def get_default_azure_credentials() -> DefaultAzureCredential: + """Get the default Azure credentials. + + This method caches the credentials to avoid creating new instances. + """ + return DefaultAzureCredential() + + +def decode_jwt(token): + """Decode the JWT payload to extract claims.""" + payload = token.split(".")[1] + padding = "=" * (4 - len(payload) % 4) + decoded_payload = base64.urlsafe_b64decode(payload + padding) + return json.loads(decoded_payload) + + +async def get_entra_conninfo(credential: TokenCredential | AsyncTokenCredential | None) -> dict[str, str]: + """Fetches a token returns the username and token.""" + # Fetch a new token and extract the username + if isinstance(credential, AsyncTokenCredential): + token = await get_entra_token_aysnc(credential) + else: + token = get_entra_token(credential) + claims = decode_jwt(token) + username = claims.get("upn") or claims.get("preferred_username") or claims.get("unique_name") + if not username: + raise ValueError("Could not extract username from token. Have you logged in?") + + return {"user": username, "password": token} + + +class AsyncEntraConnection(AsyncConnection): + """Asynchronous connection class for using Entra auth with Azure DB for PostgreSQL.""" + + @classmethod + async def connect(cls, *args, **kwargs): + """Establish an asynchronous connection using Entra auth with Azure DB for PostgreSQL.""" + credential = kwargs.pop("credential", None) + if credential and not isinstance(credential, (TokenCredential, AsyncTokenCredential)): + raise ValueError("credential must be a TokenCredential or AsyncTokenCredential") + if credential or not kwargs.get("user") or not kwargs.get("password"): + entra_conninfo = await get_entra_conninfo(credential) + kwargs["password"] = entra_conninfo["password"] + if not kwargs.get("user"): + # If user isn't already set, use the username from the token + kwargs["user"] = entra_conninfo["user"] + return await super().connect(*args, **kwargs | entra_conninfo) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py index f7dee848eea1..ea693f908d3c 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py @@ -1,15 +1,17 @@ # Copyright (c) Microsoft. All rights reserved. import logging +from functools import lru_cache from azure.core.credentials import TokenCredential from azure.core.credentials_async import AsyncTokenCredential +from azure.identity import DefaultAzureCredential from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import AZURE_DB_FOR_POSTGRES_SCOPE logger = logging.getLogger(__name__) -async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: +async def get_entra_token_async(credential: AsyncTokenCredential) -> str: """Get the password from Entra using the provided credential.""" logger.info("Acquiring Entra token for postgres password") @@ -18,8 +20,19 @@ async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: return cred.token -def get_entra_token(credential: TokenCredential) -> str: +def get_entra_token(credential: TokenCredential | None) -> str: """Get the password from Entra using the provided credential.""" logger.info("Acquiring Entra token for postgres password") + credential = credential or get_default_azure_credentials() + print("HERE") return credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE).token + + +@lru_cache(maxsize=1) +def get_default_azure_credentials() -> DefaultAzureCredential: + """Get the default Azure credentials. + + This method caches the credentials to avoid creating new instances. + """ + return DefaultAzureCredential()