diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 91c686b69..3d5bc76c2 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -5,21 +5,22 @@ - Dropped support for Python 3.9 (EOL Oct 2025). Minimum supported version is now 3.10. - Removed the deprecated `MatplotlibWriter` datset. Matplotlib objects can now be handled using `MatplotlibDataset`. - Group datasets documentation according to the dependencies to clean up the nav bar. +- Added `mode` save argument to `ibis.TableDataset`, supporting "append", "overwrite", "error"/"errorifexists", and "ignore" save modes. The deprecated `overwrite` save argument is mapped to `mode` for backward compatibility and will be removed in a future release. Specifying both `mode` and `overwrite` results in an error. +- Added credentials support to `ibis.TableDataset`. - Added the following new datasets: -| Type | Description | Location | -|-----------------------|-----------------------------------------------------------------------------------|--------------------------| +| Type | Description | Location | +|-----------------------|-----------------------------------------------------------------------------------------|--------------------------| | `openxml.PptxDataset` | A dataset for loading and saving .pptx files (Microsoft PowerPoint) using `python-pptx` | `kedro_datasets.openxml` | - Added the following new **experimental** datasets: -| Type | Description | Location | -|--------------------------------|-------------------------------------------------------------|--------------------------------------| -| `langfuse.LangfuseTraceDataset` | Kedro dataset to provide Langfuse tracing clients and callbacks | `kedro_datasets_experimental.langfuse` | -| `langchain.LangChainPromptDataset` | Kedro dataset for loading LangChain prompts | `kedro_datasets_experimental.langchain` | -| `pypdf.PDFDataset` | Kedro dataset to read PDF files and extract text using pypdf | `kedro_datasets_experimental.pypdf` | -| `langfuse.LangfusePromptDataset` | Kedro dataset for managing Langfuse prompts | `kedro_datasets_experimental.langfuse` | - +| Type | Description | Location | +|------------------------------------|-----------------------------------------------------------------|-----------------------------------------| +| `langfuse.LangfuseTraceDataset` | Kedro dataset to provide Langfuse tracing clients and callbacks | `kedro_datasets_experimental.langfuse` | +| `langchain.LangChainPromptDataset` | Kedro dataset for loading LangChain prompts | `kedro_datasets_experimental.langchain` | +| `pypdf.PDFDataset` | Kedro dataset to read PDF files and extract text using pypdf | `kedro_datasets_experimental.pypdf` | +| `langfuse.LangfusePromptDataset` | Kedro dataset for managing Langfuse prompts | `kedro_datasets_experimental.langfuse` | ## Bug fixes and other changes - Add HTMLPreview type. @@ -29,8 +30,10 @@ Many thanks to the following Kedroids for contributing PRs to this release: - [Guillaume Tauzin](https://github.com/gtauzin) +- [gitgud5000](https://github.com/gitgud5000) # Release 8.1.0 + ## Major features and improvements - Added the following new experimental datasets: @@ -39,8 +42,6 @@ Many thanks to the following Kedroids for contributing PRs to this release: | ------------------------------ | ------------------------------------------------------------- | ------------------------------------ | | `polars.PolarsDatabaseDataset` | A dataset to load and save data to a SQL backend using Polars | `kedro_datasets_experimental.polars` | -- Added `mode` save argument to `ibis.TableDataset`, supporting "append", "overwrite", "error"/"errorifexists", and "ignore" save modes. The deprecated `overwrite` save argument is mapped to `mode` for backward compatibility and will be removed in a future release. Specifying both `mode` and `overwrite` results in an error. - ## Bug fixes and other changes - Added primary key constraint to BaseTable. @@ -82,7 +83,6 @@ Many thanks to the following Kedroids for contributing PRs to this release: - [Seohyun Park](https://github.com/soyamimi) - [Daniel Russell-Brain](https://github.com/killerfridge) - # Release 7.0.0 ## Major features and improvements @@ -116,7 +116,6 @@ Many thanks to the following Kedroids for contributing PRs to this release: - [Abhishek Bhatia](https://github.com/abhi8893) - [Guillaume Tauzin](https://github.com/gtauzin) - # Release 6.0.0 ## Major features and improvements diff --git a/kedro-datasets/kedro_datasets/ibis/table_dataset.py b/kedro-datasets/kedro_datasets/ibis/table_dataset.py index 879efa78a..50eacebc2 100644 --- a/kedro-datasets/kedro_datasets/ibis/table_dataset.py +++ b/kedro-datasets/kedro_datasets/ibis/table_dataset.py @@ -33,7 +33,7 @@ class SaveMode(StrEnum): class TableDataset(ConnectionMixin, AbstractDataset[ir.Table, ir.Table]): - """`TableDataset` loads/saves data from/to Ibis table expressions. + """``TableDataset`` loads/saves data from/to Ibis table expressions. Examples: Using the [YAML API](https://docs.kedro.org/en/stable/catalog-data/data_catalog_yaml_examples/): @@ -58,7 +58,7 @@ class TableDataset(ConnectionMixin, AbstractDataset[ir.Table, ir.Table]): save_args: materialized: view mode: overwrite - ``` + ``` Using the [Python API](https://docs.kedro.org/en/stable/catalog-data/advanced_data_catalog_usage/): @@ -96,6 +96,7 @@ def __init__( # noqa: PLR0913 table_name: str, database: str | None = None, connection: dict[str, Any] | None = None, + credentials: dict[str, Any] | None = None, load_args: dict[str, Any] | None = None, save_args: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None, @@ -126,6 +127,9 @@ def __init__( # noqa: PLR0913 in a multi-level table hierarchy. connection: Configuration for connecting to an Ibis backend. If not provided, connect to DuckDB in in-memory mode. + credentials: Connection information (e.g. + user, password, token, account). If provided, these values + override the base `connection` configuration. load_args: Additional arguments passed to the Ibis backend's `read_{file_format}` method. save_args: Additional arguments passed to the Ibis backend's @@ -144,7 +148,9 @@ def __init__( # noqa: PLR0913 self._table_name = table_name self._database = database - self._connection_config = connection or self.DEFAULT_CONNECTION_CONFIG + _connection_config = connection or self.DEFAULT_CONNECTION_CONFIG + _credentials = deepcopy(credentials) or {} + self._connection_config = {**_connection_config, **_credentials} self.metadata = metadata # Set load and save arguments, overwriting defaults if provided. diff --git a/kedro-datasets/static/jsonschema/kedro-catalog-1.0.0.json b/kedro-datasets/static/jsonschema/kedro-catalog-1.0.0.json index 32d7fc688..74cbe3dfa 100644 --- a/kedro-datasets/static/jsonschema/kedro-catalog-1.0.0.json +++ b/kedro-datasets/static/jsonschema/kedro-catalog-1.0.0.json @@ -512,6 +512,10 @@ "type": ["string", "null"], "description": "The name of the database to read from or create in. Can be a dotted string or tuple for multi-level hierarchy." }, + "credentials": { + "type": ["object", "null"], + "description": "Credentials required to get access to the underlying database." + }, "connection": { "type": "object", "description": "Configuration for connecting to an Ibis backend. E.g. {\"backend\": \"duckdb\", \"database\": \"company.db\"}. If not provided, defaults to DuckDB in-memory." diff --git a/kedro-datasets/tests/ibis/test_table_dataset.py b/kedro-datasets/tests/ibis/test_table_dataset.py index 0cfcac7de..ce0da804d 100644 --- a/kedro-datasets/tests/ibis/test_table_dataset.py +++ b/kedro-datasets/tests/ibis/test_table_dataset.py @@ -38,12 +38,24 @@ def connection_config(request, database): ) +@pytest.fixture(params=[_SENTINEL]) +def credentials_config(request, database): + return ( + None + if request.param is _SENTINEL # `None` is a valid value to test + else request.param + ) + + @pytest.fixture -def table_dataset(database_name, connection_config, load_args, save_args): +def table_dataset( + database_name, connection_config, credentials_config, load_args, save_args +): ds = TableDataset( table_name="test", database=database_name, connection=connection_config, + credentials=credentials_config, load_args=load_args, save_args=save_args, ) @@ -348,6 +360,61 @@ def test_connection_config(self, mocker, table_dataset, connection_config, key): table_dataset.load() assert ("ibis", key) in table_dataset._connections + @pytest.mark.parametrize( + ("connection_config", "credentials_config", "key"), + [ + ( + {"backend": "duckdb", "database": "file.db", "extensions": ["spatial"]}, + {"user": "admin", "password": "secret"}, # pragma: allowlist secret + ( + ("backend", "duckdb"), + ("database", "file.db"), + ("extensions", ("spatial",)), + ("password", "secret"), + ("user", "admin"), + ), + ), + ( + [], + { + "host": "xxx.sql.azuresynapse.net", + "database": "xxx", + "query": {"driver": "ODBC Driver 17 for SQL Server"}, + "backend": "mssql", + }, + ( + ("backend", "mssql"), + ("database", "xxx"), + ("host", "xxx.sql.azuresynapse.net"), + ("query", (("driver", "ODBC Driver 17 for SQL Server"),)), + ), + ), + ( + None, + None, + ( + ("backend", "duckdb"), + ("database", ":memory:"), + ), + ), + ( + {"backend": "duckdb", "database": "file.db"}, + {"backend": "mssql", "password": "secret"}, # pragma: allowlist secret + ( + ("backend", "mssql"), + ("database", "file.db"), + ("password", "secret"), + ), + ), + ], + indirect=["connection_config", "credentials_config"], + ) + def test_connection_config_with_credentials(self, mocker, table_dataset, key): + backend = table_dataset._connection_config["backend"] + mocker.patch(f"ibis.{backend}") + table_dataset.load() + assert ("ibis", key) in table_dataset._connections + def test_save_data_loaded_using_file_dataset(self, file_dataset, table_dataset): """Test interoperability of Ibis datasets sharing a database.""" dummy_table = file_dataset.load()