Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improvement: add 'auto' as an option for auto_discovery #4043

Merged
merged 3 commits into from
Mar 9, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/guides/working_with_data/sql.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,13 @@ marimo will automatically discover the database connection and display the datab

???+ note

By default, marimo auto-discovers databases and schemas, but not tables and columns (to avoid performance issues with large databases). You can configure this behavior in your `pyproject.toml` file:
By default, marimo auto-discovers databases and schemas, but not tables and columns (to avoid performance issues with large databases). You can configure this behavior in your `pyproject.toml` file. Options are `true`, `false`, or `"auto"`. `"auto"` will determine whether to auto-discover based on the type of database (e.g. when the value is `"auto"`, Snowflake and BigQuery will not auto-discover tables and columns while SQLite, Postgres, and MySQL will):

```toml title="pyproject.toml"
[tool.marimo.datasources]
auto_discover_schemas = true # Default: true
auto_discover_tables = false # Default: false - enable for table auto-discovery
auto_discover_columns = false # Default: false - enable for column auto-discovery
auto_discover_tables = "auto" # Default: "auto"
auto_discover_columns = "auto" # Default: false
```

## Interactive tutorial
Expand Down
6 changes: 3 additions & 3 deletions marimo/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,9 @@ class DatasourcesConfig(TypedDict):
- `auto_discover_columns`: if `True`, include columns & table metadata in the datasource
"""

auto_discover_schemas: NotRequired[bool]
auto_discover_tables: NotRequired[bool]
auto_discover_columns: NotRequired[bool]
auto_discover_schemas: NotRequired[Union[bool, Literal["auto"]]]
auto_discover_tables: NotRequired[Union[bool, Literal["auto"]]]
auto_discover_columns: NotRequired[Union[bool, Literal["auto"]]]


@mddoc
Expand Down
7 changes: 4 additions & 3 deletions marimo/_islands/_island_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
from marimo._ast.cell import Cell, CellConfig
from marimo._ast.compiler import compile_cell
from marimo._messaging.cell_output import CellOutput
from marimo._output.formatting import as_html, mime_to_html
from marimo._output.utils import uri_encode_component
from marimo._plugins.ui import code_editor
from marimo._server.export import run_app_until_completion
from marimo._server.file_manager import AppFileManager
from marimo._server.file_router import AppFileRouter
from marimo._types.ids import CellId_t
Expand Down Expand Up @@ -86,6 +83,8 @@ def render(

- str: The HTML code.
"""
from marimo._output.formatting import as_html, mime_to_html
from marimo._plugins.ui import code_editor

is_reactive = (
is_reactive if is_reactive is not None else self._is_reactive
Expand Down Expand Up @@ -309,6 +308,8 @@ async def build(self) -> App:

- App: The built app.
"""
from marimo._server.export import run_app_until_completion

if self.has_run:
raise ValueError("You can only call build() once")

Expand Down
29 changes: 22 additions & 7 deletions marimo/_sql/engines.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2024 Marimo. All rights reserved.
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Optional, cast
from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast

from marimo import _loggers
from marimo._data.get_datasets import get_databases_from_duckdb
Expand Down Expand Up @@ -247,9 +247,9 @@ def _get_default_schema(self) -> Optional[str]:
def get_databases(
self,
*,
include_schemas: bool = False,
include_tables: bool = False,
include_table_details: bool = False,
include_schemas: Union[bool, Literal["auto"]],
include_tables: Union[bool, Literal["auto"]],
include_table_details: Union[bool, Literal["auto"]],
) -> list[Database]:
"""Get all databases from the engine.

Expand All @@ -271,10 +271,14 @@ def get_databases(

schemas = (
self._get_schemas(
include_tables=include_tables,
include_table_details=include_table_details,
include_tables=self._resolve_should_auto_discover(
include_tables
),
include_table_details=self._resolve_should_auto_discover(
include_table_details
),
)
if include_schemas
if self._resolve_should_auto_discover(include_schemas)
else []
)
databases.append(
Expand Down Expand Up @@ -460,6 +464,17 @@ def _get_generic_type(
LOGGER.debug("Failed to get generic type", exc_info=True)
return None

def _resolve_should_auto_discover(
self,
value: Union[bool, Literal["auto"]],
) -> bool:
if value == "auto":
return self._is_cheap_discovery()
return value

def _is_cheap_discovery(self) -> bool:
return self.dialect.lower() in ("sqlite", "mysql", "postgresql")


def _sql_type_to_data_type(type_str: str) -> DataType:
"""Convert SQL type string to DataType"""
Expand Down
2 changes: 1 addition & 1 deletion marimo/_sql/get_engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def engine_to_data_source_connection(
default_schema = engine.default_schema
databases = engine.get_databases(
include_schemas=config.get("auto_discover_schemas", True),
include_tables=config.get("auto_discover_tables", False),
include_tables=config.get("auto_discover_tables", "auto"),
include_table_details=config.get("auto_discover_columns", False),
)
elif isinstance(engine, DuckDBEngine):
Expand Down
53 changes: 53 additions & 0 deletions tests/_sql/test_sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from unittest import mock

import pytest

Expand Down Expand Up @@ -443,6 +444,58 @@ def test_sqlalchemy_get_databases(sqlite_engine: sa.Engine) -> None:
]


@pytest.mark.skipif(not HAS_SQLALCHEMY, reason="SQLAlchemy not installed")
def test_sqlalchemy_get_databases_auto(sqlite_engine: sa.Engine) -> None:
"""Test SQLAlchemyEngine get_databases method with 'auto' option."""
engine = SQLAlchemyEngine(
sqlite_engine, engine_name=VariableName("test_sqlite")
)

# For SQLite, _is_cheap_discovery() returns True, so 'auto' should behave like True
databases = engine.get_databases(
include_schemas="auto",
include_tables="auto",
include_table_details="auto",
)

# Should be equivalent to setting all params to True since sqlite is a "cheap" dialect
tables_main = get_expected_table("test", include_table_details=True)
tables_my_schema = get_expected_table("test2", include_table_details=True)
assert tables_main.columns == tables_my_schema.columns
assert tables_main.primary_keys == tables_my_schema.primary_keys
assert databases == [
Database(
name=":memory:",
dialect="sqlite",
schemas=[
get_expected_schema("main", "test"),
get_expected_schema("my_schema", "test2"),
],
engine=VariableName("test_sqlite"),
)
]

# Test with a mock to simulate a non-cheap dialect
with mock.patch.object(
SQLAlchemyEngine, "_is_cheap_discovery", return_value=False
):
# For a non-cheap dialect, 'auto' should behave like False
databases = engine.get_databases(
include_schemas="auto",
include_tables="auto",
include_table_details="auto",
)

assert databases == [
Database(
name=":memory:",
dialect="sqlite",
schemas=[],
engine=VariableName("test_sqlite"),
)
]


@pytest.mark.skipif(
not HAS_SQLALCHEMY or not HAS_PANDAS,
reason="SQLAlchemy and Pandas not installed",
Expand Down
Loading