Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
9024730
Fix accidental boolean value where a string is intended.
asnare Oct 2, 2025
7225971
Implement support for --overrides-file and --target-technology argume…
asnare Oct 2, 2025
aeb6c3f
Mark static fixtures as session-scope.
asnare Oct 2, 2025
cbdec92
Refactor Bladebridge integration tests to use the CLI entrypoint.
asnare Oct 2, 2025
f1fea8c
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 3, 2025
aacadf2
Formatting.
asnare Oct 3, 2025
da249c5
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 6, 2025
af75dd0
Missed a stray encoding.
asnare Oct 6, 2025
3dbab6f
Ensure existing unit tests properly set and verify the transpiler_opt…
asnare Oct 6, 2025
5a1eb0e
Fix some old invalid fixtures for dialect flags that include a leadin…
asnare Oct 6, 2025
734d469
Internal documentation, to help the next person who looks at this.
asnare Oct 6, 2025
77e733f
Split some TODO markers into multiple actions.
asnare Oct 6, 2025
070dd38
Don't bail out if transpiler_options is not a mapping: log a warning …
asnare Oct 6, 2025
13cd9ed
Properly detect no answer from the user.
asnare Oct 6, 2025
ec0f36b
Internal documentation on why `prompts.question()` is being used this…
asnare Oct 6, 2025
ae3077c
Don't prompt for optional transpiler-specific options: they're not ne…
asnare Oct 6, 2025
79e9d40
Internal documentation for how the transpiler-specific options are co…
asnare Oct 6, 2025
0c711c3
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 6, 2025
96b1925
Simplify function.
asnare Oct 6, 2025
a95f4c2
Integration tests that cover the overrides-file and target-technology…
asnare Oct 6, 2025
10ba183
Update documentation to include the new options.
asnare Oct 6, 2025
9b5738d
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 6, 2025
f280424
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 7, 2025
5d50103
Capture and log the content of errors.log during transpiler tests.
asnare Oct 7, 2025
a600885
Capture the logs from the Bladebridge LSP server during transpile tests.
asnare Oct 7, 2025
b79a94a
Formatting.
asnare Oct 7, 2025
52b9fd0
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 8, 2025
a65907f
Encapsulate the idea of an option being optional within the option, r…
asnare Oct 8, 2025
ff56327
Revert change in PR in this PR; another will fix this anyway.
asnare Oct 8, 2025
b6e7793
Update the `labs.yml` description of the new arguments.
asnare Oct 8, 2025
39a35b3
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 9, 2025
a1f6db4
Merge branch 'main' into bladebridge-dialect-options
asnare Oct 9, 2025
268b31e
Merge branch 'bladebridge-dialect-options' of github.com:databricksla…
asnare Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ commands:
- name: source-dialect
description: Dialect name as selected during `install-transpile` or refer to documentation
default: null
- name: overrides-file
description: Path to a file containing transpiler overrides, if supported by the transpiler in use.
default: null
- name: target-technology
description: Target technology to use for code generation, if supported by the transpiler in use.
default: null
- name: input-source
description: Input Script Folder or File
default: null
Expand All @@ -37,7 +43,7 @@ commands:
default: null
- name: skip-validation
description: Validate Transpiled Code, default True validation skipped, False validate
default: true
default: "true"
- name: catalog-name
description: Catalog Name Applicable only when Validation Mode is DATABRICKS
default: null
Expand Down
98 changes: 91 additions & 7 deletions src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from databricks.labs.blueprint.cli import App
from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug
from databricks.labs.blueprint.installation import RootJsonValue
from databricks.labs.blueprint.installation import RootJsonValue, JsonObject, JsonValue
from databricks.labs.blueprint.tui import Prompts


Expand All @@ -25,7 +25,7 @@
PROFILER_SOURCE_SYSTEM,
)

from databricks.labs.lakebridge.config import TranspileConfig
from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1
from databricks.labs.lakebridge.contexts.application import ApplicationContext
from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
Expand Down Expand Up @@ -86,26 +86,33 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):


@lakebridge.command
def transpile(
def transpile( # pylint: disable=too-many-arguments
*,
w: WorkspaceClient,
transpiler_config_path: str | None = None,
source_dialect: str | None = None,
overrides_file: str | None = None,
target_technology: str | None = None,
input_source: str | None = None,
output_folder: str | None = None,
error_file_path: str | None = None,
skip_validation: str | None = None,
catalog_name: str | None = None,
schema_name: str | None = None,
ctx: ApplicationContext | None = None,
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
):
"""Transpiles source dialect to databricks dialect"""
ctx = ApplicationContext(w)
if ctx is None:
ctx = ApplicationContext(w)
del w
logger.debug(f"Preconfigured transpiler config: {ctx.transpile_config!r}")
ctx.add_user_agent_extra("cmd", "execute-transpile")
checker = _TranspileConfigChecker(ctx.transpile_config, ctx.prompts, transpiler_repository)
checker.use_transpiler_config_path(transpiler_config_path)
checker.use_source_dialect(source_dialect)
checker.use_overrides_file(overrides_file)
checker.use_target_technology(target_technology)
checker.use_input_source(input_source)
checker.use_output_folder(output_folder)
checker.use_error_file_path(error_file_path)
Expand Down Expand Up @@ -217,6 +224,36 @@ def use_source_dialect(self, source_dialect: str | None) -> None:
logger.debug(f"Pending source_dialect override: {source_dialect!r}")
self._source_dialect_override = source_dialect

@staticmethod
def _validate_overrides_file(overrides_file: str, msg: str) -> None:
"""Validate the overrides file: it must be a valid path that exists."""
# Note: in addition to this check, later we verify the transpiler supports it.
if not Path(overrides_file).exists():
raise_validation_exception(msg)

def use_overrides_file(self, overrides_file: str | None) -> None:
if overrides_file is not None:
logger.debug(f"Setting overrides_file to: {overrides_file!r}")
msg = f"Invalid path for '--overrides-file', does not exist: {overrides_file}"
self._validate_overrides_file(overrides_file, msg)
try:
self._set_config_transpiler_option("overrides-file", overrides_file)
except ValueError:
# TODO: Update the `config.yml` format to disallow incompatible `transpiler_options`.
msg = "Cannot use --overrides-file; workspace config.yml has incompatible transpiler_options."
raise_validation_exception(msg)

def use_target_technology(self, target_technology: str | None) -> None:
if target_technology is not None:
logger.debug(f"Setting target_technology to: {target_technology!r}")
# Cannot validate this here: depends on the transpiler engine, and will be checked later.
try:
self._set_config_transpiler_option("target-tech", target_technology)
except ValueError:
# TODO: Update the `config.yml` format to disallow incompatible `transpiler_options`.
msg = "Cannot use --target-technology; workspace config.yml has incompatible transpiler_options."
raise_validation_exception(msg)

@staticmethod
def _validate_input_source(input_source: str, msg: str) -> None:
"""Validate the input source: it must be a path that exists."""
Expand Down Expand Up @@ -321,6 +358,19 @@ def use_schema_name(self, schema_name: str | None) -> None:
logger.debug(f"Setting schema_name to: {schema_name!r}")
self._config = dataclasses.replace(self._config, schema_name=schema_name)

def _set_config_transpiler_option(self, flag: str, value: str) -> None:
transpiler_options: JsonObject
match self._config.transpiler_options:
case None:
transpiler_options = {flag: value}
case Mapping() as found_options:
transpiler_options = {**found_options, flag: value}
case found_options:
# TODO: Update `config.yml' to constrain `transpiler_options` to be a dict[str, str].
msg = f"Incompatible transpiler options configured, must be a mapping: {found_options!r}"
raise ValueError(msg)
self._config = dataclasses.replace(self._config, transpiler_options=transpiler_options)

def _configure_transpiler_config_path(self, source_dialect: str) -> TranspileEngine | None:
"""Configure the transpiler config path based on the requested source dialect."""
# Names of compatible transpiler engines for the given dialect.
Expand Down Expand Up @@ -448,18 +498,52 @@ def _check_transpiler_options(self, engine: TranspileEngine) -> None:
assert self._config.source_dialect is not None, "Source dialect must be set before checking transpiler options."
options_for_dialect = engine.options_for_dialect(self._config.source_dialect)
transpiler_options = self._config.transpiler_options
if not isinstance(transpiler_options, Mapping):
return
if transpiler_options is None:
transpiler_options = {}
elif not isinstance(transpiler_options, Mapping):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we properly type this since we know what it looks like instead of passing around Maps e.g.

class  TranspilerOptions
  target_tech: str | None
  overrides_file: Path | None

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't really know what it looks like though: it's filled in during the questionnaire based on a configuration file bundled with each LSP server.

It certainly can be more constrained than RootJsonValue, but we can't just use a static dataclass.

This PR adds specific support for 2 known options, but there might be others passed via this mechanism. (The code is supposed to prompt for missing values, but #2086 describes how this is futile and leads to the CLI simply hanging.)

logger.warning(f"Ignoring transpiler_options in config.yml, must be a mapping: {transpiler_options!r}")
transpiler_options = {}
# Only checks if the option is present, does not validate the value.
# TODO: Validate the value for CHOICE/CONFIRM options.
# TODO: Handle FORCE options: these are fixed by the transpiler, and cannot be overridden.
checked_options = {
option.flag: (
transpiler_options[option.flag]
if option.flag in transpiler_options
else option.prompt_for_value(self._prompts)
else self._handle_missing_transpiler_option(option)
)
for option in options_for_dialect
}
self._config = dataclasses.replace(self._config, transpiler_options=checked_options)

def _handle_missing_transpiler_option(self, option: LSPConfigOptionV1) -> JsonValue:
# Semantics during configuration:
# - Entries are present in the config file for all options the LSP server needs for a dialect.
# - If a value is `None`, it means the user wants the value to be left unset.
# - There is no 'provide it later' option: either it's set, or it's unset.
# As a corner case, if there is no entry present it means the user wasn't prompted. Here we have
# some complexity. We have two ways of obtaining a value:
# - The user could provide it on the command-line, using --target-technology or --overrides-file.
# Problem: via command-line options there's no way to indicate 'no value'.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use_target_technology and use_overrides_file set <none> if user does not provide it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's not the way the code was authored, and configuration files are already out there.

# - We could prompt for it, assuming the user is running interactively.
# In terms of what is required by the option:
# - If the option has a default of <none>, it means that no value is required.
# - Everything else requires a value.
#
# This leads to the following business rules:
# - If the option has a default of <none> that means that no value is required, no further action is required.
# - Otherwise, a value is required: prompt for it.
#
# TODO: When adding non-interactive support, the otherwise branch need to be modified:
# 1. If it can be provided by the command-line, fail and ask the user to provide it.
# 2. If it cannot be provided by the command-line, prompt for it if we are running interactively.
# 3. If we cannot prompt because we are not running interactively, use the default if there is one.
# 4. Fail: the only way to provide a value is via the config.yml, which can be set via 'install-transpile'.

if option.default == "<none>":
return None
return option.prompt_for_value(self._prompts)

def check(self) -> tuple[TranspileConfig, TranspileEngine]:
"""Checks that all configuration parameters are present and valid."""
logger.debug(f"Checking config: {self._config!r}")
Expand Down
10 changes: 8 additions & 2 deletions src/databricks/labs/lakebridge/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,15 @@ def prompt_for_value(self, prompts: Prompts) -> JsonValue:
if self.method == LSPPromptMethod.CONFIRM:
return prompts.confirm(self.prompt)
if self.method == LSPPromptMethod.QUESTION:
default = self.default if self.default else "None"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sundarshankar89: This is the bit where I think it would insert the string "None" as a value into the config.yml instead of null.

# Hack to:
# - trick prompts.question() into indicating that no answer is required;
# - allow no answer to be given.
# Normally prompts.confirm() requires an answer, or returns the default, and the default can't be None.
# Note: LSP servers use '<none>' as a default to indicate that no answer is required.
no_answer = "<none>"
default = self.default if self.default else no_answer
result = prompts.question(self.prompt, default=default)
if result == "<none>":
if result == no_answer:
return None
return result
if self.method == LSPPromptMethod.CHOICE:
Expand Down
5 changes: 5 additions & 0 deletions src/databricks/labs/lakebridge/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,11 @@ def _prompt_for_transpiler_options(self, transpiler_name: str, source_dialect: s
config_options = self._transpiler_repository.transpiler_config_options(transpiler_name, source_dialect)
if len(config_options) == 0:
return None
# Semantics here are different from the other properties of a TranspileConfig. Specifically:
# - Entries are present for all options.
# - If the value is None, it means the user chose to not provide a value. (This differs to other
# attributes, where None means the user chose to provide a value later.)
# - There is no way to express 'provide a value later'.
return {option.flag: option.prompt_for_value(self._prompts) for option in config_options}

def _configure_catalog(self) -> str:
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def mock_data_source():
return MockDataSource({}, {})


@pytest.fixture
@pytest.fixture(scope="session")
def bladebridge_artifact() -> Path:
artifact = (
Path(__file__).parent
Expand All @@ -298,7 +298,7 @@ def bladebridge_artifact() -> Path:
return artifact


@pytest.fixture
@pytest.fixture(scope="session")
def morpheus_artifact() -> Path:
artifact = (
Path(__file__).parent
Expand Down
10 changes: 3 additions & 7 deletions tests/integration/transpile/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,10 @@


def assert_sql_outputs(output_folder: Path, expected_sql: str, expected_failure_sql: str) -> None:
assert (output_folder / "create_ddl.sql").exists()
with open(output_folder / "create_ddl.sql", "r", encoding="utf-8") as f:
actual_sql = f.read()
assert actual_sql.strip() == expected_sql.strip()
actual_sql = (output_folder / "create_ddl.sql").read_text(encoding="utf-8")
actual_failure_sql = (output_folder / "dummy_function.sql").read_text(encoding="utf-8")

assert (output_folder / "dummy_function.sql").exists()
with open(output_folder / "dummy_function.sql", "r", encoding="utf-8") as f:
actual_failure_sql = f.read()
assert actual_sql.strip() == expected_sql.strip()
assert actual_failure_sql.strip() == expected_failure_sql.strip()


Expand Down
Loading
Loading