Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
dd726b5
feat: integrate Switch transpiler with Lakebridge installer
hiroyukinakazato-db Sep 30, 2025
febb62d
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Sep 30, 2025
fa26b4c
fix: remove undefined URLError from exception handling
hiroyukinakazato-db Sep 30, 2025
6511e20
refactor: streamline SwitchInstaller deployment logic and update tests
hiroyukinakazato-db Oct 3, 2025
33ea7de
refactor: simplify SwitchInstaller test structure and improve assertions
hiroyukinakazato-db Oct 3, 2025
d0c63c3
Merge remote-tracking branch 'origin/main' into feature/switch-instal…
hiroyukinakazato-db Oct 3, 2025
7cb9ea9
feat: add Switch transpiler installer for Lakebridge integration
hiroyukinakazato-db Oct 7, 2025
467dea9
fix: support case-insensitive config lookup in SwitchInstaller
hiroyukinakazato-db Oct 8, 2025
57298b0
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 8, 2025
09c0eb8
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
8439314
refactor: separate Switch installation from workspace deployment
hiroyukinakazato-db Oct 9, 2025
5f66f3f
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
fae9880
feat: add llm-transpile command with Switch integration
hiroyukinakazato-db Oct 7, 2025
2ee157f
refactor: encapsulate Switch package path resolution in SwitchDeployment
hiroyukinakazato-db Oct 9, 2025
b736965
test: update Switch installation tests for refactored interface
hiroyukinakazato-db Oct 9, 2025
bacd5f6
fix: update error messages to include 'true' flag for install-transpi…
hiroyukinakazato-db Oct 9, 2025
21b6629
Merge branch 'main' into feature/llm-transpile
hiroyukinakazato-db Oct 9, 2025
81c32e5
fix: exclude wait_for_completion from Switch job parameters
hiroyukinakazato-db Oct 10, 2025
13bcc15
chore: update Switch wheel with wait_for_completion fix
hiroyukinakazato-db Oct 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ remorph_transpile/
/linter/src/main/antlr4/library/gen/
.databricks-login.json
.mypy_cache
.env
16 changes: 16 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ commands:
{{range .}}{{.total_files_processed}}\t{{.total_queries_processed}}\t{{.analysis_error_count}}\t{{.parsing_error_count}}\t{{.validation_error_count}}\t{{.generation_error_count}}\t{{.error_log_file}}
{{end}}

- name: llm-transpile
description: Transpile source code to Databricks using LLM Transpiler (Switch)
flags:
- name: input-source
description: Input Script Folder or File (local path)
default: null
- name: output-ws-folder
description: Output folder path (Databricks Workspace path starting with /Workspace/)
default: null
- name: source-dialect
description: Source dialect name (e.g., 'snowflake', 'teradata')
default: null

- name: reconcile
description: Reconcile source and target data residing on Databricks

Expand All @@ -59,6 +72,9 @@ commands:
- name: interactive
description: (Optional) Whether installing in interactive mode (`true|false|auto`); configuration settings are prompted for when interactive
default: auto
- name: include-llm-transpiler
description: (Optional) Whether to include LLM-based transpiler in installation ('true'|'false')
default: false

- name: describe-transpile
description: Describe installed transpilers
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ bad-functions = ["map", "input"]
# ignored-parents =

# Maximum number of arguments for function / method.
max-args = 12
max-args = 13

# Maximum number of attributes for a class (see R0902).
max-attributes = 13
Expand Down
239 changes: 237 additions & 2 deletions src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from databricks.labs.blueprint.cli import App
from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug
from databricks.labs.blueprint.installation import RootJsonValue
from databricks.labs.blueprint.installer import InstallState
from databricks.labs.blueprint.tui import Prompts


Expand All @@ -33,9 +34,10 @@
from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME
from databricks.labs.lakebridge.transpiler.describe import TranspilersDescription
from databricks.labs.lakebridge.transpiler.execute import transpile as do_transpile
from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine
from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig, LSPEngine
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
from databricks.labs.lakebridge.transpiler.switch_runner import SwitchConfig, SwitchRunner
from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine

from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity
Expand Down Expand Up @@ -530,6 +532,234 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s
ctx.connect_config.cluster_id = cluster_id


@lakebridge.command
def llm_transpile(
*,
w: WorkspaceClient,
input_source: str | None = None,
output_ws_folder: str | None = None,
source_dialect: str | None = None,
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
) -> None:
"""Transpile source code to Databricks using LLM Transpiler (Switch)"""
ctx = ApplicationContext(w)
ctx.add_user_agent_extra("cmd", "llm-transpile")
user = ctx.current_user
logger.debug(f"User: {user}")

checker = _LLMTranspileConfigChecker(ctx.transpile_config, ctx.prompts, ctx.install_state, transpiler_repository)
checker.use_input_source(input_source)
checker.use_output_ws_folder(output_ws_folder)
checker.use_source_dialect(source_dialect)
params = checker.check()

result = _llm_transpile(ctx, params)
print(json.dumps(result))


class _LLMTranspileConfigChecker:
"""Helper class for 'llm-transpile' command configuration validation"""

_transpile_config: TranspileConfig | None
_prompts: Prompts
_install_state: InstallState
_transpiler_repository: TranspilerRepository
_input_source: str | None = None
_output_ws_folder: str | None = None
_source_dialect: str | None = None

def __init__(
self,
transpile_config: TranspileConfig | None,
prompts: Prompts,
install_state: InstallState,
transpiler_repository: TranspilerRepository,
):
self._transpile_config = transpile_config
self._prompts = prompts
self._install_state = install_state
self._transpiler_repository = transpiler_repository

@staticmethod
def _validate_input_source_path(input_source: str, msg: str) -> None:
"""Validate the input source: it must be a path that exists."""
if not Path(input_source).exists():
raise_validation_exception(msg)

def use_input_source(self, input_source: str | None) -> None:
if input_source is not None:
logger.debug(f"Setting input_source to: {input_source!r}")
self._validate_input_source_path(input_source, f"Invalid path for '--input-source': {input_source}")
self._input_source = input_source

def _prompt_input_source(self) -> None:
default_input = None
if self._transpile_config and self._transpile_config.input_source:
default_input = self._transpile_config.input_source

if default_input:
prompt_text = f"Enter input source path (press <enter> for default: {default_input})"
prompted = self._prompts.question(prompt_text).strip()
self._input_source = prompted if prompted else default_input
else:
prompted = self._prompts.question("Enter input source path (directory or file)").strip()
self._input_source = prompted

logger.debug(f"Setting input_source to: {self._input_source!r}")
self._validate_input_source_path(self._input_source, f"Invalid input source: {self._input_source}")

def _check_input_source(self) -> None:
if self._input_source is None:
self._prompt_input_source()

def use_output_ws_folder(self, output_ws_folder: str | None) -> None:
if output_ws_folder is not None:
logger.debug(f"Setting output_ws_folder to: {output_ws_folder!r}")
self._validate_output_ws_folder_path(
output_ws_folder, f"Invalid path for '--output-ws-folder': {output_ws_folder}"
)
self._output_ws_folder = output_ws_folder

@staticmethod
def _validate_output_ws_folder_path(output_ws_folder: str, msg: str) -> None:
"""Validate output folder is a Workspace path."""
if not output_ws_folder.startswith("/Workspace/"):
raise_validation_exception(f"{msg}. Must start with /Workspace/")

def _prompt_output_ws_folder(self) -> None:
prompted_output_ws_folder = self._prompts.question(
"Enter output folder path (Databricks Workspace path starting with /Workspace/)"
).strip()
logger.debug(f"Setting output_ws_folder to: {prompted_output_ws_folder!r}")
self._validate_output_ws_folder_path(
prompted_output_ws_folder, f"Invalid output folder: {prompted_output_ws_folder}"
)
self._output_ws_folder = prompted_output_ws_folder

def _check_output_ws_folder(self) -> None:
if self._output_ws_folder is None:
self._prompt_output_ws_folder()

def use_source_dialect(self, source_dialect: str | None) -> None:
if source_dialect is not None:
logger.debug(f"Setting source_dialect to: {source_dialect!r}")
self._source_dialect = source_dialect

def _prompt_source_dialect(self) -> None:
"""Prompt for source dialect from Switch dialects."""
available_dialects = self._get_switch_dialects()

if not available_dialects:
raise_validation_exception(
"No Switch dialects available. "
"Install with: databricks labs lakebridge install-transpile --include-llm-transpiler"
)

logger.debug(f"Available dialects: {available_dialects!r}")
source_dialect = self._prompts.choice("Select the source dialect:", list(sorted(available_dialects)))

self._source_dialect = source_dialect

def _check_source_dialect(self) -> None:
"""Validate and prompt for source dialect if not provided."""
available_dialects = self._get_switch_dialects()

if self._source_dialect is None:
self._prompt_source_dialect()
elif self._source_dialect not in available_dialects:
supported = ", ".join(sorted(available_dialects))
raise_validation_exception(f"Invalid source-dialect: '{self._source_dialect}'. " f"Available: {supported}")

def _get_switch_dialects(self) -> set[str]:
"""Get Switch dialects from config.yml using LSPConfig."""
config_path = self._transpiler_repository.transpiler_config_path("Switch")
if not config_path.exists():
return set()

try:
lsp_config = LSPConfig.load(config_path)
return set(lsp_config.remorph.dialects)
except (OSError, ValueError) as e:
logger.warning(f"Failed to load Switch dialects: {e}")
return set()

def _get_switch_options_with_defaults(self) -> dict[str, str]:
"""Get default values for Switch options from config.yml."""
config_path = self._transpiler_repository.transpiler_config_path("Switch")
if not config_path.exists():
return {}

try:
lsp_config = LSPConfig.load(config_path)
except (OSError, ValueError) as e:
logger.warning(f"Failed to load Switch options: {e}")
return {}

options_all = lsp_config.options_for_dialect("all")
result = {}
for option in options_all:
if option.default and option.default != "<none>":
result[option.flag] = option.default

logger.debug(f"Loaded {len(result)} Switch options with defaults from config.yml")
return result

def _validate_switch_options(self, options: dict[str, str]) -> None:
"""Validate options against config.yml choices."""
config_path = self._transpiler_repository.transpiler_config_path("Switch")
if not config_path.exists():
return

try:
lsp_config = LSPConfig.load(config_path)
except (OSError, ValueError) as e:
logger.warning(f"Failed to validate Switch options: {e}")
return

options_all = lsp_config.options_for_dialect("all")
for option in options_all:
if option.flag in options and option.choices:
value = options[option.flag]
if value not in option.choices:
raise_validation_exception(
f"Invalid value for '{option.flag}': {value!r}. " f"Must be one of: {', '.join(option.choices)}"
)

def check(self) -> dict:
"""Validate all parameters and return configuration dict."""
logger.debug("Checking llm-transpile configuration")

self._check_input_source()
self._check_output_ws_folder()
self._check_source_dialect()

switch_options = self._get_switch_options_with_defaults()
self._validate_switch_options(switch_options)

wait_for_completion = str(switch_options.pop("wait_for_completion", "false")).lower() == "true"

return {
"input_source": self._input_source,
"output_ws_folder": self._output_ws_folder,
"source_dialect": self._source_dialect,
"switch_options": switch_options,
"wait_for_completion": wait_for_completion,
}


def _llm_transpile(ctx: ApplicationContext, params: dict) -> RootJsonValue:
"""Execute LLM transpilation via Switch job."""
config = SwitchConfig(ctx.install_state)
resources = config.get_resources()
job_id = config.get_job_id()

runner = SwitchRunner(ctx.workspace_client, ctx.installation)

return runner.run(
catalog=resources["catalog"], schema=resources["schema"], volume=resources["volume"], job_id=job_id, **params
)


@lakebridge.command
def reconcile(*, w: WorkspaceClient) -> None:
"""[EXPERIMENTAL] Reconciles source to Databricks datasets"""
Expand Down Expand Up @@ -623,6 +853,7 @@ def install_transpile(
w: WorkspaceClient,
artifact: str | None = None,
interactive: str | None = None,
include_llm_transpiler: bool = False,
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
) -> None:
"""Install or upgrade the Lakebridge transpilers."""
Expand All @@ -631,9 +862,13 @@ def install_transpile(
ctx.add_user_agent_extra("cmd", "install-transpile")
if artifact:
ctx.add_user_agent_extra("artifact-overload", Path(artifact).name)
if include_llm_transpiler:
ctx.add_user_agent_extra("include-llm-transpiler", "true")
user = w.current_user
logger.debug(f"User: {user}")
transpile_installer = installer(w, transpiler_repository, is_interactive=is_interactive)
transpile_installer = installer(
w, transpiler_repository, is_interactive=is_interactive, include_llm=include_llm_transpiler
)
transpile_installer.run(module="transpile", artifact=artifact)


Expand Down
9 changes: 9 additions & 0 deletions src/databricks/labs/lakebridge/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@ def prompt_for_value(self, prompts: Prompts) -> JsonValue:
raise ValueError(f"Unsupported prompt method: {self.method}")


@dataclass
class SwitchResourcesConfig:
catalog: str
schema: str
volume: str


@dataclass
class TranspileConfig:
__file__ = "config.yml"
Expand All @@ -152,9 +159,11 @@ class TranspileConfig:
error_file_path: str | None = None
sdk_config: dict[str, str] | None = None
skip_validation: bool = False
include_llm: bool = False
catalog_name: str = "remorph"
schema_name: str = "transpiler"
transpiler_options: JsonValue = None
switch_resources: SwitchResourcesConfig | None = None

@property
def transpiler_path(self) -> Path | None:
Expand Down
14 changes: 14 additions & 0 deletions src/databricks/labs/lakebridge/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
from databricks.labs.lakebridge.deployment.recon import TableDeployment, JobDeployment, ReconDeployment
from databricks.labs.lakebridge.deployment.switch import SwitchDeployment
from databricks.labs.lakebridge.helpers.metastore import CatalogOperations
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -119,13 +121,25 @@ def recon_deployment(self) -> ReconDeployment:
self.dashboard_deployment,
)

@cached_property
def switch_deployment(self) -> SwitchDeployment:
return SwitchDeployment(
self.workspace_client,
self.installation,
self.install_state,
self.product_info,
self.job_deployment,
TranspilerRepository.user_home(),
)

@cached_property
def workspace_installation(self) -> WorkspaceInstallation:
return WorkspaceInstallation(
self.workspace_client,
self.prompts,
self.installation,
self.recon_deployment,
self.switch_deployment,
self.product_info,
self.upgrades,
)
Expand Down
Loading
Loading