diff --git a/labs.yml b/labs.yml index 82481aadf..5ffdcc5ed 100644 --- a/labs.yml +++ b/labs.yml @@ -78,3 +78,9 @@ commands: - name: configure-reconcile description: Configure 'reconcile' dependencies + + - name: execute-database-profiler + description: Profile the source system database + flags: + - name: source-tech + description: (Optional) The technology/platform of the sources to Profile diff --git a/src/databricks/labs/lakebridge/assessments/_constants.py b/src/databricks/labs/lakebridge/assessments/_constants.py index 4f81c3885..9a793b19d 100644 --- a/src/databricks/labs/lakebridge/assessments/_constants.py +++ b/src/databricks/labs/lakebridge/assessments/_constants.py @@ -8,7 +8,7 @@ } # TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported -PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"] +PROFILER_SOURCE_SYSTEM = ["synapse"] # This flag indicates whether a connector is required for the source system when pipeline is trigger diff --git a/src/databricks/labs/lakebridge/cli.py b/src/databricks/labs/lakebridge/cli.py index e309bcb62..2b031170b 100644 --- a/src/databricks/labs/lakebridge/cli.py +++ b/src/databricks/labs/lakebridge/cli.py @@ -21,10 +21,12 @@ from databricks.labs.lakebridge.assessments.configure_assessment import create_assessment_configurator -from databricks.labs.lakebridge.assessments import PROFILER_SOURCE_SYSTEM +from databricks.labs.lakebridge.assessments import PROFILER_SOURCE_SYSTEM, PRODUCT_NAME +from databricks.labs.lakebridge.assessments.profiler import Profiler from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1 from databricks.labs.lakebridge.contexts.application import ApplicationContext +from databricks.labs.lakebridge.connections.credential_manager import cred_file from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver from databricks.labs.lakebridge.install import installer @@ -701,18 +703,19 @@ def configure_secrets(*, w: WorkspaceClient) -> None: recon_conf.prompt_and_save_connection_details() -@lakebridge.command(is_unauthenticated=True) -def configure_database_profiler() -> None: - """[Experimental] Install the lakebridge Assessment package""" - prompts = Prompts() - - # Prompt for source system - source_system = str( - prompts.choice("Please select the source system you want to configure", PROFILER_SOURCE_SYSTEM) - ).lower() +@lakebridge.command +def configure_database_profiler(w: WorkspaceClient) -> None: + """[Experimental] Installs and runs the Lakebridge Assessment package for database profiling""" + ctx = ApplicationContext(w) + ctx.add_user_agent_extra("cmd", "configure-profiler") + prompts = ctx.prompts + source_tech = prompts.choice("Select the source technology", PROFILER_SOURCE_SYSTEM).lower() + ctx.add_user_agent_extra("profiler_source_tech", make_alphanum_or_semver(source_tech)) + user = ctx.current_user + logger.debug(f"User: {user}") # Create appropriate assessment configurator - assessment = create_assessment_configurator(source_system=source_system, product_name="lakebridge", prompts=prompts) + assessment = create_assessment_configurator(source_system=source_tech, product_name="lakebridge", prompts=prompts) assessment.run() @@ -810,6 +813,36 @@ def analyze( logger.debug(f"User: {ctx.current_user}") +@lakebridge.command() +def execute_database_profiler(w: WorkspaceClient, source_tech: str | None = None) -> None: + """Execute the Profiler Extraction for the given source technology""" + ctx = ApplicationContext(w) + ctx.add_user_agent_extra("cmd", "execute-profiler") + prompts = ctx.prompts + if source_tech is None: + source_tech = prompts.choice("Select the source technology", PROFILER_SOURCE_SYSTEM) + source_tech = source_tech.lower() + + if source_tech not in PROFILER_SOURCE_SYSTEM: + logger.error(f"Only the following source systems are supported: {PROFILER_SOURCE_SYSTEM}") + raise_validation_exception(f"Invalid source technology {source_tech}") + + ctx.add_user_agent_extra("profiler_source_tech", make_alphanum_or_semver(source_tech)) + user = ctx.current_user + logger.debug(f"User: {user}") + # check if cred_file is present which has the connection details before running the profiler + file = cred_file(PRODUCT_NAME) + if not file.exists(): + raise_validation_exception( + f"Connection details not found. Please run `databricks labs lakebridge configure-database-profiler` " + f"to set up connection details for {source_tech}." + ) + profiler = Profiler.create(source_tech) + + # TODO: Add extractor logic to ApplicationContext instead of creating inside the Profiler class + profiler.profile() + + if __name__ == "__main__": lakebridge() if is_in_debug():