diff --git a/docs/lakebridge/docs/reconcile/index.mdx b/docs/lakebridge/docs/reconcile/index.mdx index 8a111d5162..e5da59035a 100644 --- a/docs/lakebridge/docs/reconcile/index.mdx +++ b/docs/lakebridge/docs/reconcile/index.mdx @@ -38,6 +38,76 @@ Refer to [Reconcile Configuration Guide](reconcile_configuration) for detailed i > 2. Setup the connection properties +#### Option A: Using Lakebridge credentials mechanism +Reconcile connection properties are configured through a dynamic mapping from connection property to value. +The values can be loaded from databricks, env vars or used directly. It depends on the config in `reconcile.yml` +```yaml +... +creds_or_secret_scope: + vault_type: local + source_creds: + +``` +or to use databricks secrets. And the value has to be in the form of `/` +```yaml +... +creds_or_secret_scope: + vault_type: databricks + source_creds: + some_property = / + ... +``` +The expected connection properties under `source_creds` per data source are: + + + ```yaml + sfUrl = [local_or_databricks_mapping] + account = [local_or_databricks_mapping] + sfUser = [local_or_databricks_mapping] + sfPassword = [local_or_databricks_mapping] + sfDatabase = [local_or_databricks_mapping] + sfSchema = [local_or_databricks_mapping] + sfWarehouse = [local_or_databricks_mapping] + sfRole = [local_or_databricks_mapping] + pem_private_key = [local_or_databricks_mapping] + pem_private_key_password = [local_or_databricks_mapping] + ``` + + :::note + For Snowflake authentication, either sfPassword or pem_private_key is required. + Priority is given to pem_private_key, and if it is not found, sfPassword will be used. + If neither is available, an exception will be raised. + + When using an encrypted pem_private_key, you'll need to provide the pem_private_key_password. + This password is used to decrypt the private key for authentication. + ::: + + + ```yaml + user = [local_or_databricks_mapping] + password = [local_or_databricks_mapping] + host = [local_or_databricks_mapping] + port = [local_or_databricks_mapping] + database = [local_or_databricks_mapping] + ``` + + + ```yaml + user = [local_or_databricks_mapping] + password = [local_or_databricks_mapping] + host = [local_or_databricks_mapping] + port = [local_or_databricks_mapping] + database = [local_or_databricks_mapping] + encrypt = [local_or_databricks_mapping] + trustServerCertificate = [local_or_databricks_mapping] + ``` + + +#### Option B: Using secret scopes +:::warning +Deprecated in favor of Lakebridge credentials mechanism +::: + Lakebridge-Reconcile manages connection properties by utilizing secrets stored in the Databricks workspace. Below is the default secret naming convention for managing connection properties. @@ -66,17 +136,11 @@ Below are the connection properties required for each source: sfSchema = [schema] sfWarehouse = [warehouse_name] sfRole = [role_name] - pem_private_key = [pkcs8_pem_private_key] - pem_private_key_password = [pkcs8_pem_private_key] ``` :::note - For Snowflake authentication, either sfPassword or pem_private_key is required. - Priority is given to pem_private_key, and if it is not found, sfPassword will be used. - If neither is available, an exception will be raised. - - When using an encrypted pem_private_key, you'll need to provide the pem_private_key_password. - This password is used to decrypt the private key for authentication. + For Snowflake authentication, sfPassword is required. To use pem_private_key, + and optionally pem_private_key_password, please use the Lakebridge credentials mechanism. ::: diff --git a/docs/lakebridge/docs/reconcile/recon_notebook.mdx b/docs/lakebridge/docs/reconcile/recon_notebook.mdx index b2203bbd08..fb8ea71c34 100644 --- a/docs/lakebridge/docs/reconcile/recon_notebook.mdx +++ b/docs/lakebridge/docs/reconcile/recon_notebook.mdx @@ -72,12 +72,13 @@ class ReconcileConfig: secret_scope: str database_config: DatabaseConfig metadata_config: ReconcileMetadataConfig + creds_or_secret_scope: ReconcileCredentialConfig | str | None = None ``` Parameters: - `data_source`: The data source to be reconciled. Supported values: `snowflake`, `teradata`, `oracle`, `mssql`, `synapse`, `databricks`. - `report_type`: The type of report to be generated. Available report types are `schema`, `row`, `data` or `all`. For details check [here](./dataflow_example.mdx). -- `secret_scope`: The secret scope name used to store the connection credentials for the source database system. +- `secret_scope`: (Deprecated in favor of `creds_or_secret_scope` and kept for backwards compatibility) The secret scope name used to store the connection credentials for the source database system. - `database_config`: The database configuration for connecting to the source database. expects a `DatabaseConfig` object. - `source_schema`: The source schema name. - `target_catalog`: The target catalog name. @@ -104,6 +105,16 @@ class ReconcileMetadataConfig: ``` If not set the default values will be used to store the metadata. The default resources are created during the installation of Lakebridge. +- `creds_or_secret_scope`: The credentials to use to connect to the data source. Made optional for backwards compatibility. +Can also be a string having value of secret scope to mimic old behavior of credentials. If used, `secret_scope` will be ignored. + - `vault_type`: Can be local to use the values directly, env to load from env variables or databricks to load from databricks secrets. + - `source_creds`: A mapping of reconcile credentials keys to the values that will be resolved depending on vault type. +```python +@dataclass +class ReconcileCredentialConfig: + vault_type: str + source_creds: dict[str, str] +``` An Example of configuring the Reconcile properties: @@ -111,13 +122,14 @@ An Example of configuring the Reconcile properties: from databricks.labs.lakebridge.config import ( DatabaseConfig, ReconcileConfig, - ReconcileMetadataConfig + ReconcileMetadataConfig, + ReconcileCredentialConfig ) reconcile_config = ReconcileConfig( data_source = "snowflake", report_type = "all", - secret_scope = "snowflake-credential", + secret_scope = "NOT_USED", database_config= DatabaseConfig(source_catalog="source_sf_catalog", source_schema="source_sf_schema", target_catalog="target_databricks_catalog", @@ -126,9 +138,25 @@ reconcile_config = ReconcileConfig( metadata_config = ReconcileMetadataConfig( catalog = "lakebridge_metadata", schema= "reconcile" - ) + ), + creds_or_secret_scope=ReconcileCredentialConfig( + vault_type="local", + source_creds={"sfUrl": "xxx@snowflakecomputing.com", "sfUser": "app", "sfPassword": "the P@asswort", "sfRole": "app"} + ) ) ``` +An Example of using databricks secrets for the source credentials: +```python +reconcile_config = ReconcileConfig( + ..., + creds_or_secret_scope=ReconcileCredentialConfig( + vault_type="databricks", + source_creds={"sfUrl": "some_secret_scope/some_key", "sfUser": "another_secret_scope/user_key", "sfPassword": "scope/key", "sfRole": "scope/key"} + ) +) + +``` +All the expected credentials have to be configured. ## Configure Table Properties diff --git a/docs/lakebridge/docs/reconcile/reconcile_automation.mdx b/docs/lakebridge/docs/reconcile/reconcile_automation.mdx index 80fc668940..0d62825a1f 100644 --- a/docs/lakebridge/docs/reconcile/reconcile_automation.mdx +++ b/docs/lakebridge/docs/reconcile/reconcile_automation.mdx @@ -116,7 +116,7 @@ To run the utility, the following parameters must be set: - `remorph_catalog`: The catalog configured through CLI. - `remorph_schema`: The schema configured through CLI. - `remorph_config_table`: The table configs created as a part of the pre-requisites. -- `secret_scope`: The Databricks secret scope for accessing the source system. Refer to the Lakebridge documentation for the specific keys required to be configured as per the source system. +- `secret_scope`: (Deprecated) The Databricks secret scope for accessing the source system. Refer to the Lakebridge documentation for the specific keys required to be configured as per the source system. - `source_system`: The source system against which reconciliation is performed. - `table_recon_summary`: The target summary table created as a part of the pre-requisites. diff --git a/src/databricks/labs/lakebridge/cli.py b/src/databricks/labs/lakebridge/cli.py index bb76743b66..a042803dfe 100644 --- a/src/databricks/labs/lakebridge/cli.py +++ b/src/databricks/labs/lakebridge/cli.py @@ -27,7 +27,6 @@ from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1 from databricks.labs.lakebridge.contexts.application import ApplicationContext from databricks.labs.lakebridge.connections.credential_manager import cred_file -from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver from databricks.labs.lakebridge.install import installer from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner @@ -699,18 +698,6 @@ def generate_lineage( lineage_generator(engine, source_dialect, input_source, output_folder) -@lakebridge.command -def configure_secrets(*, w: WorkspaceClient) -> None: - """Setup reconciliation connection profile details as Secrets on Databricks Workspace""" - recon_conf = ReconConfigPrompts(w) - - # Prompt for source - source = recon_conf.prompt_source() - - logger.info(f"Setting up Scope, Secrets for `{source}` reconciliation") - recon_conf.prompt_and_save_connection_details() - - @lakebridge.command def configure_database_profiler(w: WorkspaceClient) -> None: """[Experimental] Installs and runs the Lakebridge Assessment package for database profiling""" diff --git a/src/databricks/labs/lakebridge/helpers/recon_config_utils.py b/src/databricks/labs/lakebridge/helpers/recon_config_utils.py index e798edbf77..236d9fb16e 100644 --- a/src/databricks/labs/lakebridge/helpers/recon_config_utils.py +++ b/src/databricks/labs/lakebridge/helpers/recon_config_utils.py @@ -3,98 +3,16 @@ from databricks.labs.blueprint.tui import Prompts from databricks.labs.lakebridge.reconcile.constants import ReconSourceType from databricks.sdk import WorkspaceClient -from databricks.sdk.errors.platform import ResourceDoesNotExist logger = logging.getLogger(__name__) class ReconConfigPrompts: def __init__(self, ws: WorkspaceClient, prompts: Prompts = Prompts()): - self._source = None self._prompts = prompts self._ws = ws - def _scope_exists(self, scope_name: str) -> bool: - scope_exists = scope_name in [scope.name for scope in self._ws.secrets.list_scopes()] - - if not scope_exists: - logger.error( - f"Error: Cannot find Secret Scope: `{scope_name}` in Databricks Workspace." - f"\nUse `remorph configure-secrets` to setup Scope and Secrets" - ) - return False - logger.debug(f"Found Scope: `{scope_name}` in Databricks Workspace") - return True - - def _ensure_scope_exists(self, scope_name: str): - """ - Get or Create a new Scope in Databricks Workspace - :param scope_name: - """ - scope_exists = self._scope_exists(scope_name) - if not scope_exists: - allow_scope_creation = self._prompts.confirm("Do you want to create a new one?") - if not allow_scope_creation: - msg = "Scope is needed to store Secrets in Databricks Workspace" - raise SystemExit(msg) - - try: - logger.debug(f" Creating a new Scope: `{scope_name}`") - self._ws.secrets.create_scope(scope_name) - except Exception as ex: - logger.error(f"Exception while creating Scope `{scope_name}`: {ex}") - raise ex - - logger.info(f" Created a new Scope: `{scope_name}`") - logger.info(f" Using Scope: `{scope_name}`...") - - def _secret_key_exists(self, scope_name: str, secret_key: str) -> bool: - try: - self._ws.secrets.get_secret(scope_name, secret_key) - logger.info(f"Found Secret key `{secret_key}` in Scope `{scope_name}`") - return True - except ResourceDoesNotExist: - logger.debug(f"Secret key `{secret_key}` not found in Scope `{scope_name}`") - return False - - def _store_secret(self, scope_name: str, secret_key: str, secret_value: str): - try: - logger.debug(f"Storing Secret: *{secret_key}* in Scope: `{scope_name}`") - self._ws.secrets.put_secret(scope=scope_name, key=secret_key, string_value=secret_value) - except Exception as ex: - logger.error(f"Exception while storing Secret `{secret_key}`: {ex}") - raise ex - - def store_connection_secrets(self, scope_name: str, conn_details: tuple[str, dict[str, str]]): - engine = conn_details[0] - secrets = conn_details[1] - - logger.debug(f"Storing `{engine}` Connection Secrets in Scope: `{scope_name}`") - - for key, value in secrets.items(): - secret_key = key - logger.debug(f"Processing Secret: *{secret_key}*") - debug_op = "Storing" - info_op = "Stored" - if self._secret_key_exists(scope_name, secret_key): - overwrite_secret = self._prompts.confirm(f"Do you want to overwrite `{secret_key}`?") - if not overwrite_secret: - continue - debug_op = "Overwriting" - info_op = "Overwritten" - - logger.debug(f"{debug_op} Secret: *{secret_key}* in Scope: `{scope_name}`") - self._store_secret(scope_name, secret_key, value) - logger.info(f"{info_op} Secret: *{secret_key}* in Scope: `{scope_name}`") - - def prompt_source(self): - source = self._prompts.choice( - "Select the source dialect", [source_type.value for source_type in ReconSourceType] - ) - self._source = source - return source - - def _prompt_snowflake_connection_details(self) -> tuple[str, dict[str, str]]: + def _prompt_snowflake_connection_details(self) -> dict[str, str]: """ Prompt for Snowflake connection details :return: tuple[str, dict[str, str]] @@ -103,30 +21,40 @@ def _prompt_snowflake_connection_details(self) -> tuple[str, dict[str, str]]: f"Please answer a couple of questions to configure `{ReconSourceType.SNOWFLAKE.value}` Connection profile" ) - sf_url = self._prompts.question("Enter Snowflake URL") - account = self._prompts.question("Enter Account Name") - sf_user = self._prompts.question("Enter User") - sf_password = self._prompts.question("Enter Password") - sf_db = self._prompts.question("Enter Database") - sf_schema = self._prompts.question("Enter Schema") - sf_warehouse = self._prompts.question("Enter Snowflake Warehouse") - sf_role = self._prompts.question("Enter Role", default=" ") + sf_url = self._prompts.question("Enter Snowflake URL Secret Name") + sf_user = self._prompts.question("Enter User Secret Name") + password_dict = {} + sf_password = self._prompts.question( + "Enter Password Secret Name or use `None` to use key-based auth", default="None" + ) + if sf_password.lower() == "none": + logger.info("Proceeding with PEM Private Key authentication...") + sf_pem_key = self._prompts.question("Enter PEM Private Key Secret Name") + password_dict["pem_private_key"] = sf_pem_key + sf_pem_key_password = self._prompts.question( + "Enter PEM Private Key Password Secret Name or use `None`", default="None" + ) + if sf_pem_key_password.lower() == "none": + password_dict["pem_private_key_password"] = sf_pem_key_password + else: + password_dict["sfPassword"] = sf_password + sf_db = self._prompts.question("Enter Database Secret Name") + sf_schema = self._prompts.question("Enter Schema Secret Name") + sf_warehouse = self._prompts.question("Enter Snowflake Warehouse Secret Name") + sf_role = self._prompts.question("Enter Role Secret Name") sf_conn_details = { "sfUrl": sf_url, - "account": account, "sfUser": sf_user, - "sfPassword": sf_password, "sfDatabase": sf_db, "sfSchema": sf_schema, "sfWarehouse": sf_warehouse, "sfRole": sf_role, - } + } | password_dict - sf_conn_dict = (ReconSourceType.SNOWFLAKE.value, sf_conn_details) - return sf_conn_dict + return sf_conn_details - def _prompt_oracle_connection_details(self) -> tuple[str, dict[str, str]]: + def _prompt_oracle_connection_details(self) -> dict[str, str]: """ Prompt for Oracle connection details :return: tuple[str, dict[str, str]] @@ -134,43 +62,55 @@ def _prompt_oracle_connection_details(self) -> tuple[str, dict[str, str]]: logger.info( f"Please answer a couple of questions to configure `{ReconSourceType.ORACLE.value}` Connection profile" ) - user = self._prompts.question("Enter User") - password = self._prompts.question("Enter Password") - host = self._prompts.question("Enter host") - port = self._prompts.question("Enter port") - database = self._prompts.question("Enter database/SID") + user = self._prompts.question("Enter User Secret Name") + password = self._prompts.question("Enter Password Secret Name") + host = self._prompts.question("Enter host Secret Name") + port = self._prompts.question("Enter port Secret Name") + database = self._prompts.question("Enter database/SID Secret Name") oracle_conn_details = {"user": user, "password": password, "host": host, "port": port, "database": database} - oracle_conn_dict = (ReconSourceType.ORACLE.value, oracle_conn_details) - return oracle_conn_dict + return oracle_conn_details - def _connection_details(self): + def _prompt_mssql_connection_details(self) -> dict[str, str]: """ - Prompt for connection details based on the source - :return: None + Prompt for Oracle connection details + :return: tuple[str, dict[str, str]] """ - logger.debug(f"Prompting for `{self._source}` connection details") - match self._source: + logger.info( + f"Please answer a couple of questions to configure `{ReconSourceType.MSSQL.value}`/`{ReconSourceType.SYNAPSE.value}` Connection profile" + ) + user = self._prompts.question("Enter User Secret Name") + password = self._prompts.question("Enter Password Secret Name") + host = self._prompts.question("Enter host Secret Name") + port = self._prompts.question("Enter port Secret Name") + database = self._prompts.question("Enter database Secret Name") + encrypt = self._prompts.question("Enter Encrypt Secret Name") + trust_server_certificate = self._prompts.question("Enter Trust Server Certificate Secret Name") + + tsql_conn_details = { + "user": user, + "password": password, + "host": host, + "port": port, + "database": database, + "encrypt": encrypt, + "trustServerCertificate": trust_server_certificate, + } + + return tsql_conn_details + + def _connection_details(self, source: str): + logger.debug(f"Prompting for `{source}` connection details") + match source: case ReconSourceType.SNOWFLAKE.value: return self._prompt_snowflake_connection_details() case ReconSourceType.ORACLE.value: return self._prompt_oracle_connection_details() + case ReconSourceType.MSSQL.value | ReconSourceType.SYNAPSE.value: + return self._prompt_mssql_connection_details() - def prompt_and_save_connection_details(self): - """ - Prompt for connection details and save them as Secrets in Databricks Workspace - """ - # prompt for connection_details only if source is other than Databricks - if self._source == ReconSourceType.DATABRICKS.value: - logger.info("*Databricks* as a source is supported only for **Hive MetaStore (HMS) setup**") - return - - # Prompt for secret scope - scope_name = self._prompts.question("Enter Secret Scope name") - self._ensure_scope_exists(scope_name) - - # Prompt for connection details - connection_details = self._connection_details() - logger.debug(f"Storing `{self._source}` connection details as Secrets in Databricks Workspace...") - self.store_connection_secrets(scope_name, connection_details) + def prompt_recon_creds(self, source: str) -> tuple[str, dict[str, str]]: + logger.info("Please provide secret names in the following steps in the format /") + connection_details = self._connection_details(source) + return "databricks", connection_details diff --git a/src/databricks/labs/lakebridge/install.py b/src/databricks/labs/lakebridge/install.py index 9d5cdb2c91..552c971f63 100644 --- a/src/databricks/labs/lakebridge/install.py +++ b/src/databricks/labs/lakebridge/install.py @@ -25,6 +25,7 @@ from databricks.labs.lakebridge.contexts.application import ApplicationContext from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation +from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType from databricks.labs.lakebridge.transpiler.installers import ( BladebridgeInstaller, @@ -48,6 +49,7 @@ def __init__( # pylint: disable=too-many-arguments install_state: InstallState, product_info: ProductInfo, resource_configurator: ResourceConfigurator, + recon_creds_prompts: ReconConfigPrompts, workspace_installation: WorkspaceInstallation, environ: dict[str, str] | None = None, *, @@ -65,6 +67,7 @@ def __init__( # pylint: disable=too-many-arguments self._install_state = install_state self._product_info = product_info self._resource_configurator = resource_configurator + self._recon_creds_prompts = recon_creds_prompts self._ws_installation = workspace_installation # TODO: Refactor the 'prompts' property in preference to using this flag, which should be redundant. self._is_interactive = is_interactive @@ -326,10 +329,11 @@ def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig: report_type = self._prompts.choice( "Select the report type:", [report_type.value for report_type in ReconReportType] ) - scope_name = self._prompts.question( # TODO deprecate - f"Enter Secret scope name to store `{data_source.capitalize()}` connection details / secrets", - default=f"remorph_{data_source}", - ) + if data_source != ReconSourceType.DATABRICKS.value: + vault, credentials = self._recon_creds_prompts.prompt_recon_creds(data_source) + creds = ReconcileCredentialConfig(vault, credentials) + else: + creds = ReconcileCredentialConfig("databricks", {}) db_config = self._prompt_for_reconcile_database_config(data_source) metadata_config = self._prompt_for_reconcile_metadata_config() @@ -337,7 +341,7 @@ def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig: return ReconcileConfig( data_source=data_source, report_type=report_type, - creds=ReconcileCredentialConfig(vault_type="databricks", source_creds={"__secret_scope": scope_name}), + creds=creds, database_config=db_config, metadata_config=metadata_config, ) @@ -415,6 +419,7 @@ def installer( app_context.install_state, app_context.product_info, app_context.resource_configurator, + ReconConfigPrompts(ws, app_context.prompts), app_context.workspace_installation, transpiler_repository=transpiler_repository, is_interactive=is_interactive, diff --git a/tests/integration/config/test_config.py b/tests/integration/config/test_config.py index afc61c1e5c..57965b2d5e 100644 --- a/tests/integration/config/test_config.py +++ b/tests/integration/config/test_config.py @@ -1,3 +1,5 @@ +from unittest.mock import MagicMock + from databricks.sdk import WorkspaceClient from databricks.labs.blueprint.tui import MockPrompts @@ -26,6 +28,7 @@ def test_stores_and_fetches_config(ws: WorkspaceClient) -> None: context.install_state, context.product_info, context.resource_configurator, + MagicMock(), context.workspace_installation, ) config = TranspileConfig( diff --git a/tests/unit/helpers/test_recon_config_utils.py b/tests/unit/helpers/test_recon_config_utils.py index 84558295b3..8519f7aa09 100644 --- a/tests/unit/helpers/test_recon_config_utils.py +++ b/tests/unit/helpers/test_recon_config_utils.py @@ -1,145 +1,77 @@ -from unittest.mock import patch - -import pytest - from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts -from databricks.sdk.errors.platform import ResourceDoesNotExist -from databricks.sdk.service.workspace import SecretScope -SOURCE_DICT = {"databricks": "0", "mssql": "1", "oracle": "2", "snowflake": "3", "synapse": "4"} -SCOPE_NAME = "dummy_scope" +from databricks.labs.lakebridge.reconcile.constants import ReconSourceType -def test_configure_secrets_snowflake_overwrite(mock_workspace_client): +def test_configure_secrets_snowflake(mock_workspace_client): prompts = MockPrompts( { - r"Select the source": SOURCE_DICT["snowflake"], - r"Enter Secret Scope name": SCOPE_NAME, + r"Enter secret vault type": "0", r"Enter Snowflake URL": "dummy", - r"Enter Account Name": "dummy", r"Enter User": "dummy", - r"Enter Password": "dummy", + r"Enter Password*": "dummy", r"Enter Database": "dummy", r"Enter Schema": "dummy", r"Enter Snowflake Warehouse": "dummy", r"Enter Role": "dummy", - r"Do you want to overwrite.*": "yes", } ) - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name=SCOPE_NAME)]] recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - recon_conf.prompt_and_save_connection_details() + recon_conf.prompt_recon_creds(ReconSourceType.SNOWFLAKE.value) -def test_configure_secrets_oracle_insert(mock_workspace_client): - # mock prompts for Oracle +def test_configure_secrets_snowflake_pem(mock_workspace_client): prompts = MockPrompts( { - r"Select the source": SOURCE_DICT["oracle"], - r"Enter Secret Scope name": SCOPE_NAME, - r"Do you want to create a new one?": "yes", + r"Enter secret vault type": "0", + r"Enter Snowflake URL": "dummy", r"Enter User": "dummy", - r"Enter Password": "dummy", - r"Enter host": "dummy", - r"Enter port": "dummy", - r"Enter database/SID": "dummy", - } - ) - - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name="scope_name")]] - - with patch( - "databricks.labs.lakebridge.helpers.recon_config_utils.ReconConfigPrompts._secret_key_exists", - return_value=False, - ): - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - recon_conf.prompt_and_save_connection_details() - - -def test_configure_secrets_invalid_source(mock_workspace_client): - prompts = MockPrompts( - { - r"Select the source": "100", # Invalid source - r"Enter Secret Scope name": SCOPE_NAME, - } - ) - - with patch( - "databricks.labs.lakebridge.helpers.recon_config_utils.ReconConfigPrompts._scope_exists", - return_value=True, - ): - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - with pytest.raises(ValueError, match="cannot get answer within 10 attempt"): - recon_conf.prompt_source() - - -def test_store_connection_secrets_exception(mock_workspace_client): - prompts = MockPrompts( - { - r"Do you want to overwrite `source_key`?": "no", - } - ) - - mock_workspace_client.secrets.get_secret.side_effect = ResourceDoesNotExist("Not Found") - mock_workspace_client.secrets.put_secret.side_effect = Exception("Timed out") - - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - - with pytest.raises(Exception, match="Timed out"): - recon_conf.store_connection_secrets("scope_name", ("source", {"key": "value"})) - - -def test_configure_secrets_no_scope(mock_workspace_client): - prompts = MockPrompts( - { - r"Select the source": SOURCE_DICT["snowflake"], - r"Enter Secret Scope name": SCOPE_NAME, - r"Do you want to create a new one?": "no", + r"Enter Password*": "none", + r"Enter PEM*": "dummy", + r"Enter PEM*Password*": "none", + r"Enter Database": "dummy", + r"Enter Schema": "dummy", + r"Enter Snowflake Warehouse": "dummy", + r"Enter Role": "dummy", } ) - - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name="scope_name")]] - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() + recon_conf.prompt_recon_creds(ReconSourceType.SNOWFLAKE.value) - with pytest.raises(SystemExit, match="Scope is needed to store Secrets in Databricks Workspace"): - recon_conf.prompt_and_save_connection_details() - -def test_configure_secrets_create_scope_exception(mock_workspace_client): +def test_configure_secrets_oracle(mock_workspace_client): + # mock prompts for Oracle prompts = MockPrompts( { - r"Select the source": SOURCE_DICT["snowflake"], - r"Enter Secret Scope name": SCOPE_NAME, + r"Enter secret vault type": "1", r"Do you want to create a new one?": "yes", + r"Enter User": "dummy", + r"Enter Password": "dummy", + r"Enter host": "dummy", + r"Enter port": "dummy", + r"Enter database/SID": "dummy", } ) - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name="scope_name")]] - mock_workspace_client.secrets.create_scope.side_effect = Exception("Network Error") - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - with pytest.raises(Exception, match="Network Error"): - recon_conf.prompt_and_save_connection_details() + recon_conf.prompt_recon_creds(ReconSourceType.ORACLE.value) -def test_store_connection_secrets_overwrite(mock_workspace_client): +def test_configure_secrets_tsql(mock_workspace_client): prompts = MockPrompts( { - r"Do you want to overwrite `key`?": "no", + r"Enter secret vault type": "2", + r"Enter User": "dummy", + r"Enter Password": "dummy", + r"Enter host": "dummy", + r"Enter port": "dummy", + r"Enter database": "dummy", + r"Enter Encrypt": "dummy", + r"Enter Trust Server Certificate": "dummy", } ) - with patch( - "databricks.labs.lakebridge.helpers.recon_config_utils.ReconConfigPrompts._secret_key_exists", return_value=True - ): - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.store_connection_secrets("scope_name", ("source", {"key": "value"})) + recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) + recon_conf.prompt_recon_creds(ReconSourceType.MSSQL.value) + recon_conf.prompt_recon_creds(ReconSourceType.SYNAPSE.value) diff --git a/tests/unit/test_cli_other.py b/tests/unit/test_cli_other.py index 5d184bffc0..c2a6568709 100644 --- a/tests/unit/test_cli_other.py +++ b/tests/unit/test_cli_other.py @@ -6,21 +6,6 @@ from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.lakebridge import cli from databricks.labs.lakebridge.config import LSPConfigOptionV1, LSPPromptMethod -from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts - - -def test_configure_secrets_databricks(mock_workspace_client): - source_dict = {"databricks": "0", "netezza": "1", "oracle": "2", "snowflake": "3"} - prompts = MockPrompts( - { - r"Select the source": source_dict["databricks"], - } - ) - - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - recon_conf.prompt_and_save_connection_details() @pytest.mark.parametrize( @@ -60,12 +45,6 @@ def test_interactive_argument_auto(is_tty: bool) -> None: assert interactive_mode is is_tty -def test_cli_configure_secrets_config(mock_workspace_client): - with patch("databricks.labs.lakebridge.cli.ReconConfigPrompts") as mock_recon_config: - cli.configure_secrets(w=mock_workspace_client) - mock_recon_config.assert_called_once_with(mock_workspace_client) - - def test_cli_reconcile(mock_workspace_client): with patch("databricks.labs.lakebridge.reconcile.runner.ReconcileRunner.run", return_value=True): cli.reconcile(w=mock_workspace_client) diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index 0c80635485..802ee7c785 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -1,7 +1,7 @@ import logging from collections.abc import Callable, Generator, Sequence from pathlib import Path -from unittest.mock import create_autospec, patch +from unittest.mock import create_autospec, patch, MagicMock import pytest from databricks.labs.blueprint.installation import JsonObject, MockInstallation @@ -22,6 +22,7 @@ from databricks.labs.lakebridge.contexts.application import ApplicationContext from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation +from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts from databricks.labs.lakebridge.install import WorkspaceInstaller from databricks.labs.lakebridge.reconcile.constants import ReconSourceType, ReconReportType from databricks.labs.lakebridge.transpiler.installers import ( @@ -92,6 +93,7 @@ def test_workspace_installer_run_raise_error_in_dbr(ws: WorkspaceClient) -> None ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, environ=environ, ) @@ -117,6 +119,7 @@ def test_workspace_installer_run_install_not_called_in_test( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -144,6 +147,7 @@ def test_workspace_installer_run_install_called_with_provided_config( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -166,6 +170,7 @@ def test_configure_error_if_invalid_module_selected(ws: WorkspaceClient) -> None ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -205,6 +210,7 @@ def test_workspace_installer_run_install_called_with_generated_config( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) workspace_installer.run("transpile") @@ -255,6 +261,7 @@ def test_configure_transpile_no_existing_installation( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -324,6 +331,7 @@ def test_configure_transpile_installation_no_override(ws: WorkspaceClient) -> No ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) remorph_config = workspace_installer.configure(module="transpile") @@ -385,6 +393,7 @@ def test_configure_transpile_installation_config_error_continue_install( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -448,6 +457,7 @@ def test_configure_transpile_installation_with_no_validation(ws, ws_installer): ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -519,6 +529,7 @@ def test_configure_transpile_installation_with_validation_and_warehouse_id_from_ ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -595,6 +606,7 @@ def test_configure_reconcile_installation_no_override(ws: WorkspaceClient) -> No ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) with pytest.raises(SystemExit): @@ -647,6 +659,9 @@ def test_configure_reconcile_installation_config_error_continue_install(ws: Work workspace_installation=create_autospec(WorkspaceInstallation), ) + creds_mock = MagicMock(ReconConfigPrompts) + creds_sample = ReconcileCredentialConfig("local", {"test_secret": "dummy"}) + creds_mock.prompt_recon_creds.return_value = (creds_sample.vault_type, creds_sample.source_creds) workspace_installer = WorkspaceInstaller( ctx.workspace_client, ctx.prompts, @@ -654,6 +669,7 @@ def test_configure_reconcile_installation_config_error_continue_install(ws: Work ctx.install_state, ctx.product_info, ctx.resource_configurator, + creds_mock, ctx.workspace_installation, ) config = workspace_installer.configure(module="reconcile") @@ -662,7 +678,7 @@ def test_configure_reconcile_installation_config_error_continue_install(ws: Work reconcile=ReconcileConfig( data_source="oracle", report_type="all", - creds=ReconcileCredentialConfig(vault_type="databricks", source_creds={"__secret_scope": "remorph_oracle"}), + creds=ReconcileCredentialConfig(vault_type="local", source_creds={"test_secret": "dummy"}), database_config=DatabaseConfig( source_schema="tpch_sf1000", target_catalog="tpch", @@ -682,10 +698,7 @@ def test_configure_reconcile_installation_config_error_continue_install(ws: Work { "data_source": "oracle", "report_type": "all", - "creds": { - "vault_type": "databricks", - "source_creds": {"__secret_scope": "remorph_oracle"}, - }, + "creds": {"vault_type": "local", "source_creds": {"test_secret": "dummy"}}, "database_config": { "source_schema": "tpch_sf1000", "target_catalog": "tpch", @@ -707,7 +720,6 @@ def test_configure_reconcile_no_existing_installation(ws: WorkspaceClient) -> No { r"Select the Data Source": str(RECONCILE_DATA_SOURCES.index("snowflake")), r"Select the report type": str(RECONCILE_REPORT_TYPES.index("all")), - r"Enter Secret scope name to store .* connection details / secrets": "remorph_snowflake", r"Enter source catalog name for .*": "snowflake_sample_data", r"Enter source schema name for .*": "tpch_sf1000", r"Enter target catalog name for Databricks": "tpch", @@ -729,6 +741,9 @@ def test_configure_reconcile_no_existing_installation(ws: WorkspaceClient) -> No workspace_installation=create_autospec(WorkspaceInstallation), ) + creds_mock = MagicMock(ReconConfigPrompts) + creds_sample = ReconcileCredentialConfig("local", {"test_secret": "dummy"}) + creds_mock.prompt_recon_creds.return_value = (creds_sample.vault_type, creds_sample.source_creds) workspace_installer = WorkspaceInstaller( ctx.workspace_client, ctx.prompts, @@ -736,6 +751,7 @@ def test_configure_reconcile_no_existing_installation(ws: WorkspaceClient) -> No ctx.install_state, ctx.product_info, ctx.resource_configurator, + creds_mock, ctx.workspace_installation, ) config = workspace_installer.configure(module="reconcile") @@ -744,9 +760,7 @@ def test_configure_reconcile_no_existing_installation(ws: WorkspaceClient) -> No reconcile=ReconcileConfig( data_source="snowflake", report_type="all", - creds=ReconcileCredentialConfig( - vault_type="databricks", source_creds={"__secret_scope": "remorph_snowflake"} - ), + creds=ReconcileCredentialConfig(vault_type="local", source_creds={"test_secret": "dummy"}), database_config=DatabaseConfig( source_schema="tpch_sf1000", target_catalog="tpch", @@ -767,10 +781,7 @@ def test_configure_reconcile_no_existing_installation(ws: WorkspaceClient) -> No { "data_source": "snowflake", "report_type": "all", - "creds": { - "vault_type": "databricks", - "source_creds": {"__secret_scope": "remorph_snowflake"}, - }, + "creds": {"vault_type": "local", "source_creds": {"test_secret": "dummy"}}, "database_config": { "source_catalog": "snowflake_sample_data", "source_schema": "tpch_sf1000", @@ -792,7 +803,6 @@ def test_configure_reconcile_databricks_no_existing_installation(ws: WorkspaceCl prompts = MockPrompts( { r"Select the Data Source": str(RECONCILE_DATA_SOURCES.index("databricks")), - r"Enter Secret scope name to store .* connection details / secrets": "remorph_databricks", r"Select the report type": str(RECONCILE_REPORT_TYPES.index("all")), r"Enter source catalog name for .*": "databricks_catalog", r"Enter source schema name for .*": "some_schema", @@ -815,6 +825,7 @@ def test_configure_reconcile_databricks_no_existing_installation(ws: WorkspaceCl workspace_installation=create_autospec(WorkspaceInstallation), ) + creds_mock = MagicMock(ReconConfigPrompts) # user not prompted if databricks source workspace_installer = WorkspaceInstaller( ctx.workspace_client, ctx.prompts, @@ -822,6 +833,7 @@ def test_configure_reconcile_databricks_no_existing_installation(ws: WorkspaceCl ctx.install_state, ctx.product_info, ctx.resource_configurator, + creds_mock, ctx.workspace_installation, ) config = workspace_installer.configure(module="reconcile") @@ -842,7 +854,8 @@ def test_configure_reconcile_databricks_no_existing_installation(ws: WorkspaceCl volume="reconcile_volume", ), creds=ReconcileCredentialConfig( - vault_type="databricks", source_creds={"__secret_scope": "remorph_databricks"} + vault_type="databricks", + source_creds={}, ), ), transpile=None, @@ -855,7 +868,6 @@ def test_configure_reconcile_databricks_no_existing_installation(ws: WorkspaceCl "report_type": "all", "creds": { "vault_type": "databricks", - "source_creds": {"__secret_scope": "remorph_databricks"}, }, "database_config": { "source_catalog": "databricks_catalog", @@ -889,7 +901,6 @@ def test_configure_all_override_installation( r"Open .* in the browser?": "no", r"Select the Data Source": str(RECONCILE_DATA_SOURCES.index("snowflake")), r"Select the report type": str(RECONCILE_REPORT_TYPES.index("all")), - r"Enter Secret scope name to store .* connection details / secrets": "remorph_snowflake", r"Enter source catalog name for .*": "snowflake_sample_data", r"Enter source schema name for .*": "tpch_sf1000", r"Enter target catalog name for Databricks": "tpch", @@ -914,7 +925,7 @@ def test_configure_all_override_installation( "reconcile.yml": { "data_source": "snowflake", "report_type": "all", - "secret_scope": "remorph_snowflake", # v1 + "secret_scope": "NOT_USED", # v1 "database_config": { "source_catalog": "snowflake_sample_data", "source_schema": "tpch_sf1000", @@ -944,6 +955,9 @@ def test_configure_all_override_installation( workspace_installation=create_autospec(WorkspaceInstallation), ) + creds_mock = MagicMock(ReconConfigPrompts) + creds_sample = ReconcileCredentialConfig("local", {"test_secret": "dummy"}) + creds_mock.prompt_recon_creds.return_value = (creds_sample.vault_type, creds_sample.source_creds) workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, @@ -951,6 +965,7 @@ def test_configure_all_override_installation( ctx.install_state, ctx.product_info, ctx.resource_configurator, + creds_mock, ctx.workspace_installation, ) @@ -971,7 +986,7 @@ def test_configure_all_override_installation( expected_reconcile_config = ReconcileConfig( data_source="snowflake", report_type="all", - creds=ReconcileCredentialConfig(vault_type="databricks", source_creds={"__secret_scope": "remorph_snowflake"}), + creds=ReconcileCredentialConfig(vault_type="local", source_creds={"test_secret": "dummy"}), database_config=DatabaseConfig( source_schema="tpch_sf1000", target_catalog="tpch", @@ -1006,10 +1021,7 @@ def test_configure_all_override_installation( { "data_source": "snowflake", "report_type": "all", - "creds": { - "vault_type": "databricks", - "source_creds": {"__secret_scope": "remorph_snowflake"}, - }, + "creds": {"vault_type": "local", "source_creds": {"test_secret": "dummy"}}, "database_config": { "source_catalog": "snowflake_sample_data", "source_schema": "tpch_sf1000", @@ -1087,6 +1099,7 @@ def test_runs_upgrades_on_more_recent_version( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -1157,6 +1170,7 @@ def transpilers_path(self) -> Path: ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=_TranspilerRepository(), ) @@ -1246,6 +1260,7 @@ def test_runs_and_stores_force_config_option( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=transpiler_repository, ) @@ -1328,6 +1343,7 @@ def test_runs_and_stores_question_config_option( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=transpiler_repository, ) @@ -1416,6 +1432,7 @@ def test_runs_and_stores_choice_config_option( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=transpiler_repository, ) @@ -1470,6 +1487,7 @@ def test_installer_detects_installed_transpilers( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=mock_repository, ) @@ -1531,6 +1549,7 @@ def mock_factory(self, repository: TranspilerRepository) -> TranspilerInstaller: ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=mock_repository, transpiler_installers=(baz_installer.mock_factory, bar_installer.mock_factory), @@ -1608,6 +1627,7 @@ def install(self, artifact: Path | None = None) -> bool: ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=mock_repository, transpiler_installers=(MockTranspilerInstaller,), @@ -1666,6 +1686,7 @@ def test_no_reconfigure_if_noninteractive( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, is_interactive=False, ) @@ -1699,6 +1720,7 @@ def test_no_configure_if_noninteractive( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, is_interactive=False, ) @@ -1739,6 +1761,7 @@ def test_transpiler_installers_llm_flag( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, is_interactive=False, **kw_args,