Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ dependencies = [
"standard-distutils~=3.11.9; python_version>='3.11'",
"databricks-bb-analyzer~=0.1.9",
"sqlglot==26.1.3",
"databricks-labs-blueprint[yaml]>=0.11.3,<0.12.0",
"databricks-labs-blueprint @ git+https://github.com/databrickslabs/blueprint@main",
"databricks-labs-lsql==0.16.0",
"cryptography>=44.0.2,<45.1.0",
"pyodbc~=5.2.0",
Expand All @@ -56,6 +56,10 @@ build-backend = "hatchling.build"
sources = ["src"]
include = ["src"]


[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.version]
path = "src/databricks/labs/lakebridge/__about__.py"

Expand Down Expand Up @@ -459,7 +463,7 @@ max-bool-expr = 5
max-branches = 20

# Maximum number of locals for function / method body.
max-locals = 19
max-locals = 20

# Maximum number of parents for a class (see R0901).
max-parents = 7
Expand Down
15 changes: 15 additions & 0 deletions src/databricks/labs/lakebridge/assessments/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from ._constants import (
PRODUCT_NAME,
PRODUCT_PATH_PREFIX,
PROFILER_SOURCE_SYSTEM,
PLATFORM_TO_SOURCE_TECHNOLOGY,
CONNECTOR_REQUIRED,
)

__all__ = [
"PRODUCT_NAME",
"PRODUCT_PATH_PREFIX",
"PROFILER_SOURCE_SYSTEM",
"PLATFORM_TO_SOURCE_TECHNOLOGY",
"CONNECTOR_REQUIRED",
]
20 changes: 20 additions & 0 deletions src/databricks/labs/lakebridge/assessments/_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from pathlib import Path

PRODUCT_NAME = "lakebridge"
PRODUCT_PATH_PREFIX = Path.home() / ".databricks" / "labs" / PRODUCT_NAME / "lib"

PLATFORM_TO_SOURCE_TECHNOLOGY = {
"synapse": "src/databricks/labs/lakebridge/resources/assessments/synapse/pipeline_config.yml",
}

# TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported
PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]

# This flag indicates whether a connector is required for the source system when pipeline is trigger
# For example in the case of synapse no connector is required and the python scripts
# manage the connection by directly reading the credentials files
# Revisit this when more source systems are added to standardize the approach
CONNECTOR_REQUIRED = {
"synapse": False,
"mssql": True,
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABC, abstractmethod
from pathlib import Path
import logging
import shutil
import yaml
Expand All @@ -12,20 +13,32 @@
)
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
from databricks.labs.lakebridge.connections.env_getter import EnvGetter
from databricks.labs.lakebridge.assessments import CONNECTOR_REQUIRED

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]

def _save_to_disk(credential: dict, cred_file: Path) -> None:
if cred_file.exists():
backup_filename = cred_file.with_suffix('.bak')
shutil.copy(cred_file, backup_filename)
logger.debug(f"Backup of the existing file created at {backup_filename}")

with open(cred_file, 'w', encoding='utf-8') as file:
yaml.dump(credential, file, default_flow_style=False)


class AssessmentConfigurator(ABC):
"""Abstract base class for assessment configuration."""

def __init__(self, product_name: str, prompts: Prompts, credential_file=None):
def __init__(
self, product_name: str, prompts: Prompts, source_name: str, credential_file: Path | str | None = None
):
self.prompts = prompts
self._product_name = product_name
self._credential_file = creds(product_name) if not credential_file else credential_file
self._credential_file = creds(product_name) if not credential_file else Path(credential_file)
self._source_name = source_name

@abstractmethod
def _configure_credentials(self) -> str:
Expand All @@ -52,10 +65,11 @@ def run(self):
logger.info(f"Welcome to the {self._product_name} Assessment Configuration")
source = self._configure_credentials()
logger.info(f"{source.capitalize()} details and credentials received.")
if self.prompts.confirm(f"Do you want to test the connection to {source}?"):
cred_manager = create_credential_manager("lakebridge", EnvGetter())
if cred_manager:
self._test_connection(source, cred_manager)
if CONNECTOR_REQUIRED.get(self._source_name, True):
if self.prompts.confirm(f"Do you want to test the connection to {source}?"):
cred_manager = create_credential_manager("lakebridge", EnvGetter())
if cred_manager:
self._test_connection(source, cred_manager)
logger.info(f"{source.capitalize()} Assessment Configuration Completed")


Expand All @@ -64,7 +78,7 @@ class ConfigureSqlServerAssessment(AssessmentConfigurator):

def _configure_credentials(self) -> str:
cred_file = self._credential_file
source = "mssql"
source = self._source_name

logger.info(
"\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
Expand All @@ -84,18 +98,11 @@ def _configure_credentials(self) -> str:
"server": self.prompts.question("Enter the server or host details"),
"port": int(self.prompts.question("Enter the port details", valid_number=True)),
"user": self.prompts.question("Enter the user details"),
"password": self.prompts.question("Enter the password details"),
"password": self.prompts.password("Enter the password details"),
},
}

if cred_file.exists():
backup_filename = cred_file.with_suffix('.bak')
shutil.copy(cred_file, backup_filename)
logger.debug(f"Backup of the existing file created at {backup_filename}")

with open(cred_file, 'w', encoding='utf-8') as file:
yaml.dump(credential, file, default_flow_style=False)

_save_to_disk(credential, cred_file)
logger.info(f"Credential template created for {source}.")
return source

Expand All @@ -105,7 +112,7 @@ class ConfigureSynapseAssessment(AssessmentConfigurator):

def _configure_credentials(self) -> str:
cred_file = self._credential_file
source = "synapse"
source = self._source_name

logger.info(
"\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
Expand All @@ -116,23 +123,23 @@ def _configure_credentials(self) -> str:

# Synapse Workspace Settings
logger.info("Please provide Synapse Workspace settings:")
workspace_name = self.prompts.question("Enter Synapse workspace name")
synapse_workspace = {
"name": self.prompts.question("Enter Synapse workspace name"),
"dedicated_sql_endpoint": self.prompts.question("Enter dedicated SQL endpoint"),
"serverless_sql_endpoint": self.prompts.question("Enter serverless SQL endpoint"),
"name": workspace_name,
"dedicated_sql_endpoint": f"{workspace_name}.sql.azuresynapse.net",
"serverless_sql_endpoint": f"{workspace_name}-ondemand.sql.azuresynapse.net",
"sql_user": self.prompts.question("Enter SQL user"),
"sql_password": self.prompts.question("Enter SQL password"),
"sql_password": self.prompts.password("Enter SQL password"),
"tz_info": self.prompts.question("Enter timezone (e.g. America/New_York)", default="UTC"),
"driver": self.prompts.question(
"Enter the ODBC driver installed locally", default="ODBC Driver 18 for SQL Server"
),
}

# Azure API Access Settings
logger.info("Please provide Azure API access settings:")
azure_api_access = {
"development_endpoint": self.prompts.question("Enter development endpoint"),
"azure_client_id": self.prompts.question("Enter Azure client ID"),
"azure_tenant_id": self.prompts.question("Enter Azure tenant ID"),
"azure_client_secret": self.prompts.question("Enter Azure client secret"),
}
logger.info("Please provide Azure access settings:")
# Users use az cli to login to their Azure account and we just need the endpoint
azure_api_access = {"development_endpoint": self.prompts.question("Enter development endpoint")}

# JDBC Settings
logger.info("Please select JDBC authentication type:")
Expand Down Expand Up @@ -166,14 +173,7 @@ def _configure_credentials(self) -> str:
"profiler": synapse_profiler,
},
}

if cred_file.exists():
backup_filename = cred_file.with_suffix('.bak')
shutil.copy(cred_file, backup_filename)
logger.debug(f"Backup of the existing file created at {backup_filename}")

with open(cred_file, 'w', encoding='utf-8') as file:
yaml.dump(credential, file, default_flow_style=False)
_save_to_disk(credential, cred_file)

logger.info(f"Credential template created for {source}.")
return source
Expand All @@ -191,4 +191,4 @@ def create_assessment_configurator(
if source_system not in configurators:
raise ValueError(f"Unsupported source system: {source_system}")

return configurators[source_system](product_name, prompts, credential_file)
return configurators[source_system](product_name, prompts, source_system, credential_file)
Loading
Loading