Skip to content

Commit c112a30

Browse files
Introduce a new feature to establish base configurator for Profiler Assessment (#1981)
<!-- REMOVE IRRELEVANT COMMENTS BEFORE CREATING A PULL REQUEST --> ## Changes <!-- Summary of your changes that are easy to understand. Add screenshots when necessary, they're helpful to illustrate the before and after state --> ### What does this PR do? - Introduces a new feature to establish a base configurator for Profiler Assessment within the project. ### Relevant implementation details - Adds foundational logic for configurable Profiler Assessments. - Updates or adds new CLI commands to support configuration. - Modifies existing command: `databricks labs lakebridge configure-database-profiler` - Merges changes from PR #1940, co-authored by @goodwillpunning. ### Caveats/things to watch out for when reviewing: ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> Resolves #.. ### Functionality - [ ] added relevant user documentation - [x] added new CLI command - [ ] modified existing command: `databricks labs lakebridge ...` - [ ] ... +add your own ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] manually tested - [ ] added unit tests - [ ] added integration tests --------- Co-authored-by: Guenia Izquierdo <[email protected]>
1 parent 7548e63 commit c112a30

File tree

11 files changed

+272
-123
lines changed

11 files changed

+272
-123
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ max-bool-expr = 5
459459
max-branches = 20
460460

461461
# Maximum number of locals for function / method body.
462-
max-locals = 19
462+
max-locals = 20
463463

464464
# Maximum number of parents for a class (see R0901).
465465
max-parents = 7
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from ._constants import (
2+
PRODUCT_NAME,
3+
PRODUCT_PATH_PREFIX,
4+
PROFILER_SOURCE_SYSTEM,
5+
PLATFORM_TO_SOURCE_TECHNOLOGY,
6+
CONNECTOR_REQUIRED,
7+
)
8+
9+
__all__ = [
10+
"PRODUCT_NAME",
11+
"PRODUCT_PATH_PREFIX",
12+
"PROFILER_SOURCE_SYSTEM",
13+
"PLATFORM_TO_SOURCE_TECHNOLOGY",
14+
"CONNECTOR_REQUIRED",
15+
]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from pathlib import Path
2+
3+
PRODUCT_NAME = "lakebridge"
4+
PRODUCT_PATH_PREFIX = Path.home() / ".databricks" / "labs" / PRODUCT_NAME / "lib"
5+
6+
PLATFORM_TO_SOURCE_TECHNOLOGY = {
7+
"synapse": "src/databricks/labs/lakebridge/resources/assessments/synapse/pipeline_config.yml",
8+
}
9+
10+
# TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported
11+
PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]
12+
13+
# This flag indicates whether a connector is required for the source system when pipeline is trigger
14+
# For example in the case of synapse no connector is required and the python scripts
15+
# manage the connection by directly reading the credentials files
16+
# Revisit this when more source systems are added to standardize the approach
17+
CONNECTOR_REQUIRED = {
18+
"synapse": False,
19+
"mssql": True,
20+
}

src/databricks/labs/lakebridge/assessments/configure_assessment.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from abc import ABC, abstractmethod
2+
from pathlib import Path
23
import logging
34
import shutil
45
import yaml
@@ -12,20 +13,32 @@
1213
)
1314
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
1415
from databricks.labs.lakebridge.connections.env_getter import EnvGetter
16+
from databricks.labs.lakebridge.assessments import CONNECTOR_REQUIRED
1517

1618
logger = logging.getLogger(__name__)
1719
logger.setLevel(logging.INFO)
1820

19-
PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]
21+
22+
def _save_to_disk(credential: dict, cred_file: Path) -> None:
23+
if cred_file.exists():
24+
backup_filename = cred_file.with_suffix('.bak')
25+
shutil.copy(cred_file, backup_filename)
26+
logger.debug(f"Backup of the existing file created at {backup_filename}")
27+
28+
with open(cred_file, 'w', encoding='utf-8') as file:
29+
yaml.dump(credential, file, default_flow_style=False)
2030

2131

2232
class AssessmentConfigurator(ABC):
2333
"""Abstract base class for assessment configuration."""
2434

25-
def __init__(self, product_name: str, prompts: Prompts, credential_file=None):
35+
def __init__(
36+
self, product_name: str, prompts: Prompts, source_name: str, credential_file: Path | str | None = None
37+
):
2638
self.prompts = prompts
2739
self._product_name = product_name
28-
self._credential_file = creds(product_name) if not credential_file else credential_file
40+
self._credential_file = creds(product_name) if not credential_file else Path(credential_file)
41+
self._source_name = source_name
2942

3043
@abstractmethod
3144
def _configure_credentials(self) -> str:
@@ -52,10 +65,11 @@ def run(self):
5265
logger.info(f"Welcome to the {self._product_name} Assessment Configuration")
5366
source = self._configure_credentials()
5467
logger.info(f"{source.capitalize()} details and credentials received.")
55-
if self.prompts.confirm(f"Do you want to test the connection to {source}?"):
56-
cred_manager = create_credential_manager("lakebridge", EnvGetter())
57-
if cred_manager:
58-
self._test_connection(source, cred_manager)
68+
if CONNECTOR_REQUIRED.get(self._source_name, True):
69+
if self.prompts.confirm(f"Do you want to test the connection to {source}?"):
70+
cred_manager = create_credential_manager("lakebridge", EnvGetter())
71+
if cred_manager:
72+
self._test_connection(source, cred_manager)
5973
logger.info(f"{source.capitalize()} Assessment Configuration Completed")
6074

6175

@@ -64,7 +78,7 @@ class ConfigureSqlServerAssessment(AssessmentConfigurator):
6478

6579
def _configure_credentials(self) -> str:
6680
cred_file = self._credential_file
67-
source = "mssql"
81+
source = self._source_name
6882

6983
logger.info(
7084
"\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
@@ -84,18 +98,11 @@ def _configure_credentials(self) -> str:
8498
"server": self.prompts.question("Enter the server or host details"),
8599
"port": int(self.prompts.question("Enter the port details", valid_number=True)),
86100
"user": self.prompts.question("Enter the user details"),
87-
"password": self.prompts.question("Enter the password details"),
101+
"password": self.prompts.password("Enter the password details"),
88102
},
89103
}
90104

91-
if cred_file.exists():
92-
backup_filename = cred_file.with_suffix('.bak')
93-
shutil.copy(cred_file, backup_filename)
94-
logger.debug(f"Backup of the existing file created at {backup_filename}")
95-
96-
with open(cred_file, 'w', encoding='utf-8') as file:
97-
yaml.dump(credential, file, default_flow_style=False)
98-
105+
_save_to_disk(credential, cred_file)
99106
logger.info(f"Credential template created for {source}.")
100107
return source
101108

@@ -105,7 +112,7 @@ class ConfigureSynapseAssessment(AssessmentConfigurator):
105112

106113
def _configure_credentials(self) -> str:
107114
cred_file = self._credential_file
108-
source = "synapse"
115+
source = self._source_name
109116

110117
logger.info(
111118
"\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
@@ -116,23 +123,23 @@ def _configure_credentials(self) -> str:
116123

117124
# Synapse Workspace Settings
118125
logger.info("Please provide Synapse Workspace settings:")
126+
workspace_name = self.prompts.question("Enter Synapse workspace name")
119127
synapse_workspace = {
120-
"name": self.prompts.question("Enter Synapse workspace name"),
121-
"dedicated_sql_endpoint": self.prompts.question("Enter dedicated SQL endpoint"),
122-
"serverless_sql_endpoint": self.prompts.question("Enter serverless SQL endpoint"),
128+
"name": workspace_name,
129+
"dedicated_sql_endpoint": f"{workspace_name}.sql.azuresynapse.net",
130+
"serverless_sql_endpoint": f"{workspace_name}-ondemand.sql.azuresynapse.net",
123131
"sql_user": self.prompts.question("Enter SQL user"),
124-
"sql_password": self.prompts.question("Enter SQL password"),
132+
"sql_password": self.prompts.password("Enter SQL password"),
125133
"tz_info": self.prompts.question("Enter timezone (e.g. America/New_York)", default="UTC"),
134+
"driver": self.prompts.question(
135+
"Enter the ODBC driver installed locally", default="ODBC Driver 18 for SQL Server"
136+
),
126137
}
127138

128139
# Azure API Access Settings
129-
logger.info("Please provide Azure API access settings:")
130-
azure_api_access = {
131-
"development_endpoint": self.prompts.question("Enter development endpoint"),
132-
"azure_client_id": self.prompts.question("Enter Azure client ID"),
133-
"azure_tenant_id": self.prompts.question("Enter Azure tenant ID"),
134-
"azure_client_secret": self.prompts.question("Enter Azure client secret"),
135-
}
140+
logger.info("Please provide Azure access settings:")
141+
# Users use az cli to login to their Azure account and we just need the endpoint
142+
azure_api_access = {"development_endpoint": self.prompts.question("Enter development endpoint")}
136143

137144
# JDBC Settings
138145
logger.info("Please select JDBC authentication type:")
@@ -166,14 +173,7 @@ def _configure_credentials(self) -> str:
166173
"profiler": synapse_profiler,
167174
},
168175
}
169-
170-
if cred_file.exists():
171-
backup_filename = cred_file.with_suffix('.bak')
172-
shutil.copy(cred_file, backup_filename)
173-
logger.debug(f"Backup of the existing file created at {backup_filename}")
174-
175-
with open(cred_file, 'w', encoding='utf-8') as file:
176-
yaml.dump(credential, file, default_flow_style=False)
176+
_save_to_disk(credential, cred_file)
177177

178178
logger.info(f"Credential template created for {source}.")
179179
return source
@@ -191,4 +191,4 @@ def create_assessment_configurator(
191191
if source_system not in configurators:
192192
raise ValueError(f"Unsupported source system: {source_system}")
193193

194-
return configurators[source_system](product_name, prompts, credential_file)
194+
return configurators[source_system](product_name, prompts, source_system, credential_file)

0 commit comments

Comments
 (0)