Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
9e56b8e
Initial Commit of configure assessment
sundarshankar89 Sep 2, 2025
a230739
Fixed unnecessary exclude lists
sundarshankar89 Sep 2, 2025
be9b358
Merged UX Recommendation
sundarshankar89 Sep 2, 2025
dcf8ee9
Added more comments
sundarshankar89 Sep 2, 2025
aeb2e00
Merge branch 'main' into feature/configure-assessment
sundarshankar89 Sep 3, 2025
18ff36c
Merge branch 'main' into feature/configure-assessment
gueniai Sep 3, 2025
26fc1ed
Merge branch 'main' into feature/configure-assessment
sundarshankar89 Sep 8, 2025
eee4c24
Updated with custom gen ai
sundarshankar89 Sep 8, 2025
ffc33aa
Merge branch 'main' into feature/configure-assessment
sundarshankar89 Sep 8, 2025
ac77f81
Merge branch 'main' into feature/configure-assessment
sundarshankar89 Sep 15, 2025
afd8272
Scripts for Synapse Profiling re-imagined from existing profiler
sundarshankar89 Sep 15, 2025
75c5b5b
Introduced Profiler Skeleton
sundarshankar89 Sep 15, 2025
8a631ed
Merge branch 'feature/configure-assessment' into feature/add_profiler…
sundarshankar89 Sep 15, 2025
26fd78a
Addressed review comments
sundarshankar89 Sep 29, 2025
11e38d3
Merge branch 'main' into feature/configure-assessment
sundarshankar89 Sep 29, 2025
e1e23f2
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Sep 29, 2025
baef44e
Merge branch 'feature/configure-assessment' into feature/add_profiler…
sundarshankar89 Sep 29, 2025
9111c31
Addressed review comments
sundarshankar89 Sep 29, 2025
6d0de29
Merge branch 'main' into feature/configure-assessment
sundarshankar89 Oct 6, 2025
a604e0b
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Oct 6, 2025
ec2c1e4
Merge branch 'feature/configure-assessment' into feature/add_profiler…
sundarshankar89 Oct 6, 2025
599cbf5
fixed review comment
sundarshankar89 Oct 6, 2025
baf0eb3
fmt fixes
sundarshankar89 Oct 6, 2025
da0c85c
update pyproject.toml
sundarshankar89 Oct 6, 2025
e8f59a4
bump blueprint
sundarshankar89 Oct 6, 2025
b8a6c70
bump blueprint and fmt fixes
sundarshankar89 Oct 6, 2025
ae841e7
addressing review comments
sundarshankar89 Oct 6, 2025
354a461
Merge branch 'bump/blueprint-0.11.4' into feature/configure-assessment
sundarshankar89 Oct 6, 2025
d56958c
Merge branch 'feature/configure-assessment' into feature/add_profiler…
sundarshankar89 Oct 6, 2025
9925b44
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Oct 6, 2025
d305754
Merge branch 'main' into feature/add_profiler_skeleton
sundarshankar89 Oct 7, 2025
eac40a3
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Oct 7, 2025
c79c01a
fixing review comments
sundarshankar89 Oct 7, 2025
2195df6
fixing review comments-2
sundarshankar89 Oct 7, 2025
ccba2df
fixing review comments-3
sundarshankar89 Oct 7, 2025
9a9e909
fixing review comments-3
sundarshankar89 Oct 7, 2025
635b84a
fixed failing tests
sundarshankar89 Oct 7, 2025
c23cb0e
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Oct 7, 2025
53b3fa9
Merge branch 'main' into feature/add_profiler_skeleton
sundarshankar89 Oct 7, 2025
be777f6
Merge branch 'feature/synapse_profiler_scripts' into feature/add_prof…
sundarshankar89 Oct 7, 2025
4763885
fix a typo
sundarshankar89 Oct 7, 2025
6f8997c
Merge branch 'feature/synapse_profiler_scripts' into feature/add_prof…
sundarshankar89 Oct 7, 2025
4992e6b
added close connection
sundarshankar89 Oct 7, 2025
1a5288f
Merge branch 'feature/synapse_profiler_scripts' into feature/add_prof…
sundarshankar89 Oct 7, 2025
9df6d0b
fixes review comments
sundarshankar89 Oct 8, 2025
9e55fe8
fixes review comments
sundarshankar89 Oct 8, 2025
393dbe9
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Oct 9, 2025
11da6d9
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Oct 9, 2025
3dc6f88
added context manager
sundarshankar89 Oct 9, 2025
6085fca
Merge branch 'feature/synapse_profiler_scripts' into feature/add_prof…
sundarshankar89 Oct 9, 2025
cb38bf5
added connection close
sundarshankar89 Oct 9, 2025
13417bf
Merge branch 'feature/synapse_profiler_scripts' into feature/add_prof…
sundarshankar89 Oct 9, 2025
53429c4
Merge branch 'main' into feature/synapse_profiler_scripts
sundarshankar89 Oct 10, 2025
b660e4a
addressed merge conflicts
sundarshankar89 Oct 10, 2025
3dd7455
Merge branch 'feature/synapse_profiler_scripts' into feature/add_prof…
sundarshankar89 Oct 10, 2025
5dbb664
removed print
sundarshankar89 Oct 10, 2025
b262013
Merge branch 'feature/synapse_profiler_scripts' into feature/add_prof…
sundarshankar89 Oct 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ asyncio_mode = "auto"
asyncio_default_fixture_loop_scope="function"

[tool.mypy]
exclude = ["tests/resources/.*"]
exclude = ["tests/resources/.*", "src/databricks/labs/lakebridge/resources/assessments/.*"]

[tool.black]
target-version = ["py310"]
Expand Down Expand Up @@ -211,7 +211,7 @@ fail-under = 10.0
ignore-patterns = ["^\\.#"]

# Ignore files under tests/resources
ignore-paths = ["tests/resources"]
ignore-paths = ["tests/resources", "src/databricks/labs/lakebridge/resources/assessments"]

# List of module names for which member attributes should not be checked (useful
# for modules/projects where namespaces are manipulated during runtime and thus
Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/lakebridge/assessments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
PRODUCT_NAME,
PRODUCT_PATH_PREFIX,
PROFILER_SOURCE_SYSTEM,
PLATFORM_TO_SOURCE_TECHNOLOGY,
PLATFORM_TO_SOURCE_TECHNOLOGY_CFG,
CONNECTOR_REQUIRED,
)

__all__ = [
"PRODUCT_NAME",
"PRODUCT_PATH_PREFIX",
"PROFILER_SOURCE_SYSTEM",
"PLATFORM_TO_SOURCE_TECHNOLOGY",
"PLATFORM_TO_SOURCE_TECHNOLOGY_CFG",
"CONNECTOR_REQUIRED",
]
3 changes: 2 additions & 1 deletion src/databricks/labs/lakebridge/assessments/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
PRODUCT_NAME = "lakebridge"
PRODUCT_PATH_PREFIX = Path.home() / ".databricks" / "labs" / PRODUCT_NAME / "lib"

PLATFORM_TO_SOURCE_TECHNOLOGY = {
PLATFORM_TO_SOURCE_TECHNOLOGY_CFG = {
"synapse": "src/databricks/labs/lakebridge/resources/assessments/synapse/pipeline_config.yml",
}

# TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]


# This flag indicates whether a connector is required for the source system when pipeline is trigger
# For example in the case of synapse no connector is required and the python scripts
# manage the connection by directly reading the credentials files
Expand Down
88 changes: 88 additions & 0 deletions src/databricks/labs/lakebridge/assessments/profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import logging
from pathlib import Path

from databricks.labs.lakebridge.assessments.pipeline import PipelineClass
from databricks.labs.lakebridge.assessments.profiler_config import PipelineConfig
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
from databricks.labs.lakebridge.connections.credential_manager import (
create_credential_manager,
)
from databricks.labs.lakebridge.connections.env_getter import EnvGetter
from databricks.labs.lakebridge.assessments import (
PRODUCT_NAME,
PRODUCT_PATH_PREFIX,
PLATFORM_TO_SOURCE_TECHNOLOGY_CFG,
CONNECTOR_REQUIRED,
)

logger = logging.getLogger(__name__)


class Profiler:

def __init__(self, platform: str, pipeline_configs: PipelineConfig | None = None):
self._platform = platform
self._pipeline_config = pipeline_configs

@classmethod
def create(cls, platform: str) -> "Profiler":
pipeline_config_path = PLATFORM_TO_SOURCE_TECHNOLOGY_CFG.get(platform, None)
pipeline_config = None
if pipeline_config_path:
pipeline_config_absolute_path = Profiler._locate_config(pipeline_config_path)
pipeline_config = Profiler.path_modifier(config_file=pipeline_config_absolute_path)
return cls(platform, pipeline_config)

@classmethod
def supported_platforms(cls) -> list[str]:
return list(PLATFORM_TO_SOURCE_TECHNOLOGY_CFG.keys())

@staticmethod
def path_modifier(*, config_file: str | Path, path_prefix: Path = PRODUCT_PATH_PREFIX) -> PipelineConfig:
# TODO: Make this work install during developer mode
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this TODO mean

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

databricks labs install .
This is the developer mode we use, with the way the current path_modifier is defined, and it doesn't pick up the latest changes unless it is installed in ~./.databricks/labs

config = PipelineClass.load_config_from_yaml(config_file)
for step in config.steps:
step.extract_source = f"{path_prefix}/{step.extract_source}"
return config

def profile(
self,
*,
extractor: DatabaseManager | None = None,
pipeline_config: PipelineConfig | None = None,
) -> None:
platform = self._platform.lower()
if not pipeline_config:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function seems to only validate that pipeline_config is not None, but _execute() also handles FNF exceptions. Maybe consider collapsing these 2 functions into 1 for simplicity's sake.

if not self._pipeline_config:
raise ValueError(f"Cannot Proceed without a valid pipeline configuration for {platform}")
pipeline_config = self._pipeline_config
self._execute(platform, pipeline_config, extractor)

@staticmethod
def _setup_extractor(platform: str) -> DatabaseManager | None:
if not CONNECTOR_REQUIRED[platform]:
return None
cred_manager = create_credential_manager(PRODUCT_NAME, EnvGetter())
connect_config = cred_manager.get_credentials(platform)
return DatabaseManager(platform, connect_config)

def _execute(self, platform: str, pipeline_config: PipelineConfig, extractor=None) -> None:
try:
if extractor is None:
extractor = Profiler._setup_extractor(platform)

result = PipelineClass(pipeline_config, extractor).execute()
logger.info(f"Profile execution has completed successfully for {platform} for more info check: {result}.")
except FileNotFoundError as e:
logger.error(f"Configuration file not found for source {platform}: {e}")
raise FileNotFoundError(f"Configuration file not found for source {platform}: {e}") from e
except Exception as e:
logger.error(f"Error executing pipeline for source {platform}: {e}")
raise RuntimeError(f"Pipeline execution failed for source {platform} : {e}") from e

@staticmethod
def _locate_config(config_path: str | Path) -> Path:
config_file = PRODUCT_PATH_PREFIX / config_path
if not config_file.exists():
raise FileNotFoundError(f"Configuration file not found: {config_file}")
return config_file
23 changes: 17 additions & 6 deletions src/databricks/labs/lakebridge/connections/database_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,29 @@ def _connect(self) -> Engine:

class MSSQLConnector(_BaseConnector):
def _connect(self) -> Engine:
query_params = {"driver": self.config['driver']}
auth_type = self.config.get('auth_type', 'sql_authentication')
db_name = self.config.get('database')

query_params = {
"driver": self.config['driver'],
"loginTimeout": "30",
}

if auth_type == "ad_passwd_authentication":
query_params = {
**query_params,
"authentication": "ActiveDirectoryPassword",
}
elif auth_type == "spn_authentication":
raise NotImplementedError("SPN Authentication not implemented yet")

for key, value in self.config.items():
if key not in ["user", "password", "server", "database", "port"]:
query_params[key] = value
connection_string = URL.create(
"mssql+pyodbc",
drivername="mssql+pyodbc",
username=self.config['user'],
password=self.config['password'],
host=self.config['server'],
port=self.config.get('port', 1433),
database=self.config['database'],
database=db_name,
query=query_params,
)
return create_engine(connection_string)
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager


def get_sqlpool_reader(
input_cred: dict,
db_name: str,
*,
endpoint_key: str = 'dedicated_sql_endpoint',
auth_type: str = 'sql_authentication',
) -> DatabaseManager:
config = {
"driver": input_cred['driver'],
"server": input_cred[endpoint_key],
"database": db_name,
"user": input_cred['sql_user'],
"password": input_cred['sql_password'],
"port": input_cred.get('port', 1433),
"auth_type": auth_type,
}
# synapse and mssql use the same connector
source = "mssql"

return DatabaseManager(source, config)
Loading
Loading