Skip to content

Commit cb194dc

Browse files
committed
refactor: move parse_lakebase_config to lakebasechecksstorageconfig
1 parent 2862b22 commit cb194dc

File tree

2 files changed

+66
-80
lines changed

2 files changed

+66
-80
lines changed

src/databricks/labs/dqx/checks_storage.py

Lines changed: 2 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
from databricks.labs.dqx.telemetry import telemetry_logger
5050
from databricks.labs.dqx.utils import TABLE_PATTERN
5151
from databricks.labs.dqx.checks_serializer import FILE_SERIALIZERS
52-
from urllib.parse import urlparse, unquote
5352

5453

5554
logger = logging.getLogger(__name__)
@@ -441,74 +440,6 @@ def save(self, checks: list[dict], config: InstallationChecksStorageConfig) -> N
441440
handler, config = self._get_storage_handler_and_config(config)
442441
return handler.save(checks, config)
443442

444-
def _parse_lakebase_config(
445-
self, config: InstallationChecksStorageConfig
446-
) -> tuple[str | None, str | None, str | None, str | None]:
447-
"""
448-
Parse PostgreSQL connection string to extract connection parameters.
449-
450-
Expected format: postgresql://user:password@instance_name:port/database?params
451-
Examples:
452-
User: postgresql://[email protected]:${PGPASSWORD}@instance-1234.database.azuredatabricks.net:5432/databricks_postgres?sslmode=require
453-
Service Principal: postgresql://1234567890:${PGPASSWORD}@instance-1234.database.azuredatabricks.net:5432/databricks_postgres?sslmode=require
454-
455-
Args:
456-
config: Installation checks storage configuration containing the location URL
457-
458-
Returns:
459-
Tuple of (user, instance_name, port, database) - any may be None if parsing fails
460-
461-
Raises:
462-
ValueError: If the URL format is invalid or required components are missing
463-
"""
464-
if not config.location:
465-
raise ValueError("Location field is empty or None - cannot parse Lakebase configuration")
466-
467-
try:
468-
parsed = urlparse(config.location)
469-
except Exception as e:
470-
raise ValueError(f"Failed to parse URL '{config.location}': {e}") from e
471-
472-
if parsed.scheme != "postgresql":
473-
raise ValueError(
474-
f"Invalid URL scheme '{parsed.scheme}'. Expected 'postgresql' for Lakebase connections. "
475-
f"URL: {config.location}"
476-
)
477-
478-
try:
479-
user = unquote(parsed.username) if parsed.username else None
480-
except Exception as e:
481-
raise ValueError(f"Failed to decode username from URL: {e}") from e
482-
483-
instance_name = parsed.hostname
484-
if not instance_name:
485-
raise ValueError(f"Missing hostname in URL: {config.location}")
486-
487-
port = None
488-
if parsed.port:
489-
try:
490-
port = str(parsed.port)
491-
except (ValueError, TypeError) as e:
492-
raise ValueError(f"Invalid port '{parsed.port}' in URL: {e}") from e
493-
494-
database = None
495-
if parsed.path:
496-
try:
497-
database = parsed.path.lstrip("/")
498-
if not database:
499-
raise ValueError("Database name is missing")
500-
except Exception as e:
501-
raise ValueError(f"Failed to extract database name from connection string '{parsed.path}': {e}") from e
502-
503-
if not database:
504-
raise ValueError(f"Missing required database name in connection string: {config.location}")
505-
506-
logger.debug(
507-
f"Parsed Lakebase config - User: {user}, Instance name: {instance_name}, Port: {port}, Database: {database}"
508-
)
509-
510-
return user, instance_name, port, database
511-
512443
def _get_storage_handler_and_config(
513444
self, config: InstallationChecksStorageConfig
514445
) -> tuple[ChecksStorageHandler, InstallationChecksStorageConfig]:
@@ -524,17 +455,16 @@ def _get_storage_handler_and_config(
524455

525456
config.location = run_config.checks_location
526457

527-
528458
if TABLE_PATTERN.match(config.location) and not config.location.lower().endswith(
529459
tuple(FILE_SERIALIZERS.keys())
530460
):
531461
return self.table_handler, config
532462

533463
if config.location.startswith("/Volumes/"):
534464
return self.volume_handler, config
535-
465+
536466
if config.location.startswith("postgresql://"):
537-
user, instance_name, port, database = self._parse_lakebase_config(config)
467+
user, instance_name, port, database = config._parse_lakebase_config(config.location)
538468
config.user = user
539469
config.instance_name = instance_name
540470
config.port = port

src/databricks/labs/dqx/config.py

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import abc
22
from datetime import datetime, timezone
33
from dataclasses import dataclass, field
4+
from urllib.parse import urlparse, unquote
45

56
__all__ = [
67
"WorkspaceConfig",
@@ -213,10 +214,72 @@ class LakebaseChecksStorageConfig(BaseChecksStorageConfig):
213214
schema: str = "config"
214215
table: str = "checks"
215216
port: str = "5432"
216-
user: str | None = None
217+
user: str
217218
run_config_name: str = "default"
218219
mode: str = "overwrite"
219220

221+
def _parse_lakebase_config(self, location: str) -> tuple[str | None, str | None, str | None, str | None]:
222+
"""
223+
Parse PostgreSQL connection string to extract connection parameters.
224+
225+
Expected format: postgresql://user:password@instance_name:port/database?params
226+
Examples:
227+
User: postgresql://[email protected]:${PGPASSWORD}@instance-1234.database.azuredatabricks.net:5432/databricks_postgres?sslmode=require
228+
Service Principal: postgresql://1234567890:${PGPASSWORD}@instance-1234.database.azuredatabricks.net:5432/databricks_postgres?sslmode=require
229+
230+
Args:
231+
config: Installation checks storage configuration containing the location URL
232+
233+
Returns:
234+
Tuple of (user, instance_name, port, database) - any may be None if parsing fails
235+
236+
Raises:
237+
ValueError: If the URL format is invalid or required components are missing
238+
"""
239+
if not location:
240+
raise ValueError("Location field is empty or None - cannot parse Lakebase configuration")
241+
242+
try:
243+
parsed = urlparse(location)
244+
except Exception as e:
245+
raise ValueError(f"Failed to parse URL '{location}': {e}") from e
246+
247+
if parsed.scheme != "postgresql":
248+
raise ValueError(
249+
f"Invalid URL scheme '{parsed.scheme}'. Expected 'postgresql' for Lakebase connections. "
250+
f"URL: {location}"
251+
)
252+
253+
try:
254+
user = unquote(parsed.username) if parsed.username else None
255+
except Exception as e:
256+
raise ValueError(f"Failed to decode username from URL: {e}") from e
257+
258+
instance_name = parsed.hostname
259+
if not instance_name:
260+
raise ValueError(f"Missing hostname in URL: {location}")
261+
262+
port = None
263+
if parsed.port:
264+
try:
265+
port = str(parsed.port)
266+
except (ValueError, TypeError) as e:
267+
raise ValueError(f"Invalid port '{parsed.port}' in URL: {e}") from e
268+
269+
database = None
270+
if parsed.path:
271+
try:
272+
database = parsed.path.lstrip("/")
273+
if not database:
274+
raise ValueError("Database name is missing")
275+
except Exception as e:
276+
raise ValueError(f"Failed to extract database name from connection string '{parsed.path}': {e}") from e
277+
278+
if not database:
279+
raise ValueError(f"Missing required database name in connection string: {location}")
280+
281+
return user, instance_name, port, database
282+
220283
def __post_init__(self):
221284
if not self.instance_name:
222285
raise ValueError("The instance name ('instance_name' field) must not be empty or None.")
@@ -265,10 +328,3 @@ class InstallationChecksStorageConfig(
265328
product_name: str = "dqx"
266329
assume_user: bool = True
267330
install_folder: str | None = None
268-
instance_name: str | None = None
269-
database: str = "dqx"
270-
schema: str = "config"
271-
table: str = "checks"
272-
port: str = "5432"
273-
user: str | None = None
274-
mode: str = "overwrite"

0 commit comments

Comments
 (0)