diff --git a/odd_collector/adapters/tableau/adapter.py b/odd_collector/adapters/tableau/adapter.py index bbdea968..7dcc2fbb 100644 --- a/odd_collector/adapters/tableau/adapter.py +++ b/odd_collector/adapters/tableau/adapter.py @@ -1,69 +1,67 @@ -from typing import Dict, List, Type +from typing import Type +from urllib.parse import urlparse -from odd_collector_sdk.domain.adapter import AbstractAdapter -from odd_models.models import DataEntity, DataEntityList -from oddrn_generator import TableauGenerator +from odd_collector_sdk.domain.adapter import BaseAdapter +from odd_models.models import DataEntityList +from oddrn_generator import Generator, TableauGenerator +from odd_collector.adapters.tableau.domain.table import EmbeddedTable from odd_collector.domain.plugin import TableauPlugin -from .client import TableauBaseClient, TableauClient -from .domain.table import EmbeddedTable, Table +from .client import TableauClient +from .logger import logger from .mappers.sheets import map_sheet from .mappers.tables import map_table -class Adapter(AbstractAdapter): +class Adapter(BaseAdapter): + config: TableauPlugin + generator: TableauGenerator + def __init__( - self, config: TableauPlugin, client: Type[TableauBaseClient] = None + self, config: TableauPlugin, client: Type[TableauClient] = TableauClient ) -> None: - client = client or TableauClient + super().__init__(config) self.client = client(config) - self.__oddrn_generator = TableauGenerator( - host_settings=self.client.get_server_host(), sites=config.site - ) - - def get_data_source_oddrn(self) -> str: - return self.__oddrn_generator.get_data_source_oddrn() + def create_generator(self) -> Generator: + site = self.config.site or "default" + host = urlparse(self.config.server).netloc + return TableauGenerator(host_settings=host, sites=site) def get_data_entity_list(self) -> DataEntityList: - sheets = self._get_sheets() - tables = self._get_tables() + sheets = self.client.get_sheets() + tables = self.client.get_tables() - tables_data_entities_by_id: Dict[str, DataEntity] = { - table_id: map_table(self.__oddrn_generator, table) - for table_id, table in tables.items() + embedded_tables: list[EmbeddedTable] = [ + t for t in tables.values() if isinstance(t, EmbeddedTable) + ] + + tbl_entities = { + table.id: map_table(self.generator, table) for table in embedded_tables } - tables_data_entities = tables_data_entities_by_id.values() sheets_data_entities = [] for sheet in sheets: - sheet_tables = [ - tables_data_entities_by_id[table_id] for table_id in sheet.tables_id - ] - data_entity = map_sheet(self.__oddrn_generator, sheet, sheet_tables) - sheets_data_entities.append(data_entity) - - return DataEntityList( - data_source_oddrn=self.get_data_source_oddrn(), - items=[*tables_data_entities, *sheets_data_entities], - ) - - def _get_tables(self) -> Dict[str, Table]: - tables: List[Table] = self.client.get_tables() - tables_by_id: Dict[str, Table] = {table.id: table for table in tables} + sheet_entity = map_sheet(self.generator, sheet) - ids = tables_ids_to_load(tables) - tables_columns = self.client.get_tables_columns(ids) + for table_id in sheet.tables_id: + table = tables.get(table_id) - for table_id, columns in tables_columns.items(): - tables_by_id[table_id].columns = columns + if not table: + logger.warning(f"Table {table_id} not found in tables, skipping it") + continue - return tables_by_id + if table.is_embedded: + oddrn = tbl_entities[table_id].oddrn + else: + oddrn = tables.get(table_id).get_oddrn() - def _get_sheets(self): - return self.client.get_sheets() + sheet_entity.data_consumer.inputs.append(oddrn) + sheets_data_entities.append(sheet_entity) -def tables_ids_to_load(tables: List[Table]): - return [table.id for table in tables if isinstance(table, EmbeddedTable)] + return DataEntityList( + data_source_oddrn=self.get_data_source_oddrn(), + items=[*tbl_entities.values(), *sheets_data_entities], + ) diff --git a/odd_collector/adapters/tableau/client.py b/odd_collector/adapters/tableau/client.py index c0bcde40..cbfc5e0e 100644 --- a/odd_collector/adapters/tableau/client.py +++ b/odd_collector/adapters/tableau/client.py @@ -1,17 +1,15 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Union -from urllib.parse import urlparse +from typing import Any, Union import tableauserverclient as TSC -from funcy import lmap from odd_collector_sdk.errors import DataSourceAuthorizationError, DataSourceError from tableauserverclient import PersonalAccessTokenAuth, TableauAuth +from odd_collector.adapters.tableau.domain.table import Table from odd_collector.domain.plugin import TableauPlugin -from .domain.column import Column +from .domain.database import ConnectionParams, EmbeddedDatabase, ExternalDatabase from .domain.sheet import Sheet -from .domain.table import Table, databases_to_tables +from .logger import logger sheets_query = """ query GetSheets($count: Int, $after: String) { @@ -49,6 +47,7 @@ nodes { id name + isEmbedded connectionType downstreamOwners { name @@ -58,6 +57,13 @@ schema name description + columns { + id + name + remoteType + isNullable + description + } } } pageInfo { @@ -67,18 +73,18 @@ } } """ -tables_columns_query = """ -query GetTablesColumns($ids: [ID], $count: Int, $after: String){ - tablesConnection(filter: {idWithin: $ids}, first: $count, after: $after, orderBy: {field: NAME, direction: ASC}) { + +database_servers_query = """ +query DatabaseServersConnection($count: Int, $after: String) { + databaseServersConnection(first: $count, after: $after, orderBy: {field: NAME, direction: ASC}) { nodes { id - columns { - id - name - remoteType - isNullable - description - } + name + isEmbedded + connectionType + hostName + port + service } pageInfo { hasNextPage @@ -89,64 +95,69 @@ """ -class TableauBaseClient(ABC): - @abstractmethod - def get_server_host(self): - raise NotImplementedError - - @abstractmethod - def get_sheets(self) -> List[Sheet]: - raise NotImplementedError +class TableauClient: + def __init__(self, config: TableauPlugin) -> None: + self.config = config + self.__auth = self._get_auth(config) + self.server = TSC.Server(config.server, use_server_version=True) - @abstractmethod - def get_tables(self) -> List[Table]: - raise NotImplementedError + def get_sheets(self) -> list[Sheet]: + sheets_response = self._query(query=sheets_query, root_key="sheetsConnection") - @abstractmethod - def get_tables_columns(self, tables_ids: List[str]) -> Dict[str, Table]: - raise NotImplementedError + return [Sheet.from_response(response) for response in sheets_response] + def get_databases(self) -> dict[str, Union[EmbeddedDatabase, ExternalDatabase]]: + logger.debug("Getting databases") + databases = self._query(query=databases_query, root_key="databasesConnection") -class TableauClient(TableauBaseClient): - def __init__(self, config: TableauPlugin) -> None: - self.__config = config - self.__auth = self.__get_auth(config) - self.server = TSC.Server(config.server, use_server_version=True) + connection_params = self.get_servers() - def get_server_host(self): - return urlparse(self.__config.server).netloc + result = {} + for db in databases: + if db.get("isEmbedded"): + result[db.get("id")] = EmbeddedDatabase.from_dict(**db) + else: + try: + database = ExternalDatabase( + id=db.get("id"), + name=db.get("name"), + connection_type=db.get("connectionType"), + connection_params=connection_params[db.get("id")], + tables=db.get("tables"), + ) + result[database.id] = database + except Exception as e: + logger.warning(f"Couldn't get database: {db.get('name')} {e}") + continue - def get_sheets(self) -> List[Sheet]: - sheets_response = self.__query(query=sheets_query, root_key="sheetsConnection") + logger.debug(f"Got {len(result)} databases") + return result - return [Sheet.from_response(response) for response in sheets_response] + def get_tables(self) -> dict[str, Table]: + databases = self.get_databases() - def get_tables(self) -> List[Table]: - databases_response = self.__query( - query=databases_query, root_key="databasesConnection" - ) - return databases_to_tables(databases_response) + return { + table.id: table + for database in databases.values() + for table in database.tables + } - def get_tables_columns(self, table_ids: List[str]) -> Dict[str, List[Column]]: - response: List = self.__query( - query=tables_columns_query, - variables={"ids": table_ids}, - root_key="tablesConnection", + def get_servers(self) -> dict[str, ConnectionParams]: + servers = self._query( + query=database_servers_query, root_key="databaseServersConnection" ) - return { - table.get("id"): lmap(Column.from_response, table.get("columns")) - for table in response + server.get("id"): ConnectionParams.from_dict(**server) for server in servers } - def __query( + def _query( self, query: str, root_key: str, variables: object = None, ) -> Any: if variables is None: - variables = {"count": self.__config.pagination_size} + variables = {"count": self.config.pagination_size} with self.server.auth.sign_in(self.__auth): try: @@ -173,18 +184,18 @@ def __query( ) from e @staticmethod - def __get_auth( + def _get_auth( config: TableauPlugin, ) -> Union[PersonalAccessTokenAuth, TableauAuth]: try: if config.token_value and config.token_name: - return TSC.PersonalAccessTokenAuth( + return PersonalAccessTokenAuth( config.token_name, config.token_value.get_secret_value(), config.site, ) else: - return TSC.TableauAuth( + return TableauAuth( config.user, config.password.get_secret_value(), config.site, diff --git a/odd_collector/adapters/tableau/domain/column.py b/odd_collector/adapters/tableau/domain/column.py index 15e02f5c..3cdd2749 100644 --- a/odd_collector/adapters/tableau/domain/column.py +++ b/odd_collector/adapters/tableau/domain/column.py @@ -1,24 +1,21 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass class Column: - def __init__( - self, - id: str, - name: str, - is_nullable: bool, - remote_type: str = None, - description: str = None, - ): - self.id = id - self.name = name - self.remote_type = remote_type - self.is_nullable = is_nullable - self.description = description or None + id: str + name: str + is_nullable: bool + remote_type: Optional[str] = None + description: Optional[str] = None - @staticmethod - def from_response(response): - return Column( - response.get("id"), - response.get("name"), - response.get("isNullable"), - response.get("remoteType"), - response.get("description"), + @classmethod + def from_dict(cls, **data) -> "Column": + return cls( + data["id"], + data["name"], + data["isNullable"], + data.get("remoteType"), + data.get("description"), ) diff --git a/odd_collector/adapters/tableau/domain/connection_params.py b/odd_collector/adapters/tableau/domain/connection_params.py new file mode 100644 index 00000000..21e0b6d7 --- /dev/null +++ b/odd_collector/adapters/tableau/domain/connection_params.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class ConnectionParams: + id: str + name: str + connection_type: str + host: str + port: int + service: Optional[str] + + @classmethod + def from_dict(cls, **kwargs): + return cls( + id=kwargs["id"], + name=kwargs["name"], + connection_type=kwargs["connectionType"], + host=kwargs["hostName"], + port=kwargs["port"], + service=kwargs["service"], + ) diff --git a/odd_collector/adapters/tableau/domain/database.py b/odd_collector/adapters/tableau/domain/database.py new file mode 100644 index 00000000..6a668aa0 --- /dev/null +++ b/odd_collector/adapters/tableau/domain/database.py @@ -0,0 +1,88 @@ +from abc import abstractmethod +from dataclasses import dataclass, field +from typing import ClassVar, Optional, Union + +from funcy import first, get_lax + +from .connection_params import ConnectionParams +from .table import EmbeddedTable, SnowflakeTable + + +@dataclass +class EmbeddedDatabase: + id: str + name: str + connection_type: str + owner: Optional[str] + tables: list = field(default_factory=list) + + @classmethod + def from_dict(cls, **kwargs): + tables = [] + for table in kwargs.get("tables", []): + table = EmbeddedTable.from_dict( + **dict( + id=table["id"], + name=table["name"], + db_id=kwargs["id"], + db_name=kwargs["name"], + connection_type=kwargs["connectionType"], + schema=table["schema"], + columns=table["columns"], + ) + ) + tables.append(table) + + return cls( + id=kwargs["id"], + name=kwargs["name"], + connection_type=kwargs["connectionType"], + owner=get_lax(first(kwargs["downstreamOwners"]), "name"), + tables=tables, + ) + + @property + def is_embedded(self): + return True + + +@dataclass +class ExternalDatabase: + _CONNECTION_TYPE: ClassVar[tuple[str]] + + id: str + name: str + connection_params: ConnectionParams + connection_type: str + tables: list = field(default_factory=list) + + def __new__(cls, *args, **kwargs): + for subclass in cls.__subclasses__(): + if kwargs["connection_type"].lower() in subclass._CONNECTION_TYPE: + return super().__new__(subclass) + + raise NotImplementedError( + f"Database {kwargs['connection_type']} is not supported" + ) + + def __post_init__(self): + self.tables = [self.create_table(**table) for table in self.tables] + + @abstractmethod + def create_table(self, **data) -> Union[EmbeddedTable, SnowflakeTable]: + raise NotImplementedError + + +@dataclass +class SnowflakeDatabase(ExternalDatabase): + _CONNECTION_TYPE: ClassVar[tuple[str]] = ("snowflake",) + + def create_table(self, **data) -> SnowflakeTable: + return SnowflakeTable( + id=data["id"], + host=self.connection_params.host, + database=self.connection_params.name, + name=data["name"], + connection_type="snowflake", + schema=data["schema"], + ) diff --git a/odd_collector/adapters/tableau/domain/table.py b/odd_collector/adapters/tableau/domain/table.py index 87170867..4bcd8186 100644 --- a/odd_collector/adapters/tableau/domain/table.py +++ b/odd_collector/adapters/tableau/domain/table.py @@ -1,92 +1,96 @@ -from typing import Any, List, Optional - -from funcy import lmapcat, lpluck - -from odd_collector.adapters.tableau.domain.column import Column - - -class Table: - def __init__( - self, - id: str, - name: str, - schema: Optional[str], - db_id: str, - db_name: str, - connection_type: str, - columns: List[Column] = None, - owners: List[str] = None, - description: str = None, - ): - self.id = id - self.name = name - self.schema = schema or "unknown_schema" - self.database_name = db_name - self.database_id = db_id - self.connection_type = connection_type - self.columns = columns or [] - self.owners = owners or [] - self.description = description or None - - def get_oddrn(self, oddrn_generator): - oddrn_generator.set_oddrn_paths( - databases=self.database_id, - schemas=self.schema, - tables=self.name, +from dataclasses import dataclass, field +from typing import Optional, Union + +from oddrn_generator import BigQueryStorageGenerator, SnowflakeGenerator + +from .column import Column + + +@dataclass +class EmbeddedTable: + id: str + name: str + db_id: str + db_name: str + connection_type: str + schema: Optional[str] = field(default="unknown_schema") + columns: Optional[list[Column]] = field(default_factory=list) + owners: Optional[list[str]] = field(default_factory=list) + description: Optional[str] = field(default=None) + + @property + def is_embedded(self): + return True + + @classmethod + def from_dict(cls, **kwargs): + return cls( + id=kwargs["id"], + name=kwargs["name"], + db_id=kwargs["db_id"], + db_name=kwargs["db_name"], + connection_type=kwargs["connection_type"], + schema=kwargs["schema"], + columns=[ + Column.from_dict(**response) for response in kwargs.get("columns", []) + ], + owners=kwargs.get("owners"), + description=kwargs.get("description"), ) - return oddrn_generator.get_oddrn_by_path("tables") -class BigqueryTable(Table): - def get_oddrn(self, oddrn_generator): - db_name = self.database_name.lower() - schema = self.schema - name = self.name - return f"//bigquery_storage/cloud/gcp/project/{db_name}/datasets/{schema}/tables/{name}" +@dataclass +class ExternalTable: + id: str + name: str + connection_type: str + @property + def is_embedded(self): + return False -class EmbeddedTable(Table): - pass + @classmethod + def from_dict(cls, **kwargs) -> "ExternalTable": + raise NotImplementedError -def create_table(**kwargs) -> Table: - """Factory Method""" - connection_type = kwargs.get("connection_type") - constructors = {"bigquery": BigqueryTable} - constructor = constructors.get(connection_type, EmbeddedTable) +@dataclass +class SnowflakeTable(ExternalTable): + id: str + host: str + database: str + name: str + schema: str - return constructor(**kwargs) + def get_oddrn(self): + suffix = ".snowflakecomputing.com" + host = self.host.split(suffix)[0].upper() + suffix - -def databases_to_tables(databases_response: List[Any]) -> List[Table]: - return lmapcat(traverse_tables, databases_response) + generator = SnowflakeGenerator( + host_settings=host, + databases=self.database.upper(), + schemas=self.schema.upper(), + tables=self.name.upper(), + ) + return generator.get_oddrn_by_path("databases") -def traverse_tables(database_response) -> List[Table]: - connection_type = database_response.get("connectionType") - db_name = database_response.get("name") - db_id = database_response.get("id") - owners = lpluck("name", database_response.get("downstreamOwners")) +@dataclass +class BigqueryTable(ExternalTable): + database: str + name: str + schema: str - tables = [] + def get_oddrn(self): + db_name = self.database.lower() + schema = self.schema + name = self.name - for table in database_response.get("tables"): - tbl_id = table.get("id") - tbl_name = table.get("name") - tbl_schema = table.get("schema") - description = table.get("description") + return BigQueryStorageGenerator( + google_cloud_settings={"project": db_name}, + datasets=schema, + tables=name, + ).get_oddrn_by_path("tables") - tables.append( - create_table( - id=tbl_id, - name=tbl_name, - schema=tbl_schema, - db_id=db_id, - db_name=db_name, - connection_type=connection_type, - description=description, - owners=owners, - ) - ) - return tables +Table = Union[EmbeddedTable, ExternalTable] diff --git a/odd_collector/adapters/tableau/mappers/columns.py b/odd_collector/adapters/tableau/mappers/columns.py index 3c486caf..0d6b3b8b 100644 --- a/odd_collector/adapters/tableau/mappers/columns.py +++ b/odd_collector/adapters/tableau/mappers/columns.py @@ -26,6 +26,7 @@ def map_column(oddrn_generator: TableauGenerator, column: Column) -> DataSetFiel oddrn=oddrn_generator.get_oddrn_by_path("columns", column_name), name=column_name, metadata=extract_metadata(metadata=metadata), + owner=None, type=DataSetFieldType( type=map_type(column_type), logical_type=column_type, diff --git a/odd_collector/adapters/tableau/mappers/sheets.py b/odd_collector/adapters/tableau/mappers/sheets.py index f6c227d5..745100a7 100644 --- a/odd_collector/adapters/tableau/mappers/sheets.py +++ b/odd_collector/adapters/tableau/mappers/sheets.py @@ -1,18 +1,17 @@ from datetime import datetime from functools import partial -from typing import List, Optional +from typing import Optional import pytz from odd_collector_sdk.errors import MappingDataError from odd_models.models import DataConsumer, DataEntity, DataEntityType -from oddrn_generator import TableauGenerator from ..domain.sheet import Sheet from . import DATA_CONSUMER_EXCLUDED_KEYS, DATA_CONSUMER_SCHEMA, TABLEAU_DATETIME_FORMAT from .metadata import extract_metadata -def __map_date(date: str = None) -> Optional[str]: +def __map_date(date: Optional[str] = None) -> Optional[str]: if not date: return None @@ -30,7 +29,7 @@ def __map_date(date: str = None) -> Optional[str]: ) -def map_sheet(oddrn_generator, sheet: Sheet, tables: List[DataEntity]) -> DataEntity: +def map_sheet(oddrn_generator, sheet: Sheet) -> DataEntity: """ Args: oddrn_generator: Generator @@ -51,15 +50,7 @@ def map_sheet(oddrn_generator, sheet: Sheet, tables: List[DataEntity]) -> DataEn created_at=__map_date(sheet.created), updated_at=__map_date(sheet.updated), type=DataEntityType.DASHBOARD, - data_consumer=DataConsumer(inputs=[de.oddrn for de in tables]), + data_consumer=DataConsumer(inputs=[]), ) except Exception as e: raise MappingDataError(f"Mapping sheet {sheet.name} failed") from e - - -def map_sheets( - oddrn_generator: TableauGenerator, - sheets: List[Sheet], - tables: List[DataEntity], -) -> List[DataEntity]: - return [map_sheet(oddrn_generator, sheet, tables) for sheet in sheets] diff --git a/odd_collector/adapters/tableau/mappers/tables.py b/odd_collector/adapters/tableau/mappers/tables.py index b79bf03e..aa6d8625 100644 --- a/odd_collector/adapters/tableau/mappers/tables.py +++ b/odd_collector/adapters/tableau/mappers/tables.py @@ -1,11 +1,9 @@ -from typing import List - -from funcy import first, partial +from funcy import partial from odd_collector_sdk.errors import MappingDataError from odd_models.models import DataEntity, DataEntityType, DataSet from oddrn_generator import TableauGenerator -from ..domain.table import Table +from ..domain.table import EmbeddedTable from . import DATA_SET_EXCLUDED_KEYS, DATA_SET_SCHEMA from .columns import map_column from .metadata import extract_metadata @@ -15,32 +13,32 @@ ) -def map_table(oddrn_generator: TableauGenerator, table: Table) -> DataEntity: +def map_table(generator: TableauGenerator, table: EmbeddedTable) -> DataEntity: # TODO: Now table model doesn't have metadata field, need to add it metadata = extract_metadata(metadata={}) - # Each database has multiple owners, by odd specification we can attach only 1 owner, take first owner or None - owner = first(table.owners) + # Each database has multiple owners, by odd specification we can attach only 1 owner + # take first owner or None + owner = None + generator.set_oddrn_paths( + databases=table.db_id, + schemas=table.schema or "unknown_schema", + tables=table.id, + ) try: return DataEntity( - oddrn=table.get_oddrn(oddrn_generator), + oddrn=generator.get_oddrn_by_path("tables"), name=table.name, owner=owner, metadata=metadata, description=table.description, type=DataEntityType.TABLE, - dataset=create_dataset(oddrn_generator, table), + dataset=create_dataset(generator, table), ) except Exception as e: raise MappingDataError(f"Mapping table {table.name} failed") from e -def map_tables( - oddrn_generator: TableauGenerator, tables: List[Table] -) -> List[DataEntity]: - return [map_table(oddrn_generator, table) for table in tables] - - -def create_dataset(oddrn_generator, table: Table): +def create_dataset(oddrn_generator, table: EmbeddedTable): parent_oddrn = oddrn_generator.get_oddrn_by_path("tables") columns = [map_column(oddrn_generator, column) for column in table.columns] diff --git a/odd_collector/adapters/tableau/tests/__init__.py b/odd_collector/adapters/tableau/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/odd_collector/adapters/tableau/tests/test_map_column.py b/odd_collector/adapters/tableau/tests/test_map_column.py deleted file mode 100644 index 256372d1..00000000 --- a/odd_collector/adapters/tableau/tests/test_map_column.py +++ /dev/null @@ -1,41 +0,0 @@ -import pytest -from odd_models.models import Type -from oddrn_generator import TableauGenerator - -from odd_collector.adapters.tableau.domain.column import Column -from odd_collector.adapters.tableau.mappers.columns import map_column - - -@pytest.fixture -def generator(): - return TableauGenerator( - host_settings="host", - sites="site", - databases="db", - schemas="schema", - tables="table", - ) - - -def test_map_column(generator): - column = Column( - id="1", - name="Age", - remote_type="I1", - is_nullable=False, - description="some description", - ) - data_entity = map_column(generator, column) - - assert ( - data_entity.oddrn - == "//tableau/host/host/sites/site/databases/db/schemas/schema/tables/table/columns/Age" - ) - - assert data_entity.name == "Age" - assert data_entity.description == "some description" - data_entity_type = data_entity.type - - assert data_entity_type.type == Type.TYPE_INTEGER - assert data_entity_type.logical_type == "I1" - assert data_entity_type.is_nullable == False diff --git a/odd_collector/adapters/tableau/tests/test_map_sheet.py b/odd_collector/adapters/tableau/tests/test_map_sheet.py deleted file mode 100644 index 06a57ef1..00000000 --- a/odd_collector/adapters/tableau/tests/test_map_sheet.py +++ /dev/null @@ -1,87 +0,0 @@ -import datetime -from typing import List - -import pytest -from funcy import first -from odd_models.models import DataEntity, DataEntityType -from oddrn_generator import TableauGenerator - -from odd_collector.adapters.tableau.domain.column import Column -from odd_collector.adapters.tableau.domain.sheet import Sheet -from odd_collector.adapters.tableau.domain.table import Table -from odd_collector.adapters.tableau.mappers.sheets import map_sheet -from odd_collector.adapters.tableau.mappers.tables import map_table - - -@pytest.fixture -def generator(): - return TableauGenerator(host_settings="host", sites="site") - - -@pytest.fixture -def tables() -> List[DataEntity]: - table = Table( - id="table-id-1", - name="table-name", - schema="schema", - db_id="db-id-1", - db_name="db-name", - connection_type="textscan", - columns=[ - Column( - id="1", - name="Age", - remote_type="I1", - is_nullable=False, - description="some description", - ), - Column( - id="1", - name="Age", - remote_type="I1", - is_nullable=False, - description="some description", - ), - ], - owners=["user1", "user2"], - description="table description", - ) - - generator = TableauGenerator(host_settings="host", sites="site") - - return [map_table(generator, table)] - - -def test_map_sheet(generator, tables: List[DataEntity]): - sheet = Sheet( - id="id", - name="name", - workbook="workbook", - owner="pmakarichev", - created="2022-08-09T08:12:45Z", - updated="2022-08-09T08:12:45Z", - table_ids=["table-id-1"], - ) - - data_entity = map_sheet(generator, sheet, tables) - assert ( - data_entity.oddrn - == "//tableau/host/host/sites/site/workbooks/workbook/sheets/name" - ) - assert data_entity.name == "name" - assert data_entity.owner == "pmakarichev" - assert data_entity.created_at == datetime.datetime( - 2022, 8, 9, 8, 12, 45, tzinfo=datetime.timezone.utc - ) - assert data_entity.updated_at == datetime.datetime( - 2022, 8, 9, 8, 12, 45, tzinfo=datetime.timezone.utc - ) - assert data_entity.type == DataEntityType.DASHBOARD - assert data_entity.data_consumer is not None - - data_consumer = data_entity.data_consumer - assert len(data_consumer.inputs) == 1 - assert ( - first(data_consumer.inputs) - == "//tableau/host/host/sites/site/databases/db-id-1/schemas/schema/tables/table-name" - ) diff --git a/odd_collector/adapters/tableau/tests/test_map_table.py b/odd_collector/adapters/tableau/tests/test_map_table.py deleted file mode 100644 index 1cf9b872..00000000 --- a/odd_collector/adapters/tableau/tests/test_map_table.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest -from odd_models.models import DataEntityType -from oddrn_generator import TableauGenerator - -from odd_collector.adapters.tableau.domain.column import Column -from odd_collector.adapters.tableau.domain.table import Table -from odd_collector.adapters.tableau.mappers.tables import map_table - - -@pytest.fixture -def generator(): - return TableauGenerator(host_settings="host", sites="site") - - -def test_table_to_data_entity(generator): - table = Table( - id="table-id-1", - name="table-name", - schema="schema", - db_id="db-id-1", - db_name="db-name", - connection_type="textscan", - columns=[ - Column( - id="1", - name="Age", - remote_type="I1", - is_nullable=False, - description="some description", - ), - Column( - id="2", - name="Height", - remote_type="I1", - is_nullable=True, - description="some description", - ), - ], - owners=["user1", "user2"], - description="table description", - ) - - data_entity = map_table(generator, table) - - assert ( - data_entity.oddrn - == "//tableau/host/host/sites/site/databases/db-id-1/schemas/schema/tables/table-name" - ) - assert data_entity.name == "table-name" - assert data_entity.owner == "user1" - assert data_entity.description == "table description" - assert data_entity.type == DataEntityType.TABLE - - assert data_entity.dataset is not None - - dataset = data_entity.dataset - assert ( - dataset.parent_oddrn - == "//tableau/host/host/sites/site/databases/db-id-1/schemas/schema/tables/table-name" - ) - assert len(data_entity.dataset.field_list) == 2 - - age_field = data_entity.dataset.field_list[0] - assert age_field.name == "Age" - assert ( - age_field.oddrn - == "//tableau/host/host/sites/site/databases/db-id-1/schemas/schema/tables/table-name/columns/Age" - ) diff --git a/odd_collector/adapters/tableau/tests/test_parse_column.py b/odd_collector/adapters/tableau/tests/test_parse_column.py deleted file mode 100644 index 3e6ee153..00000000 --- a/odd_collector/adapters/tableau/tests/test_parse_column.py +++ /dev/null @@ -1,35 +0,0 @@ -from odd_collector.adapters.tableau.domain.column import Column - - -def test_column_from_response(): - column_response = { - "id": "77b5f158-1c74-7cc7-f9f0-89b9044581ee", - "isNullable": True, - "name": "Category", - "remoteType": "WSTR", - "description": "PII Column", - } - - column = Column.from_response(column_response) - - assert column.name == "Category" - assert column.is_nullable == True - assert column.remote_type == "WSTR" - assert column.id == "77b5f158-1c74-7cc7-f9f0-89b9044581ee" - assert column.description == "PII Column" - - column_response = { - "id": "77b5f158-1c74-7cc7-f9f0-89b9044581ee", - "isNullable": True, - "name": "Category", - "remoteType": "WSTR", - "description": "", - } - - column = Column.from_response(column_response) - - assert column.name == "Category" - assert column.is_nullable == True - assert column.remote_type == "WSTR" - assert column.id == "77b5f158-1c74-7cc7-f9f0-89b9044581ee" - assert column.description is None diff --git a/odd_collector/adapters/tableau/tests/test_parse_sheets.py b/odd_collector/adapters/tableau/tests/test_parse_sheets.py deleted file mode 100644 index ad317c81..00000000 --- a/odd_collector/adapters/tableau/tests/test_parse_sheets.py +++ /dev/null @@ -1,44 +0,0 @@ -from odd_collector.adapters.tableau.domain.sheet import Sheet - - -def test_sheet_from_response(): - response = { - "id": "ff156326-811f-fd5e-0a36-c8c02f69a4ad", - "name": "S&P Forward Returns", - "createdAt": "2022-08-09T08:12:45Z", - "updatedAt": "2022-08-09T08:12:45Z", - "upstreamFields": [ - { - "id": "40fb92c0-6fe9-9590-8ed2-c290933cc949", - "name": "Decade", - "upstreamTables": [{"id": "f275901d-3d2c-aa1d-3ce6-266877fea80d"}], - }, - { - "id": "50da20ee-33a4-d048-a4fd-dd144855dafe", - "name": "Value", - "upstreamTables": [{"id": "f275901d-3d2c-aa1d-3ce6-266877fea80d"}], - }, - { - "id": "ab8d4f55-8215-d607-7087-8b8184366a72", - "name": "Date", - "upstreamTables": [{"id": "f275901d-3d2c-aa1d-3ce6-266877fea80d"}], - }, - { - "id": "ec4f6949-94fd-3c8f-6f80-b6e0940df045", - "name": "Metric", - "upstreamTables": [{"id": "f275901d-3d2c-aa1d-3ce6-266877fea80d"}], - }, - ], - "workbook": {"name": "Regional", "owner": {"name": "pmakarichev"}}, - } - - sheet = Sheet.from_response(response) - - assert sheet.name == "S&P Forward Returns" - assert sheet.id == "ff156326-811f-fd5e-0a36-c8c02f69a4ad" - assert len(sheet.tables_id) == 1 - assert sheet.tables_id[0] == "f275901d-3d2c-aa1d-3ce6-266877fea80d" - assert sheet.workbook == "Regional" - assert sheet.owner == "pmakarichev" - assert sheet.created == "2022-08-09T08:12:45Z" - assert sheet.updated == "2022-08-09T08:12:45Z" diff --git a/odd_collector/adapters/tableau/tests/test_parse_tables.py b/odd_collector/adapters/tableau/tests/test_parse_tables.py deleted file mode 100644 index da7aa124..00000000 --- a/odd_collector/adapters/tableau/tests/test_parse_tables.py +++ /dev/null @@ -1,112 +0,0 @@ -from odd_collector.adapters.tableau.domain.table import ( - BigqueryTable, - EmbeddedTable, - databases_to_tables, - traverse_tables, -) - - -def test_databases_to_tables(): - databases_response = [ - { - "connectionType": "textscan", - "id": "02c3936a-8b08-032b-912b-27b83240a123", - "name": "orders_south_2015.csv", - "downstreamOwners": [{"name": "pmack"}], - "tables": [ - { - "id": "ef5fc198-8311-5285-d525-3e78b8311c77", - "name": "orders_south_2015.csv", - "schema": "", - } - ], - }, - { - "connectionType": "excel-direct", - "id": "0f1558d0-c481-7b62-c161-f2e620ff67f3", - "name": "return reasons_new.xlsx", - "downstreamOwners": [{"name": "pmack"}], - "tables": [ - { - "id": "7a9a6203-7777-0334-9a8e-f995fc1ceb34", - "name": "returns_new", - "schema": "", - } - ], - }, - ] - - tables = databases_to_tables(databases_response) - assert len(tables) == 2 - assert tables[0].id == "ef5fc198-8311-5285-d525-3e78b8311c77" - assert tables[1].id == "7a9a6203-7777-0334-9a8e-f995fc1ceb34" - - -def test_traverse_tables(): - table_response = { - "connectionType": "bigquery", - "id": "15995808-6508-dded-fb32-e7905a6d61d1", - "name": "publicdata", - "downstreamOwners": [{"name": "pmakarichev"}], - "tables": [ - { - "id": "0c2df5d9-85e6-5682-69be-20b54fbd39d9", - "name": "github_nested", - "schema": "samples", - "description": "", - }, - { - "id": "29e00772-c836-b5b1-e726-afa64dc72251", - "name": "natality", - "schema": "samples", - "description": "dataset", - }, - ], - } - - tables = traverse_tables(table_response) - - assert len(tables) == 2 - table = tables[0] - assert table.id == "0c2df5d9-85e6-5682-69be-20b54fbd39d9" - assert table.name == "github_nested" - assert table.connection_type == "bigquery" - assert table.database_name == "publicdata" - assert table.database_id == "15995808-6508-dded-fb32-e7905a6d61d1" - assert table.schema == "samples" - assert table.description is None - assert table.owners == ["pmakarichev"] - assert isinstance(table, BigqueryTable) - - table = tables[1] - assert table.id == "29e00772-c836-b5b1-e726-afa64dc72251" - assert table.name == "natality" - assert table.connection_type == "bigquery" - assert table.database_name == "publicdata" - assert table.database_id == "15995808-6508-dded-fb32-e7905a6d61d1" - assert table.schema == "samples" - assert table.description == "dataset" - assert table.owners == ["pmakarichev"] - assert isinstance(table, BigqueryTable) - - -def test_parse_bigquery_table(): - response = { - "connectionType": "textscan", - "id": "02c3936a-8b08-032b-912b-27b83240a123", - "name": "orders_south_2015.csv", - "downstreamOwners": [{"name": "pmack"}], - "tables": [ - { - "id": "ef5fc198-8311-5285-d525-3e78b8311c77", - "name": "orders_south_2015.csv", - "schema": "", - } - ], - } - - tables = traverse_tables(response) - - assert len(tables) == 1 - table = tables[0] - assert isinstance(table, EmbeddedTable) diff --git a/odd_collector/adapters/tableau/tests/test_table_model.py b/odd_collector/adapters/tableau/tests/test_table_model.py deleted file mode 100644 index 8d5f5f93..00000000 --- a/odd_collector/adapters/tableau/tests/test_table_model.py +++ /dev/null @@ -1,47 +0,0 @@ -import pytest -from oddrn_generator import TableauGenerator - -from odd_collector.adapters.tableau.domain.table import BigqueryTable, EmbeddedTable - - -@pytest.fixture -def generator(): - return TableauGenerator(host_settings="host", sites="site") - - -def test_emdedded_table_oddrn_generation(generator): - table = EmbeddedTable( - id="table-id-1", - name="table-name", - schema="schema", - db_id="db-id-1", - db_name="db-name", - connection_type="textscan", - columns=[], - owners=["user1", "user2"], - description="table description", - ) - - assert ( - table.get_oddrn(generator) - == "//tableau/host/host/sites/site/databases/db-id-1/schemas/schema/tables/table-name" - ) - - -def test_biqquery_table_oddrn_generation(generator): - table = BigqueryTable( - id="table-id-1", - name="table-name", - schema="schema", - db_id="db-id-1", - db_name="db-name", - connection_type="tableau", - columns=[], - owners=["user1", "user2"], - description="table description", - ) - - assert ( - table.get_oddrn(generator) - == "//bigquery_storage/cloud/gcp/project/db-name/datasets/schema/tables/table-name" - ) diff --git a/odd_collector/adapters/tableau/tests/test_tableau_adapter.py b/odd_collector/adapters/tableau/tests/test_tableau_adapter.py deleted file mode 100644 index afddb626..00000000 --- a/odd_collector/adapters/tableau/tests/test_tableau_adapter.py +++ /dev/null @@ -1,138 +0,0 @@ -from typing import Dict, List - -import pytest -from odd_models.models import DataEntity, DataEntityType -from pydantic import SecretStr - -from odd_collector.adapters.tableau.adapter import Adapter, tables_ids_to_load -from odd_collector.adapters.tableau.client import TableauBaseClient -from odd_collector.adapters.tableau.domain.column import Column -from odd_collector.adapters.tableau.domain.sheet import Sheet -from odd_collector.adapters.tableau.domain.table import ( - BigqueryTable, - EmbeddedTable, - Table, -) -from odd_collector.domain.plugin import TableauPlugin - - -class TableauTestClient(TableauBaseClient): - def get_tables_columns(self, tables_ids: List[str]) -> Dict[str, List[Column]]: - return { - "table-id-1": [ - Column( - id="1", - name="Age", - remote_type="I1", - is_nullable=False, - description="some description", - ), - Column( - id="1", - name="Height", - remote_type="I1", - is_nullable=True, - description="some description", - ), - ] - } - - def __init__(self, config: TableauPlugin): - self.config = config - - def get_tables(self) -> List[Table]: - return [ - EmbeddedTable( - id="table-id-1", - name="table-name", - schema="schema", - db_id="db-id-1", - db_name="db-name", - connection_type="textscan", - columns=[], - owners=["user1", "user2"], - description="table description", - ) - ] - - def get_sheets(self) -> List[Sheet]: - return [ - Sheet( - id="id", - name="name", - workbook="workbook", - owner="pmakarichev", - created="2022-08-09T08:12:45Z", - updated="2022-08-09T08:12:45Z", - table_ids=["table-id-1"], - ) - ] - - def get_server_host(self): - return "host" - - -@pytest.fixture -def client(): - return TableauTestClient - - -@pytest.fixture -def config() -> TableauPlugin: - return TableauPlugin( - name="tableau", - description="teableau_adapter", - namespace="odd", - type="tableau", - server="server", - site="site", - user="user", - password=SecretStr("password"), - ) - - -def test_adapter(config, client): - adapter = Adapter(config, client) - - data_entity_list = adapter.get_data_entity_list() - assert len(data_entity_list.items) == 2 - - table_elements = [ - de for de in data_entity_list.items if de.type == DataEntityType.TABLE - ] - - assert len(table_elements) == 1 - table: DataEntity = table_elements[0] - assert len(table.dataset.field_list) == 2 - - -def test_tables_ids_to_load(): - tables = [ - EmbeddedTable( - id="table-id-1", - name="table-name", - schema="schema", - db_id="db-id-1", - db_name="db-name", - connection_type="textscan", - columns=[], - owners=["user1", "user2"], - description="table description", - ), - BigqueryTable( - id="table-id-2", - name="bigquery-table", - schema="schema", - db_id="db-id-2", - db_name="db-name-2", - connection_type="bigquery", - columns=[], - owners=["user1", "user2"], - description="table description", - ), - ] - - ids = tables_ids_to_load(tables) - - assert len(ids) == 1 - assert ids[0] == "table-id-1" diff --git a/odd_collector/domain/plugin.py b/odd_collector/domain/plugin.py index 81e49eb3..af4349d5 100644 --- a/odd_collector/domain/plugin.py +++ b/odd_collector/domain/plugin.py @@ -170,7 +170,7 @@ class Neo4jPlugin(DatabasePlugin): class TableauPlugin(BasePlugin): type: Literal["tableau"] server: str - site: str + site: Optional[str] user: Optional[str] password: Optional[SecretStr] token_name: Optional[str]