Skip to content

Commit

Permalink
feat(primitives): dd
Browse files Browse the repository at this point in the history
  • Loading branch information
vladyoslav committed Oct 27, 2024
1 parent 84016bd commit d7d5597
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 0 deletions.
1 change: 1 addition & 0 deletions internal/domain/task/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from internal.domain.task.entities import AindTask # noqa: F401
from internal.domain.task.entities import ArTask # noqa: F401
from internal.domain.task.entities import CfdTask # noqa: F401
from internal.domain.task.entities import DdTask # noqa: F401
3 changes: 3 additions & 0 deletions internal/domain/task/entities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from internal.domain.task.entities.aind import AindTask
from internal.domain.task.entities.ar import ArTask
from internal.domain.task.entities.cfd import CfdTask
from internal.domain.task.entities.dd import DdTask
from internal.domain.task.value_objects import PrimitiveName


Expand Down Expand Up @@ -38,4 +39,6 @@ def match_task_by_primitive_name(primitive_name: PrimitiveName):
return ArTask()
case PrimitiveName.cfd:
return CfdTask()
case PrimitiveName.dd:
return DdTask()
assert_never(primitive_name)
1 change: 1 addition & 0 deletions internal/domain/task/entities/dd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from internal.domain.task.entities.dd.dd_task import DdTask # noqa: F401
48 changes: 48 additions & 0 deletions internal/domain/task/entities/dd/dd_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from desbordante.dd.algorithms import Split
from internal.domain.task.entities.task import Task
from internal.domain.task.value_objects import PrimitiveName, IncorrectAlgorithmName
from internal.domain.task.value_objects.dd import DdTaskConfig, DdTaskResult
from internal.domain.task.value_objects.dd import DdAlgoName, DdModel, DdAlgoResult


class DdTask(Task[Split, DdTaskConfig, DdTaskResult]):
"""
Task class for discovering Data Dependencies (DD).
This class handles the execution of the DD algorithm and formats
the results for further processing.
Methods:
- _match_algo_by_name(algo_name: DdAlgoName) -> Split:
Match the DD algorithm by its name.
- _collect_result(algo: Split) -> DdTaskResult:
Process the output of the DD algorithm and return the result.
"""

def _collect_result(self, algo: Split) -> DdTaskResult:
"""
Collect and process the DD result.
Args:
algo (Split): DD algorithm instance to process.
Returns:
DdTaskResult: The processed result containing data dependencies.
"""
dds = algo.get_dds()
algo_result = DdAlgoResult(dds=[DdModel.from_dd(dd) for dd in dds])
return DdTaskResult(primitive_name=PrimitiveName.dd, result=algo_result)

def _match_algo_by_name(self, algo_name: str) -> Split:
"""
Match the DD algorithm by name.
Args:
algo_name (DdAlgoName): The name of the DD algorithm.
Returns:
Split: The corresponding algorithm instance.
"""
match algo_name:
case DdAlgoName.Split:
return Split()
case _:
raise IncorrectAlgorithmName(algo_name, "DD")
3 changes: 3 additions & 0 deletions internal/domain/task/value_objects/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from internal.domain.task.value_objects.aind import AindTaskConfig, AindTaskResult
from internal.domain.task.value_objects.ar import ArTaskConfig, ArTaskResult
from internal.domain.task.value_objects.cfd import CfdTaskConfig, CfdTaskResult
from internal.domain.task.value_objects.dd import DdTaskConfig, DdTaskResult

from internal.domain.task.value_objects.config import TaskConfig # noqa: F401
from internal.domain.task.value_objects.result import TaskResult # noqa: F401
Expand All @@ -34,6 +35,7 @@
AindTaskConfig,
ArTaskConfig,
CfdTaskConfig,
DdTaskConfig,
],
Field(discriminator="primitive_name"),
]
Expand All @@ -47,6 +49,7 @@
AindTaskResult,
ArTaskResult,
CfdTaskResult,
DdTaskResult,
],
Field(discriminator="primitive_name"),
]
23 changes: 23 additions & 0 deletions internal/domain/task/value_objects/dd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from typing import Literal

from pydantic import BaseModel

from internal.domain.task.value_objects.primitive_name import PrimitiveName
from internal.domain.task.value_objects.dd.algo_config import OneOfDdAlgoConfig
from internal.domain.task.value_objects.dd.algo_result import ( # noqa: F401
DdAlgoResult,
DdModel,
)
from internal.domain.task.value_objects.dd.algo_name import DdAlgoName # noqa: F401


class BaseDdTaskModel(BaseModel):
primitive_name: Literal[PrimitiveName.dd]


class DdTaskConfig(BaseDdTaskModel):
config: OneOfDdAlgoConfig


class DdTaskResult(BaseDdTaskModel):
result: DdAlgoResult
28 changes: 28 additions & 0 deletions internal/domain/task/value_objects/dd/algo_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Literal, Annotated
from pydantic import Field
from internal.domain.common import OptionalModel
from internal.domain.task.value_objects.dd.algo_name import DdAlgoName
from internal.domain.task.value_objects.dd.algo_descriptions import descriptions


class BaseDdConfig(OptionalModel):
__non_optional_fields__ = {
"algo_name",
}


class SplitConfig(BaseDdConfig):
algo_name: Literal[DdAlgoName.Split]

num_rows: Annotated[int, Field(ge=1, description=descriptions["num_rows"])]
num_columns: Annotated[int, Field(ge=1, description=descriptions["num_columns"])]
# TODO: diff table is not string
difference_table: Annotated[
str, Field(description=descriptions["difference_table"])
]


OneOfDdAlgoConfig = Annotated[
SplitConfig,
Field(discriminator="algo_name"),
]
5 changes: 5 additions & 0 deletions internal/domain/task/value_objects/dd/algo_descriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
descriptions = {
"num_rows": "Number of rows from the dataset to process",
"num_columns": "Number of columns from the dataset to process",
"difference_table": "CSV table with difference limits for each column",
}
5 changes: 5 additions & 0 deletions internal/domain/task/value_objects/dd/algo_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from enum import StrEnum, auto


class DdAlgoName(StrEnum):
Split = auto()
14 changes: 14 additions & 0 deletions internal/domain/task/value_objects/dd/algo_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from pydantic import BaseModel
from desbordante.dd import DD


class DdModel(BaseModel):
description: str

@classmethod
def from_dd(cls, dd: DD):
return cls(description=str(dd))


class DdAlgoResult(BaseModel):
dds: list[DdModel]
1 change: 1 addition & 0 deletions internal/domain/task/value_objects/primitive_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class PrimitiveName(StrEnum):
ind = auto()
aind = auto()
cfd = auto()
dd = auto()
# fd_verification = auto()
# mfd_verification = auto()
# statistics = auto()
Expand Down

0 comments on commit d7d5597

Please sign in to comment.