-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
60dacbe
commit cd8095f
Showing
20 changed files
with
324 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from internal.domain.task.entities import FdTask # noqa: F401 | ||
from internal.domain.task.entities import AfdTask # noqa: F401 | ||
from internal.domain.task.entities import AcTask # noqa: F401 | ||
from internal.domain.task.entities import IndTask # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from internal.domain.task.entities.aind.aind_task import AindTask # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from desbordante.ind import IndAlgorithm | ||
from desbordante.aind.algorithms import Mind, Spider | ||
from internal.domain.task.entities.task import Task | ||
from internal.domain.task.value_objects import PrimitiveName, IncorrectAlgorithmName | ||
from internal.domain.task.value_objects.aind import ( | ||
AindAlgoName, | ||
AindTaskConfig, | ||
AindTaskResult, | ||
) | ||
from internal.domain.task.value_objects.aind import AindAlgoResult, AindModel | ||
|
||
|
||
class AindTask(Task[IndAlgorithm, AindTaskConfig, AindTaskResult]): | ||
""" | ||
Task class for Inclusion Dependency (AIND) profiling. | ||
This class executes various AIND algorithms and processes the results | ||
into the appropriate format. It implements abstract methods from the Task base class. | ||
Methods: | ||
- _match_algo_by_name(algo_name: AindAlgoName) -> AindAlgorithm: | ||
Match AIND algorithm by its name. | ||
- _collect_result(algo: AindAlgorithm) -> AindTaskResult: | ||
Process the output of the AIND algorithm and return the result. | ||
""" | ||
|
||
def _collect_result(self, algo: IndAlgorithm) -> AindTaskResult: | ||
""" | ||
Collect and process the AIND result. | ||
Args: | ||
algo (AindAlgorithm): AIND algorithm to process. | ||
Returns: | ||
AindTaskResult: Processed result containing AINDs. | ||
""" | ||
ainds = algo.get_inds() | ||
algo_result = AindAlgoResult(inds=[AindModel.from_ind(aind) for aind in ainds]) | ||
return AindTaskResult(primitive_name=PrimitiveName.aind, result=algo_result) | ||
|
||
def _match_algo_by_name(self, algo_name: str) -> IndAlgorithm: | ||
""" | ||
Match the inclusion dependency algorithm by name. | ||
Args: | ||
algo_name (AindAlgoName): Name of the AIND algorithm. | ||
Returns: | ||
AindAlgorithm: The corresponding algorithm instance. | ||
""" | ||
match algo_name: | ||
case AindAlgoName.Mind: | ||
return Mind() | ||
case AindAlgoName.Spider: | ||
return Spider() | ||
case _: | ||
raise IncorrectAlgorithmName(algo_name, "AIND") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from internal.domain.task.entities.ind.ind_task import IndTask # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from desbordante.ind import IndAlgorithm | ||
from desbordante.ind.algorithms import Faida, Mind, Spider | ||
from internal.domain.task.entities.task import Task | ||
from internal.domain.task.value_objects import PrimitiveName, IncorrectAlgorithmName | ||
from internal.domain.task.value_objects.ind import ( | ||
IndAlgoName, | ||
IndTaskConfig, | ||
IndTaskResult, | ||
) | ||
from internal.domain.task.value_objects.ind import IndAlgoResult, IndModel | ||
|
||
|
||
class IndTask(Task[IndAlgorithm, IndTaskConfig, IndTaskResult]): | ||
""" | ||
Task class for Inclusion Dependency (IND) profiling. | ||
This class executes various IND algorithms and processes the results | ||
into the appropriate format. It implements abstract methods from the Task base class. | ||
Methods: | ||
- _match_algo_by_name(algo_name: IndAlgoName) -> IndAlgorithm: | ||
Match IND algorithm by its name. | ||
- _collect_result(algo: IndAlgorithm) -> IndTaskResult: | ||
Process the output of the IND algorithm and return the result. | ||
""" | ||
|
||
def _collect_result(self, algo: IndAlgorithm) -> IndTaskResult: | ||
""" | ||
Collect and process the IND result. | ||
Args: | ||
algo (IndAlgorithm): IND algorithm to process. | ||
Returns: | ||
IndTaskResult: Processed result containing INDs. | ||
""" | ||
inds = algo.get_inds() | ||
algo_result = IndAlgoResult(inds=[IndModel.from_ind(ind) for ind in inds]) | ||
return IndTaskResult(primitive_name=PrimitiveName.ind, result=algo_result) | ||
|
||
def _match_algo_by_name(self, algo_name: str) -> IndAlgorithm: | ||
""" | ||
Match the inclusion dependency algorithm by name. | ||
Args: | ||
algo_name (IndAlgoName): Name of the IND algorithm. | ||
Returns: | ||
IndAlgorithm: The corresponding algorithm instance. | ||
""" | ||
match algo_name: | ||
case IndAlgoName.Faida: | ||
return Faida() | ||
case IndAlgoName.Mind: | ||
return Mind() | ||
case IndAlgoName.Spider: | ||
return Spider() | ||
case _: | ||
raise IncorrectAlgorithmName(algo_name, "IND") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from typing import Literal | ||
|
||
from pydantic import BaseModel | ||
|
||
from internal.domain.task.value_objects.primitive_name import PrimitiveName | ||
from internal.domain.task.value_objects.aind.algo_config import OneOfAindAlgoConfig | ||
from internal.domain.task.value_objects.aind.algo_result import ( # noqa: F401 | ||
AindAlgoResult, | ||
AindModel, | ||
) | ||
from internal.domain.task.value_objects.aind.algo_name import AindAlgoName # noqa: F401 | ||
|
||
|
||
class BaseAindTaskModel(BaseModel): | ||
primitive_name: Literal[PrimitiveName.aind] | ||
|
||
|
||
class AindTaskConfig(BaseAindTaskModel): | ||
config: OneOfAindAlgoConfig | ||
|
||
|
||
class AindTaskResult(BaseAindTaskModel): | ||
result: AindAlgoResult |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from typing import Literal, Annotated | ||
from pydantic import Field | ||
from internal.domain.common import OptionalModel | ||
from internal.domain.task.value_objects.aind.algo_name import AindAlgoName | ||
from internal.domain.task.value_objects.aind.algo_descriptions import descriptions | ||
|
||
|
||
class BaseAindConfig(OptionalModel): | ||
__non_optional_fields__ = { | ||
"algo_name", | ||
} | ||
|
||
|
||
class MindConfig(BaseAindConfig): | ||
algo_name: Literal[AindAlgoName.Mind] | ||
|
||
max_arity: Annotated[int, Field(gt=0, description=descriptions["max_arity"])] | ||
error: Annotated[float, Field(ge=0, le=1.0, description=descriptions["error"])] | ||
|
||
|
||
class SpiderConfig(BaseAindConfig): | ||
algo_name: Literal[AindAlgoName.Spider] | ||
|
||
error: Annotated[float, Field(ge=0, le=1.0, description=descriptions["error"])] | ||
is_null_equal_null: Annotated[ | ||
bool, Field(description=descriptions["is_null_equal_null"]) | ||
] | ||
threads: Annotated[int, Field(ge=0, description=descriptions["threads"])] | ||
mem_limit: Annotated[int, Field(gt=0, description=descriptions["mem_limit"])] | ||
|
||
|
||
OneOfAindAlgoConfig = Annotated[ | ||
MindConfig | SpiderConfig, | ||
Field(discriminator="algo_name"), | ||
] |
11 changes: 11 additions & 0 deletions
11
internal/domain/task/value_objects/aind/algo_descriptions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
descriptions = { | ||
"max_arity": "Maximum arity of the inclusion dependency (IND).", | ||
"sample_size": "Size of table sample for IND profiling.", | ||
"ignore_constant_cols": "Ignore INDs containing columns with only one value for improved performance.", | ||
"hll_accuracy": "HyperLogLog approximation accuracy. Closer to 0 means higher accuracy and memory usage.", | ||
"ignore_null_cols": "Ignore INDs containing columns filled only with NULLs.", | ||
"threads": "Number of threads to use. If 0, use all available threads.", | ||
"error": "Error threshold for approximate IND algorithms.", | ||
"is_null_equal_null": "Specify whether two NULL values should be treated as equal.", | ||
"mem_limit": "Memory limit in MBs for the algorithm.", | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from enum import StrEnum, auto | ||
|
||
|
||
class AindAlgoName(StrEnum): | ||
Mind = auto() | ||
Spider = auto() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from internal.domain.task.value_objects.ind.algo_result import IndAlgoResult, IndModel | ||
|
||
|
||
AindAlgoResult = IndAlgoResult | ||
AindModel = IndModel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from typing import Literal | ||
|
||
from pydantic import BaseModel | ||
|
||
from internal.domain.task.value_objects.primitive_name import PrimitiveName | ||
from internal.domain.task.value_objects.ind.algo_config import OneOfIndAlgoConfig | ||
from internal.domain.task.value_objects.ind.algo_result import ( # noqa: F401 | ||
IndAlgoResult, | ||
IndModel, | ||
) | ||
from internal.domain.task.value_objects.ind.algo_name import IndAlgoName # noqa: F401 | ||
|
||
|
||
class BaseIndTaskModel(BaseModel): | ||
primitive_name: Literal[PrimitiveName.ind] | ||
|
||
|
||
class IndTaskConfig(BaseIndTaskModel): | ||
config: OneOfIndAlgoConfig | ||
|
||
|
||
class IndTaskResult(BaseIndTaskModel): | ||
result: IndAlgoResult |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from typing import Literal, Annotated | ||
from pydantic import Field | ||
from internal.domain.common import OptionalModel | ||
from internal.domain.task.value_objects.ind.algo_name import IndAlgoName | ||
from internal.domain.task.value_objects.ind.algo_descriptions import descriptions | ||
|
||
|
||
class BaseIndConfig(OptionalModel): | ||
__non_optional_fields__ = { | ||
"algo_name", | ||
} | ||
|
||
|
||
class FaidaConfig(BaseIndConfig): | ||
algo_name: Literal[IndAlgoName.Faida] | ||
|
||
max_arity: Annotated[int, Field(gt=0, description=descriptions["max_arity"])] | ||
sample_size: Annotated[int, Field(gt=0, description=descriptions["sample_size"])] | ||
ignore_constant_cols: Annotated[ | ||
bool, Field(description=descriptions["ignore_constant_cols"]) | ||
] | ||
hll_accuracy: Annotated[ | ||
float, Field(gt=0, description=descriptions["hll_accuracy"]) | ||
] | ||
ignore_null_cols: Annotated[ | ||
bool, Field(description=descriptions["ignore_null_cols"]) | ||
] | ||
threads: Annotated[int, Field(ge=0, description=descriptions["threads"])] | ||
|
||
|
||
class MindConfig(BaseIndConfig): | ||
algo_name: Literal[IndAlgoName.Mind] | ||
|
||
max_arity: Annotated[int, Field(gt=0, description=descriptions["max_arity"])] | ||
error: Annotated[float, Field(ge=0, le=1.0, description=descriptions["error"])] | ||
|
||
|
||
class SpiderConfig(BaseIndConfig): | ||
algo_name: Literal[IndAlgoName.Spider] | ||
|
||
error: Annotated[float, Field(ge=0, le=1.0, description=descriptions["error"])] | ||
is_null_equal_null: Annotated[ | ||
bool, Field(description=descriptions["is_null_equal_null"]) | ||
] | ||
threads: Annotated[int, Field(ge=0, description=descriptions["threads"])] | ||
mem_limit: Annotated[int, Field(gt=0, description=descriptions["mem_limit"])] | ||
|
||
|
||
OneOfIndAlgoConfig = Annotated[ | ||
FaidaConfig | MindConfig | SpiderConfig, | ||
Field(discriminator="algo_name"), | ||
] |
11 changes: 11 additions & 0 deletions
11
internal/domain/task/value_objects/ind/algo_descriptions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
descriptions = { | ||
"max_arity": "Maximum arity of the inclusion dependency (IND).", | ||
"sample_size": "Size of table sample for IND profiling.", | ||
"ignore_constant_cols": "Ignore INDs containing columns with only one value for improved performance.", | ||
"hll_accuracy": "HyperLogLog approximation accuracy. Closer to 0 means higher accuracy and memory usage.", | ||
"ignore_null_cols": "Ignore INDs containing columns filled only with NULLs.", | ||
"threads": "Number of threads to use. If 0, use all available threads.", | ||
"error": "Error threshold for approximate IND algorithms.", | ||
"is_null_equal_null": "Specify whether two NULL values should be treated as equal.", | ||
"mem_limit": "Memory limit in MBs for the algorithm.", | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from enum import StrEnum, auto | ||
|
||
|
||
class IndAlgoName(StrEnum): | ||
Faida = auto() | ||
Mind = auto() | ||
Spider = auto() |
Oops, something went wrong.