diff --git a/algorithm_store.json b/algorithm_store.json index 7dc17bd..d6d95e5 100644 --- a/algorithm_store.json +++ b/algorithm_store.json @@ -219,6 +219,16 @@ "description": "" } ] + }, + { + "name": "metadata", + "display_name": "Collect run metadata", + "standalone": true, + "description": "Collect run metadata", + "step_type": "federated_compute", + "ui_visualizations": [], + "arguments": [], + "databases": [] } ] } diff --git a/pyproject.toml b/pyproject.toml index 7308928..cdc0ba8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" requires-python = ">=3.13" dependencies = [ "pandas", + "pytest", "vantage6-algorithm-tools==5.0.0a36", ] authors = [ @@ -32,5 +33,8 @@ Issues = "https://github.com/vantage6/v6-session-basics/issues" [tool.hatch.build.targets.wheel] packages = ["v6-session-basics"] +[tool.setuptools.package-data] +"v6-session-basics" = ["data/*.csv"] + [tool.uv] dev-dependencies = [] diff --git a/test/test_metadata.py b/test/test_metadata.py new file mode 100644 index 0000000..8f95eb5 --- /dev/null +++ b/test/test_metadata.py @@ -0,0 +1,57 @@ +from importlib.resources import files + +from vantage6.mock.mock_network import MockNetwork, MockUserClient +import pytest + + +@pytest.fixture +def mock_client() -> MockUserClient: + test_data = files("v6-session-basics").joinpath("data/test_data.csv") + mock_network = MockNetwork( + module_name="v6-session-basics", + datasets=[ + { + "test_data_1": { + "database": test_data, + "db_type": "csv", + }, + }, + { + "test_data_1": { + "database": test_data, + "db_type": "csv", + }, + } + ] + ) + return MockUserClient(mock_network) + +def test_metadata_function(mock_client: MockUserClient): + """Test the metadata function""" + # Get organizations + orgs = mock_client.organization.list() + org_ids = [org["id"] for org in orgs] + + # Create task + task = mock_client.task.create( + method="metadata", organizations=org_ids, arguments={} + ) + + # Wait for results + results = mock_client.wait_for_results(task.get("id")) + + # Assertions + assert results is not None + assert len(results) == 2 # Two organizations + for result in results: + assert "task_id" in result + assert "node_id" in result + assert "collaboration_id" in result + assert "organization_id" in result + assert "temporary_directory" in result + assert "output_file" in result + assert "input_file" in result + assert "token" in result + assert "action" in result + + print(results) diff --git a/test/test_read_csv.py b/test/test_read_csv.py new file mode 100644 index 0000000..14bb495 --- /dev/null +++ b/test/test_read_csv.py @@ -0,0 +1,54 @@ +from importlib.resources import files +import pandas as pd +import pytest + +from vantage6.mock.mock_network import MockNetwork, MockUserClient + + +@pytest.fixture +def mock_client() -> MockUserClient: + test_data = files("v6-session-basics").joinpath("data/test_data.csv") + mock_network = MockNetwork( + module_name="v6-session-basics", + datasets=[ + { + "test_data_1": { + "database": test_data, + "db_type": "csv", + }, + }, + { + "test_data_1": { + "database": test_data, + "db_type": "csv", + }, + } + ] + ) + return MockUserClient(mock_network) + + +def test_read_csv_function(mock_client: MockUserClient): + """Test the read_csv function""" + # Get organizations + orgs = mock_client.organization.list() + org_ids = [org["id"] for org in orgs] + + # Create task + mock_client.dataframe.create( + method="read_csv", + organizations=org_ids, + arguments={}, + action="data_extraction", + label="test_data_1", + name="my_dataframe_by_frank" + ) + + # A data extraction job should create a dataframe on each node, lets check if this + # is the case. Note that in the mock network we store the dataframes in the Python + # session as pandas dataframes while in the real network we store them at disk as + # parquet files. + for node in mock_client.network.nodes: + assert len(node.dataframes) == 1 + assert "my_dataframe_by_frank" in node.dataframes + assert isinstance(node.dataframes["my_dataframe_by_frank"], pd.DataFrame) \ No newline at end of file diff --git a/test/test_sleep.py b/test/test_sleep.py new file mode 100644 index 0000000..b1d3961 --- /dev/null +++ b/test/test_sleep.py @@ -0,0 +1,49 @@ +from importlib.resources import files + +from vantage6.mock.mock_network import MockNetwork, MockUserClient +import pytest + + +@pytest.fixture +def mock_client() -> MockUserClient: + test_data = files("v6-session-basics").joinpath("data/test_data.csv") + mock_network = MockNetwork( + module_name="v6-session-basics", + datasets=[ + { + "test_data_1": { + "database": test_data, + "db_type": "csv", + }, + }, + { + "test_data_1": { + "database": test_data, + "db_type": "csv", + }, + } + ] + ) + return mock_network.user_client + +def test_sleep_function(mock_client: MockUserClient): + """Test the metadata function""" + # Get organizations + orgs = mock_client.organization.list() + org_ids = [org["id"] for org in orgs] + + # Note that the tasks here are run in sequence, thus sleeping for 1 seconds will + # be multiplied by the number of organizations. + task = mock_client.task.create( + method="sleep", organizations=org_ids, arguments={"seconds": 1} + ) + + # Wait for results + results = mock_client.wait_for_results(task.get("id")) + + # Assertions + assert results is not None + assert len(results) == 2 # Two organizations + for result in results: + assert "sleep" in result + assert result["sleep"] == "done" \ No newline at end of file diff --git a/test/test_data.csv b/v6-session-basics/data/test_data.csv similarity index 100% rename from test/test_data.csv rename to v6-session-basics/data/test_data.csv diff --git a/v6-session-basics/partial.py b/v6-session-basics/partial.py index b4f059d..d028e0b 100644 --- a/v6-session-basics/partial.py +++ b/v6-session-basics/partial.py @@ -15,9 +15,24 @@ federated, preprocessing, ) +from vantage6.algorithm.decorator.metadata import metadata, RunMetaData from vantage6.algorithm.decorator.data import dataframe, dataframes from vantage6.common import info +@metadata +def metadata(metadata: RunMetaData) -> dict: + return { + "task_id": str(metadata.task_id), + "node_id": str(metadata.node_id), + "collaboration_id": str(metadata.collaboration_id), + "organization_id": str(metadata.organization_id), + "temporary_directory": str(metadata.temporary_directory), + "output_file": str(metadata.output_file), + "input_file": str(metadata.input_file), + "token": metadata.token, + "action": str(metadata.action), + } + @data_extraction def read_csv(connection_details: dict) -> dict: