Skip to content

Commit

Permalink
remove PyTorchDistributedDecorator & 2.2.5+zg1.1 (#88)
Browse files Browse the repository at this point in the history
* remove PyTorchDistributedDecorator

* 2.2.5+zg1

* 2.2.5+zg1.1

Co-authored-by: Taleb Zeghmi <[email protected]>
  • Loading branch information
talebzeghmi and talebzeghmi authored Jun 10, 2021
1 parent 7b89959 commit 6101ef0
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 86 deletions.
4 changes: 1 addition & 3 deletions metaflow/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ def _merge_lists(base, overrides, attr):
from .aws.step_functions.step_functions_decorator import StepFunctionsInternalDecorator
from .conda.conda_step_decorator import CondaStepDecorator
from .kfp.kfp_decorator import KfpInternalDecorator
from .kfp.pytorch_distributed_decorator import PyTorchDistributedDecorator
from .kfp.accelerator_decorator import AcceleratorDecorator
from .kfp.accelerator_decorator import AcceleratorDecorator

STEP_DECORATORS = _merge_lists([CatchDecorator,
TimeoutDecorator,
Expand All @@ -69,7 +68,6 @@ def _merge_lists(base, overrides, attr):
BatchDecorator,
StepFunctionsInternalDecorator,
CondaStepDecorator,
PyTorchDistributedDecorator,
AcceleratorDecorator,
KfpInternalDecorator], ext_plugins.STEP_DECORATORS, 'name')

Expand Down
25 changes: 3 additions & 22 deletions metaflow/plugins/kfp/kfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
from .kfp_exit_handler import exit_handler
from .kfp_foreach_splits import graph_to_task_ids
from .kfp_get_workflow_uid import get_workflow_uid
from .pytorch_distributed_decorator import PyTorchDistributedDecorator
from .accelerator_decorator import AcceleratorDecorator
from ..aws.batch.batch_decorator import BatchDecorator
from ..aws.step_functions.schedule_decorator import ScheduleDecorator
Expand All @@ -74,7 +73,6 @@ def __init__(
total_retries: int,
resource_requirements: Dict[str, str],
kfp_decorator: KfpInternalDecorator,
pytorch_distributed_decorator: PyTorchDistributedDecorator,
accelerator_decorator: AcceleratorDecorator,
environment_decorator: EnvironmentDecorator,
):
Expand All @@ -88,7 +86,6 @@ def __init__(
if kfp_decorator
else None
)
self.pytorch_distributed_decorator = pytorch_distributed_decorator
self.accelerator_decorator = accelerator_decorator
self.environment_decorator = environment_decorator

Expand Down Expand Up @@ -391,14 +388,6 @@ def build_kfp_component(node: DAGNode, task_id: str) -> KfpComponent:
),
None, # default
),
pytorch_distributed_decorator=next(
(
deco
for deco in node.decorators
if isinstance(deco, PyTorchDistributedDecorator)
),
None, # default
),
accelerator_decorator=next(
(
deco
Expand Down Expand Up @@ -586,17 +575,9 @@ def _set_container_resources(
container_op.container.set_ephemeral_storage_limit(
resource_requirements["local_storage_limit"]
)
if (
kfp_component.pytorch_distributed_decorator
or "volume" in resource_requirements
):
if kfp_component.pytorch_distributed_decorator:
print("This is now deprecated!")
mode = [VOLUME_MODE_RWM]
volume_dir = "/opt/pytorch_shared/"
else:
mode = resource_requirements["volume_mode"]
volume_dir = resource_requirements["volume_dir"]
if "volume" in resource_requirements:
mode = resource_requirements["volume_mode"]
volume_dir = resource_requirements["volume_dir"]

volume = KubeflowPipelines._create_volume(
step_name=kfp_component.name,
Expand Down
56 changes: 0 additions & 56 deletions metaflow/plugins/kfp/pytorch_distributed_decorator.py

This file was deleted.

5 changes: 2 additions & 3 deletions metaflow/tutorials/10-pytorch/hello_pytorch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from metaflow import FlowSpec, Parameter, step, pytorch_distributed, resources
from metaflow import FlowSpec, Parameter, step, resources

from models.train import train_model
from models.evaluate import evaluate_model
Expand Down Expand Up @@ -36,8 +36,7 @@ def start(self):
print(f"ranks: {self.ranks}")
self.next(self.train, foreach="ranks")

@resources(cpu=1, cpu_limit=2, gpu="1", memory="2G", memory_limit="5G")
@pytorch_distributed
@resources(cpu=1, cpu_limit=2, gpu="1", memory="2G", memory_limit="5G", volume="10G")
@step
def train(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion metaflow/tutorials/10-pytorch/models/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def train_model(
print("Using distributed PyTorch with {} backend".format(pytorch_backend))
dist.init_process_group(
backend=pytorch_backend,
init_method="file:///opt/pytorch_shared/sharedfile",
init_method="file:///opt/metaflow_volume/sharedfile",
world_size=world_size,
rank=rank,
)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from setuptools import setup, find_packages

version = '2.2.5'
version = '2.2.5+zg1.1'

# TODO: once this branch is merged or in pip use, remove this
os.system(
Expand Down

0 comments on commit 6101ef0

Please sign in to comment.