Skip to content

Commit

Permalink
Merge pull request #182 from Modalities/177-packaging-dependency-not-…
Browse files Browse the repository at this point in the history
…correctly-installed-during-test-pipeline-execution

bugfix: removed packaging from pyproject
  • Loading branch information
fromm-m committed Jul 11, 2024
2 parents 66c5bfa + e78b264 commit 06afc42
Show file tree
Hide file tree
Showing 20 changed files with 108 additions and 61 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ jobs:
sed -i 's/"flash-attn"/#"flash-attn"/g' pyproject.toml
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[tests]
python -m pip install --upgrade pip setuptools wheel
python -m pip install -e .[tests]
- name: Run tests
run: |
pytest
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ celerybeat-schedule
# Environments
.env
.venv
modalities_env/
env/
venv/
ENV/
Expand Down Expand Up @@ -152,6 +153,7 @@ tags

checkpoints
data
data-temp/
docs/source/generated
docs/source/api
pyenv*
Expand Down
7 changes: 2 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ version = "0.1.0"
requires-python = ">=3.10,<3.12"
description = "Modalities, a python framework for distributed and reproducible foundation model training."
dependencies = [
"packaging",
"numpy<2.0",
"torch>=2.0",
"torch>=2.3",
"packaging",
"tqdm",
"pyyaml",
"transformers",
Expand All @@ -22,10 +22,7 @@ dependencies = [
"class_resolver",
"wandb",
"einops>=0.7.0",
"mamba-ssm",
"flash-attn", # install this directly via `pip install flash-attn --no-build-isolation`
"mamba-ssm",
"causal-conv1d>=1.2.0",
]

[project.optional-dependencies]
Expand Down
19 changes: 12 additions & 7 deletions src/modalities/activation_checkpointing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,27 @@
)
from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP


import torch
from typing import List

from modalities.util import get_module_class_from_name

def is_module_to_apply_activation_checkpointing(submodule: torch.nn.Module, activation_checkpointing_modules: List[type]) -> bool:

def is_module_to_apply_activation_checkpointing(
submodule: torch.nn.Module, activation_checkpointing_modules: List[type]
) -> bool:
return isinstance(submodule, tuple(activation_checkpointing_modules))


def apply_activation_checkpointing_inplace(model: torch.nn.Module, activation_checkpointing_modules: List[str]):
activation_checkpointing_module_types = [get_module_class_from_name(model, m) for m in activation_checkpointing_modules]
activation_checkpointing_module_types = [
get_module_class_from_name(model, m) for m in activation_checkpointing_modules
]
if not isinstance(model, FSDP):
raise ValueError("activation checkpointing can only be applied to FSDP wrapped models!")
non_reentrant_wrapper = partial(checkpoint_wrapper, checkpoint_impl=CheckpointImpl.NO_REENTRANT, debug=False)

apply_activation_checkpointing(
model, checkpoint_wrapper_fn=non_reentrant_wrapper, check_fn=lambda submodule: is_module_to_apply_activation_checkpointing(submodule, activation_checkpointing_module_types)
model,
checkpoint_wrapper_fn=non_reentrant_wrapper,
check_fn=lambda submodule: is_module_to_apply_activation_checkpointing(
submodule, activation_checkpointing_module_types
),
)
3 changes: 1 addition & 2 deletions src/modalities/checkpointing/checkpoint_conversion.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from pathlib import Path

from modalities.config.config import load_app_config_dict
from modalities.models.huggingface_adapters.hf_adapter import HFModelAdapterConfig, HFModelAdapter
from modalities.models.huggingface_adapters.hf_adapter import HFModelAdapter, HFModelAdapterConfig


class CheckpointConversion:

def __init__(
self,
config_file_path: Path,
Expand Down
6 changes: 4 additions & 2 deletions src/modalities/config/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import partial
import os
from functools import partial
from pathlib import Path
from typing import Annotated, Callable, Dict, List, Literal, Optional, Tuple

Expand Down Expand Up @@ -370,7 +370,9 @@ def node_env_resolver_fun(var_name: str) -> int:
return os.cpu_count()

OmegaConf.register_new_resolver("cuda_env", cuda_env_resolver_fun, replace=True)
OmegaConf.register_new_resolver("modalities_env", partial(modalities_env_resolver_fun, config_file_path=config_file_path), replace=True)
OmegaConf.register_new_resolver(
"modalities_env", partial(modalities_env_resolver_fun, config_file_path=config_file_path), replace=True
)
OmegaConf.register_new_resolver("node_env", node_env_resolver_fun, replace=True)

cfg = OmegaConf.load(config_file_path)
Expand Down
2 changes: 1 addition & 1 deletion src/modalities/logging_broker/subscriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ class MessageSubscriberIF(ABC, Generic[T]):
@abstractmethod
def consume_message(self, message: Message[T]):
raise NotImplementedError

def consume_dict(self, mesasge_dict: Dict[str, Any]):
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from pathlib import Path

import rich
import wandb
import yaml
from rich.console import Group
from rich.panel import Panel

import wandb
from modalities.batch import EvaluationResultBatch
from modalities.config.config import WandbMode
from modalities.logging_broker.messages import Message
Expand Down
1 change: 0 additions & 1 deletion src/modalities/models/huggingface/huggingface_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from modalities.config.lookup_enum import LookupEnum
from modalities.models.model import NNModel


# Huggingface Model dependencies
#
# ModuleUtilsMixin
Expand Down
12 changes: 7 additions & 5 deletions src/modalities/models/huggingface_adapters/hf_adapter.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import json
from dataclasses import dataclass
from pathlib import PosixPath
from typing import Any, Dict, Optional, Tuple, Union, List
from typing import Any, Dict, List, Optional, Tuple, Union

import torch
from transformers import PreTrainedModel, PretrainedConfig
from transformers import PretrainedConfig, PreTrainedModel
from transformers.utils import ModelOutput

from modalities.exceptions import ConfigError
from modalities.models.model import NNModel
from modalities.models.utils import get_model_from_config, ModelTypeEnum
from modalities.models.utils import ModelTypeEnum, get_model_from_config


class HFModelAdapterConfig(PretrainedConfig):
Expand All @@ -28,7 +28,7 @@ def to_json_string(self, use_diff: bool = True) -> str:
return json.dumps(json_dict)

def _convert_posixpath_to_str(
self, data_to_be_formatted: Union[Dict[str, Any], List[Any], PosixPath, Any]
self, data_to_be_formatted: Union[Dict[str, Any], List[Any], PosixPath, Any]
) -> Union[Dict[str, Any], List[Any], PosixPath, Any]:
"""
Recursively iterate and convert PosixPath values to strings.
Expand All @@ -47,7 +47,9 @@ def _convert_posixpath_to_str(
class HFModelAdapter(PreTrainedModel):
config_class = HFModelAdapterConfig

def __init__(self, config: HFModelAdapterConfig, prediction_key: str, load_checkpoint: bool = False, *inputs, **kwargs):
def __init__(
self, config: HFModelAdapterConfig, prediction_key: str, load_checkpoint: bool = False, *inputs, **kwargs
):
super().__init__(config, *inputs, **kwargs)
self.prediction_key = prediction_key
if load_checkpoint:
Expand Down
4 changes: 2 additions & 2 deletions src/modalities/models/mamba/ops/selective_scan_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

try:
import causal_conv1d_cuda
import selective_scan_cuda
from causal_conv1d import causal_conv1d_fn
except ImportError:
causal_conv1d_fn = None
causal_conv1d_cuda = None

import selective_scan_cuda
selective_scan_cuda = None


class SelectiveScanFn(torch.autograd.Function):
Expand Down
10 changes: 8 additions & 2 deletions src/modalities/models/utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
from enum import Enum
from typing import Dict

from pydantic import BaseModel
from enum import Enum

from modalities.config.component_factory import ComponentFactory
from modalities.config.pydanctic_if_types import PydanticPytorchModuleType
from modalities.registry.components import COMPONENTS
from modalities.registry.registry import Registry


class ModelTypeEnum(Enum):
MODEL = "model"
CHECKPOINTED_MODEL = "checkpointed_model"



def get_model_from_config(config: Dict, model_type: ModelTypeEnum):
registry = Registry(COMPONENTS)
component_factory = ComponentFactory(registry=registry)

# create the pydantic config for the component factory dynamically based on model_type
if model_type.value == "model":

class PydanticConfig(BaseModel):
model: PydanticPytorchModuleType

elif model_type.value == "checkpointed_model":

class PydanticConfig(BaseModel):
checkpointed_model: PydanticPytorchModuleType

else:
raise NotImplementedError()

Expand Down
4 changes: 1 addition & 3 deletions src/modalities/tokenization/tokenizer_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ def __init__(
# also see here for the truncation and padding options and their effects:
# https://huggingface.co/docs/transformers/pad_truncation#padding-and-truncation

self.tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path=pretrained_model_name_or_path
)
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=pretrained_model_name_or_path)
if special_tokens is not None:
# TODO check if we always want to set
# replace_additional_special_tokens=False
Expand Down
24 changes: 19 additions & 5 deletions tests/checkpointing/test_checkpoint_conversion.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import os
from pathlib import Path, PosixPath
from pathlib import Path

import pytest
import torch
from transformers import AutoModelForCausalLM, AutoConfig
from transformers import AutoConfig, AutoModelForCausalLM

from modalities.checkpointing.checkpoint_conversion import CheckpointConversion
from modalities.config.component_factory import ComponentFactory
from modalities.config.config import load_app_config_dict
from modalities.models.huggingface_adapters.hf_adapter import HFModelAdapter, HFModelAdapterConfig
from modalities.models.model import NNModel
from modalities.models.utils import get_model_from_config, ModelTypeEnum
from modalities.models.utils import ModelTypeEnum, get_model_from_config
from modalities.registry.components import COMPONENTS
from modalities.registry.registry import Registry
from tests.conftest import _ROOT_DIR
Expand Down Expand Up @@ -56,6 +56,10 @@ def initialized_model(set_env, config_dict: dict) -> NNModel:
return get_model_from_config(config=config_dict, model_type=ModelTypeEnum.MODEL)


@pytest.mark.skipif(
"RANK" not in os.environ or torch.cuda.device_count() < 2,
reason="This e2e test requires 2 GPUs and a torchrun distributed environment.",
)
@pytest.fixture()
def checkpoint_conversion(tmp_path: Path, initialized_model: NNModel, config_file_path: Path) -> CheckpointConversion:
model_file_path = tmp_path / "pytorch_model.bin"
Expand All @@ -81,20 +85,22 @@ def pytorch_model(checkpoint_conversion: CheckpointConversion) -> NNModel:
def hf_model(checkpoint_conversion: CheckpointConversion, prediction_key: str) -> NNModel:
return checkpoint_conversion.convert_pytorch_to_hf_checkpoint(prediction_key=prediction_key)


@pytest.fixture()
def prediction_key() -> str:
return "logits"


@pytest.fixture()
def hf_model_from_checkpoint(
checkpoint_conversion: CheckpointConversion, pytorch_model: NNModel, device: str, prediction_key: str
checkpoint_conversion: CheckpointConversion, pytorch_model: NNModel, device: str, prediction_key: str
) -> NNModel:
AutoConfig.register(model_type="modalities", config=HFModelAdapterConfig)
AutoModelForCausalLM.register(config_class=HFModelAdapterConfig, model_class=HFModelAdapter)
hf_model_from_checkpoint = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path=checkpoint_conversion.output_hf_checkpoint_dir,
torch_dtype=pytorch_model.lm_head.weight.dtype,
prediction_key=prediction_key
prediction_key=prediction_key,
)
hf_model_from_checkpoint = hf_model_from_checkpoint.to(device)
return hf_model_from_checkpoint
Expand All @@ -107,6 +113,10 @@ def test_tensor(device: str, size: int = 10) -> torch.Tensor:
return test_tensor


@pytest.mark.skipif(
"RANK" not in os.environ or torch.cuda.device_count() < 2,
reason="This e2e test requires 2 GPUs and a torchrun distributed environment.",
)
def test_models_before_and_after_conversion_produce_same_output(
device: str,
pytorch_model: NNModel,
Expand All @@ -131,6 +141,10 @@ def put_model_to_eval_mode(model: NNModel, device: str) -> NNModel:
return model


@pytest.mark.skipif(
"RANK" not in os.environ or torch.cuda.device_count() < 2,
reason="This e2e test requires 2 GPUs and a torchrun distributed environment.",
)
def test_models_before_and_after_conversion_are_equal(
pytorch_model: NNModel,
hf_model: NNModel,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/coca/test_coca.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_e2e_coca_training_run_without_checkpoint(monkeypatch):

main = Main(dummy_config_path)
main.config_dict = config_dict

with CudaEnv(process_group_backend=ProcessGroupBackendType.nccl):
components = main.build_components(components_model_type=TrainingComponentsInstantiationModel)
main.run(components)
21 changes: 18 additions & 3 deletions tests/models/mamba/test_mamba_block.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
import os

import pytest
import torch

from modalities.models.mamba.utils.generation import InferenceParams


@pytest.mark.skipif(not torch.cuda.is_available(), reason="We need cuda to run Mamba.")
@pytest.mark.skipif(
"RANK" not in os.environ or torch.cuda.device_count() < 2,
reason="This e2e test requires 2 GPUs and a torchrun distributed environment.",
)
def test_mamba_block_forward(batch_size, sequence_length, d_model, d_state, d_conv, expand, mamba_block):
x = torch.randn(batch_size, sequence_length, d_model).to("cuda")
mamba_block = mamba_block.to("cuda")
y = mamba_block(x)
assert y.shape == x.shape


@pytest.mark.skipif(not torch.cuda.is_available(), reason="We need cuda to run Mamba.")
@pytest.mark.skipif(
"RANK" not in os.environ or torch.cuda.device_count() < 2,
reason="This e2e test requires 2 GPUs and a torchrun distributed environment.",
)
def test_block_forward(hidden_states, block):
block = block.to("cuda")
hidden_states = hidden_states.to("cuda")
Expand All @@ -38,7 +46,10 @@ def test_get_states_from_cache(
assert (ssm_state == computed_ssm_state).all()


@pytest.mark.skipif(not torch.cuda.is_available(), reason="We need cuda to run Mamba.")
@pytest.mark.skipif(
"RANK" not in os.environ or torch.cuda.device_count() < 2,
reason="This e2e test requires 2 GPUs and a torchrun distributed environment.",
)
def test_step(conv_state, ssm_state, mamba_block, hidden_states):
device = "cuda"
mamba_block = mamba_block.to(device)
Expand All @@ -58,6 +69,10 @@ def test_step(conv_state, ssm_state, mamba_block, hidden_states):
assert (computed_ssm_state != ssm_state).any()


@pytest.mark.skipif(
"RANK" not in os.environ or torch.cuda.device_count() < 2,
reason="This e2e test requires 2 GPUs and a torchrun distributed environment.",
)
def test_allocate_inference_cache(mamba_block, batch_size, sequence_length, conv_state, ssm_state):
device = "cuda"
mamba_block.to(device)
Expand Down
Loading

0 comments on commit 06afc42

Please sign in to comment.