Modalities · BlueCrescent · Jan 15, 2024 · Jan 19, 2024 · Jan 21, 2024 · Jan 21, 2024
diff --git a/config_files/config.yaml b/config_files/config.yaml
@@ -173,3 +173,9 @@ loss:
   config:
     target_key: ${data.target_key}
     prediction_key: ${model.config.prediction_key}
+
+validation_measure_factories:
+  - type_hint: AggregativeCLMCrossEntropyLossFactory
+    config:
+      target_key: ${data.target_key}
+      prediction_key: ${model.config.prediction_key}
diff --git a/config_files/config_example_hf_meditron_7B_instruction.yaml b/config_files/config_example_hf_meditron_7B_instruction.yaml
@@ -159,6 +159,13 @@ loss_fn:
     target_key: ${settings.referencing_keys.target_key}
     prediction_key: ${settings.referencing_keys.prediction_key}
 
+evaluation_measures:
+  - component_key: eval_measures
+    variant_key: clm_cross_entropy_loss
+    config:
+      target_key: ${data.target_key}
+      prediction_key: ${model.config.prediction_key}
+
 # scheduler:
 #   type_hint: StepLR
 #   config:

diff --git a/config_files/config_example_mem_map_dataset.yaml b/config_files/config_example_mem_map_dataset.yaml
@@ -177,6 +177,13 @@ loss_fn:
     target_key: ${settings.referencing_keys.target_key}
     prediction_key: ${settings.referencing_keys.prediction_key}
 
+evaluation_measures:
+  - component_key: eval_measures
+    variant_key: clm_cross_entropy_loss
+    config:
+      target_key: ${data.target_key}
+      prediction_key: ${model.config.prediction_key}
+
 optimizer:  
   component_key: optimizer
   variant_key: adam_w
@@ -209,4 +216,4 @@ evaluation_subscriber:
     project: modalities
     mode: ONLINE
     experiment_id: ${settings.experiment_id}
-    directory: "."
+    directory: "."
diff --git a/config_files/config_example_openGPTx_dataset.yaml b/config_files/config_example_openGPTx_dataset.yaml
@@ -2,6 +2,11 @@ modalities_setup:
   run_mode: FROM_SCRATCH
   settings: 
     global_num_seen_samples: 0
+
+wandb:
+  project_name: modalities
+  mode: ONLINE
+
 data:
   sample_key: "input_ids"
   target_key: "target_ids"
@@ -22,7 +27,7 @@ data:
           sampler:
             type_hint: DistributedSampler
             config:
-              rank: ${training.local_rank}
+              rank: ${training.global_rank}
               num_replicas: ${training.world_size}
               shuffle: true
       dataset:
@@ -53,7 +58,7 @@ data:
             sampler:
               type_hint: DistributedSampler
               config:
-                rank: ${training.local_rank}
+                rank: ${training.global_rank}
                 num_replicas: ${training.world_size}
                 shuffle: false
         dataset:
@@ -82,7 +87,7 @@ data:
             sampler:
               type_hint: DistributedSampler
               config:
-                rank: ${training.local_rank}
+                rank: ${training.global_rank}
                 num_replicas: ${training.world_size}
                 shuffle: false
         dataset:
@@ -98,11 +103,6 @@ data:
             sample_key: ${data.sample_key}
             target_key: ${data.target_key}
 
-wandb:
-  project_name: modalities
-  mode: ONLINE
-
-
 training:
   process_group_backend: "nccl"
   global_num_training_samples: 2048
@@ -114,7 +114,8 @@ training:
   local_train_micro_batch_size: ${data.train_dataloader.config.batch_sampler.config.batch_size}
   global_num_seen_samples: ${modalities_setup.settings.global_num_seen_samples}
   gradient_acc_step: 1
-  do_apply_activation_checkpointing: True
+  do_apply_activation_checkpointing: false
+
 
 checkpointing:
   checkpointing_strategy:
@@ -175,4 +176,11 @@ loss:
   type_hint: CLMCrossEntropyLoss
   config:
     target_key: ${data.target_key}
-    prediction_key: ${model.config.prediction_key}
+    prediction_key: ${model.config.prediction_key}
+
+evaluation_measures:
+  - component_key: eval_measures
+    variant_key: clm_cross_entropy_loss
+    config:
+      target_key: ${data.target_key}
+      prediction_key: ${model.config.prediction_key}
diff --git a/config_files/config_lorem_ipsum.yaml b/config_files/config_lorem_ipsum.yaml
@@ -170,6 +170,13 @@ loss_fn:
     target_key: target_ids
     prediction_key: logits
 
+evaluation_measures:
+  - component_key: eval_measures
+    variant_key: clm_cross_entropy_loss
+    config:
+      target_key: target_ids
+      prediction_key: logits
+
 wrapped_model:
   component_key: model
   variant_key: fsdp_wrapped

diff --git a/src/modalities/__main__.py b/src/modalities/__main__.py
@@ -243,6 +243,8 @@ def run(self):
                 local_rank=components.settings.cuda_env.local_rank,
                 batch_progress_publisher=batch_processed_publisher,
                 evaluation_result_publisher=evaluation_result_publisher,
+                loss_factories=components.evaluation_measures,
+                metric_factories=[],
             )
 
             # Gym

diff --git a/src/modalities/config/config.py b/src/modalities/config/config.py
@@ -19,6 +19,7 @@
 from modalities.checkpointing.checkpointing_strategies import CheckpointingStrategyIF
 from modalities.config.lookup_enum import LookupEnum
 from modalities.dataloader.dataloader import LLMDataLoader
+from modalities.evaluation.measure import AggregativeMeasureFactory
 from modalities.logging_broker.subscriber import MessageSubscriberIF
 from modalities.loss_functions import Loss
 from modalities.models.gpt2.collator import CollateFnIF
@@ -60,6 +61,7 @@ def __get_pydantic_core_schema__(
 PydanticLLMDataLoaderIFType = Annotated[LLMDataLoader, PydanticThirdPartyTypeIF(LLMDataLoader)]
 PydanticOptimizerIFType = Annotated[Optimizer, PydanticThirdPartyTypeIF(Optimizer)]
 PydanticLossIFType = Annotated[Loss, PydanticThirdPartyTypeIF(Loss)]
+PydanticMeasureFactoryIFType = Annotated[AggregativeMeasureFactory, PydanticThirdPartyTypeIF(AggregativeMeasureFactory)]
 PydanticMessageSubscriberIFType = Annotated[MessageSubscriberIF, PydanticThirdPartyTypeIF(MessageSubscriberIF)]
 
 
@@ -306,6 +308,7 @@ class ComponentsModel(BaseModel):
     wrapped_model: PydanticModelIFType
     optimizer: PydanticOptimizerIFType
     loss_fn: PydanticLossIFType
+    evaluation_measures: List[PydanticMeasureFactoryIFType]
     train_dataloader: PydanticLLMDataLoaderIFType
     eval_dataloaders: List[PydanticLLMDataLoaderIFType]
     batch_progress_subscriber: PydanticMessageSubscriberIFType

diff --git a/src/modalities/evaluation/__init__.py b/src/modalities/evaluation/__init__.py
diff --git a/src/modalities/evaluation/aggregator.py b/src/modalities/evaluation/aggregator.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import Dict, Generic, Hashable, Optional, TypeVar
+
+import torch
+import torch.distributed as dist
+
+from modalities.running_env.fsdp.reducer import Reducer
+
+KeyType = TypeVar("KeyType", bound=Hashable)
+
+
+class Aggregator(Generic[KeyType]):
+
+    def __init__(self, initial_values: Optional[Dict[KeyType, torch.Tensor]] = None) -> None:
+        self._key_to_value = initial_values if initial_values else {}
+
+    def add_values(self, value_dict: Dict[KeyType, torch.Tensor]):
+        for key, value in value_dict.items():
+            self.add_value(key, value)
+
+    def add_value(self, key: KeyType, value: torch.Tensor):
+        if key not in self._key_to_value:
+            self._key_to_value[key] = value
+        else:
+            self._key_to_value[key] += value
+
+    def get_all_reduced_value(
+        self, key: KeyType, reduce_operation: dist.ReduceOp.RedOpType = dist.ReduceOp.SUM
+    ) -> torch.Tensor:
+        # we clone the value so that we can always resync the value without side-effects
+        cloned_value = self._key_to_value[key].clone()
+        value = Reducer.reduce(tensor=cloned_value, operation=reduce_operation)
+        return value
diff --git a/src/modalities/evaluation/clm_cross_entropy_loss.py b/src/modalities/evaluation/clm_cross_entropy_loss.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from enum import Enum
+from typing import Dict
+
+import torch
+import torch.distributed as dist
+
+from modalities.batch import InferenceResultBatch
+from modalities.evaluation.measure import AggregativeMeasure, AggregativeMeasureFactory
+from modalities.loss_functions import CLMCrossEntropyLoss
+
+
+class LossKeys(Enum):
+    CLM_CROSS_ENTROPY = "clm_cross_entropy"
+    NUM_SAMPLES = "num_samples"
+
+
+class AggregativeCLMCrossEntropyLoss(AggregativeMeasure[LossKeys]):
+
+    def __init__(self, target_key: str, prediction_key: str, local_rank: int) -> None:
+        super().__init__(
+            aggregate_keys=list(LossKeys),
+            reduce_ops={k: dist.ReduceOp.SUM for k in LossKeys},
+            tag="CLMCrossEntropyLoss",
+            local_rank=local_rank,
+        )
+        self._loss = CLMCrossEntropyLoss(target_key=target_key, prediction_key=prediction_key, reduction="sum")
+
+    def _postprocess_result_batch(self, batch_result: InferenceResultBatch) -> Dict[LossKeys, torch.Tensor]:
+        loss = self._loss(batch_result)
+        return {
+            LossKeys.CLM_CROSS_ENTROPY: loss,
+            LossKeys.NUM_SAMPLES: torch.tensor(len(batch_result)),
+        }
+
+    def _calc_measure(self, values: Dict[LossKeys, torch.Tensor]) -> torch.Tensor:
+        return values[LossKeys.CLM_CROSS_ENTROPY] / values[LossKeys.NUM_SAMPLES]
+
+
+class AggregativeCLMCrossEntropyLossFactory(AggregativeMeasureFactory[LossKeys]):
+    def __init__(self, target_key: str, prediction_key: str) -> None:
+        self._target_key = target_key
+        self._prediction_key = prediction_key
+
+    def create(self, local_rank: int) -> AggregativeMeasure:
+        return AggregativeCLMCrossEntropyLoss(
+            target_key=self._target_key,
+            prediction_key=self._prediction_key,
+            local_rank=local_rank,
+        )
diff --git a/src/modalities/evaluation/measure.py b/src/modalities/evaluation/measure.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Dict, Generic, List
+
+import torch
+import torch.distributed as dist
+
+from modalities.batch import InferenceResultBatch
+from modalities.evaluation.aggregator import Aggregator, KeyType
+
+
+class AggregativeMeasureFactory(Generic[KeyType]):
+    def create(self, local_rank: int) -> AggregativeMeasure:
+        raise NotImplementedError
+
+
+class AggregativeMeasure(Generic[KeyType], ABC):
+    def __init__(
+        self,
+        aggregate_keys: List[KeyType],
+        reduce_ops: Dict[KeyType, dist.ReduceOp.RedOpType],
+        tag: str,
+        local_rank: int,
+    ) -> None:
+        if torch.cuda.is_available():
+            self._device = torch.device(local_rank)
+        else:
+            self._device = "cpu"
+        self._aggregator = Aggregator[KeyType](
+            initial_values={k: torch.zeros(1).to(self._device) for k in aggregate_keys}
+        )
+        self._aggregate_keys = aggregate_keys
+        self._reduce_ops = reduce_ops
+        self._tag = tag
+
+    @property
+    def tag(self) -> str:
+        return self._tag
+
+    def add(self, batch_result: InferenceResultBatch) -> None:
+        res = self._postprocess_result_batch(batch_result)
+
+        for key, value in res.items():
+            self._aggregator.add_value(key, value.to(self._device))
+
+    def compute(self) -> torch.Tensor:
+        synced_vals: Dict[KeyType, torch.Tensor] = {}
+        for key in self._aggregate_keys:
+            synced_vals[key] = self._aggregator.get_all_reduced_value(
+                key,
+                self._reduce_ops[key],
+            )
+
+        return self._calc_measure(synced_vals)
+
+    @abstractmethod
+    def _postprocess_result_batch(self, batch_result: InferenceResultBatch) -> Dict[KeyType, torch.Tensor]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _calc_measure(self, values: Dict[KeyType, torch.Tensor]) -> torch.Tensor:
+        raise NotImplementedError
diff --git a/src/modalities/evaluation/perplexity.py b/src/modalities/evaluation/perplexity.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+from enum import Enum
+from typing import Dict
+
+import torch
+import torch.distributed as dist
+
+from modalities.batch import InferenceResultBatch
+from modalities.evaluation.measure import AggregativeMeasure, AggregativeMeasureFactory
+from modalities.loss_functions import CLMCrossEntropyLoss
+
+
+class PerplexityKeys(Enum):
+    PERPLEXITY = "loss"
+    NUM_SAMPLES = "num_samples"
+
+
+class AggregativePerplexity(AggregativeMeasure[PerplexityKeys]):
+    def __init__(self, target_key: str, prediction_key: str, local_rank: int) -> None:
+        super().__init__(
+            aggregate_keys=list(PerplexityKeys),
+            reduce_ops={k: dist.ReduceOp.SUM for k in PerplexityKeys},
+            tag="Perplexity",
+            local_rank=local_rank,
+        )
+        self._target_key = target_key
+        self._loss = CLMCrossEntropyLoss(target_key=target_key, prediction_key=prediction_key, reduction="none")
+
+    def _postprocess_result_batch(self, batch_result: InferenceResultBatch) -> Dict[PerplexityKeys, torch.Tensor]:
+        loss = self._loss(batch_result)  # shape: (batch_size * seq_len)
+        batch_size, seq_len = batch_result.get_targets(self._target_key).shape
+        loss = loss.view(batch_size, seq_len)  # shape: (batch_size, seq_len)
+        perplexity = torch.exp(loss.sum(-1) / seq_len)
+        return {
+            PerplexityKeys.PERPLEXITY: perplexity.sum(),
+            PerplexityKeys.NUM_SAMPLES: torch.tensor(len(batch_result)),
+        }
+
+    def _calc_measure(self, values: Dict[PerplexityKeys, torch.Tensor]) -> torch.Tensor:
+        return values[PerplexityKeys.PERPLEXITY] / values[PerplexityKeys.NUM_SAMPLES]
+
+
+class AggregativePerplexityFactory(AggregativeMeasureFactory[PerplexityKeys]):
+    def __init__(self, target_key: str, prediction_key: str) -> None:
+        self._target_key = target_key
+        self._prediction_key = prediction_key
+
+    def create(self, local_rank: int) -> AggregativeMeasure[PerplexityKeys]:
+        return AggregativePerplexity(
+            target_key=self._target_key,
+            prediction_key=self._prediction_key,
+            local_rank=local_rank,
+        )