polish(pu): polish comments in qmix

puyuan1996 · puyuan1996 · commit d18225635c7b · 2024-11-25T15:49:38.000+08:00
diff --git a/ding/model/template/qmix.py b/ding/model/template/qmix.py
@@ -167,9 +167,9 @@ def _get_global_obs_shape_type(self, global_obs_shape: Union[int, List[int]]) ->
         Overview:
             Determine the type of global observation shape.
         Arguments:
-            - global_obs_shape (:obj:`int` or :obj:`List[int]`): The global observation state.
+            - global_obs_shape (Union[:obj:`int`, :obj:`List[int]`]): The global observation state.
         Returns:
-            - str: 'flat' for 1D observation or 'image' for 3D observation.
+            - (:obj:`str`): 'flat' for 1D observation or 'image' for 3D observation.
         """
         if isinstance(global_obs_shape, int) or (isinstance(global_obs_shape, list) and len(global_obs_shape) == 1):
             return "flat"
@@ -211,8 +211,14 @@ def forward(self, data: dict, single_step: bool = True) -> dict:
         agent_state, global_state, prev_state = data['obs']['agent_state'], data['obs']['global_state'], data[
             'prev_state']
         action = data.get('action', None)
+        # If single_step is True, add a new dimension at the front of agent_state
+        # This is necessary to maintain the expected input shape for the model,
+        # which requires a time step dimension even when processing a single step.
         if single_step:
             agent_state = agent_state.unsqueeze(0)
+        # If single_step is True and global_state has 2 dimensions, add a new dimension at the front of global_state
+        # This ensures that global_state has the same number of dimensions as agent_state,
+        # allowing for consistent processing in the forward computation.
         if single_step and len(global_state.shape) == 2:
             global_state = global_state.unsqueeze(0)
         T, B, A = agent_state.shape[:3]
diff --git a/ding/model/template/tests/test_qmix.py b/ding/model/template/tests/test_qmix.py
@@ -43,3 +43,34 @@ def test_qmix():
         is_differentiable(loss, qmix_model)
         data.pop('action')
         output = qmix_model(data, single_step=False)
+
+
+@pytest.mark.unittest
+def test_qmix_process_global_state():
+    # Test the behavior of the _process_global_state method with different global_obs_shape types
+    agent_num, obs_dim, global_obs_dim, action_dim = 4, 32, 32 * 4, 9
+    embedding_dim = 64
+
+    # Case 1: Test "flat" type global_obs_shape
+    global_obs_shape = global_obs_dim  # Flat global_obs_shape
+    qmix_model_flat = QMix(agent_num, obs_dim, global_obs_shape, action_dim, [64, 128, embedding_dim], mixer=True)
+
+    # Simulate input for the "flat" type global_state
+    batch_size, time_steps = 3, 8
+    global_state_flat = torch.randn(batch_size, time_steps, global_obs_dim)
+    processed_flat = qmix_model_flat._process_global_state(global_state_flat)
+
+    # Ensure the output shape is correct [batch_size, time_steps, embedding_dim]
+    assert processed_flat.shape == (batch_size, time_steps, global_obs_dim)
+
+    # Case 2: Test "image" type global_obs_shape
+    global_obs_shape = [3, 64, 64]  # Image-shaped global_obs_shape (C, H, W)
+    qmix_model_image = QMix(agent_num, obs_dim, global_obs_shape, action_dim, [64, 128, embedding_dim], mixer=True)
+
+    # Simulate input for the "image" type global_state
+    C, H, W = global_obs_shape
+    global_state_image = torch.randn(batch_size, time_steps, C, H, W)
+    processed_image = qmix_model_image._process_global_state(global_state_image)
+
+    # Ensure the output shape is correct [batch_size, time_steps, embedding_dim]
+    assert processed_image.shape == (batch_size, time_steps, embedding_dim)
diff --git a/dizoo/petting_zoo/config/ptz_pistonball_qmix_config.py b/dizoo/petting_zoo/config/ptz_pistonball_qmix_config.py
@@ -3,6 +3,7 @@
 n_pistons = 20
 collector_env_num = 8
 evaluator_env_num = 8
+max_env_step = 3e6
 
 main_config = dict(
     exp_name=f'data_pistonball/ptz_pistonball_n{n_pistons}_qmix_seed0',
@@ -18,7 +19,6 @@
         n_evaluator_episode=evaluator_env_num,
         stop_value=1e6,
         manager=dict(shared_memory=False,),
-        max_env_step=3e6,
     ),
     policy=dict(
         cuda=True,
@@ -76,4 +76,4 @@
 if __name__ == '__main__':
     # or you can enter `ding -m serial -c ptz_pistonball_qmix_config.py -s 0`
     from ding.entry import serial_pipeline
-    serial_pipeline((main_config, create_config), seed=0, max_env_step=main_config.env.max_env_step)
+    serial_pipeline((main_config, create_config), seed=0, max_env_step=max_env_step)
diff --git a/setup.py b/setup.py
@@ -75,7 +75,7 @@
         'responses',  # interaction
         'URLObject',  # interaction
         'pynng',  # parallel
-        'sniffio', # parallel
+        'sniffio',  # parallel
         'redis',  # parallel
         'mpire>=2.3.5',  # parallel
     ],