Reformat

AltmanD · AltmanD · commit 2d7c4062803a · 2023-08-10T14:44:03.000+08:00
diff --git a/ding/entry/tests/test_serial_entry.py b/ding/entry/tests/test_serial_entry.py
@@ -683,8 +683,11 @@ def test_discrete_dt():
         assert False, "pipeline fail"
     finally:
         os.popen('rm -rf cartpole cartpole_dt')
+
+
 test_discrete_dt()
 
+
 @pytest.mark.platformtest
 @pytest.mark.unittest
 def test_td3_bc():
diff --git a/ding/framework/middleware/data_fetcher.py b/ding/framework/middleware/data_fetcher.py
@@ -23,15 +23,16 @@ def __new__(cls, *args, **kwargs):
 
     def __init__(self, cfg: EasyDict, dataset: Dataset):
         stream = torch.cuda.Stream()
+
         def producer(queue, dataset, batch_size, device, event):
             torch.set_num_threads(4)
             nonlocal stream
             num_gpu = dist.get_world_size()
             rank = get_rank()
             idx_list = np.random.permutation(len(dataset))
             temp_idx_list = []
-            for i in range(len(dataset)//(batch_size*num_gpu)):
-                temp_idx_list.extend(idx_list[i+rank*batch_size:i+(rank+1)*batch_size])
+            for i in range(len(dataset) // (batch_size * num_gpu)):
+                temp_idx_list.extend(idx_list[i + rank * batch_size:i + (rank + 1) * batch_size])
             idx_iter = iter(temp_idx_list)
 
             with torch.cuda.stream(stream):
@@ -63,7 +64,7 @@ def producer(queue, dataset, batch_size, device, event):
             name='cuda_fetcher_producer'
         )
 
-    def __call__(self,ctx: "OfflineRLContext"):
+    def __call__(self, ctx: "OfflineRLContext"):
         if not self.producer_thread.is_alive():
             time.sleep(5)
             self.producer_thread.start()
diff --git a/ding/framework/middleware/functional/data_processor.py b/ding/framework/middleware/functional/data_processor.py
@@ -211,9 +211,7 @@ def producer(queue, dataset, batch_size, device):
     queue = Queue(maxsize=50)
     device = 'cuda:{}'.format(get_rank() % torch.cuda.device_count()) if cfg.policy.cuda else 'cpu'
     producer_thread = Thread(
-        target=producer,
-        args=(queue, dataset, cfg.policy.batch_size, device),
-        name='cuda_fetcher_producer'
+        target=producer, args=(queue, dataset, cfg.policy.batch_size, device), name='cuda_fetcher_producer'
     )
 
     def _fetch(ctx: "OfflineRLContext"):
@@ -263,6 +261,7 @@ def _fetch(ctx: "OfflineRLContext"):
             )
             ctx.train_data = next(dataloader)
         # TODO apply data update (e.g. priority) in offline setting when necessary
+
     return _fetch
 
 
diff --git a/ding/model/template/dt.py b/ding/model/template/dt.py
@@ -95,6 +95,7 @@ def forward(self, x):
 
 
 class DecisionTransformer(nn.Module):
+
     def __init__(
         self,
         state_dim,
@@ -121,7 +122,7 @@ def __init__(
         self.embed_ln = nn.LayerNorm(h_dim)
         self.embed_timestep = nn.Embedding(max_timestep, h_dim)
         self.drop = nn.Dropout(drop_p)
-        
+
         self.pos_emb = nn.Parameter(torch.zeros(1, input_seq_len + 1, self.h_dim))
         self.global_pos_emb = nn.Parameter(torch.zeros(1, max_timestep + 1, self.h_dim))
 
@@ -140,9 +141,11 @@ def __init__(
                 # discrete actions
                 self.embed_action = torch.nn.Embedding(act_dim, h_dim)
                 use_action_tanh = False  # False for discrete actions
-            self.predict_action = nn.Sequential(*([nn.Linear(h_dim, act_dim)] + ([nn.Tanh()] if use_action_tanh else [])))
+            self.predict_action = nn.Sequential(
+                *([nn.Linear(h_dim, act_dim)] + ([nn.Tanh()] if use_action_tanh else []))
+            )
         else:
-            blocks = [Block(h_dim, input_seq_len+1, n_heads, drop_p) for _ in range(n_blocks)]
+            blocks = [Block(h_dim, input_seq_len + 1, n_heads, drop_p) for _ in range(n_blocks)]
             self.state_encoder = state_encoder
             self.embed_rtg = nn.Sequential(nn.Linear(1, h_dim), nn.Tanh())
             self.head = nn.Linear(h_dim, act_dim, bias=False)
@@ -161,9 +164,8 @@ def forward(self, timesteps, states, actions, returns_to_go, tar=None):
 
             # stack rtg, states and actions and reshape sequence as
             # (r_0, s_0, a_0, r_1, s_1, a_1, r_2, s_2, a_2 ...)
-            t_p = torch.stack(
-                (returns_embeddings, state_embeddings, action_embeddings), dim=1
-            ).permute(0, 2, 1, 3).reshape(B, 3 * T, self.h_dim)
+            t_p = torch.stack((returns_embeddings, state_embeddings, action_embeddings),
+                              dim=1).permute(0, 2, 1, 3).reshape(B, 3 * T, self.h_dim)
             h = self.embed_ln(t_p)
             # transformer and prediction
             h = self.transformer(h)
@@ -183,20 +185,24 @@ def forward(self, timesteps, states, actions, returns_to_go, tar=None):
             state_embeddings = self.state_encoder(
                 states.reshape(-1, 4, 84, 84).type(torch.float32).contiguous()
             )  # (batch * block_size, h_dim)
-            state_embeddings = state_embeddings.reshape(
-                B, T, self.h_dim
-            )  # (batch, block_size, h_dim)
+            state_embeddings = state_embeddings.reshape(B, T, self.h_dim)  # (batch, block_size, h_dim)
             returns_embeddings = self.embed_rtg(returns_to_go.type(torch.float32))
             action_embeddings = self.embed_action(actions.type(torch.long).squeeze(-1))  # (batch, block_size, h_dim)
 
-            token_embeddings = torch.zeros((B, T*3 - int(tar is None), self.h_dim), dtype=torch.float32, device=state_embeddings.device)
-            token_embeddings[:,::3,:] = returns_embeddings
-            token_embeddings[:,1::3,:] = state_embeddings
-            token_embeddings[:,2::3,:] = action_embeddings[:,-T + int(tar is None):,:]
-            
-            all_global_pos_emb = torch.repeat_interleave(self.global_pos_emb, B, dim=0) # batch_size, traj_length, h_dim
+            token_embeddings = torch.zeros(
+                (B, T * 3 - int(tar is None), self.h_dim), dtype=torch.float32, device=state_embeddings.device
+            )
+            token_embeddings[:, ::3, :] = returns_embeddings
+            token_embeddings[:, 1::3, :] = state_embeddings
+            token_embeddings[:, 2::3, :] = action_embeddings[:, -T + int(tar is None):, :]
+
+            all_global_pos_emb = torch.repeat_interleave(
+                self.global_pos_emb, B, dim=0
+            )  # batch_size, traj_length, h_dim
 
-            position_embeddings = torch.gather(all_global_pos_emb, 1, torch.repeat_interleave(timesteps, self.h_dim, dim=-1)) + self.pos_emb[:, :token_embeddings.shape[1], :]
+            position_embeddings = torch.gather(
+                all_global_pos_emb, 1, torch.repeat_interleave(timesteps, self.h_dim, dim=-1)
+            ) + self.pos_emb[:, :token_embeddings.shape[1], :]
 
             t_p = token_embeddings + position_embeddings
 
@@ -207,7 +213,7 @@ def forward(self, timesteps, states, actions, returns_to_go, tar=None):
 
             return_preds = None
             state_preds = None
-            action_preds = logits[:, 1::3, :] # only keep predictions from state_embeddings
+            action_preds = logits[:, 1::3, :]  # only keep predictions from state_embeddings
 
         return state_preds, action_preds, return_preds
 
@@ -227,7 +233,7 @@ def configure_optimizers(self, weight_decay, learning_rate, betas=(0.9, 0.95)):
         blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
         for mn, m in self.named_modules():
             for pn, p in m.named_parameters():
-                fpn = '%s.%s' % (mn, pn) if mn else pn # full param name
+                fpn = '%s.%s' % (mn, pn) if mn else pn  # full param name
 
                 if pn.endswith('bias'):
                     # all biases will not be decayed
@@ -253,8 +259,14 @@ def configure_optimizers(self, weight_decay, learning_rate, betas=(0.9, 0.95)):
 
         # create the pytorch optimizer object
         optim_groups = [
-            {"params": [param_dict[pn] for pn in sorted(list(decay))], "weight_decay": weight_decay},
-            {"params": [param_dict[pn] for pn in sorted(list(no_decay))], "weight_decay": 0.0},
+            {
+                "params": [param_dict[pn] for pn in sorted(list(decay))],
+                "weight_decay": weight_decay
+            },
+            {
+                "params": [param_dict[pn] for pn in sorted(list(no_decay))],
+                "weight_decay": 0.0
+            },
         ]
         optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas)
-        return optimizer
+        return optimizer
diff --git a/ding/policy/dt.py b/ding/policy/dt.py
@@ -235,17 +235,19 @@ def _forward_eval(self, data: Dict[int, Any]) -> Dict[int, Any]:
 
                 if self.t[i] <= self.context_len:
                     if 'state_mean' not in self._cfg:
-                        timesteps[i] = min(self.t[i],
-                                           self._cfg.model.max_timestep) * torch.ones((1, 1), dtype=torch.int64).to(self._device)
+                        timesteps[i] = min(self.t[i], self._cfg.model.max_timestep) * torch.ones(
+                            (1, 1), dtype=torch.int64
+                        ).to(self._device)
                     else:
                         timesteps[i] = self.timesteps[i, :self.context_len]
                     states[i] = self.states[i, :self.context_len]
                     actions[i] = self.actions[i, :self.context_len]
                     rewards_to_go[i] = self.rewards_to_go[i, :self.context_len]
                 else:
                     if 'state_mean' not in self._cfg:
-                        timesteps[i] = min(self.t[i],
-                                           self._cfg.model.max_timestep) * torch.ones((1, 1), dtype=torch.int64).to(self._device)
+                        timesteps[i] = min(self.t[i], self._cfg.model.max_timestep) * torch.ones(
+                            (1, 1), dtype=torch.int64
+                        ).to(self._device)
                     else:
                         timesteps[i] = self.timesteps[i, self.t[i] - self.context_len + 1:self.t[i] + 1]
                     states[i] = self.states[i, self.t[i] - self.context_len + 1:self.t[i] + 1]
@@ -267,7 +269,7 @@ def _forward_eval(self, data: Dict[int, Any]) -> Dict[int, Any]:
                 else:
                     act = torch.argmax(logits, axis=1).unsqueeze(1)
             for i in data_id:
-                self.actions[i, self.t[i]] = act[i] # TODO: self.actions[i] should be a queue when exceed max_t
+                self.actions[i, self.t[i]] = act[i]  # TODO: self.actions[i] should be a queue when exceed max_t
                 self.t[i] += 1
 
         if self._cuda:
diff --git a/ding/utils/data/dataset.py b/ding/utils/data/dataset.py
@@ -151,9 +151,7 @@ def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
         block_size = self.context_len
         done_idx = idx + block_size
         idx = done_idx - block_size
-        states = torch.as_tensor(
-            np.array(self._data['obs'][idx:done_idx]), dtype=torch.float32
-        ).view(block_size, -1)  
+        states = torch.as_tensor(np.array(self._data['obs'][idx:done_idx]), dtype=torch.float32).view(block_size, -1)
         actions = torch.as_tensor(self._data['action'][idx:done_idx], dtype=torch.long)
         rtgs = torch.as_tensor(self._data['reward'][idx:done_idx, 0], dtype=torch.float32)
         timesteps = torch.as_tensor(range(idx, done_idx), dtype=torch.int64)