deepseek-ai · hhhhhhhhhhhhhhhhho · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · panyu2000
diff --git a/DeepSeek-OCR-master/DeepSeek-OCR-vllm/deepencoder/clip_sdpa.py b/DeepSeek-OCR-master/DeepSeek-OCR-vllm/deepencoder/clip_sdpa.py
@@ -61,6 +61,16 @@ def forward(self, x: torch.Tensor):
 
 
 def get_abs_pos(abs_pos, tgt_size):
+        """
+    Resize absolute positional embeddings to target size if necessary.
+
+    Args:
+        abs_pos (torch.Tensor): [1, H, W, C] absolute positional embeddings
+        tgt_size (int): target height/width
+
+    Returns:
+        torch.Tensor: resized absolute positional embeddings [1, tgt_size, tgt_size, C]
+    """
     # abs_pos: L, C
     # tgt_size: M
     # return: M, C

diff --git a/DeepSeek-OCR-master/DeepSeek-OCR-vllm/deepencoder/sam_vary_sdpa.py b/DeepSeek-OCR-master/DeepSeek-OCR-vllm/deepencoder/sam_vary_sdpa.py
@@ -41,6 +41,20 @@ def get_abs_pos(abs_pos, tgt_size):
 
 
 class MLPBlock(nn.Module):
+    """
+    Simple MLP block with two linear layers and activation.
+
+    Args:
+        embedding_dim (int): Input and output feature dimension.
+        mlp_dim (int): Hidden layer dimension.
+        act (nn.Module): Activation function class (default nn.GELU).
+
+    Forward Args:
+        x (torch.Tensor): Input tensor of shape [B, N, embedding_dim].
+
+    def forward => Returns:
+        torch.Tensor: Output tensor of same shape [B, N, embedding_dim].
+    """
     def __init__(
         self,
         embedding_dim: int,
@@ -359,7 +373,7 @@ def window_unpartition(
         hw (Tuple): original height and width (H, W) before padding.
 
     Returns:
-        x: unpartitioned sequences with [B, H, W, C].
+        torch.Tensor: Reconstructed tensor of shape [B, H, W, C].
     """
     Hp, Wp = pad_hw
     H, W = hw
@@ -525,4 +539,4 @@ def _build_sam(
         # tob
         image_encoder.load_state_dict({k[30:]: v for k, v in state_dict.items() if 'vision_tower_high' in k}, strict=True)
         print(checkpoint)
-    return image_encoder
+    return image_encoder