Feature(MInference): add supported models

microsoft · Jul 2, 2024 · 2c48613 · 2c48613
1 parent 882dcc6
commit 2c48613
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -43,6 +43,25 @@ To get started with MInference, simply install it using pip:
 pip install minference
 ```
 
+### Supported Models
+
+General *MInference* **supports any decoding LLMs**, including LLaMA-style models, and Phi models.
+We have adapted nearly all open-source long-context LLMs available in the market.
+If your model is not on the supported list, feel free to let us know in the issues, or you can follow [the guide](./experiments/) to manually generate the sparse heads config.
+
+You can get the complete list of supported LLMs by running:
+```python
+from minference import get_support_models
+get_support_models()
+```
+
+Currently, we support the following LLMs:
+- LLaMA-3: [gradientai/Llama-3-8B-Instruct-262k](https://huggingface.co/gradientai/Llama-3-8B-Instruct-262k), [gradientai/Llama-3-8B-Instruct-Gradient-1048k](https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k), [gradientai/Llama-3-8B-Instruct-Gradient-4194k](https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-4194k)
+- GLM-4: [THUDM/glm-4-9b-chat-1m](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
+- Yi: [01-ai/Yi-9B-200K](https://huggingface.co/01-ai/Yi-9B-200K)
+- Phi-3: [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct)
+- Qwen2: [Qwen/Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)
+
 ### How to use MInference
 
 for HF,

diff --git a/minference/__init__.py b/minference/__init__.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2024 Microsoft
 # Licensed under The MIT License [see LICENSE for details]
 
+from .configs.model2path import get_support_models
+
 # flake8: noqa
 from .minference_configuration import MInferenceConfig
 from .models_patch import MInference
@@ -25,4 +27,5 @@
     "vertical_slash_sparse_attention",
     "block_sparse_attention",
     "streaming_forward",
+    "get_support_models",
 ]
diff --git a/minference/configs/model2path.py b/minference/configs/model2path.py
@@ -11,6 +11,9 @@
     "gradientai/Llama-3-8B-Instruct-Gradient-1048k": os.path.join(
         BASE_DIR, "Llama_3_8B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json"
     ),
+    "gradientai/Llama-3-8B-Instruct-Gradient-4194k": os.path.join(
+        BASE_DIR, "Llama_3_8B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json"
+    ),
     "01-ai/Yi-9B-200K": os.path.join(
         BASE_DIR, "Yi_9B_200k_kv_out_v32_fit_o_best_pattern.json"
     ),
@@ -24,3 +27,7 @@
         BASE_DIR, "GLM_4_9B_1M_instruct_kv_out_v32_fit_o_best_pattern.json"
     ),
 }
+
+
+def get_support_models():
+    return list(MODEL2PATH.keys())