FEAT: code-llama (#402)

xorbitsai · Aug 25, 2023 · 8fa1630 · 8fa1630
1 parent 6c60377
commit 8fa1630
Show file tree

Hide file tree

Showing 4 changed files with 193 additions and 0 deletions.
diff --git a/doc/source/models/builtin/code-llama-python.rst b/doc/source/models/builtin/code-llama-python.rst
@@ -0,0 +1,50 @@
+.. _models_builtin_code_llama_python:
+
+
+=================
+Code-Llama-Python
+=================
+
+- **Context Length:** 100000
+- **Model Name:** code-llama-python
+- **Languages:** en
+- **Abilities:** generate
+
+Specifications
+^^^^^^^^^^^^^^
+
+Model Spec 1 (pytorch, 7 Billion)
++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 7
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** TheBloke/CodeLlama-7B-Python-fp16
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 2 (pytorch, 13 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 13
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** TheBloke/CodeLlama-13B-Python-fp16
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 3 (pytorch, 34 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 34
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** TheBloke/CodeLlama-34B-Python-fp16
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
diff --git a/doc/source/models/builtin/code-llama.rst b/doc/source/models/builtin/code-llama.rst
@@ -0,0 +1,49 @@
+.. _models_builtin_code_llama:
+
+==========
+Code-Llama
+==========
+
+- **Context Length:** 100000
+- **Model Name:** code-llama
+- **Languages:** en
+- **Abilities:** generate
+
+Specifications
+^^^^^^^^^^^^^^
+
+Model Spec 1 (pytorch, 7 Billion)
++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 7
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** TheBloke/CodeLlama-7B-fp16
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 2 (pytorch, 13 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 13
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** TheBloke/CodeLlama-13B-fp16
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 3 (pytorch, 34 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 34
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** TheBloke/CodeLlama-34B-fp16
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst
@@ -41,6 +41,8 @@ Code Generation Models
 ++++++++++++++++++++++
 - :ref:`Starcoder <models_builtin_starcoder>`
 - :ref:`StarCoderPlus <models_builtin_starcoderplus>`
+- :ref:`Code-Llama <models_builtin_code_llama>`
+- :ref:`Code-Llama-Python <models_builtin_code_llama_python>`
 
 
 Code Assistant Models

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -1253,5 +1253,97 @@
       ],
       "intra_message_sep": "\n\n### "
     }
+  },
+  {
+    "version": 1,
+    "context_length": 100000,
+    "model_name": "code-llama",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TheBloke/CodeLlama-7B-fp16",
+        "model_revision": "ce09049eb9140a19cf78051cb5d849607b6fa8ec"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TheBloke/CodeLlama-13B-fp16",
+        "model_revision": "d67ca1183da991d0d97927bdaaf35599556dfd76"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TheBloke/CodeLlama-34B-fp16",
+        "model_revision": "f91d0cf7fc338cdc726f9c72d5ea15fe51bb16e9"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 100000,
+    "model_name": "code-llama-python",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TheBloke/CodeLlama-7B-Python-fp16",
+        "model_revision": "d51c51e625bc24b9a7a0616e82681b4859e2cfe4"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TheBloke/CodeLlama-13B-Python-fp16",
+        "model_revision": "442282f4207442b828953a72c51a919c332cba5c"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TheBloke/CodeLlama-34B-Python-fp16",
+        "model_revision": "875f9d97fb6c9619d8867887dd1d80918ff0f593"
+      }
+    ]
   }
 ]