diff --git a/doc/source/models/builtin/code-llama-python.rst b/doc/source/models/builtin/code-llama-python.rst new file mode 100644 index 0000000000..50c51ce251 --- /dev/null +++ b/doc/source/models/builtin/code-llama-python.rst @@ -0,0 +1,50 @@ +.. _models_builtin_code_llama_python: + + +================= +Code-Llama-Python +================= + +- **Context Length:** 100000 +- **Model Name:** code-llama-python +- **Languages:** en +- **Abilities:** generate + +Specifications +^^^^^^^^^^^^^^ + +Model Spec 1 (pytorch, 7 Billion) ++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 7 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** TheBloke/CodeLlama-7B-Python-fp16 + +.. note:: + + 4-bit quantization is not supported on macOS. + +Model Spec 2 (pytorch, 13 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 13 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** TheBloke/CodeLlama-13B-Python-fp16 + +.. note:: + + 4-bit quantization is not supported on macOS. + +Model Spec 3 (pytorch, 34 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 34 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** TheBloke/CodeLlama-34B-Python-fp16 + +.. note:: + + 4-bit quantization is not supported on macOS. diff --git a/doc/source/models/builtin/code-llama.rst b/doc/source/models/builtin/code-llama.rst new file mode 100644 index 0000000000..f4aff27d29 --- /dev/null +++ b/doc/source/models/builtin/code-llama.rst @@ -0,0 +1,49 @@ +.. _models_builtin_code_llama: + +========== +Code-Llama +========== + +- **Context Length:** 100000 +- **Model Name:** code-llama +- **Languages:** en +- **Abilities:** generate + +Specifications +^^^^^^^^^^^^^^ + +Model Spec 1 (pytorch, 7 Billion) ++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 7 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** TheBloke/CodeLlama-7B-fp16 + +.. note:: + + 4-bit quantization is not supported on macOS. + +Model Spec 2 (pytorch, 13 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 13 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** TheBloke/CodeLlama-13B-fp16 + +.. note:: + + 4-bit quantization is not supported on macOS. + +Model Spec 3 (pytorch, 34 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 34 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** TheBloke/CodeLlama-34B-fp16 + +.. note:: + + 4-bit quantization is not supported on macOS. diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst index aa180b45fd..d2ddb96f81 100644 --- a/doc/source/models/builtin/index.rst +++ b/doc/source/models/builtin/index.rst @@ -41,6 +41,8 @@ Code Generation Models ++++++++++++++++++++++ - :ref:`Starcoder ` - :ref:`StarCoderPlus ` +- :ref:`Code-Llama ` +- :ref:`Code-Llama-Python ` Code Assistant Models diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index d9bd66a800..19d3f1ee30 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -1253,5 +1253,97 @@ ], "intra_message_sep": "\n\n### " } + }, + { + "version": 1, + "context_length": 100000, + "model_name": "code-llama", + "model_lang": [ + "en" + ], + "model_ability": [ + "generate" + ], + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "TheBloke/CodeLlama-7B-fp16", + "model_revision": "ce09049eb9140a19cf78051cb5d849607b6fa8ec" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 13, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "TheBloke/CodeLlama-13B-fp16", + "model_revision": "d67ca1183da991d0d97927bdaaf35599556dfd76" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 34, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "TheBloke/CodeLlama-34B-fp16", + "model_revision": "f91d0cf7fc338cdc726f9c72d5ea15fe51bb16e9" + } + ] + }, + { + "version": 1, + "context_length": 100000, + "model_name": "code-llama-python", + "model_lang": [ + "en" + ], + "model_ability": [ + "generate" + ], + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "TheBloke/CodeLlama-7B-Python-fp16", + "model_revision": "d51c51e625bc24b9a7a0616e82681b4859e2cfe4" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 13, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "TheBloke/CodeLlama-13B-Python-fp16", + "model_revision": "442282f4207442b828953a72c51a919c332cba5c" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 34, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "TheBloke/CodeLlama-34B-Python-fp16", + "model_revision": "875f9d97fb6c9619d8867887dd1d80918ff0f593" + } + ] } ]