You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
22:19:21-672140 INFO Loading "TheBloke_Luban-Marcoroni-13B-v3-GPTQ"
22:19:21-682114 ERROR Failed to load the model.
Traceback (most recent call last):
File "e:\text-generation-webui\modules\ui_model_menu.py", line 232, in load_model_wrapper
shared.model, shared.tokenizer = load_model(selected_model, loader)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\modules\models.py", line 93, in load_model
output = load_func_map[loader](model_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\modules\models.py", line 313, in ExLlamav2_HF_loader
from modules.exllamav2_hf import Exllamav2HF
File "e:\text-generation-webui\modules\exllamav2_hf.py", line 7, in<module>
from exllamav2 import (
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\exllamav2\__init__.py", line 3, in<module>
from exllamav2.model import ExLlamaV2
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\exllamav2\model.py", line 35, in<module>
from exllamav2.config import ExLlamaV2Config
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\exllamav2\config.py", line 5, in<module>
from exllamav2.stloader import STFile, cleanup_stfiles
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\exllamav2\stloader.py", line 5, in<module>
from exllamav2.ext import none_tensor, exllamav2_ext as ext_c
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\exllamav2\ext.py", line 291, in<module>
ext_c = exllamav2_ext
^^^^^^^^^^^^^
NameError: name 'exllamav2_ext' is not defined
System Info
win10
update webui 10/24
same with
22:24:22-231318 INFO Loading "TheBloke_Lelantos-Maid-DPO-7B-GPTQ"
22:24:22-241559 ERROR Failed to load the model.
Traceback (most recent call last):
File "e:\text-generation-webui\modules\ui_model_menu.py", line 232, in load_model_wrapper
shared.model, shared.tokenizer = load_model(selected_model, loader)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\modules\models.py", line 93, in load_model
output = load_func_maploader
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\modules\models.py", line 320, in AutoGPTQ_loader
import modules.AutoGPTQ_loader
File "e:\text-generation-webui\modules\AutoGPTQ_loader.py", line 4, in
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq_init_.py", line 1, in
from .modeling import AutoGPTQForCausalLM, BaseQuantizeConfig
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\modeling_init_.py", line 2, in
from .auto import GPTQ_CAUSAL_LM_MODEL_MAP, AutoGPTQForCausalLM
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\modeling\auto.py", line 9, in
from .decilm import DeciLMGPTQForCausalLM
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\modeling\decilm.py", line 8, in
from ..nn_modules.fused_llama_attn import FusedLlamaAttentionForQuantizedModel
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\nn_modules\fused_llama_attn.py", line 6, in
from transformers.models.llama.modeling_llama import (
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\transformers\models\llama\modeling_llama.py", line 32, in
from ...modeling_flash_attention_utils import flash_attention_forward
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\transformers\modeling_flash_attention_utils.py", line 27, in
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\flash_attn_init.py", line 3, in
from flash_attn.flash_attn_interface import (
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\flash_attn\flash_attn_interface.py", line 10, in
import flash_attn_2_cuda as flash_attn_cuda
ImportError: DLL load failed while importing flash_attn_2_cuda: Die angegebene Prozedur wurde nicht gefunden.
The text was updated successfully, but these errors were encountered:
Ошибка NameError: name 'exllamav2_ext' is not defined указывает на проблему с инициализацией модуля exllamav2_ext в пакете exllamav2. Эта ошибка обычно возникает из-за отсутствия компиляции или неправильной установки расширений, необходимых для работы ExLlamaV2
!!!!ПОСЛЕ КОМАНДЫ pip install flash-attn --no-cache-dir НАЧНЕТСЯ КОМПИЛЯЦИЯ ФАЙЛОВ!!!! компиляция может занять до 2 часов, зависит от вашей системы, не пугайтесь большой нагрузки на RAM и процессор, компиляция это ресурсозатратный процесс. После завершения повторите свою попытку запуска, если ничего не работает следуйте инструкции ниже.
pip install --upgrade exllamav2
Перейдите в директорию exllamav2/ext ----> python setup.py install
Describe the bug
see log
Is there an existing issue for this?
Reproduction
10 gptq worked last year tody they dont
gguf work
Screenshot
No response
Logs
System Info
same with
22:24:22-231318 INFO Loading "TheBloke_Lelantos-Maid-DPO-7B-GPTQ"
22:24:22-241559 ERROR Failed to load the model.
Traceback (most recent call last):
File "e:\text-generation-webui\modules\ui_model_menu.py", line 232, in load_model_wrapper
shared.model, shared.tokenizer = load_model(selected_model, loader)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\modules\models.py", line 93, in load_model
output = load_func_maploader
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\modules\models.py", line 320, in AutoGPTQ_loader
import modules.AutoGPTQ_loader
File "e:\text-generation-webui\modules\AutoGPTQ_loader.py", line 4, in
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq_init_.py", line 1, in
from .modeling import AutoGPTQForCausalLM, BaseQuantizeConfig
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\modeling_init_.py", line 2, in
from .auto import GPTQ_CAUSAL_LM_MODEL_MAP, AutoGPTQForCausalLM
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\modeling\auto.py", line 9, in
from .decilm import DeciLMGPTQForCausalLM
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\modeling\decilm.py", line 8, in
from ..nn_modules.fused_llama_attn import FusedLlamaAttentionForQuantizedModel
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\auto_gptq\nn_modules\fused_llama_attn.py", line 6, in
from transformers.models.llama.modeling_llama import (
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\transformers\models\llama\modeling_llama.py", line 32, in
from ...modeling_flash_attention_utils import flash_attention_forward
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\transformers\modeling_flash_attention_utils.py", line 27, in
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\flash_attn_init.py", line 3, in
from flash_attn.flash_attn_interface import (
File "e:\text-generation-webui\installer_files\env\Lib\site-packages\flash_attn\flash_attn_interface.py", line 10, in
import flash_attn_2_cuda as flash_attn_cuda
ImportError: DLL load failed while importing flash_attn_2_cuda: Die angegebene Prozedur wurde nicht gefunden.
The text was updated successfully, but these errors were encountered: