doc: disable TE be default

fumiama · fumiama · commit a64dde103939 · 2024-08-09T21:34:14.000+08:00
diff --git a/ChatTTS/model/gpt.py b/ChatTTS/model/gpt.py
@@ -91,7 +91,7 @@ def __init__(
             ],
         )
 
-    def from_pretrained(self, file_path: str):
+    def from_pretrained(self, file_path: str, experimental=False):
         if self.is_vllm and platform.system().lower() == "linux":
             from safetensors.torch import save_file
 
@@ -134,12 +134,12 @@ def from_pretrained(self, file_path: str):
         self.load_state_dict(torch.load(file_path, weights_only=True, mmap=True))
 
         if (
-            "cuda" in str(self.device_gpt) and platform.system().lower() == "linux"
+            experimental and "cuda" in str(self.device_gpt) and platform.system().lower() == "linux"
         ):  # is TELlamaModel
             try:
                 from .cuda import TELlamaModel
 
-                self.logger.info("Linux with CUDA, try NVIDIA accelerated TELlamaModel")
+                self.logger.warning("Linux with CUDA, try NVIDIA accelerated TELlamaModel because experimental is enabled")
                 state_dict = self.gpt.state_dict()
                 vanilla = TELlamaModel.from_state_dict(state_dict, self.llama_config)
                 # Force mem release. Taken from huggingface code
diff --git a/README.md b/README.md
@@ -107,25 +107,28 @@ pip install safetensors vllm==0.2.7 torchaudio
 ```
 
 #### Unrecommended Optional: Install TransformerEngine if using NVIDIA GPU (Linux only)
-> [!Note]
-> The installation process is very slow.
-
 > [!Warning]
+> DO NOT INSTALL! 
 > The adaptation of TransformerEngine is currently under development and CANNOT run properly now. 
-> Only install it on developing purpose.
+> Only install it on developing purpose. See more details on at #672 #676
+
+> [!Note]
+> The installation process is very slow.
 
 ```bash
 pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
 ```
 
 #### Unrecommended Optional: Install FlashAttention-2 (mainly NVIDIA GPU)
-> [!Note]
-> See supported devices at the [Hugging Face Doc](https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2).
-
 > [!Warning]
+> DO NOT INSTALL! 
 > Currently the FlashAttention-2 will slow down the generating speed according to [this issue](https://github.com/huggingface/transformers/issues/26990). 
 > Only install it on developing purpose.
 
+> [!Note]
+> See supported devices at the [Hugging Face Doc](https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2).
+
+
 ```bash
 pip install flash-attn --no-build-isolation
 ```