-
Notifications
You must be signed in to change notification settings - Fork 729
fix(generation): add timeout, progress fallback, and VRAM pre-flight … #671
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,16 @@ | ||
| """Execution helper for ``generate_music`` service invocation with progress tracking.""" | ||
|
|
||
| import os | ||
| import threading | ||
| from typing import Any, Dict, List, Optional, Sequence | ||
|
|
||
| from loguru import logger | ||
|
|
||
| # Maximum wall-clock seconds to wait for service_generate before declaring a hang. | ||
| # Generous default: most generations finish in 30-120s, but large batches on slow | ||
| # GPUs can take several minutes. Override via ACESTEP_GENERATION_TIMEOUT env var. | ||
| _DEFAULT_GENERATION_TIMEOUT = int(os.environ.get("ACESTEP_GENERATION_TIMEOUT", "600")) | ||
|
Comment on lines
+9
to
+12
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unsafe env-var parsing — non-numeric values crash at import time. The PR description promises safe parsing with fallback to 600 s, but this bare 🐛 Proposed fix: safe parsing with validation-# Maximum wall-clock seconds to wait for service_generate before declaring a hang.
-# Generous default: most generations finish in 30-120s, but large batches on slow
-# GPUs can take several minutes. Override via ACESTEP_GENERATION_TIMEOUT env var.
-_DEFAULT_GENERATION_TIMEOUT = int(os.environ.get("ACESTEP_GENERATION_TIMEOUT", "600"))
+# Maximum wall-clock seconds to wait for service_generate before declaring a hang.
+# Generous default: most generations finish in 30-120s, but large batches on slow
+# GPUs can take several minutes. Override via ACESTEP_GENERATION_TIMEOUT env var.
+_DEFAULT_GENERATION_TIMEOUT: int = 600
+
+_raw_timeout = os.environ.get("ACESTEP_GENERATION_TIMEOUT")
+if _raw_timeout is not None:
+ try:
+ _parsed = int(_raw_timeout)
+ if _parsed > 0:
+ _DEFAULT_GENERATION_TIMEOUT = _parsed
+ else:
+ logger.warning(
+ f"ACESTEP_GENERATION_TIMEOUT={_raw_timeout!r} is <= 0; "
+ f"falling back to {_DEFAULT_GENERATION_TIMEOUT}s."
+ )
+ except ValueError:
+ logger.warning(
+ f"ACESTEP_GENERATION_TIMEOUT={_raw_timeout!r} is not a valid integer; "
+ f"falling back to {_DEFAULT_GENERATION_TIMEOUT}s."
+ )🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| class GenerateMusicExecuteMixin: | ||
| """Run service generation under diffusion progress estimation lifecycle.""" | ||
|
|
@@ -25,12 +34,55 @@ def _run_generate_music_service_with_progress( | |
| shift: float, | ||
| infer_method: str, | ||
| ) -> Dict[str, Any]: | ||
| """Invoke ``service_generate`` while maintaining background progress estimation.""" | ||
| """Invoke ``service_generate`` while maintaining background progress estimation. | ||
|
|
||
| Wraps the synchronous CUDA call in a monitored thread so that a hung | ||
| diffusion loop becomes a recoverable ``TimeoutError`` instead of a | ||
| permanent UI freeze. | ||
| """ | ||
| infer_steps_for_progress = len(timesteps) if timesteps else inference_steps | ||
| progress_desc = f"Generating music (batch size: {actual_batch_size})..." | ||
| progress(0.52, desc=progress_desc) | ||
| stop_event = None | ||
| progress_thread = None | ||
|
|
||
| # --- Timeout-wrapped service_generate --- | ||
| # Run the actual CUDA work in a child thread so we can join() with a | ||
| # deadline. If it exceeds the timeout the calling thread unblocks and | ||
| # raises TimeoutError, which propagates to generate_music()'s | ||
| # try/except and becomes a clean error payload for the UI. | ||
| _result: Dict[str, Any] = {} | ||
| _error: Dict[str, BaseException] = {} | ||
|
|
||
| def _service_target(): | ||
| try: | ||
| _result["outputs"] = self.service_generate( | ||
| captions=service_inputs["captions_batch"], | ||
| lyrics=service_inputs["lyrics_batch"], | ||
| metas=service_inputs["metas_batch"], | ||
| vocal_languages=service_inputs["vocal_languages_batch"], | ||
| refer_audios=refer_audios, | ||
| target_wavs=service_inputs["target_wavs_tensor"], | ||
| infer_steps=inference_steps, | ||
| guidance_scale=guidance_scale, | ||
| seed=actual_seed_list, | ||
| repainting_start=service_inputs["repainting_start_batch"], | ||
| repainting_end=service_inputs["repainting_end_batch"], | ||
| instructions=service_inputs["instructions_batch"], | ||
| audio_cover_strength=audio_cover_strength, | ||
| cover_noise_strength=cover_noise_strength, | ||
| use_adg=use_adg, | ||
| cfg_interval_start=cfg_interval_start, | ||
| cfg_interval_end=cfg_interval_end, | ||
| shift=shift, | ||
| infer_method=infer_method, | ||
| audio_code_hints=service_inputs["audio_code_hints_batch"], | ||
| return_intermediate=service_inputs["should_return_intermediate"], | ||
| timesteps=timesteps, | ||
| ) | ||
| except Exception as exc: | ||
| _error["exc"] = exc | ||
|
Comment on lines
+83
to
+84
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If Use 🐛 Proposed fix- except Exception as exc:
+ except BaseException as exc:
_error["exc"] = exc🧰 Tools🪛 Ruff (0.15.1)[warning] 83-83: Do not catch blind exception: (BLE001) 🤖 Prompt for AI Agents |
||
|
|
||
| try: | ||
| stop_event, progress_thread = self._start_diffusion_progress_estimator( | ||
| progress=progress, | ||
|
|
@@ -41,33 +93,34 @@ def _run_generate_music_service_with_progress( | |
| duration_sec=audio_duration if audio_duration and audio_duration > 0 else None, | ||
| desc=progress_desc, | ||
| ) | ||
| outputs = self.service_generate( | ||
| captions=service_inputs["captions_batch"], | ||
| lyrics=service_inputs["lyrics_batch"], | ||
| metas=service_inputs["metas_batch"], | ||
| vocal_languages=service_inputs["vocal_languages_batch"], | ||
| refer_audios=refer_audios, | ||
| target_wavs=service_inputs["target_wavs_tensor"], | ||
| infer_steps=inference_steps, | ||
| guidance_scale=guidance_scale, | ||
| seed=actual_seed_list, | ||
| repainting_start=service_inputs["repainting_start_batch"], | ||
| repainting_end=service_inputs["repainting_end_batch"], | ||
| instructions=service_inputs["instructions_batch"], | ||
| audio_cover_strength=audio_cover_strength, | ||
| cover_noise_strength=cover_noise_strength, | ||
| use_adg=use_adg, | ||
| cfg_interval_start=cfg_interval_start, | ||
| cfg_interval_end=cfg_interval_end, | ||
| shift=shift, | ||
| infer_method=infer_method, | ||
| audio_code_hints=service_inputs["audio_code_hints_batch"], | ||
| return_intermediate=service_inputs["should_return_intermediate"], | ||
| timesteps=timesteps, | ||
|
|
||
| gen_thread = threading.Thread( | ||
| target=_service_target, | ||
| name="service-generate", | ||
| daemon=True, | ||
| ) | ||
| gen_thread.start() | ||
| gen_thread.join(timeout=_DEFAULT_GENERATION_TIMEOUT) | ||
|
|
||
| if gen_thread.is_alive(): | ||
| logger.error( | ||
| f"[generate_music] service_generate exceeded {_DEFAULT_GENERATION_TIMEOUT}s " | ||
| f"timeout (batch={actual_batch_size}, steps={inference_steps}, " | ||
| f"duration={audio_duration}s). The CUDA operation may still be " | ||
| f"running in the background." | ||
| ) | ||
| raise TimeoutError( | ||
| f"Music generation timed out after {_DEFAULT_GENERATION_TIMEOUT} seconds. " | ||
| f"This usually means the GPU ran out of VRAM or the diffusion loop " | ||
| f"stalled. Try reducing batch size, duration, or inference steps." | ||
| ) | ||
| if "exc" in _error: | ||
| raise _error["exc"] | ||
|
|
||
| finally: | ||
| if stop_event is not None: | ||
| stop_event.set() | ||
| if progress_thread is not None: | ||
| progress_thread.join(timeout=1.0) | ||
| return {"outputs": outputs, "infer_steps_for_progress": infer_steps_for_progress} | ||
|
|
||
| return {"outputs": _result["outputs"], "infer_steps_for_progress": infer_steps_for_progress} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing defensive guard on The PR description mentions a defensive guard here, but the code directly accesses 🐛 Proposed fix- return {"outputs": _result["outputs"], "infer_steps_for_progress": infer_steps_for_progress}
+ if "outputs" not in _result:
+ raise RuntimeError(
+ "service_generate thread exited without producing outputs or raising an exception."
+ )
+ return {"outputs": _result["outputs"], "infer_steps_for_progress": infer_steps_for_progress}🤖 Prompt for AI Agents |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
get_effective_free_vram_gb()returning0.0on internal error will falsely block generation.When CUDA is available but
get_effective_free_vram_gb()hits an unexpected internal exception, it returns0.0(per its ownexceptclause). Since0.0 < needed_gbis always true, this will surface a "not enough VRAM" error even though the GPU may have plenty of memory.Consider guarding against this — e.g., treat
free_gb <= 0.0as "unable to query VRAM" and let the generation attempt proceed:Proposed fix
🤖 Prompt for AI Agents