The model doesn't quite work well it seems

Yo ~ Currently doing some tests in terms of integrating LavaSR in my rvc-based fork ( voice conversion stuff. )

Original output from inference ( In this particular case, doing tests on 32khz models. (( from available 24, 32, 40 and 48)) ) 
<img width="1629" height="759" alt="Image" src="https://github.com/user-attachments/assets/8f508ba7-4727-45ed-b054-205b9cc631da" />

After LavaSR v2 ( input sr set to 16khz )
<img width="1629" height="762" alt="Image" src="https://github.com/user-attachments/assets/b6617da5-a589-46b2-ac5a-4e03682c3e55" />

After LavaSR v2 ( input sr set to 32khz )
<img width="1628" height="762" alt="Image" src="https://github.com/user-attachments/assets/cfa1d62b-a7e6-4df3-9518-2af13d99ebd2" />

The resulting audio quality is.. pretty bad, lightly put.
You know, too much of vibrance and " aspiration " quality is gone.
Is there something I don't understand? ( Quite possible ~ ) or is it by design? ( and my case simply seems unfortunate? )

Here's how I currently handle it in my inference code:
```
    @staticmethod
    def apply_lavasr(
        audio: np.ndarray,
        current_sr: int,
    ):
        """
        Args:
            audio:      Audio as a float32 NumPy array (mono).
            current_sr: Sample rate of `audio` (the model's target SR).

        Returns:
            Tuple (enhanced_numpy_array, 48000).
        """
        try:
            from LavaSR.model import LavaEnhance2

        try:
            import tempfile
            import torch as _torch
            from huggingface_hub import snapshot_download

            # Resolve / download model to rvc/models/enhancement/LavaSR
            _here = os.getcwd()
            local_model_dir = os.path.join(_here, "rvc", "models", "enhancement", "LavaSR")
            os.makedirs(local_model_dir, exist_ok=True)

            if not os.path.isdir(os.path.join(local_model_dir, "enhancer_v2")):
                print("[LavaSR] Model not found locally – downloading from HuggingFace...")
                print(f"[LavaSR] Saving to: {local_model_dir}")
                snapshot_download("YatharthS/LavaSR", local_dir=local_model_dir)
                print("[LavaSR] Download complete.")
            else:
                print(f"[LavaSR] Using cached model at: {local_model_dir}")

            # Write inference output to a temp WAV
            tmp_fd, tmp_path = tempfile.mkstemp(suffix=".wav")
            os.close(tmp_fd)
            sf.write(tmp_path, audio, current_sr)

            # Load model
            device = "cuda" if _torch.cuda.is_available() else "cpu"
            print(f"[LavaSR] Loading LavaSR v2 on {device}...")
            lava_model = LavaEnhance2(local_model_dir, device)

            # Run inference
            print(f"[LavaSR] Enhancing: {current_sr} Hz → 48000 Hz ")
            input_audio, _ = lava_model.load_audio(tmp_path, input_sr=32000)
            enhanced = lava_model.enhance(input_audio, denoise=False, batch=False)
            enhanced_np = enhanced.cpu().numpy().squeeze()

            # Cleanup
            os.remove(tmp_path)
            del lava_model
            if _torch.cuda.is_available():
                _torch.cuda.empty_cache()

            print("[LavaSR] Enhancement complete. Output SR: 48000 Hz")
            return enhanced_np, 48000

        except Exception as error:
            print(f"[LavaSR] Enhancement failed: {error}")
            import traceback as _tb
            print(_tb.format_exc())
            return audio, current_sr

.......

            converted_chunks = []
            for c in chunks:
                audio_opt = self.vc.pipeline(
                    model=self.hubert_model,
                    net_g=self.net_g,
                    sid=sid,
                    audio=c,
                    pitch=pitch,
                    f0_method=f0_method,
                    file_index=file_index,
                    index_rate=index_rate,
                    pitch_guidance=self.use_f0,
                    filter_radius=filter_radius,
                    volume_envelope=volume_envelope,
                    version=self.version,
                    protect=protect,
                    f0_autotune=f0_autotune,
                    f0_autotune_strength=f0_autotune_strength,
                    f0_file=f0_file,
                    seed=seed,
                    loaded_index=self.loaded_index,
                )
                converted_chunks.append(audio_opt)
                if split_audio:
                    print(f"Converted audio chunk {len(converted_chunks)}")

            if split_audio:
                audio_opt = merge_audio(chunks, converted_chunks, intervals, 16000, self.tgt_sr)
            else:
                audio_opt = converted_chunks[0]

            if clean_audio:
                cleaned_audio = self.remove_audio_noise(
                    audio_opt, self.tgt_sr, clean_strength
                )
                if cleaned_audio is not None:
                    audio_opt = cleaned_audio

            if post_process:
                audio_opt = self.post_process_audio(
                    audio_input=audio_opt,
                    sample_rate=self.tgt_sr,
                    **kwargs,
                )

            output_sr = self.tgt_sr
            if lavasr_enhance:
                audio_opt, output_sr = self.apply_lavasr(
                    audio_opt, self.tgt_sr
                )
                # output_sr is now 48000 — keep it as-is.

            sf.write(audio_output_path, audio_opt, output_sr, format="WAV")
            output_path_format = audio_output_path.replace(
                ".wav", f".{export_format.lower()}"
            )
            audio_output_path = self.convert_audio_format(
                audio_output_path, output_path_format, export_format
            )

            elapsed_time = time.time() - start_time
            print(
                f"Conversion completed! Result available in: '{audio_output_path}'. Time taken: {elapsed_time:.2f} seconds."
            )

```

Thanks in advance!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

The model doesn't quite work well it seems #17

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

The model doesn't quite work well it seems #17

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions