-
Notifications
You must be signed in to change notification settings - Fork 5
Add microphone/audio device input #53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
richinsley
wants to merge
1
commit into
pygfx:main
Choose a base branch
from
richinsley:audio-input-feature
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| { | ||
| "Shader": { | ||
| "ver": "0.1", | ||
| "info": { | ||
| "id": "llSGDh", | ||
| "date": "1428482235", | ||
| "viewed": 6992, | ||
| "name": "Input - Microphone", | ||
| "username": "iq", | ||
| "description": "Testing the Sound Input (microphone).", | ||
| "likes": 44, | ||
| "published": 3, | ||
| "flags": 4, | ||
| "usePreview": 1, | ||
| "tags": [ | ||
| "2d", | ||
| "sound", | ||
| "microphone" | ||
| ], | ||
| "hasliked": 0 | ||
| }, | ||
| "renderpass": [ | ||
| { | ||
| "inputs": [ | ||
| { | ||
| "id": 32, | ||
| "src": "/presets/mic.png", | ||
| "ctype": "mic", | ||
| "channel": 0, | ||
| "sampler": { | ||
| "filter": "linear", | ||
| "wrap": "clamp", | ||
| "vflip": "false", | ||
| "srgb": "false", | ||
| "internal": "byte" | ||
| }, | ||
| "published": 1 | ||
| } | ||
| ], | ||
| "outputs": [], | ||
| "code": "// Created by inigo quilez - iq/2015\n// License Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.\n\n\n// See also:\n//\n// Input - Keyboard : https://www.shadertoy.com/view/lsXGzf\n// Input - Microphone : https://www.shadertoy.com/view/llSGDh\n// Input - Mouse : https://www.shadertoy.com/view/Mss3zH\n// Input - Sound : https://www.shadertoy.com/view/Xds3Rr\n// Input - SoundCloud : https://www.shadertoy.com/view/MsdGzn\n// Input - Time : https://www.shadertoy.com/view/lsXGz8\n// Input - TimeDelta : https://www.shadertoy.com/view/lsKGWV\n// Inout - 3D Texture : https://www.shadertoy.com/view/4llcR4\n\n\nvoid mainImage( out vec4 fragColor, in vec2 fragCoord )\n{\n // create pixel coordinates\n\tvec2 uv = fragCoord.xy / iResolution.xy;\n\n\t// first texture row is frequency data\n\tfloat fft = textureLod( iChannel0, vec2(uv.x,0.25), 0.0 ).x; \n\t \n // second texture row is the sound wave\n\tfloat wave = textureLod( iChannel0, vec2(uv.x,0.75), 0.0 ).x;\n\t\n\t// convert frequency to colors\n\tvec3 col = vec3(1.0)*fft;\n\n // add wave form on top\t\n\tcol += 1.0 - smoothstep( 0.0, 0.01, abs(wave - uv.y) );\n\n col = pow( col, vec3(1.0,0.5,2.0) );\n\n\t// output final color\n\tfragColor = vec4(col,1.0);\n}", | ||
| "name": "Image", | ||
| "description": "", | ||
| "type": "image" | ||
| } | ||
| ] | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| import argparse | ||
| from wgpu_shadertoy import Shadertoy | ||
| from wgpu_shadertoy import MicrophoneAudioDevice | ||
| from pathlib import Path | ||
| import sounddevice as sd | ||
| import sys | ||
|
|
||
| # Set up command line argument parsing | ||
| parser = argparse.ArgumentParser(description='Run a Shadertoy shader with an audio input device.') | ||
| parser.add_argument('--from_id', type=str, default="llSGDh", | ||
| help='Shadertoy ID (default: llSGDh) https://www.shadertoy.com/view/llSGDh by iq CC-BY-NC-SA-3.0') | ||
| parser.add_argument('--list-audio-devices', action='store_true', | ||
| help='List all available audio input devices and exit') | ||
| parser.add_argument('--device-index', type=int, default=None, | ||
| help='Audio device index to use (default: system default)') | ||
|
|
||
| json_path = Path(Path(__file__).parent, "shader_llSGDh.json") | ||
|
|
||
| if __name__ == "__main__": | ||
| # Parse the command line arguments | ||
| args = parser.parse_args() | ||
|
|
||
| # If list-audio-devices flag is present, list devices and exit | ||
| if args.list_audio_devices: | ||
| print("Available audio input devices:") | ||
| print(sd.query_devices()) | ||
| sys.exit(0) | ||
|
|
||
| # Use the provided ID or the default one | ||
| shader_id = args.from_id | ||
|
|
||
| # Use the device index from command line if provided | ||
| # if device_index is None, sounddevice will use the system default device | ||
| device_index = args.device_index | ||
|
|
||
| # Create microphone audio device with specified device index (or None for default) | ||
| # We could use a NoiseAudioDevice or NullAudioDevice for testing without a mic: audio_device = NoiseAudioDevice(rate=44100) | ||
| audio_device = MicrophoneAudioDevice(device_index=device_index, sample_rate=44100, buffer_duration_seconds=2.0) | ||
| audio_device.start() | ||
|
|
||
| # shadertoy source: https://www.shadertoy.com/view/llSGDh by iq CC-BY-NC-SA-3.0 | ||
| shader = None | ||
| if shader_id == "llSGDh": | ||
| # Load the shader from JSON file | ||
| shader = Shadertoy.from_json(json_path, use_cache=True, audio_device=audio_device) | ||
| else: | ||
| # Load the shader from Shadertoy by ID | ||
| shader = Shadertoy.from_id(shader_id, use_cache=True, audio_device=audio_device) | ||
|
|
||
| shader.show() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| from .inputs import ShadertoyChannel, ShadertoyChannelBuffer, ShadertoyChannelTexture | ||
| from .passes import BufferRenderPass, ImageRenderPass | ||
| from .shadertoy import Shadertoy | ||
| from .audio_devices import AudioDevice, FIFOPushAudioDevice, NullAudioDevice, NoiseAudioDevice, MicrophoneAudioDevice | ||
|
|
||
| __version__ = "0.2.0" | ||
| version_info = tuple(map(int, __version__.split("."))) # noqa | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,230 @@ | ||
| import numpy as np | ||
| from abc import ABC, abstractmethod | ||
| from collections import deque | ||
| import sounddevice as sd | ||
| from typing import Optional | ||
|
|
||
| class AudioDevice(ABC): | ||
| """ | ||
| Abstract Base Class for audio input devices used by ShadertoyChannelMusic. | ||
| """ | ||
|
|
||
| @abstractmethod | ||
| def get_samples(self, num_samples: int) -> np.ndarray: | ||
| """ | ||
| Retrieve the most recent audio samples. | ||
|
|
||
| Args: | ||
| num_samples (int): The number of samples required. | ||
|
|
||
| Returns: | ||
| np.ndarray: A 1D numpy array of float32 audio samples (ideally [-1, 1]), | ||
| of length `num_samples`. If fewer samples are available than | ||
| requested, implementations should pad appropriately (e.g., with zeros). | ||
| """ | ||
| pass | ||
|
|
||
| @abstractmethod | ||
| def get_rate(self) -> int: | ||
| """ | ||
| Returns the sample rate of the audio device in Hz. | ||
| """ | ||
| pass | ||
|
|
||
| def start(self): | ||
| """ | ||
| Start the audio device (e.g., open microphone stream). | ||
| Optional: Base implementation does nothing. | ||
| """ | ||
| pass | ||
|
|
||
| def stop(self): | ||
| """ | ||
| Stop the audio device (e.g., close microphone stream). | ||
| Optional: Base implementation does nothing. | ||
| """ | ||
| pass | ||
|
|
||
| def is_ready(self) -> bool: | ||
| """ | ||
| Check if the device is ready or has sufficient data. | ||
| Optional: Base implementation assumes always ready. | ||
| """ | ||
| return True | ||
|
|
||
| def gain(self) -> float: | ||
| """ | ||
| Returns the gain factor for the audio device. | ||
| Optional: Base implementation returns 1.0 (no gain). | ||
| """ | ||
| return 1.0 | ||
|
|
||
| class FIFOPushAudioDevice(AudioDevice): | ||
| """ | ||
| An AudioDevice implementation using an internal FIFO buffer (deque). | ||
| Samples are added using the `push_samples` method. | ||
| """ | ||
| def __init__(self, rate: int, max_buffer_samples: int, gain: float = 0.6): | ||
| self._gain = gain | ||
| self._rate = rate | ||
| # Buffer stores slightly more than needed for FFT to handle requests | ||
| self._buffer = deque(maxlen=max_buffer_samples) | ||
|
|
||
| def get_rate(self) -> int: | ||
| return self._rate | ||
|
|
||
| def push_samples(self, new_samples: np.ndarray): | ||
| """Appends new audio samples to the internal buffer.""" | ||
| # Ensure input is float32? Or handle conversion? Assume float for now. | ||
| self._buffer.extend(new_samples.astype(np.float32)) | ||
|
|
||
| def get_samples(self, num_samples: int) -> np.ndarray: | ||
| """Returns the most recent `num_samples` from the buffer.""" | ||
| current_buffer = np.array(self._buffer) # Convert deque to numpy array for slicing | ||
| available_samples = current_buffer.size | ||
|
|
||
| if available_samples >= num_samples: | ||
| # Return the last num_samples | ||
| return current_buffer[-num_samples:] | ||
| else: | ||
| # Not enough samples, return what we have padded with leading zeros | ||
| padded_samples = np.zeros(num_samples, dtype=np.float32) | ||
| if available_samples > 0: | ||
| padded_samples[-available_samples:] = current_buffer | ||
| return padded_samples | ||
|
|
||
| def is_ready(self) -> bool: | ||
| # Consider ready if buffer has at least enough samples for typical request? | ||
| # Let's say ready if buffer has at least 512 samples (typical fft_input_size) | ||
| return len(self._buffer) >= 512 | ||
|
|
||
| def gain(self) -> float: | ||
| return self._gain | ||
|
|
||
| class NullAudioDevice(AudioDevice): | ||
| """An AudioDevice that always returns silence.""" | ||
| def __init__(self, rate: int = 44100): | ||
| self._rate = rate | ||
|
|
||
| def get_rate(self) -> int: | ||
| return self._rate | ||
|
|
||
| def get_samples(self, num_samples: int) -> np.ndarray: | ||
| return np.zeros(num_samples, dtype=np.float32) | ||
|
|
||
| class NoiseAudioDevice(AudioDevice): | ||
| """An AudioDevice that always returns noise.""" | ||
| def __init__(self, rate: int = 44100): | ||
| self._rate = rate | ||
|
|
||
| def get_rate(self) -> int: | ||
| return self._rate | ||
|
|
||
| def get_samples(self, num_samples: int) -> np.ndarray: | ||
| return np.random.uniform(-1, 1, num_samples).astype(np.float32) | ||
|
|
||
| class MicrophoneAudioDevice(FIFOPushAudioDevice): | ||
| """ | ||
| An AudioDevice implementation that reads live audio from a system input device | ||
| using the `sounddevice` library. Provides single-channel (mono) float32 samples. | ||
| """ | ||
| def __init__(self, | ||
| sample_rate: int = 44100, | ||
| buffer_duration_seconds: float = 5.0, | ||
| chunk_size: int = 1024, | ||
| device_index: Optional[int] = None, | ||
| gain: float = 0.6): | ||
| """ | ||
| Initializes the MicrophoneAudioDevice. | ||
|
|
||
| Args: | ||
| sample_rate (int): The desired sample rate in Hz. | ||
| buffer_duration_seconds (float): The duration of the internal FIFO buffer | ||
| in seconds. Determines the maximum amount | ||
| of recent audio history stored. | ||
| chunk_size (int): The number of samples to read from the audio device | ||
| in each callback chunk. Affects latency and overhead. | ||
| device_index (int | None): The index of the input audio device to use. | ||
| If None, the default system input device is used. | ||
| Use `sounddevice.query_devices()` to list devices. | ||
| gain (float): The gain factor to apply to the audio fft data. | ||
| """ | ||
| max_samples = int(sample_rate * buffer_duration_seconds) | ||
| super().__init__(rate=sample_rate, max_buffer_samples=max_samples, gain=gain) | ||
|
|
||
| self._chunk_size = chunk_size | ||
| self._device_index = device_index | ||
| self._stream = None | ||
| # print(f"MicrophoneAudioDevice initialized. Rate: {sample_rate} Hz, " | ||
| # f"Buffer: {buffer_duration_seconds}s ({max_samples} samples), " | ||
| # f"Chunk Size: {chunk_size}, Device: {device_index or 'Default'}") | ||
|
|
||
| def _audio_callback(self, indata: np.ndarray, frames: int, time, status: sd.CallbackFlags): | ||
| """ | ||
| This function is called by sounddevice in a separate thread | ||
| whenever new audio data is available. | ||
| """ | ||
| if status: | ||
| print(f"Warning: Sounddevice callback status: {status}") | ||
| if status.input_underflow: print(f"Warning: Input underflow detected!") | ||
|
Check failure on line 169 in wgpu_shadertoy/audio_devices.py
|
||
| if status.input_overflow: print(f"Warning: Input overflow detected! Buffer might be too small or processing too slow.") | ||
|
Check failure on line 170 in wgpu_shadertoy/audio_devices.py
|
||
|
|
||
| # indata comes in as float32 (due to dtype='float32' in stream) | ||
| # It might be multi-channel, but we requested channels=1, | ||
| # so it should have shape (frames, 1). We need 1D. | ||
| if indata.shape[1] != 1: | ||
| # This shouldn't happen if channels=1 works, but as a fallback: | ||
| mono_samples = indata.mean(axis=1).astype(np.float32) | ||
| else: | ||
| mono_samples = indata[:, 0] # Extract the single channel, makes it 1D | ||
|
|
||
| # Push the mono samples into the deque buffer (managed by parent class) | ||
| # This uses self._buffer.extend(), which is thread-safe in CPython. | ||
| self.push_samples(mono_samples) | ||
|
|
||
| def start(self): | ||
| """Starts the audio stream from the microphone.""" | ||
| if self._stream is not None and self._stream.active: | ||
| print("Warning: Stream already running. Call stop() first if you want to restart.") | ||
| return | ||
|
|
||
| try: | ||
| self._stream = sd.InputStream( | ||
| samplerate=self.get_rate(), | ||
| blocksize=self._chunk_size, | ||
| device=self._device_index, | ||
| channels=1, # Request mono directly | ||
| dtype=np.float32, # Request 32-bit float | ||
| callback=self._audio_callback, | ||
| latency='low' # Or 'high' for potentially more stable streaming | ||
| ) | ||
| self._stream.start() | ||
| except Exception as e: | ||
| print(f"Error: Failed to start audio stream: {e}") | ||
| self._stream = None # Ensure stream is None if start failed | ||
| # Re-raise the exception if the calling code needs to know | ||
| raise e | ||
|
|
||
| def stop(self): | ||
| """Stops the audio stream.""" | ||
| if self._stream is None: | ||
| return | ||
|
|
||
| try: | ||
| if self._stream.active: | ||
| self._stream.stop() | ||
| self._stream.close() | ||
| except Exception as e: | ||
| print(f"Error: stopping audio stream: {e}") | ||
| finally: | ||
| # Ensure stream is marked as stopped regardless of errors | ||
| self._stream = None | ||
|
|
||
| # Optional: Override is_ready to also check the stream status | ||
| def is_ready(self) -> bool: | ||
| """Check if the stream is active and the buffer has sufficient data.""" | ||
| parent_ready = super().is_ready() | ||
| stream_active = self._stream is not None and self._stream.active | ||
| # TODO - allow getting samples | ||
| # even if stream just stopped but buffer is full. | ||
| return stream_active and parent_ready | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since this functionality is supported by the cli, we should just have a simple example that has the code as a string and show how the channel class can be setup manually.