steveseguin · papiche · May 28, 2024 · May 28, 2024 · May 28, 2024 · May 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,7 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# record.py whisper trasncriptions output directory
+*_audio.ts
+stt/
diff --git a/README.md b/README.md
@@ -390,6 +390,113 @@ If using a virtual MIDI device on the remote viewer's computer, such as `loopMID
 Please note, the raspberry_ninja publish.py script can both send and recieve MIDI commands over a single peer connection, which is a bit different than how video/audio work currently. It's also different than how browser to browser currently is setup, where a sender won't ever request MIDI data, yet the raspberry_ninja code does allow the sender to both send and receive MIDI data.
 
 midi demo video: https://youtu.be/Gry9UFtOTmQ
+Based on the provided information, here is a section for the README that details the `record.py` microservice, focusing on its audio recording and Whisper transcription capabilities, as well as process monitoring.
+
+---
+
+## `record.py`
+
+The `record.py` microservice is designed to handle audio recording and transcription tasks using Whisper AI. It provides a streamlined way to capture audio streams, transcribe them, and manage recording processes efficiently.
+
+### Features
+
+- **Automatic Transcription**: Transcribe audio recordings using Whisper AI.
+- **Process Monitoring**: Automatically monitor and manage recording processes to ensure they do not exceed a specified duration.
+
+### Prerequisites
+
+Ensure you have the following installed on your system:
+
+- Python 3.x
+- Whisper AI model
+- Required Python packages (install via `pip`)
+
+### Installation
+
+A. **Install Dependencies**:
+
+```bash
+sudo apt-get update
+sudo apt-get install -y python3-pip
+pip3 install whisper
+```
+
+### Usage
+
+#### Starting the FastAPI Server
+
+To start the FastAPI server, use the following command:
+
+```bash
+python3 record.py --host 0.0.0.0 --port 8000
+```
+
+This will start the server on the specified host and port.
+
+#### Starting an Audio Recording
+
+To start an audio recording via the FastAPI endpoint, use the following `curl` command:
+
+```bash
+curl -X POST -F "room=myRoom" -F "record=myRecord" http://localhost:8000/rec
+```
+
+Replace `myRoom` and `myRecord` with your desired room name and record ID.
+
+#### Stopping an Audio Recording
+
+To stop an audio recording and trigger transcription, use the following `curl` command:
+
+```bash
+curl -X POST -F "record=myRecord" -F "process_pid=<PROCESS_PID>" -F "language=en" http://localhost:8000/stop
+```
+
+Replace `myRecord` with your record ID and `<PROCESS_PID>` with the process ID of the recording process.
+
+#### Command-Line Usage
+
+You can also start and stop recordings directly from the command line.
+
+**Start Recording:**
+
+```bash
+python3 record.py --room myRoom --record myRecord
+```
+
+Join room [https://vdo.ninja/?push=myRecord&room=myRoom&password=false&effects&record](https://vdo.ninja/?push=myRecord&room=myRoom&password=false&effects&record)
+
+**Stop Recording:**
+
+```bash
+python3 record.py --stop --pid <PROCESS_PID> --record myRecord --language en
+```
+
+### Process Monitoring
+
+The `record.py` script includes a process monitoring feature that automatically kills any recording process that exceeds one hour of execution time. This ensures that long-running processes do not consume system resources indefinitely.
+
+### Transcription
+
+After stopping the recording, the audio file is automatically transcribed using Whisper AI. The transcription is saved to a text file in the `stt/` directory.
+
+### Example
+
+To start a recording session with specific parameters:
+
+```bash
+python3 record.py --room myRoom --record myRecord
+```
+
+To stop the recording and transcribe the audio:
+
+```bash
+python3 record.py --stop --pid <PROCESS_PID> --record myRecord --language en
+```
+
+### Auto-Starting on Boot
+
+To configure the `record.py` script to start automatically on boot, follow the instructions in the Raspberry Pi folder for setting up auto-start scripts.
+
 
 ### Note:
 

diff --git a/publish.py b/publish.py
@@ -25,7 +25,7 @@
     from multiprocessing import shared_memory
 except Exception as e:
     pass
-    
+
 
 
 gi.require_version('Gst', '1.0')
@@ -424,6 +424,10 @@ def on_data_channel_open(channel):
 
         def on_data_channel_close(channel):
             printc('DATA CHANNEL: CLOSE', "F44")
+            if self.save_file:
+                # Stop the pipeline and set it to NULL state
+                self.pipe.set_state(Gst.State.NULL)
+                print("RECORDING TO DISK STOPPED")
 
         def on_data_channel_message(channel, msg_raw):
             try:
@@ -646,51 +650,51 @@ def on_incoming_stream( _, pad):
                 name = caps.to_string()
 
                 print(name)
-                
+
                 if "video" in name:
                     if self.novideo:
                         printc('Ignoring incoming video track', "F88")
                         out = Gst.parse_bin_from_description("queue ! fakesink", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
                         return;
-                        
+
                     if self.ndiout:
                         print("NDI OUT")
                         if "VP8" in name:
                             out = Gst.parse_bin_from_description("queue ! rtpvp8depay ! decodebin ! videoconvert ! queue ! video/x-raw,format=UYVY ! ndisinkcombiner name=mux1 ! ndisink ndi-name='" + self.streamin + "'", True)
                         elif "H264" in name:
                             out = Gst.parse_bin_from_description("queue ! rtph264depay ! h264parse ! queue max-size-buffers=0 max-size-time=0 ! decodebin ! queue max-size-buffers=0 max-size-time=0 ! videoconvert ! queue max-size-buffers=0 max-size-time=0 ! video/x-raw,format=UYVY ! ndisinkcombiner name=mux1 ! queue ! ndisink ndi-name='" + self.streamin + "'", True)
-                            
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
                     elif self.view:
                         print("DISPLAY OUTPUT MODE BEING SETUP")
-                        
+
                         outsink = "autovideosink"
                         if check_drm_displays():
                             printc('\nThere is at least one connected display.',"00F")
                         else:
                             printc('\n ! No connected displays found. Will try to use glimagesink instead of autovideosink',"F60")
                             outsink = "glimagesink sync=true"
-                        
+
                         if "VP8" in name:
                             out = Gst.parse_bin_from_description(
                                 "queue ! rtpvp8depay ! decodebin ! queue max-size-buffers=0 max-size-time=0 ! videoconvert ! video/x-raw,format=RGB ! queue max-size-buffers=0 max-size-time=0 ! "+outsink, True)
                         elif "H264" in name:
                             out = Gst.parse_bin_from_description(
                                 "queue ! rtph264depay ! h264parse ! openh264dec ! queue max-size-buffers=0 max-size-time=0 ! videoconvert ! video/x-raw,format=RGB ! queue max-size-buffers=0 max-size-time=0 ! "+outsink, True)
-                            
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
-                        pad.link(sink)      
-                        
+                        pad.link(sink)
+
                     elif self.fdsink:
                         print("FD SINK OUT")
                         if "VP8" in name:
@@ -699,24 +703,24 @@ def on_incoming_stream( _, pad):
                         elif "H264" in name:
                             out = Gst.parse_bin_from_description(
                                 "queue ! rtph264depay ! h264parse ! openh264dec ! videoconvert ! video/x-raw,format=BGR ! queue max-size-buffers=0 max-size-time=0 ! fdsink", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
-                        
+
                     elif self.framebuffer: ## send raw data to ffmpeg or something I guess, using the stdout?
                         print("APP SINK OUT")
                         if "VP8" in name:
                             out = Gst.parse_bin_from_description("queue ! rtpvp8depay ! queue max-size-buffers=0 max-size-time=0 ! decodebin ! videoconvert ! video/x-raw,format=BGR ! queue max-size-buffers=2 leaky=downstream ! appsink name=appsink", True)
                         elif "H264" in name:
                             out = Gst.parse_bin_from_description("queue ! rtph264depay ! h264parse ! queue max-size-buffers=0 max-size-time=0 ! openh264dec ! videoconvert ! video/x-raw,format=BGR ! queue max-size-buffers=2 leaky=downstream ! appsink name=appsink", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
-                        
+
                     else:
                         printc('VIDEO record setup', "88F")
                         if self.pipe.get_by_name('filesink'):
@@ -725,7 +729,7 @@ def on_incoming_stream( _, pad):
                                 out = Gst.parse_bin_from_description("queue ! rtpvp8depay", True)
                             elif "H264" in name:
                                 out = Gst.parse_bin_from_description("queue ! rtph264depay ! h264parse", True)
-                                
+
                             self.pipe.add(out)
                             out.sync_state_with_parent()
                             sink = out.get_static_pad('sink')
@@ -736,7 +740,7 @@ def on_incoming_stream( _, pad):
                                 out = Gst.parse_bin_from_description("queue ! rtpvp8depay !  mpegtsmux  name=mux1 ! filesink name=filesinkvideo sync=false location="+self.streamin+"_"+str(int(time.time()))+"_video.ts", True)
                             elif "H264" in name:
                                 out = Gst.parse_bin_from_description("queue ! rtph264depay ! h264parse ! mpegtsmux   name=mux1 ! queue ! filesink  name=filesinkvideo sync=true location="+self.streamin+"_"+str(int(time.time()))+"_video.ts", True)
-                                
+
                             self.pipe.add(out)
                             out.sync_state_with_parent()
                             sink = out.get_static_pad('sink')
@@ -757,19 +761,19 @@ def on_incoming_stream( _, pad):
                 elif "audio" in name:
                     if self.noaudio:
                         printc('Ignoring incoming audio track', "F88")
-                        
+
                         out = Gst.parse_bin_from_description("queue ! fakesink", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
                         return;
-                
+
                     if self.ndiout:
                        # if "OPUS" in name:
                         out = Gst.parse_bin_from_description("queue ! rtpopusdepay ! opusparse ! opusdec ! audioconvert ! audioresample ! audio/x-raw,format=S16LE,rate=48000 ! ndisink name=ndi-audio ndi-name='" + self.streamin + "'", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
@@ -778,41 +782,41 @@ def on_incoming_stream( _, pad):
                        # if "OPUS" in name:
                         print("decode and play out the incoming audio")
                         out = Gst.parse_bin_from_description("queue ! rtpopusdepay ! opusparse ! opusdec ! audioconvert ! audioresample ! audio/x-raw,format=S16LE,rate=48000 ! autoaudiosink", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
                     elif self.fdsink:
                         #if "OPUS" in name:
                         out = Gst.parse_bin_from_description("queue ! rtpopusdepay ! opusparse ! opusdec ! audioconvert ! audioresample ! audio/x-raw,format=S16LE,rate=48000 ! fdsink", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
-                        
+
                     elif self.framebuffer:
                         out = Gst.parse_bin_from_description("queue ! fakesink", True)
-                        
+
                         self.pipe.add(out)
                         out.sync_state_with_parent()
                         sink = out.get_static_pad('sink')
                         pad.link(sink)
-                        
+
                     else:
-                    
+
                         if self.pipe.get_by_name('filesink'):
                             print("Audio being added after video")
                             if "OPUS" in name:
                                 out = Gst.parse_bin_from_description("queue rtpopusdepay ! opusparse ! audio/x-opus,channel-mapping-family=0,channels=2,rate=48000", True)
-                                
+
                             self.pipe.add(out)
                             out.sync_state_with_parent()
                             sink = out.get_static_pad('sink')
                             out.link(self.pipe.get_by_name('filesink'))
                             pad.link(sink)
-                            
+
                         else:
                             print("audio being saved...")
                             if "OPUS" in name:
@@ -822,7 +826,7 @@ def on_incoming_stream( _, pad):
                             out.sync_state_with_parent()
                             sink = out.get_static_pad('sink')
                             pad.link(sink)
-                            
+
                     print("success audio?")
 
             except Exception as E:
@@ -1121,7 +1125,7 @@ async def loop(self):
                 if 'vector' in msg:
                     print("Try with --password false (here) and &password=false (sender side) instead, as encryption isn't supported it seems with your setup")
                     continue
-                
+
                 if 'from' in msg:
                     if self.puuid==None:
                         self.puuid = str(random.randint(10000000,99999999))
@@ -1146,7 +1150,7 @@ async def loop(self):
                                 else:
                                     msg = json.dumps({"request":"seed","streamID":self.stream_id+self.hashcode}) ## we're just going to publish a stream
                                     printwout("seed start")
-                            
+
                     continue
 
                 if UUID not in self.clients:
@@ -1348,7 +1352,7 @@ async def main():
     parser.add_argument('--audio-pipeline', type=str, default=None, help='Custom GStreamer audio source pipeline')
     parser.add_argument('--timestamp', action='store_true',  help='Add a timestamp to the video output, if possible')
     parser.add_argument('--clockstamp', action='store_true',  help='Add a clock overlay to the video output, if possible')
-    
+
     args = parser.parse_args()
 
     Gst.init(None)
@@ -1404,7 +1408,7 @@ async def main():
             args.streamin = args.record
     elif args.view:
         args.streamin = args.view
-         
+
     elif args.fdsink:
         args.streamin = args.fdsink
     elif args.framebuffer: