diff --git a/packages/llm/local_llm/Dockerfile b/packages/llm/local_llm/Dockerfile
index 72c6a371c..c2480786f 100644
--- a/packages/llm/local_llm/Dockerfile
+++ b/packages/llm/local_llm/Dockerfile
@@ -1,7 +1,7 @@
 #---
 # name: local_llm
 # group: llm
-# depends: [mlc, jetson-utils, riva-client:python]
+# depends: [mlc, riva-client:python, jetson-inference, torch2trt]
 # requires: '>=34.1.0'
 # docs: docs.md
 #---
diff --git a/packages/llm/local_llm/agent.py b/packages/llm/local_llm/agent.py
index 990b22218..4a0c97152 100644
--- a/packages/llm/local_llm/agent.py
+++ b/packages/llm/local_llm/agent.py
@@ -35,7 +35,7 @@ def __call__(self, input, channel=0, **kwargs):
         """
         Operator overload for process()
         """
-        return self.process[channel](input, **kwargs)
+        return self.process(input, channel, **kwargs)
         
     def start(self):
         """
diff --git a/packages/llm/local_llm/agents/chat.py b/packages/llm/local_llm/agents/chat.py
index a3aa86405..db236e2d6 100644
--- a/packages/llm/local_llm/agents/chat.py
+++ b/packages/llm/local_llm/agents/chat.py
@@ -66,8 +66,6 @@ def print_input_prompt(self):
         
         
 if __name__ == "__main__":
-    from local_llm.utils import ArgParser
-
     parser = ArgParser()
     parser.add_argument("-it", "--interactive", action="store_true", help="enable interactive user input from the terminal")
     args = parser.parse_args()
diff --git a/packages/llm/local_llm/agents/mp_chat.py b/packages/llm/local_llm/agents/mp_chat.py
new file mode 100644
index 000000000..43d3bbdab
--- /dev/null
+++ b/packages/llm/local_llm/agents/mp_chat.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+from local_llm import Agent, Pipeline
+
+from local_llm.plugins import UserPrompt, ChatQuery, PrintStream, ProcessProxy
+from local_llm.utils import ArgParser
+
+
+class MultiprocessChat(Agent):
+    """
+    Test of running a LLM and chat session in a subprocess.
+    """
+    def __init__(self, **kwargs):
+        super().__init__()
+
+        self.pipeline = Pipeline([
+            UserPrompt(interactive=False, **kwargs),  # interactive=False if kwargs.get('prompt') else True
+            ProcessProxy((lambda **kwargs: ChatQuery(**kwargs)), **kwargs),
+            PrintStream(color='green')  
+        ])
+
+         
+if __name__ == "__main__":
+    parser = ArgParser()
+    args = parser.parse_args()
+    
+    agent = MultiprocessChat(**vars(args)).run()
\ No newline at end of file
diff --git a/packages/llm/local_llm/agents/mp_test.py b/packages/llm/local_llm/agents/mp_test.py
new file mode 100644
index 000000000..0d2d63d30
--- /dev/null
+++ b/packages/llm/local_llm/agents/mp_test.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+import time
+
+from local_llm import Agent, Pipeline
+
+from local_llm.plugins import PrintStream, ProcessProxy
+from local_llm.utils import ArgParser
+
+
+class MultiprocessTest(Agent):
+    """
+    This is a test of the ProcessProxy plugin for running pipelines and plugins in their own processes.
+    """
+    def __init__(self, **kwargs):
+        super().__init__()
+
+        self.pipeline = Pipeline([
+            ProcessProxy((lambda **kwargs: PrintStream(**kwargs)), color='green', relay=True, **kwargs),
+            PrintStream(color='blue', **kwargs),
+        ])
+        
+         
+if __name__ == "__main__":
+    parser = ArgParser()
+    args = parser.parse_args()
+    
+    agent = MultiprocessTest(**vars(args)).start()
+    
+    while True:
+        agent("INSERT MESSAGE HERE")
+        time.sleep(1.0)
\ No newline at end of file
diff --git a/packages/llm/local_llm/agents/mp_video.py b/packages/llm/local_llm/agents/mp_video.py
new file mode 100644
index 000000000..93bd922c1
--- /dev/null
+++ b/packages/llm/local_llm/agents/mp_video.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+import logging
+
+from local_llm import Agent
+
+from local_llm.plugins import VideoSource, VideoOutput, ProcessProxy
+from local_llm.utils import ArgParser
+
+
+class MultiprocessVideo(Agent):
+    """
+    Test of running a video stream across processes.
+    """
+    def __init__(self, **kwargs):
+        super().__init__()
+
+        self.video_source = VideoSource(return_tensors='np', **kwargs)
+        self.video_output = ProcessProxy((lambda **kwargs: VideoOutput(**kwargs)), **kwargs)
+        
+        self.video_source.add(self.on_video, threaded=False)
+        self.video_source.add(self.video_output)
+        
+        self.pipeline = [self.video_source]
+        
+    def on_video(self, image):
+        logging.debug(f"captured {image.shape} ({image.dtype}) frame from {self.video_source.resource}")
+
+         
+if __name__ == "__main__":
+    from local_llm.utils import ArgParser
+
+    parser = ArgParser(extras=['video_input', 'video_output', 'log'])
+    args = parser.parse_args()
+    
+    agent = MultiprocessVideo(**vars(args)).run()
\ No newline at end of file
diff --git a/packages/llm/local_llm/agents/video_query.py b/packages/llm/local_llm/agents/video_query.py
index 9d4c25ed1..f0e3b9e2a 100644
--- a/packages/llm/local_llm/agents/video_query.py
+++ b/packages/llm/local_llm/agents/video_query.py
@@ -1,56 +1,116 @@
 #!/usr/bin/env python3
-from local_llm import Agent, Pipeline, ChatTemplates
+import time
+import threading
 
-from local_llm.plugins import UserPrompt, ChatQuery, PrintStream
+from local_llm import Agent
+
+from local_llm.plugins import VideoSource, VideoOutput, ChatQuery, PrintStream, ProcessProxy
 from local_llm.utils import ArgParser, print_table
 
 from termcolor import cprint
+from jetson_utils import cudaFont, cudaMemcpy, cudaToNumpy, cudaDeviceSynchronize
+
 
-#
-# TODO max-new-tokens and other generation args...
-# TODO custom arg parser
-#
-class ChatAgent(Agent):
+class VideoQuery(Agent):
     """
-    Agent for two-turn multimodal chat.
+    Perpetual always-on closed-loop visual agent that applies prompts to a video stream.
     """
-    def __init__(self, model="meta-llama/Llama-2-7b-chat-hf", interactive=True, **kwargs):
+    def __init__(self, model="liuhaotian/llava-v1.5-13b", **kwargs):
         super().__init__()
+
+        # load model in another process for smooth streaming
+        self.llm = ProcessProxy((lambda **kwargs: ChatQuery(model, drop_inputs=True, **kwargs)), **kwargs)
+        self.llm.add(PrintStream(color='green', relay=True).add(self.on_eos))
+        self.llm.start()
+
+        # test / warm-up query
+        self.warmup = True
+        self.last_text = ""
+        self.eos = False
         
-        """
-        # Equivalent to:
-        self.pipeline = UserPrompt(interactive=interactive, **kwargs).add(
-            LLMQuery(model, **kwargs).add(
-            PrintStream(relay=True).add(self.on_eos)     
-        ))
-        """
-        self.pipeline = Pipeline([
-            UserPrompt(interactive=interactive, **kwargs),
-            ChatQuery(model, **kwargs),
-            PrintStream(relay=True),
-            self.on_eos    
-        ])
+        self.llm("What is 2+2?")
         
-        self.model = self.pipeline[ChatQuery].model
-        self.interactive = interactive
+        while self.warmup:
+            time.sleep(0.25)
+            
+        # create video streams    
+        self.video_source = VideoSource(**kwargs)
+        self.video_output = VideoOutput(**kwargs)
         
-        self.print_input_prompt()
-
-    def on_eos(self, input):
-        if input.endswith('</s>'):
-            print_table(self.model.stats)
-            self.print_input_prompt()
+        self.video_source.add(self.on_video, threaded=False)
+        self.video_output.start()
+        
+        self.font = cudaFont()
 
-    def print_input_prompt(self):
-        if self.interactive:
-            cprint('>> PROMPT: ', 'blue', end='', flush=True)
+        # setup prompts
+        self.prompt = 0
+        self.prompts = kwargs.get('prompt')
         
+        if not self.prompts:
+            self.prompts = [
+                'Describe the image concisely.', 
+                'How many fingers is the person holding up?',
+                'What does the text in the image say?',
+                'There is a question asked in the image.  What is the answer?',
+            ]
         
-if __name__ == "__main__":
-    from local_llm.utils import ArgParser
+        self.keyboard_thread = threading.Thread(target=self.poll_keyboard)
+        self.keyboard_thread.start()
+        
+        # entry node
+        self.pipeline = [self.video_source]
+   
+    def on_video(self, image):
+        np_image = cudaToNumpy(image)
+        cudaDeviceSynchronize()
+        
+        self.llm([
+            'reset',
+            np_image,
+            self.prompts[self.prompt],
+        ])
+        
+        text = self.last_text.replace('\n', '').replace('</s>', '').strip()
 
-    parser = ArgParser()
-    parser.add_argument("-it", "--interactive", action="store_true", help="enable interactive user input from the terminal")
+        if text:
+            self.font.OverlayText(image, text=text, x=5, y=42, color=self.font.White, background=self.font.Gray40)
+
+        self.font.OverlayText(image, text=self.prompts[self.prompt], x=5, y=5, color=(120,215,21), background=self.font.Gray40)
+        self.video_output(image)
+            
+    def on_eos(self, text):
+        if self.eos:
+            self.last_text = text  # new query response
+            self.eos = False
+        elif not self.warmup:  # don't view warmup response
+            self.last_text = self.last_text + text
+
+        if text.endswith('</s>'):
+            #print_table(self.llm.model.stats)
+            self.warmup = False
+            self.eos = True
+     
+    def poll_keyboard(self):
+        while True:
+            try:
+                key = input().strip() #getch.getch()
+                
+                if key == 'd' or key == 'l':
+                    self.prompt = (self.prompt + 1) % len(self.prompts)
+                elif key == 'a' or key == 'j':
+                    self.prompt = self.prompt - 1
+                    if self.prompt < 0:
+                        self.prompt = len(self.prompts) - 1
+                
+                num = int(key)
+                
+                if num > 0 and num <= len(self.prompts):
+                    self.prompt = num - 1
+            except Exception as err:
+                continue
+                
+if __name__ == "__main__":
+    parser = ArgParser(extras=ArgParser.Defaults+['video_input', 'video_output'])
     args = parser.parse_args()
-    
-    agent = ChatAgent(**vars(args)).run() 
\ No newline at end of file
+    agent = VideoQuery(**vars(args)).run() 
+    
\ No newline at end of file
diff --git a/packages/llm/local_llm/agents/video_stream.py b/packages/llm/local_llm/agents/video_stream.py
new file mode 100644
index 000000000..79f168cf9
--- /dev/null
+++ b/packages/llm/local_llm/agents/video_stream.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+import logging
+
+from local_llm import Agent
+
+from local_llm.plugins import VideoSource, VideoOutput
+from local_llm.utils import ArgParser
+
+
+class VideoStream(Agent):
+    """
+    Relay, view, or test a video stream.  Use the --video-input and --video-output arguments
+    to set the video source and output protocols used from:
+    
+      https://github.com/dusty-nv/jetson-inference/blob/master/docs/aux-streaming.md
+      
+    For example, this will capture a V4L2 camera and serve it via WebRTC with H.264 encoding:
+    
+      python3 -m local_llm.agents.video_stream \
+        --video-input /dev/video0 \
+        --video-output webrtc://@:8554/output
+      
+    It's also used as a basic test of video streaming before using more complex agents that rely on it.
+    """
+    def __init__(self, video_input=None, video_output=None, **kwargs):
+        super().__init__()
+
+        self.video_source = VideoSource(video_input, **kwargs)
+        self.video_output = VideoOutput(video_output, **kwargs)
+        
+        self.video_source.add(self.on_video, threaded=False)
+        self.video_source.add(self.video_output)
+        
+        self.pipeline = [self.video_source]
+        
+    def on_video(self, image):
+        logging.debug(f"captured {image.width}x{image.height} frame from {self.video_source.resource}")
+
+         
+if __name__ == "__main__":
+    parser = ArgParser(extras=['video_input', 'video_output', 'log'])
+    args = parser.parse_args()
+    
+    agent = VideoStream(**vars(args)).run() 
\ No newline at end of file
diff --git a/packages/llm/local_llm/agents/voice_chat.py b/packages/llm/local_llm/agents/voice_chat.py
index 07381b5da..52c36ac9b 100644
--- a/packages/llm/local_llm/agents/voice_chat.py
+++ b/packages/llm/local_llm/agents/voice_chat.py
@@ -78,8 +78,6 @@ def on_eos(self, text):
 
  
 if __name__ == "__main__":
-    from local_llm.utils import ArgParser
-
     parser = ArgParser(extras=ArgParser.Defaults+['asr', 'tts', 'audio_output'])
     args = parser.parse_args()
     
diff --git a/packages/llm/local_llm/agents/web_chat.py b/packages/llm/local_llm/agents/web_chat.py
index feed079d6..f22d492e1 100644
--- a/packages/llm/local_llm/agents/web_chat.py
+++ b/packages/llm/local_llm/agents/web_chat.py
@@ -112,8 +112,6 @@ def start(self):
 
 
 if __name__ == "__main__":
-    from local_llm.utils import ArgParser
-
     parser = ArgParser(extras=ArgParser.Defaults+['asr', 'tts', 'audio_output', 'web'])
     args = parser.parse_args()
     
diff --git a/packages/llm/local_llm/history.py b/packages/llm/local_llm/history.py
index 4b5c1f506..d172cde3a 100644
--- a/packages/llm/local_llm/history.py
+++ b/packages/llm/local_llm/history.py
@@ -285,7 +285,7 @@ def embed_chat(self, use_cache=True):
                 cached = embed_key in entry and use_cache
                 
                 if logging.getLogger().isEnabledFor(logging.DEBUG) and not cached:
-                    logging.debug(f"processing chat entry {i}  role='{entry.role}' template='{role_template}' open_user_prompt={open_user_prompt} cached={'true' if cached else 'false'} {key}='{entry[key]}'".replace('\n', '\\n'))
+                    logging.debug(f"processing chat entry {i}  role='{entry.role}' template='{role_template}' open_user_prompt={open_user_prompt} cached={'true' if cached else 'false'} {key}='{entry[key] if isinstance(entry[key], str) else type(entry[key])}'".replace('\n', '\\n'))
                 
                 if use_cache:
                     if embed_key not in entry: # TODO  and entry.role != 'bot'  -- only compute bot embeddings when needed
diff --git a/packages/llm/local_llm/local_llm.py b/packages/llm/local_llm/local_llm.py
index 28c97e592..5e0571d37 100644
--- a/packages/llm/local_llm/local_llm.py
+++ b/packages/llm/local_llm/local_llm.py
@@ -76,9 +76,11 @@ def from_pretrained(model, api=None, **kwargs):
         else:
             raise ValueError(f"invalid API: {api}")
 
+        # moved CLIP to after LLM is loaded because of MLC CUDA errors when running in subprocess
+        model.init_vision()  
         model.config.load_time = time.perf_counter() - load_begin
-        
         print_table(model.config)
+        
         return model
      
     def generate(self, inputs, streaming=True, **kwargs):
@@ -144,7 +146,8 @@ def __init__(self, model_path, **kwargs):
         self.model_path = model_path
         self.model_config = AutoConfig.from_pretrained(model_path)
         
-        self.init_vision(**kwargs)
+        # moved CLIP to after LLM is loaded because of MLC CUDA errors when running in subprocess
+        #self.init_vision(**kwargs)
         
     def init_vision(self, **kwargs):
         """
diff --git a/packages/llm/local_llm/models/mlc.py b/packages/llm/local_llm/models/mlc.py
index 6b6c94572..a6c90c44e 100644
--- a/packages/llm/local_llm/models/mlc.py
+++ b/packages/llm/local_llm/models/mlc.py
@@ -285,13 +285,9 @@ def _generate(self, stream):
 
         # decode until EOS or max_new_tokens
         time_begin_decode = time.perf_counter()
-        
+
         while True:
-            #time_begin_sample = time.perf_counter()
             token = self._sample(output[0], do_sample, temperature, top_p, repetition_penalty)
-            #sample_time = (time.perf_counter() - time_begin_sample) * 1000
-            #print(f"SAMPLE_TIME:  {sample_time:.2f}  SAMPLE_RATE:  {1000/sample_time:.2f}")
-            
             stream.output_tokens.append(token)
             stream.event.set()
             
@@ -306,15 +302,11 @@ def _generate(self, stream):
 
             stream.kv_cache.num_tokens += 1
             self.stats.output_tokens += 1
-            
-            #time_begin_decode2 = time.perf_counter()
+
             output = self._decode(
                 tvm.nd.array(np.array([[stream.output_tokens[-1]]], dtype=np.int32), self.device),
                 tvm.runtime.ShapeTuple([stream.kv_cache.num_tokens]), stream.kv_cache, self.params
             )
-            #decode_time = (time.perf_counter() - time_begin_decode2) * 1000
-            #print(f"DECODE_TIME:  {decode_time:.2f}  DECODE_RATE:  {1000/decode_time:.2f}")
-            #time.sleep(0.225)
 
         time_end_decode = time.perf_counter()
         
diff --git a/packages/llm/local_llm/plugin.py b/packages/llm/local_llm/plugin.py
index eee863b9d..d719c3e24 100644
--- a/packages/llm/local_llm/plugin.py
+++ b/packages/llm/local_llm/plugin.py
@@ -127,6 +127,7 @@ def input(self, input):
         TODO:  multiple input channels?
         """
         if self.threaded:
+            #self.start() # thread may not be started if plugin only called from a callback
             if self.drop_inputs:
                 self.clear_inputs()
             self.input_queue.put(input)
diff --git a/packages/llm/local_llm/plugins/__init__.py b/packages/llm/local_llm/plugins/__init__.py
index ce2de01b1..ec65a9fdb 100644
--- a/packages/llm/local_llm/plugins/__init__.py
+++ b/packages/llm/local_llm/plugins/__init__.py
@@ -5,6 +5,7 @@
 from .print_stream import PrintStream
 from .user_prompt import UserPrompt
 from .rate_limit import RateLimit
+from .process_proxy import ProcessProxy
 
 from .audio import AudioOutputDevice, AudioOutputFile
 from .video import VideoSource, VideoOutput
diff --git a/packages/llm/local_llm/plugins/chat_query.py b/packages/llm/local_llm/plugins/chat_query.py
index 240d4a7b5..b08da189c 100644
--- a/packages/llm/local_llm/plugins/chat_query.py
+++ b/packages/llm/local_llm/plugins/chat_query.py
@@ -2,7 +2,7 @@
 import logging
 
 from local_llm import Plugin, LocalLM, ChatHistory
-from local_llm.utils import print_table
+from local_llm.utils import ImageTypes, print_table
 
 
 class ChatQuery(Plugin):
@@ -75,6 +75,10 @@ def __init__(self, model="meta-llama/Llama-2-7b-chat-hf", **kwargs):
         self.temperature = kwargs.get('temperature', 0.7)
         self.top_p = kwargs.get('top_p', 0.95)
             
+        #warmup_query = '2+2 is '
+        #logging.debug(f"Warming up LLM with query '{warmup_query}'")
+        #logging.debug(f"Warmup response:  '{self.model.generate(warmup_query, streaming=False)}'")
+        
     def process(self, input, **kwargs):
         """
         Generate the reply to a prompt or the latest ChatHistory.
@@ -105,13 +109,13 @@ def process(self, input, **kwargs):
                 return
         
         # add prompt to chat history
-        if isinstance(input, str) or isinstance(input, dict):
+        if isinstance(input, str) or isinstance(input, dict) or isinstance(input, ImageTypes):
             self.chat_history.append(role='user', msg=input)
             chat_history = self.chat_history
         elif isinstance(input, ChatHistory):
             chat_history = input
         else:
-            raise TypeError(f"LLMQuery plugin expects inputs of type str, dict, or ChatHistory (was {type(input)})")
+            raise TypeError(f"LLMQuery plugin expects inputs of type str, dict, image, or ChatHistory (was {type(input)})")
 
         # images should be followed by text prompts
         if 'image' in chat_history[-1] and 'text' not in chat_history[-1]:
diff --git a/packages/llm/local_llm/plugins/process_proxy.py b/packages/llm/local_llm/plugins/process_proxy.py
new file mode 100644
index 000000000..c1366fe94
--- /dev/null
+++ b/packages/llm/local_llm/plugins/process_proxy.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+import os
+import time
+import pickle
+import logging
+import threading
+import multiprocessing as mp
+
+from local_llm import Plugin
+
+
+class ProcessProxy(Plugin):
+    """
+    Proxy wrapper for running a plugin in a subprocess
+    """
+    def __init__(self, plugin_factory, **kwargs):
+        """
+        Parameters:
+          plugin_factory (callable) -- Factory function that returns a plugin instance.
+                                       This will be called from the new process to create it.
+        """
+        self.data_parent, self.data_child = mp.Pipe(duplex=True)
+        self.control_parent, self.control_child = mp.Pipe(duplex=True)
+        
+        self.subprocess = mp.Process(target=self.run_process, args=(plugin_factory, kwargs))
+        self.subprocess.start()
+        
+        init_msg = self.control_parent.recv()
+        
+        if init_msg['status'] != 'initialized':
+            raise RuntimeError(f"subprocess has an invalid initialization status ({init_msg['status']})")
+
+        super().__init__(output_channels=init_msg['output_channels'], **kwargs)
+        logging.info(f"ProcessProxy initialized, output_channels={self.output_channels}")
+
+    def input(self, input):
+        #time_begin = time.perf_counter()
+        #input_type = type(input)
+        input = pickle.dumps(input, protocol=-1)
+        #logging.debug(f"ProcessProxy time to pickle {input_type} - {(time.perf_counter()-time_begin)*1000} ms")
+        self.data_parent.send_bytes(input)
+        
+    def start(self):
+        self.control_parent.send('start')
+        self.assert_message(self.control_parent.recv(), 'started')
+        return super().start()
+
+    def run(self):
+        while True:
+            output, channel = pickle.loads(self.data_parent.recv_bytes())
+            #logging.debug(f"parent process recieved {type(output)} (channel={channel})")
+            self.output(output, channel)
+
+    def run_process(self, factory, kwargs):
+        logging.debug(f"subprocess {os.getpid()} started")
+        
+        try:
+            # create an instance of the plugin
+            plugin = factory(**kwargs)
+        except Exception as error:
+            self.control_child.send({'status': str(type(error))})
+            raise error
+            
+        self.control_child.send({
+            'status': 'initialized',
+            'output_channels': plugin.output_channels,
+        })
+        
+        # forward outputs back to parent process
+        for i in range(plugin.output_channels):
+            plugin.add(OutputProxy(self.data_child, i), i)
+            
+        # start the plugin processing
+        self.assert_message(self.control_child.recv(), 'start')
+
+        try:
+            plugin.start()
+        except Exception as error:
+            self.control_child.send(str(type(error)))
+            raise error
+
+        # start a thread to recieve control messages from the parent
+        control_thread = threading.Thread(target=self.run_control)
+        control_thread.start()
+
+        self.control_child.send('started')
+        
+        # recieve inputs from the parent to process
+        while True:
+            #time_begin = time.perf_counter()
+            input = self.data_child.recv_bytes()
+            input = pickle.loads(input)
+            #logging.debug(f"subprocess recieved {str(type(input))} input  (depickle={(time.perf_counter()-time_begin)*1000} ms)")
+            plugin(input)
+    
+    def run_control(self):
+        while True:
+            msg = self.control_child.recv()
+            logging.debug(f"subprocess recieved control msg from parent process ('{msg}')")
+            
+    def assert_message(self, msg, expected, verbose=True):
+        if msg != expected:
+            raise RuntimeError(f"recieved unexpected cross-process message '{msg}' (expected '{expected}'")
+        elif verbose:
+            logging.debug(f"recieved cross-process message '{msg}'")
+            
+            
+class OutputProxy(Plugin):
+    def __init__(self, pipe, channel, **kwargs):
+        super().__init__(threaded=False)
+        self.pipe = pipe
+        self.channel = channel
+        self.enabled = True
+        
+    def process(self, input, **kwargs):
+        #logging.debug(f"subprocess sending {type(input)} {input} (channel={self.channel})")
+        try:
+            if self.enabled:
+                self.pipe.send_bytes(pickle.dumps((input, self.channel), protocol=-1)) #self.pipe.send((input, self.channel))
+        except ValueError as error:
+            if 'pickle' in str(error):
+                logging.info(f"subprocess output could not be pickled ({type(input)}), disabling channel {self.channel}")
+                self.enabled = False
+                
\ No newline at end of file
diff --git a/packages/llm/local_llm/plugins/video.py b/packages/llm/local_llm/plugins/video.py
index d47fbc097..6d4f5b919 100644
--- a/packages/llm/local_llm/plugins/video.py
+++ b/packages/llm/local_llm/plugins/video.py
@@ -5,7 +5,7 @@
 from local_llm import Plugin
 from local_llm.utils import cuda_image
 
-from jetson_utils import videoSource, videoOutput, cudaMemcpy
+from jetson_utils import videoSource, videoOutput, cudaDeviceSynchronize, cudaToNumpy
 
 
 class VideoSource(Plugin):
@@ -16,7 +16,7 @@ class VideoSource(Plugin):
     def __init__(self, video_input='/dev/video0', 
                  video_input_width=None, video_input_height=None, 
                  video_input_codec=None, video_input_framerate=None, 
-                 **kwargs):
+                 video_input_save=None, return_tensors='cuda', **kwargs):
         """
         Parameters:
         
@@ -24,6 +24,7 @@ def __init__(self, video_input='/dev/video0',
           input_width (int) -- the disired width in pixels (default uses stream's resolution)
           input_height (int) -- the disired height in pixels (default uses stream's resolution)
           input_codec (str) -- force a particular codec ('h264', 'h265', 'vp8', 'vp9', 'mjpeg', ect)
+          return_tensors (str) -- the object datatype of the image to output ('np', 'pt', 'cuda')
         """
         super().__init__(**kwargs)
         
@@ -41,20 +42,32 @@ def __init__(self, video_input='/dev/video0',
         if video_input_framerate:
             options['framerate'] = video_input_framerate
             
+        if video_input_save:
+            options['save'] = video_input_save
+            
         self.stream = videoSource(video_input, options=options)
-        self.resource = video_input
-        
+        self.resource = video_input  # self.stream.GetOptions().resource['string']
+        self.return_tensors = return_tensors
+
     def run(self):
         """
         Capture images from the video source as long as it's streaming
         """
         while True:
             image = self.stream.Capture(format='rgb8', timeout=-1)
-            
+
             if image is None:
                 continue
+
+            if self.return_tensors == 'pt':
+                image = torch.as_tensor(image, device='cuda')
+            elif self.return_tensors == 'np':
+                image = cudaToNumpy(image)
+                cudaDeviceSynchronize()
+            elif self.return_tensors != 'cuda':
+                raise ValueError(f"return_tensors should be 'np', 'pt', or 'cuda' (was '{self.return_tensors}')")
                 
-            self.output(cudaMemcpy(image))            
+            self.output(image)             
             
         
 class VideoOutput(Plugin):
@@ -62,7 +75,7 @@ class VideoOutput(Plugin):
     Saves images to a compressed video or directory of individual images, the display, or a network stream.
     https://github.com/dusty-nv/jetson-inference/blob/master/docs/aux-streaming.md
     """
-    def __init__(self, video_output=None, video_output_codec=None, video_output_bitrate=None, **kwargs):
+    def __init__(self, video_output=None, video_output_codec=None, video_output_bitrate=None, video_output_save=None, **kwargs):
         """
         Parameters:
         
@@ -80,8 +93,14 @@ def __init__(self, video_output=None, video_output_codec=None, video_output_bitr
         if video_output_bitrate:
             options['bitrate'] = video_output_bitrate
 
+        if video_output_save:
+            options['save'] = video_output_save
+            
+        if video_output is None:
+            video_output = ''
+            
         self.stream = videoOutput(video_output, options=options)
-        self.resource = output
+        self.resource = video_output
         
     def process(self, input, **kwargs):
         """
diff --git a/packages/llm/local_llm/requirements.txt b/packages/llm/local_llm/requirements.txt
index 6ed0b2055..4ceb674a4 100644
--- a/packages/llm/local_llm/requirements.txt
+++ b/packages/llm/local_llm/requirements.txt
@@ -2,4 +2,5 @@ websockets
 termcolor
 tabulate
 flask
-tqdm
\ No newline at end of file
+tqdm
+getch
\ No newline at end of file
diff --git a/packages/llm/local_llm/utils/args.py b/packages/llm/local_llm/utils/args.py
index 6d71b33f6..cbfa29a40 100644
--- a/packages/llm/local_llm/utils/args.py
+++ b/packages/llm/local_llm/utils/args.py
@@ -5,6 +5,7 @@
 import logging
 
 from .log import LogFormatter
+from .prompts import DefaultChatPrompts, DefaultCompletionPrompts
 
 
 class ArgParser(argparse.ArgumentParser):
@@ -61,12 +62,14 @@ def __init__(self, extras=Defaults, **kwargs):
             self.add_argument("--video-input-height", type=int, default=None, help="manually set the resolution of the video input")
             self.add_argument("--video-input-codec", type=str, default=None, choices=['h264', 'h265', 'vp8', 'vp9', 'mjpeg'], help="manually set the input video codec to use")
             self.add_argument("--video-input-framerate", type=int, default=None, help="set the desired framerate of input video")
+            self.add_argument("--video-input-save", type=str, default=None, help="path to video file to save the incoming video feed to")
             
         if 'video_output' in extras:
             self.add_argument("--video-output", type=str, default=None, help="display, stream URL, file/dir path")
             self.add_argument("--video-output-codec", type=str, default=None, choices=['h264', 'h265', 'vp8', 'vp9', 'mjpeg'], help="set the output video codec to use")
             self.add_argument("--video-output-bitrate", type=int, default=None, help="set the output bitrate to use")
-         
+            self.add_argument("--video-output-save", type=str, default=None, help="path to video file to save the outgoing video stream to")
+            
         # AUDIO
         if 'audio_input' not in extras and 'asr' in extras:
             extras += ['audio_input']
diff --git a/packages/llm/local_llm/utils/image.py b/packages/llm/local_llm/utils/image.py
index 054b61707..422133535 100644
--- a/packages/llm/local_llm/utils/image.py
+++ b/packages/llm/local_llm/utils/image.py
@@ -18,6 +18,18 @@ def is_image(image):
     """
     return isinstance(image, ImageTypes)
     
+ 
+def image_size(image):
+    """
+    Returns the dimensions of the image as a tuple (height, width, channels)
+    """
+    if isinstance(image, (cudaImage, np.ndarray, torch.Tensor)):
+        return image.shape
+    elif isinstance(image, PIL.Image.Image):
+        return image.size
+    else:
+        raise TypeError(f"expected an image of type {ImageTypes} (was {type(image)})")
+        
     
 def load_image(path):
     """
@@ -65,6 +77,17 @@ def cuda_image(image):
             format=torch_image_format(input)
         )
         
+ 
+def torch_image(image):
+    """
+    Convert the image to a type that is compatible with PyTorch (torch.Tensor, ndarray, PIL.Image)
+    """
+    if isinstance(image, cudaImage):
+        return torch.as_tensor(image, device='cuda')
+    elif is_image(image):
+        return image 
+    raise TypeError(f"expected an image of type {ImageTypes} (was {type(image)})")
+        
         
 def torch_image_format(tensor):
     """
diff --git a/packages/llm/local_llm/vision/clip_hf.py b/packages/llm/local_llm/vision/clip_hf.py
index 2d13618df..a37479bee 100644
--- a/packages/llm/local_llm/vision/clip_hf.py
+++ b/packages/llm/local_llm/vision/clip_hf.py
@@ -6,7 +6,7 @@
 import logging
 
 from transformers import CLIPImageProcessor, CLIPVisionModel
-from ..utils import AttributeDict, load_image, download_model, print_table
+from ..utils import AttributeDict, load_image, torch_image, image_size, download_model, print_table
 
 _clip_model_cache = dict(image={}, text={})
 
@@ -56,11 +56,11 @@ def __call__(self, image, crop=False, hidden_state=None, return_tensors='pt', **
         """
         if isinstance(image, str):
             image = load_image(image)
-
+        else:
+            image = torch_image(image)
+            
         time_begin_pre = time.perf_counter()
-        
-        image_size = image.size
-        
+
         if not crop:
             image = image.resize(self.config.input_shape, PIL.Image.BILINEAR) # PIL.Image.BICUBIC
             
@@ -85,7 +85,7 @@ def __call__(self, image, crop=False, hidden_state=None, return_tensors='pt', **
         self.stats.clip_rate = 1.0 / self.stats.clip_time
         self.stats.preprocess_time = time_begin_enc - time_begin_pre
         self.stats.encode_time = time_end_enc - time_begin_enc
-        self.stats.input_shape = f"{image_size[0]}x{image_size[1]} -> {self.model.config.image_size}x{self.model.config.image_size}"
+        self.stats.input_shape = f"{image_size(image)} -> {self.model.config.image_size}x{self.model.config.image_size}"
         self.stats.output_shape = self.config.output_shape
 
         if return_tensors == 'pt':
diff --git a/packages/llm/mlc/Dockerfile b/packages/llm/mlc/Dockerfile
index a20f63ff2..18e126a83 100644
--- a/packages/llm/mlc/Dockerfile
+++ b/packages/llm/mlc/Dockerfile
@@ -18,12 +18,13 @@ ARG MLC_PATCH
 
 ARG CUDAARCHS
 ARG TORCH_CUDA_ARCH_LIST
+ARG LLVM_VERSION
 
 # install LLVM the upstream way instead of apt because of:
 # https://discourse.llvm.org/t/llvm-15-0-7-missing-libpolly-a/67942 
 RUN wget https://apt.llvm.org/llvm.sh && \
     chmod +x llvm.sh && \
-    ./llvm.sh all && \
+    ./llvm.sh ${LLVM_VERSION} all && \
     ln -s /usr/bin/llvm-config-* /usr/bin/llvm-config
 
 # could NOT find zstd (missing: zstd_LIBRARY zstd_INCLUDE_DIR)
diff --git a/packages/llm/mlc/config.py b/packages/llm/mlc/config.py
index f79102853..58ee536c0 100644
--- a/packages/llm/mlc/config.py
+++ b/packages/llm/mlc/config.py
@@ -11,7 +11,7 @@
     'TORCH_CUDA_ARCH_LIST': ';'.join([f'{x/10:.1f}' for x in CUDA_ARCHITECTURES])
 }
 
-def mlc(version, patch, tag=None, requires=None, default=False):
+def mlc(version, patch, llvm=17, tag=None, requires=None, default=False):
     pkg = copy.deepcopy(package)
     
     if default:
@@ -28,6 +28,7 @@ def mlc(version, patch, tag=None, requires=None, default=False):
     pkg['build_args'].update({
         'MLC_VERSION': version,
         'MLC_PATCH': patch,
+        'LLVM_VERSION': llvm,
     })
     
     pkg['notes'] = f"[{repo}](https://github.com/{repo}/tree/{version}) commit SHA [`{version}`](https://github.com/{repo}/tree/{version})"
@@ -40,7 +41,7 @@ def mlc(version, patch, tag=None, requires=None, default=False):
 #default_dev=(L4T_VERSION.major >= 36)
 
 package = [
-    mlc(latest_sha, 'patches/51fb0f4.diff', tag='dev'), #, default=default_dev),
-    mlc('9bf5723', 'patches/9bf5723.diff', requires='==35.*'), # 10/20/2023
-    mlc('51fb0f4', 'patches/51fb0f4.diff', default=True), # 12/15/2023
+    mlc(latest_sha, 'patches/51fb0f4.diff', llvm=18, tag='dev'), #, default=default_dev),
+    mlc('9bf5723', 'patches/9bf5723.diff', llvm=17, requires='==35.*'), # 10/20/2023
+    mlc('51fb0f4', 'patches/51fb0f4.diff', llvm=17, default=True), # 12/15/2023
 ]
\ No newline at end of file