forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
updated local_llm and MLC containers
- Loading branch information
Showing
24 changed files
with
447 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/env python3 | ||
from local_llm import Agent, Pipeline | ||
|
||
from local_llm.plugins import UserPrompt, ChatQuery, PrintStream, ProcessProxy | ||
from local_llm.utils import ArgParser | ||
|
||
|
||
class MultiprocessChat(Agent): | ||
""" | ||
Test of running a LLM and chat session in a subprocess. | ||
""" | ||
def __init__(self, **kwargs): | ||
super().__init__() | ||
|
||
self.pipeline = Pipeline([ | ||
UserPrompt(interactive=False, **kwargs), # interactive=False if kwargs.get('prompt') else True | ||
ProcessProxy((lambda **kwargs: ChatQuery(**kwargs)), **kwargs), | ||
PrintStream(color='green') | ||
]) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = ArgParser() | ||
args = parser.parse_args() | ||
|
||
agent = MultiprocessChat(**vars(args)).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/usr/bin/env python3 | ||
import time | ||
|
||
from local_llm import Agent, Pipeline | ||
|
||
from local_llm.plugins import PrintStream, ProcessProxy | ||
from local_llm.utils import ArgParser | ||
|
||
|
||
class MultiprocessTest(Agent): | ||
""" | ||
This is a test of the ProcessProxy plugin for running pipelines and plugins in their own processes. | ||
""" | ||
def __init__(self, **kwargs): | ||
super().__init__() | ||
|
||
self.pipeline = Pipeline([ | ||
ProcessProxy((lambda **kwargs: PrintStream(**kwargs)), color='green', relay=True, **kwargs), | ||
PrintStream(color='blue', **kwargs), | ||
]) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = ArgParser() | ||
args = parser.parse_args() | ||
|
||
agent = MultiprocessTest(**vars(args)).start() | ||
|
||
while True: | ||
agent("INSERT MESSAGE HERE") | ||
time.sleep(1.0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/usr/bin/env python3 | ||
import logging | ||
|
||
from local_llm import Agent | ||
|
||
from local_llm.plugins import VideoSource, VideoOutput, ProcessProxy | ||
from local_llm.utils import ArgParser | ||
|
||
|
||
class MultiprocessVideo(Agent): | ||
""" | ||
Test of running a video stream across processes. | ||
""" | ||
def __init__(self, **kwargs): | ||
super().__init__() | ||
|
||
self.video_source = VideoSource(return_tensors='np', **kwargs) | ||
self.video_output = ProcessProxy((lambda **kwargs: VideoOutput(**kwargs)), **kwargs) | ||
|
||
self.video_source.add(self.on_video, threaded=False) | ||
self.video_source.add(self.video_output) | ||
|
||
self.pipeline = [self.video_source] | ||
|
||
def on_video(self, image): | ||
logging.debug(f"captured {image.shape} ({image.dtype}) frame from {self.video_source.resource}") | ||
|
||
|
||
if __name__ == "__main__": | ||
from local_llm.utils import ArgParser | ||
|
||
parser = ArgParser(extras=['video_input', 'video_output', 'log']) | ||
args = parser.parse_args() | ||
|
||
agent = MultiprocessVideo(**vars(args)).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,116 @@ | ||
#!/usr/bin/env python3 | ||
from local_llm import Agent, Pipeline, ChatTemplates | ||
import time | ||
import threading | ||
|
||
from local_llm.plugins import UserPrompt, ChatQuery, PrintStream | ||
from local_llm import Agent | ||
|
||
from local_llm.plugins import VideoSource, VideoOutput, ChatQuery, PrintStream, ProcessProxy | ||
from local_llm.utils import ArgParser, print_table | ||
|
||
from termcolor import cprint | ||
from jetson_utils import cudaFont, cudaMemcpy, cudaToNumpy, cudaDeviceSynchronize | ||
|
||
|
||
# | ||
# TODO max-new-tokens and other generation args... | ||
# TODO custom arg parser | ||
# | ||
class ChatAgent(Agent): | ||
class VideoQuery(Agent): | ||
""" | ||
Agent for two-turn multimodal chat. | ||
Perpetual always-on closed-loop visual agent that applies prompts to a video stream. | ||
""" | ||
def __init__(self, model="meta-llama/Llama-2-7b-chat-hf", interactive=True, **kwargs): | ||
def __init__(self, model="liuhaotian/llava-v1.5-13b", **kwargs): | ||
super().__init__() | ||
|
||
# load model in another process for smooth streaming | ||
self.llm = ProcessProxy((lambda **kwargs: ChatQuery(model, drop_inputs=True, **kwargs)), **kwargs) | ||
self.llm.add(PrintStream(color='green', relay=True).add(self.on_eos)) | ||
self.llm.start() | ||
|
||
# test / warm-up query | ||
self.warmup = True | ||
self.last_text = "" | ||
self.eos = False | ||
|
||
""" | ||
# Equivalent to: | ||
self.pipeline = UserPrompt(interactive=interactive, **kwargs).add( | ||
LLMQuery(model, **kwargs).add( | ||
PrintStream(relay=True).add(self.on_eos) | ||
)) | ||
""" | ||
self.pipeline = Pipeline([ | ||
UserPrompt(interactive=interactive, **kwargs), | ||
ChatQuery(model, **kwargs), | ||
PrintStream(relay=True), | ||
self.on_eos | ||
]) | ||
self.llm("What is 2+2?") | ||
|
||
self.model = self.pipeline[ChatQuery].model | ||
self.interactive = interactive | ||
while self.warmup: | ||
time.sleep(0.25) | ||
|
||
# create video streams | ||
self.video_source = VideoSource(**kwargs) | ||
self.video_output = VideoOutput(**kwargs) | ||
|
||
self.print_input_prompt() | ||
|
||
def on_eos(self, input): | ||
if input.endswith('</s>'): | ||
print_table(self.model.stats) | ||
self.print_input_prompt() | ||
self.video_source.add(self.on_video, threaded=False) | ||
self.video_output.start() | ||
|
||
self.font = cudaFont() | ||
|
||
def print_input_prompt(self): | ||
if self.interactive: | ||
cprint('>> PROMPT: ', 'blue', end='', flush=True) | ||
# setup prompts | ||
self.prompt = 0 | ||
self.prompts = kwargs.get('prompt') | ||
|
||
if not self.prompts: | ||
self.prompts = [ | ||
'Describe the image concisely.', | ||
'How many fingers is the person holding up?', | ||
'What does the text in the image say?', | ||
'There is a question asked in the image. What is the answer?', | ||
] | ||
|
||
if __name__ == "__main__": | ||
from local_llm.utils import ArgParser | ||
self.keyboard_thread = threading.Thread(target=self.poll_keyboard) | ||
self.keyboard_thread.start() | ||
|
||
# entry node | ||
self.pipeline = [self.video_source] | ||
|
||
def on_video(self, image): | ||
np_image = cudaToNumpy(image) | ||
cudaDeviceSynchronize() | ||
|
||
self.llm([ | ||
'reset', | ||
np_image, | ||
self.prompts[self.prompt], | ||
]) | ||
|
||
text = self.last_text.replace('\n', '').replace('</s>', '').strip() | ||
|
||
parser = ArgParser() | ||
parser.add_argument("-it", "--interactive", action="store_true", help="enable interactive user input from the terminal") | ||
if text: | ||
self.font.OverlayText(image, text=text, x=5, y=42, color=self.font.White, background=self.font.Gray40) | ||
|
||
self.font.OverlayText(image, text=self.prompts[self.prompt], x=5, y=5, color=(120,215,21), background=self.font.Gray40) | ||
self.video_output(image) | ||
|
||
def on_eos(self, text): | ||
if self.eos: | ||
self.last_text = text # new query response | ||
self.eos = False | ||
elif not self.warmup: # don't view warmup response | ||
self.last_text = self.last_text + text | ||
|
||
if text.endswith('</s>'): | ||
#print_table(self.llm.model.stats) | ||
self.warmup = False | ||
self.eos = True | ||
|
||
def poll_keyboard(self): | ||
while True: | ||
try: | ||
key = input().strip() #getch.getch() | ||
|
||
if key == 'd' or key == 'l': | ||
self.prompt = (self.prompt + 1) % len(self.prompts) | ||
elif key == 'a' or key == 'j': | ||
self.prompt = self.prompt - 1 | ||
if self.prompt < 0: | ||
self.prompt = len(self.prompts) - 1 | ||
|
||
num = int(key) | ||
|
||
if num > 0 and num <= len(self.prompts): | ||
self.prompt = num - 1 | ||
except Exception as err: | ||
continue | ||
|
||
if __name__ == "__main__": | ||
parser = ArgParser(extras=ArgParser.Defaults+['video_input', 'video_output']) | ||
args = parser.parse_args() | ||
|
||
agent = ChatAgent(**vars(args)).run() | ||
agent = VideoQuery(**vars(args)).run() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/usr/bin/env python3 | ||
import logging | ||
|
||
from local_llm import Agent | ||
|
||
from local_llm.plugins import VideoSource, VideoOutput | ||
from local_llm.utils import ArgParser | ||
|
||
|
||
class VideoStream(Agent): | ||
""" | ||
Relay, view, or test a video stream. Use the --video-input and --video-output arguments | ||
to set the video source and output protocols used from: | ||
https://github.com/dusty-nv/jetson-inference/blob/master/docs/aux-streaming.md | ||
For example, this will capture a V4L2 camera and serve it via WebRTC with H.264 encoding: | ||
python3 -m local_llm.agents.video_stream \ | ||
--video-input /dev/video0 \ | ||
--video-output webrtc://@:8554/output | ||
It's also used as a basic test of video streaming before using more complex agents that rely on it. | ||
""" | ||
def __init__(self, video_input=None, video_output=None, **kwargs): | ||
super().__init__() | ||
|
||
self.video_source = VideoSource(video_input, **kwargs) | ||
self.video_output = VideoOutput(video_output, **kwargs) | ||
|
||
self.video_source.add(self.on_video, threaded=False) | ||
self.video_source.add(self.video_output) | ||
|
||
self.pipeline = [self.video_source] | ||
|
||
def on_video(self, image): | ||
logging.debug(f"captured {image.width}x{image.height} frame from {self.video_source.resource}") | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = ArgParser(extras=['video_input', 'video_output', 'log']) | ||
args = parser.parse_args() | ||
|
||
agent = VideoStream(**vars(args)).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.