Skip to content

Commit

Permalink
updated local_llm and MLC containers
Browse files Browse the repository at this point in the history
  • Loading branch information
dusty-nv committed Jan 27, 2024
1 parent a408d40 commit dcdb996
Show file tree
Hide file tree
Showing 24 changed files with 447 additions and 84 deletions.
2 changes: 1 addition & 1 deletion packages/llm/local_llm/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#---
# name: local_llm
# group: llm
# depends: [mlc, jetson-utils, riva-client:python]
# depends: [mlc, riva-client:python, jetson-inference, torch2trt]
# requires: '>=34.1.0'
# docs: docs.md
#---
Expand Down
2 changes: 1 addition & 1 deletion packages/llm/local_llm/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __call__(self, input, channel=0, **kwargs):
"""
Operator overload for process()
"""
return self.process[channel](input, **kwargs)
return self.process(input, channel, **kwargs)

def start(self):
"""
Expand Down
2 changes: 0 additions & 2 deletions packages/llm/local_llm/agents/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ def print_input_prompt(self):


if __name__ == "__main__":
from local_llm.utils import ArgParser

parser = ArgParser()
parser.add_argument("-it", "--interactive", action="store_true", help="enable interactive user input from the terminal")
args = parser.parse_args()
Expand Down
26 changes: 26 additions & 0 deletions packages/llm/local_llm/agents/mp_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python3
from local_llm import Agent, Pipeline

from local_llm.plugins import UserPrompt, ChatQuery, PrintStream, ProcessProxy
from local_llm.utils import ArgParser


class MultiprocessChat(Agent):
"""
Test of running a LLM and chat session in a subprocess.
"""
def __init__(self, **kwargs):
super().__init__()

self.pipeline = Pipeline([
UserPrompt(interactive=False, **kwargs), # interactive=False if kwargs.get('prompt') else True
ProcessProxy((lambda **kwargs: ChatQuery(**kwargs)), **kwargs),
PrintStream(color='green')
])


if __name__ == "__main__":
parser = ArgParser()
args = parser.parse_args()

agent = MultiprocessChat(**vars(args)).run()
31 changes: 31 additions & 0 deletions packages/llm/local_llm/agents/mp_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3
import time

from local_llm import Agent, Pipeline

from local_llm.plugins import PrintStream, ProcessProxy
from local_llm.utils import ArgParser


class MultiprocessTest(Agent):
"""
This is a test of the ProcessProxy plugin for running pipelines and plugins in their own processes.
"""
def __init__(self, **kwargs):
super().__init__()

self.pipeline = Pipeline([
ProcessProxy((lambda **kwargs: PrintStream(**kwargs)), color='green', relay=True, **kwargs),
PrintStream(color='blue', **kwargs),
])


if __name__ == "__main__":
parser = ArgParser()
args = parser.parse_args()

agent = MultiprocessTest(**vars(args)).start()

while True:
agent("INSERT MESSAGE HERE")
time.sleep(1.0)
35 changes: 35 additions & 0 deletions packages/llm/local_llm/agents/mp_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python3
import logging

from local_llm import Agent

from local_llm.plugins import VideoSource, VideoOutput, ProcessProxy
from local_llm.utils import ArgParser


class MultiprocessVideo(Agent):
"""
Test of running a video stream across processes.
"""
def __init__(self, **kwargs):
super().__init__()

self.video_source = VideoSource(return_tensors='np', **kwargs)
self.video_output = ProcessProxy((lambda **kwargs: VideoOutput(**kwargs)), **kwargs)

self.video_source.add(self.on_video, threaded=False)
self.video_source.add(self.video_output)

self.pipeline = [self.video_source]

def on_video(self, image):
logging.debug(f"captured {image.shape} ({image.dtype}) frame from {self.video_source.resource}")


if __name__ == "__main__":
from local_llm.utils import ArgParser

parser = ArgParser(extras=['video_input', 'video_output', 'log'])
args = parser.parse_args()

agent = MultiprocessVideo(**vars(args)).run()
138 changes: 99 additions & 39 deletions packages/llm/local_llm/agents/video_query.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,116 @@
#!/usr/bin/env python3
from local_llm import Agent, Pipeline, ChatTemplates
import time
import threading

from local_llm.plugins import UserPrompt, ChatQuery, PrintStream
from local_llm import Agent

from local_llm.plugins import VideoSource, VideoOutput, ChatQuery, PrintStream, ProcessProxy
from local_llm.utils import ArgParser, print_table

from termcolor import cprint
from jetson_utils import cudaFont, cudaMemcpy, cudaToNumpy, cudaDeviceSynchronize


#
# TODO max-new-tokens and other generation args...
# TODO custom arg parser
#
class ChatAgent(Agent):
class VideoQuery(Agent):
"""
Agent for two-turn multimodal chat.
Perpetual always-on closed-loop visual agent that applies prompts to a video stream.
"""
def __init__(self, model="meta-llama/Llama-2-7b-chat-hf", interactive=True, **kwargs):
def __init__(self, model="liuhaotian/llava-v1.5-13b", **kwargs):
super().__init__()

# load model in another process for smooth streaming
self.llm = ProcessProxy((lambda **kwargs: ChatQuery(model, drop_inputs=True, **kwargs)), **kwargs)
self.llm.add(PrintStream(color='green', relay=True).add(self.on_eos))
self.llm.start()

# test / warm-up query
self.warmup = True
self.last_text = ""
self.eos = False

"""
# Equivalent to:
self.pipeline = UserPrompt(interactive=interactive, **kwargs).add(
LLMQuery(model, **kwargs).add(
PrintStream(relay=True).add(self.on_eos)
))
"""
self.pipeline = Pipeline([
UserPrompt(interactive=interactive, **kwargs),
ChatQuery(model, **kwargs),
PrintStream(relay=True),
self.on_eos
])
self.llm("What is 2+2?")

self.model = self.pipeline[ChatQuery].model
self.interactive = interactive
while self.warmup:
time.sleep(0.25)

# create video streams
self.video_source = VideoSource(**kwargs)
self.video_output = VideoOutput(**kwargs)

self.print_input_prompt()

def on_eos(self, input):
if input.endswith('</s>'):
print_table(self.model.stats)
self.print_input_prompt()
self.video_source.add(self.on_video, threaded=False)
self.video_output.start()

self.font = cudaFont()

def print_input_prompt(self):
if self.interactive:
cprint('>> PROMPT: ', 'blue', end='', flush=True)
# setup prompts
self.prompt = 0
self.prompts = kwargs.get('prompt')

if not self.prompts:
self.prompts = [
'Describe the image concisely.',
'How many fingers is the person holding up?',
'What does the text in the image say?',
'There is a question asked in the image. What is the answer?',
]

if __name__ == "__main__":
from local_llm.utils import ArgParser
self.keyboard_thread = threading.Thread(target=self.poll_keyboard)
self.keyboard_thread.start()

# entry node
self.pipeline = [self.video_source]

def on_video(self, image):
np_image = cudaToNumpy(image)
cudaDeviceSynchronize()

self.llm([
'reset',
np_image,
self.prompts[self.prompt],
])

text = self.last_text.replace('\n', '').replace('</s>', '').strip()

parser = ArgParser()
parser.add_argument("-it", "--interactive", action="store_true", help="enable interactive user input from the terminal")
if text:
self.font.OverlayText(image, text=text, x=5, y=42, color=self.font.White, background=self.font.Gray40)

self.font.OverlayText(image, text=self.prompts[self.prompt], x=5, y=5, color=(120,215,21), background=self.font.Gray40)
self.video_output(image)

def on_eos(self, text):
if self.eos:
self.last_text = text # new query response
self.eos = False
elif not self.warmup: # don't view warmup response
self.last_text = self.last_text + text

if text.endswith('</s>'):
#print_table(self.llm.model.stats)
self.warmup = False
self.eos = True

def poll_keyboard(self):
while True:
try:
key = input().strip() #getch.getch()

if key == 'd' or key == 'l':
self.prompt = (self.prompt + 1) % len(self.prompts)
elif key == 'a' or key == 'j':
self.prompt = self.prompt - 1
if self.prompt < 0:
self.prompt = len(self.prompts) - 1

num = int(key)

if num > 0 and num <= len(self.prompts):
self.prompt = num - 1
except Exception as err:
continue

if __name__ == "__main__":
parser = ArgParser(extras=ArgParser.Defaults+['video_input', 'video_output'])
args = parser.parse_args()

agent = ChatAgent(**vars(args)).run()
agent = VideoQuery(**vars(args)).run()

44 changes: 44 additions & 0 deletions packages/llm/local_llm/agents/video_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python3
import logging

from local_llm import Agent

from local_llm.plugins import VideoSource, VideoOutput
from local_llm.utils import ArgParser


class VideoStream(Agent):
"""
Relay, view, or test a video stream. Use the --video-input and --video-output arguments
to set the video source and output protocols used from:
https://github.com/dusty-nv/jetson-inference/blob/master/docs/aux-streaming.md
For example, this will capture a V4L2 camera and serve it via WebRTC with H.264 encoding:
python3 -m local_llm.agents.video_stream \
--video-input /dev/video0 \
--video-output webrtc://@:8554/output
It's also used as a basic test of video streaming before using more complex agents that rely on it.
"""
def __init__(self, video_input=None, video_output=None, **kwargs):
super().__init__()

self.video_source = VideoSource(video_input, **kwargs)
self.video_output = VideoOutput(video_output, **kwargs)

self.video_source.add(self.on_video, threaded=False)
self.video_source.add(self.video_output)

self.pipeline = [self.video_source]

def on_video(self, image):
logging.debug(f"captured {image.width}x{image.height} frame from {self.video_source.resource}")


if __name__ == "__main__":
parser = ArgParser(extras=['video_input', 'video_output', 'log'])
args = parser.parse_args()

agent = VideoStream(**vars(args)).run()
2 changes: 0 additions & 2 deletions packages/llm/local_llm/agents/voice_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def on_eos(self, text):


if __name__ == "__main__":
from local_llm.utils import ArgParser

parser = ArgParser(extras=ArgParser.Defaults+['asr', 'tts', 'audio_output'])
args = parser.parse_args()

Expand Down
2 changes: 0 additions & 2 deletions packages/llm/local_llm/agents/web_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ def start(self):


if __name__ == "__main__":
from local_llm.utils import ArgParser

parser = ArgParser(extras=ArgParser.Defaults+['asr', 'tts', 'audio_output', 'web'])
args = parser.parse_args()

Expand Down
2 changes: 1 addition & 1 deletion packages/llm/local_llm/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def embed_chat(self, use_cache=True):
cached = embed_key in entry and use_cache

if logging.getLogger().isEnabledFor(logging.DEBUG) and not cached:
logging.debug(f"processing chat entry {i} role='{entry.role}' template='{role_template}' open_user_prompt={open_user_prompt} cached={'true' if cached else 'false'} {key}='{entry[key]}'".replace('\n', '\\n'))
logging.debug(f"processing chat entry {i} role='{entry.role}' template='{role_template}' open_user_prompt={open_user_prompt} cached={'true' if cached else 'false'} {key}='{entry[key] if isinstance(entry[key], str) else type(entry[key])}'".replace('\n', '\\n'))

if use_cache:
if embed_key not in entry: # TODO and entry.role != 'bot' -- only compute bot embeddings when needed
Expand Down
7 changes: 5 additions & 2 deletions packages/llm/local_llm/local_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,11 @@ def from_pretrained(model, api=None, **kwargs):
else:
raise ValueError(f"invalid API: {api}")

# moved CLIP to after LLM is loaded because of MLC CUDA errors when running in subprocess
model.init_vision()
model.config.load_time = time.perf_counter() - load_begin

print_table(model.config)

return model

def generate(self, inputs, streaming=True, **kwargs):
Expand Down Expand Up @@ -144,7 +146,8 @@ def __init__(self, model_path, **kwargs):
self.model_path = model_path
self.model_config = AutoConfig.from_pretrained(model_path)

self.init_vision(**kwargs)
# moved CLIP to after LLM is loaded because of MLC CUDA errors when running in subprocess
#self.init_vision(**kwargs)

def init_vision(self, **kwargs):
"""
Expand Down
Loading

0 comments on commit dcdb996

Please sign in to comment.