diff --git a/bazel/python/requirements.txt b/bazel/python/requirements.txt index 8a1dd67..e8c7f01 100644 --- a/bazel/python/requirements.txt +++ b/bazel/python/requirements.txt @@ -3,3 +3,4 @@ Werkzeug==2.2.2 Flask==2.0.2 kubernetes==27.2.0 depyf +vllm diff --git a/experimental/vllm_example/BUILD b/experimental/vllm_example/BUILD new file mode 100644 index 0000000..f108a80 --- /dev/null +++ b/experimental/vllm_example/BUILD @@ -0,0 +1,11 @@ +load("@pip//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_binary", "py_library") + +py_binary( + name = "simple_inference", + srcs = ["simple_inference.py"], + main = "simple_inference.py", + deps = [ + requirement("vllm"), + ], +) diff --git a/experimental/vllm_example/README.md b/experimental/vllm_example/README.md new file mode 100644 index 0000000..d9ecfe2 --- /dev/null +++ b/experimental/vllm_example/README.md @@ -0,0 +1,4 @@ +## vllm + +### References +- [vllm](https://github.com/vllm/vllm) \ No newline at end of file diff --git a/experimental/vllm_example/simple_inference.py b/experimental/vllm_example/simple_inference.py new file mode 100644 index 0000000..9b758fa --- /dev/null +++ b/experimental/vllm_example/simple_inference.py @@ -0,0 +1,22 @@ +from vllm import LLM, SamplingParams + +# Sample prompts. +prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", +] +# Create a sampling params object. +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + +# Create an LLM. +llm = LLM(model="facebook/opt-125m") +# Generate texts from the prompts. The output is a list of RequestOutput objects +# that contain the prompt, generated text, and other information. +outputs = llm.generate(prompts, sampling_params) +# Print the outputs. +for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")