[Dev] Add vllm example

lshmouse · Oct 9, 2024 · a87b020 · a87b020
1 parent b85a994
commit a87b020
Show file tree

Hide file tree

Showing 9 changed files with 178 additions and 0 deletions.
diff --git a/bazel/python/requirements.txt b/bazel/python/requirements.txt
@@ -3,3 +3,6 @@ Werkzeug==2.2.2
 Flask==2.0.2
 kubernetes==27.2.0
 depyf
+vllm
+ray
+fastapi
diff --git a/experimental/ray_example/BUILD b/experimental/ray_example/BUILD
@@ -0,0 +1,11 @@
+load("@pip//:requirements.bzl", "requirement")
+load("@rules_python//python:defs.bzl", "py_binary", "py_library")
+
+py_binary(
+    name = "ray_demo",
+    srcs = ["ray_demo.py"],
+    main = "ray_demo.py",
+    deps = [
+        requirement("ray"),
+    ],
+)
diff --git a/experimental/ray_example/README.md b/experimental/ray_example/README.md
@@ -0,0 +1,39 @@
+## Ray
+
+### ray.remote internals
+Python装饰器
+```
+@ray.remote
+def f(x):
+    return x + 1
+```
+等价于
+```
+f = ray.remote(f)
+```
+
+内部实现：
+- @ray.remote
+- ray/__init__.py: from ray._private.worker import remote
+- ray/_private/worker.py: def remote(function, *args, **kwargs):  call ray.remote_function.RemoteFunction
+```
+def _make_remote(function_or_class, options):
+    if not function_or_class.__module__:
+        function_or_class.__module__ = "global"
+
+    if inspect.isfunction(function_or_class) or is_cython(function_or_class):
+        ray_option_utils.validate_task_options(options, in_options=False)
+        return ray.remote_function.RemoteFunction(
+            Language.PYTHON,
+            function_or_class,
+            None,
+            options,
+        )
+```
+- python/ray/remote_function.py: RemoteFunction
+
+https://tddg.github.io/ds5110-cs5501-spring24/assets/docs/lec6b-ray-internals.pdf
+
+### References
+- [Ray documentation](https://docs.ray.io/en/latest/)
+- https://github.com/ray-project/ray-educational-materials
diff --git a/experimental/ray_example/ray_demo.py b/experimental/ray_example/ray_demo.py
@@ -0,0 +1,9 @@
+import ray
+ray.init()
+
+@ray.remote
+def f(x):
+    return x * x
+
+futures = [f.remote(i) for i in range(4)]
+print(ray.get(futures))
diff --git a/experimental/vllm_example/BUILD b/experimental/vllm_example/BUILD
@@ -0,0 +1,31 @@
+load("@pip//:requirements.bzl", "requirement")
+load("@rules_python//python:defs.bzl", "py_binary", "py_library")
+
+py_binary(
+    name = "vllm_batch",
+    srcs = ["vllm_batch.py"],
+    main = "vllm_batch.py",
+    deps = [
+        requirement("vllm"),
+    ],
+)
+
+py_binary(
+    name = "vllm_local",
+    srcs = ["vllm_local.py"],
+    main = "vllm_local.py",
+    deps = [
+        requirement("vllm"),
+    ],
+)
+
+py_binary(
+    name = "vllm_on_ray",
+    srcs = ["vllm_on_ray.py"],
+    main = "vllm_on_ray.py",
+    deps = [
+        requirement("vllm"),
+        requirement("ray"),
+        requirement("fastapi"),
+    ],
+)
diff --git a/experimental/vllm_example/README.md b/experimental/vllm_example/README.md
@@ -0,0 +1,34 @@
+## vLLM
+
+### Setup
+```
+pip install vllm
+```
+
+### Example
+local
+```
+python vllm_local.py
+```
+Ray cluster
+```
+ray init
+python vllm_on_ray.py
+```
+Test:
+```
+curl http://localhost:8000/inference?query="Who%20are%20you"
+```
+
+### TODO
+- deploy vllm on k8s with helm chart
+- vllm on remote ray cluster
+- ray cluster internals
+
+### References
+- https://zhuanlan.zhihu.com/p/710614883
+- https://docs.ray.io/en/latest/serve/tutorials/vllm-example.html
+- https://docs.vllm.ai/en/latest/
+- https://github.com/vllm-project/vllm/issues/1363
+- http://kubeagi.k8s.com.cn/docs/Concepts/models
+- https://github.com/skypilot-org/skypilot
diff --git a/experimental/vllm_example/vllm_batch.py b/experimental/vllm_example/vllm_batch.py
@@ -0,0 +1,22 @@
+from vllm import LLM, SamplingParams
+
+# Sample prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+# Create a sampling params object.
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+# Create an LLM.
+llm = LLM(model="facebook/opt-125m")
+# Generate texts from the prompts. The output is a list of RequestOutput objects
+# that contain the prompt, generated text, and other information.
+outputs = llm.generate(prompts, sampling_params)
+# Print the outputs.
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
diff --git a/experimental/vllm_example/vllm_local.py b/experimental/vllm_example/vllm_local.py
@@ -0,0 +1,5 @@
+from vllm import LLM
+
+llm = LLM("qwen/qwen2-0.5B")
+output = llm.generate("Who are you?")
+print(output)
diff --git a/experimental/vllm_example/vllm_on_ray.py b/experimental/vllm_example/vllm_on_ray.py
@@ -0,0 +1,24 @@
+from fastapi import FastAPI
+from ray import serve
+from vllm import LLM
+
+app = FastAPI()
+
+@serve.deployment(num_replicas=1, ray_actor_options={"num_gpus": 1})
+@serve.ingress(app)
+class FastAPIDeployment:
+    def __init__(self):
+        self.llm = LLM("qwen/qwen2-0.5B")
+
+    @app.get("/inference")
+    def model_inference(self, query: str) -> str:
+        print("query: %s" % query)
+        output = self.llm.generate(query)
+        return str(output)
+
+    @app.get("/hello")
+    def hello(self) -> str:
+        return "hello"
+
+
+serve.run(FastAPIDeployment.bind(), route_prefix="/", name="qwen2_model_service")
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,3 +3,6 @@ Werkzeug==2.2.2 @@
     Flask==2.0.2
     kubernetes==27.2.0
     depyf
+    vllm
+    ray
+    fastapi