Skip to content

Commit 6df1e40

Browse files
committed
feat: Media processing in the frontend - 1st pass
Signed-off-by: Alexandre Milesi <[email protected]>
1 parent 4c4130e commit 6df1e40

File tree

17 files changed

+619
-12
lines changed

17 files changed

+619
-12
lines changed

Cargo.lock

Lines changed: 59 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/bindings/python/Cargo.lock

Lines changed: 66 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/bindings/python/rust/lib.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use dynamo_llm::{self as llm_rs};
3030
use dynamo_llm::{entrypoint::RouterConfig, kv_router::KvRouterConfig};
3131

3232
use crate::llm::local_model::ModelRuntimeConfig;
33+
use crate::llm::preprocessor::MediaDecoder;
3334

3435
#[pyclass(eq, eq_int)]
3536
#[derive(Clone, Debug, PartialEq)]
@@ -115,6 +116,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
115116
m.add_class::<llm::kv::WorkerMetricsPublisher>()?;
116117
m.add_class::<llm::model_card::ModelDeploymentCard>()?;
117118
m.add_class::<llm::local_model::ModelRuntimeConfig>()?;
119+
m.add_class::<llm::preprocessor::MediaDecoder>()?;
118120
m.add_class::<llm::preprocessor::OAIChatPreprocessor>()?;
119121
m.add_class::<llm::backend::Backend>()?;
120122
m.add_class::<llm::kv::OverlapScores>()?;
@@ -175,7 +177,7 @@ fn log_message(level: &str, message: &str, module: &str, file: &str, line: u32)
175177
}
176178

177179
#[pyfunction]
178-
#[pyo3(signature = (model_input, model_type, endpoint, model_path, model_name=None, context_length=None, kv_cache_block_size=None, router_mode=None, migration_limit=0, runtime_config=None, user_data=None, custom_template_path=None))]
180+
#[pyo3(signature = (model_input, model_type, endpoint, model_path, model_name=None, context_length=None, kv_cache_block_size=None, router_mode=None, migration_limit=0, runtime_config=None, user_data=None, custom_template_path=None, media_decoder=None))]
179181
#[allow(clippy::too_many_arguments)]
180182
fn register_llm<'p>(
181183
py: Python<'p>,
@@ -191,6 +193,7 @@ fn register_llm<'p>(
191193
runtime_config: Option<ModelRuntimeConfig>,
192194
user_data: Option<&Bound<'p, PyDict>>,
193195
custom_template_path: Option<&str>,
196+
media_decoder: Option<MediaDecoder>,
194197
) -> PyResult<Bound<'p, PyAny>> {
195198
let model_input = match model_input {
196199
ModelInput::Text => llm_rs::model_type::ModelInput::Text,
@@ -236,7 +239,8 @@ fn register_llm<'p>(
236239
.migration_limit(Some(migration_limit))
237240
.runtime_config(runtime_config.unwrap_or_default().inner)
238241
.user_data(user_data_json)
239-
.custom_template_path(custom_template_path_owned);
242+
.custom_template_path(custom_template_path_owned)
243+
.media_decoder(media_decoder.map(|m| m.inner));
240244
// Download from HF, load the ModelDeploymentCard
241245
let mut local_model = builder.build().await.map_err(to_pyerr)?;
242246
// Advertise ourself on etcd so ingress can find us

lib/bindings/python/rust/llm/preprocessor.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::llm::model_card::ModelDeploymentCard;
66

77
use llm_rs::{
88
preprocessor::OpenAIPreprocessor,
9+
preprocessor::media::{ImageDecoder as RsImageDecoder, VideoDecoder as RsVideoDecoder, MediaDecoder as RsMediaDecoder},
910
protocols::common::llm_backend::{BackendOutput, PreprocessedRequest},
1011
types::{
1112
Annotated,
@@ -74,3 +75,35 @@ impl OAIChatPreprocessor {
7475
})
7576
}
7677
}
78+
79+
#[pyclass]
80+
#[derive(Clone)]
81+
pub struct MediaDecoder {
82+
pub(crate) inner: RsMediaDecoder,
83+
}
84+
85+
#[pymethods]
86+
impl MediaDecoder {
87+
#[new]
88+
fn new() -> Self {
89+
Self {
90+
inner: RsMediaDecoder::default(),
91+
}
92+
}
93+
94+
fn set_image_decoder(&mut self, image_decoder: &Bound<'_, PyDict>) -> PyResult<()> {
95+
let image_decoder = pythonize::depythonize(image_decoder).map_err(|err| {
96+
PyErr::new::<PyException, _>(format!("Failed to parse image_decoder: {}", err))
97+
})?;
98+
self.inner.image_decoder = image_decoder;
99+
Ok(())
100+
}
101+
102+
fn set_video_decoder(&mut self, video_decoder: &Bound<'_, PyDict>) -> PyResult<()> {
103+
let video_decoder = pythonize::depythonize(video_decoder).map_err(|err| {
104+
PyErr::new::<PyException, _>(format!("Failed to parse video_decoder: {}", err))
105+
})?;
106+
self.inner.video_decoder = video_decoder;
107+
Ok(())
108+
}
109+
}

lib/bindings/python/src/dynamo/_core.pyi

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,12 @@ class ModelRuntimeConfig:
488488
"""
489489
...
490490

491+
class MediaDecoder:
492+
"""
493+
Media decoding configuration for the OAI preprocessor
494+
"""
495+
...
496+
491497
class OAIChatPreprocessor:
492498
"""
493499
A preprocessor for OpenAI chat completions

lib/bindings/python/src/dynamo/llm/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from dynamo._core import KvStats as KvStats
3131
from dynamo._core import ModelInput as ModelInput
3232
from dynamo._core import ModelRuntimeConfig as ModelRuntimeConfig
33+
from dynamo._core import MediaDecoder as MediaDecoder
3334
from dynamo._core import ModelType as ModelType
3435
from dynamo._core import OverlapScores as OverlapScores
3536
from dynamo._core import RadixTree as RadixTree

lib/llm/Cargo.toml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ readme.workspace = true
1313
description = "Dynamo LLM Library"
1414

1515
[features]
16-
default = []
16+
default = ["media-loading"]
1717
# todo(ops): get this working in CI as a default.
1818
# default = ["block-manager", "testing-full"]
1919

@@ -24,6 +24,7 @@ testing-etcd = []
2424
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:ndarray", "dep:nix"]
2525
cuda = ["dep:cudarc"]
2626
integration = ["dynamo-runtime/integration"]
27+
media-loading = ["dep:ndarray", "dep:video-rs", "dep:image", "dep:reqwest", "dep:base64", "dep:tokio-rayon"]
2728

2829
[[bench]]
2930
name = "tokenizer"
@@ -137,6 +138,13 @@ minijinja = { version = "2.10.2", features = ["loader"] }
137138
minijinja-contrib = { version = "2.10.2", features = ["pycompat"] }
138139
json-five = { version = "0.3" }
139140

141+
# media loading in the preprocessor
142+
video-rs = { version = "0.10", features = ["ndarray"], optional = true }
143+
image = { version = "0.25", optional = true }
144+
reqwest = { workspace = true, optional = true }
145+
base64 = { version = "0.22", optional = true }
146+
tokio-rayon = {version = "2", optional = true }
147+
140148
# Publishers
141149
zeromq = "0.4.1"
142150
rmp-serde = "1.3"

lib/llm/src/engines.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,12 @@ impl
148148
dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
149149
prompt,
150150
) => prompt,
151-
_ => anyhow::bail!("Invalid request content field, expected Content::Text"),
151+
dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Array(
152+
parts,
153+
) => parts
154+
.iter()
155+
.map(|part| format!("{:?}", part))
156+
.collect::<String>(),
152157
}
153158
}
154159
_ => anyhow::bail!("Invalid request type, expected User message"),

0 commit comments

Comments
 (0)