ai-dynamo
diff --git a/‎Cargo.lock‎
Lines changed: 59 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎lib/bindings/python/Cargo.lock‎
Lines changed: 66 additions & 0 deletions b/‎lib/bindings/python/Cargo.lock‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎lib/bindings/python/rust/lib.rs‎
Lines changed: 6 additions & 2 deletions b/‎lib/bindings/python/rust/lib.rs‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎lib/bindings/python/rust/llm/preprocessor.rs‎
Lines changed: 33 additions & 0 deletions b/‎lib/bindings/python/rust/llm/preprocessor.rs‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎lib/bindings/python/src/dynamo/_core.pyi‎
Lines changed: 6 additions & 0 deletions b/‎lib/bindings/python/src/dynamo/_core.pyi‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎lib/bindings/python/src/dynamo/llm/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎lib/bindings/python/src/dynamo/llm/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/llm/Cargo.toml‎
Lines changed: 9 additions & 1 deletion b/‎lib/llm/Cargo.toml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎lib/llm/src/engines.rs‎
Lines changed: 6 additions & 1 deletion b/‎lib/llm/src/engines.rs‎
Lines changed: 6 additions & 1 deletion
@@ -30,6 +30,7 @@ use dynamo_llm::{self as llm_rs};
 use dynamo_llm::{entrypoint::RouterConfig, kv_router::KvRouterConfig};
 
 use crate::llm::local_model::ModelRuntimeConfig;
+use crate::llm::preprocessor::MediaDecoder;
 
 #[pyclass(eq, eq_int)]
 #[derive(Clone, Debug, PartialEq)]
@@ -115,6 +116,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<llm::kv::WorkerMetricsPublisher>()?;
     m.add_class::<llm::model_card::ModelDeploymentCard>()?;
     m.add_class::<llm::local_model::ModelRuntimeConfig>()?;
+    m.add_class::<llm::preprocessor::MediaDecoder>()?;
     m.add_class::<llm::preprocessor::OAIChatPreprocessor>()?;
     m.add_class::<llm::backend::Backend>()?;
     m.add_class::<llm::kv::OverlapScores>()?;
@@ -175,7 +177,7 @@ fn log_message(level: &str, message: &str, module: &str, file: &str, line: u32)
 }
 
 #[pyfunction]
-#[pyo3(signature = (model_input, model_type, endpoint, model_path, model_name=None, context_length=None, kv_cache_block_size=None, router_mode=None, migration_limit=0, runtime_config=None, user_data=None, custom_template_path=None))]
+#[pyo3(signature = (model_input, model_type, endpoint, model_path, model_name=None, context_length=None, kv_cache_block_size=None, router_mode=None, migration_limit=0, runtime_config=None, user_data=None, custom_template_path=None, media_decoder=None))]
 #[allow(clippy::too_many_arguments)]
 fn register_llm<'p>(
     py: Python<'p>,
@@ -191,6 +193,7 @@ fn register_llm<'p>(
     runtime_config: Option<ModelRuntimeConfig>,
     user_data: Option<&Bound<'p, PyDict>>,
     custom_template_path: Option<&str>,
+    media_decoder: Option<MediaDecoder>,
 ) -> PyResult<Bound<'p, PyAny>> {
     let model_input = match model_input {
         ModelInput::Text => llm_rs::model_type::ModelInput::Text,
@@ -236,7 +239,8 @@ fn register_llm<'p>(
             .migration_limit(Some(migration_limit))
             .runtime_config(runtime_config.unwrap_or_default().inner)
             .user_data(user_data_json)
-            .custom_template_path(custom_template_path_owned);
+            .custom_template_path(custom_template_path_owned)
+            .media_decoder(media_decoder.map(|m| m.inner));
         // Download from HF, load the ModelDeploymentCard
         let mut local_model = builder.build().await.map_err(to_pyerr)?;
         // Advertise ourself on etcd so ingress can find us
 
@@ -6,6 +6,7 @@ use crate::llm::model_card::ModelDeploymentCard;
 
 use llm_rs::{
     preprocessor::OpenAIPreprocessor,
+    preprocessor::media::{ImageDecoder as RsImageDecoder, VideoDecoder as RsVideoDecoder, MediaDecoder as RsMediaDecoder},
     protocols::common::llm_backend::{BackendOutput, PreprocessedRequest},
     types::{
         Annotated,
@@ -74,3 +75,35 @@ impl OAIChatPreprocessor {
         })
     }
 }
+
+#[pyclass]
+#[derive(Clone)]
+pub struct MediaDecoder {
+    pub(crate) inner: RsMediaDecoder,
+}
+
+#[pymethods]
+impl MediaDecoder {
+    #[new]
+    fn new() -> Self {
+        Self {
+            inner: RsMediaDecoder::default(),
+        }
+    }
+
+    fn set_image_decoder(&mut self, image_decoder: &Bound<'_, PyDict>) -> PyResult<()> {
+        let image_decoder = pythonize::depythonize(image_decoder).map_err(|err| {
+            PyErr::new::<PyException, _>(format!("Failed to parse image_decoder: {}", err))
+        })?;
+        self.inner.image_decoder = image_decoder;
+        Ok(())
+    }
+
+    fn set_video_decoder(&mut self, video_decoder: &Bound<'_, PyDict>) -> PyResult<()> {
+        let video_decoder = pythonize::depythonize(video_decoder).map_err(|err| {
+            PyErr::new::<PyException, _>(format!("Failed to parse video_decoder: {}", err))
+        })?;
+        self.inner.video_decoder = video_decoder;
+        Ok(())
+    }
+}
@@ -488,6 +488,12 @@ class ModelRuntimeConfig:
     """
     ...
 
+class MediaDecoder:
+    """
+    Media decoding configuration for the OAI preprocessor
+    """
+    ...
+
 class OAIChatPreprocessor:
     """
     A preprocessor for OpenAI chat completions
 
@@ -30,6 +30,7 @@
 from dynamo._core import KvStats as KvStats
 from dynamo._core import ModelInput as ModelInput
 from dynamo._core import ModelRuntimeConfig as ModelRuntimeConfig
+from dynamo._core import MediaDecoder as MediaDecoder
 from dynamo._core import ModelType as ModelType
 from dynamo._core import OverlapScores as OverlapScores
 from dynamo._core import RadixTree as RadixTree
 
@@ -13,7 +13,7 @@ readme.workspace = true
 description = "Dynamo LLM Library"
 
 [features]
-default = []
+default = ["media-loading"]
 # todo(ops): get this working in CI as a default.
 # default = ["block-manager", "testing-full"]
 
@@ -24,6 +24,7 @@ testing-etcd = []
 block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:ndarray", "dep:nix"]
 cuda = ["dep:cudarc"]
 integration = ["dynamo-runtime/integration"]
+media-loading = ["dep:ndarray", "dep:video-rs", "dep:image", "dep:reqwest", "dep:base64", "dep:tokio-rayon"]
 
 [[bench]]
 name = "tokenizer"
@@ -137,6 +138,13 @@ minijinja = { version = "2.10.2", features = ["loader"] }
 minijinja-contrib = { version = "2.10.2", features = ["pycompat"] }
 json-five = { version = "0.3" }
 
+# media loading in the preprocessor
+video-rs = { version = "0.10", features = ["ndarray"], optional = true }
+image = { version = "0.25", optional = true }
+reqwest = { workspace = true, optional = true }
+base64 = { version = "0.22", optional = true }
+tokio-rayon = {version = "2", optional = true }
+
 # Publishers
 zeromq = "0.4.1"
 rmp-serde = "1.3"
 
@@ -148,7 +148,12 @@ impl
                     dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                         prompt,
                     ) => prompt,
-                    _ => anyhow::bail!("Invalid request content field, expected Content::Text"),
+                    dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Array(
+                        parts,
+                    ) => parts
+                        .iter()
+                        .map(|part| format!("{:?}", part))
+                        .collect::<String>(),
                 }
             }
             _ => anyhow::bail!("Invalid request type, expected User message"),
Original file line number	Diff line number	Diff line change
`@@ -148,7 +148,12 @@ impl`
`148`	`148`	`dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(`
`149`	`149`	`prompt,`
`150`	`150`	`) => prompt,`
`151`		`- _ => anyhow::bail!("Invalid request content field, expected Content::Text"),`
	`151`	`+ dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Array(`
	`152`	`+ parts,`
	`153`	`+ ) => parts`
	`154`	`+ .iter()`
	`155`	`+ .map(\|part\| format!("{:?}", part))`
	`156`	`+ .collect::<String>(),`
`152`	`157`	`}`
`153`	`158`	`}`
`154`	`159`	`_ => anyhow::bail!("Invalid request type, expected User message"),`