openmpf · eric-mccann-pro · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025
diff --git a/python/QwenSpeechSummarization/Dockerfile b/python/QwenSpeechSummarization/Dockerfile
@@ -0,0 +1,65 @@
+# syntax=docker/dockerfile:1.2
+
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+ARG BUILD_REGISTRY
+ARG BUILD_TAG=latest
+FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG}
+
+ARG RUN_TESTS=false
+RUN set -x; DEPS="transformers>=4.51.0 accelerate pydantic openai jinja2"; \
+    if [ "${RUN_TESTS,,}" == true ]; then DEPS="$DEPS pytest"; fi; \
+    pip3 install --no-cache-dir $DEPS
+
+ARG VLLM_MODEL="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+ENV VLLM_MODEL="${VLLM_MODEL}"
+
+### Defaults for runtime container-wide tunables
+
+# MAX_MODEL_LEN should match vllm container env
+ENV MAX_MODEL_LEN=45000
+
+# UPPER BOUND for splitting of input into chunks for summary of summaries agglomeration
+ENV INPUT_TOKEN_CHUNK_SIZE=10000
+
+# OVERLAP between chunks if the whole input does not fit into 1 chunk
+ENV INPUT_CHUNK_TOKEN_OVERLAP=500
+
+### END runtime container tunables
+
+RUN --mount=target=.,readwrite \
+    install-component.sh; \
+    # make sure the tokenizer is available offline
+    /opt/mpf/plugin-venv/bin/python3 -c 'from qwen_speech_summarization_component.qwen_speech_summarization_component import QwenSpeechSummaryComponent; QwenSpeechSummaryComponent()'; \
+    if [ "${RUN_TESTS,,}" == true ]; then pytest qwen_speech_summarization_component; fi
+
+LABEL org.label-schema.license="Apache 2.0" \
+      org.label-schema.name="OpenMPF Qwen Speech Summarization" \
+      org.label-schema.schema-version="1.0" \
+      org.label-schema.url="https://openmpf.github.io" \
+      org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \
+      org.label-schema.vendor="MITRE"
diff --git a/python/QwenSpeechSummarization/Dockerfile.vllm b/python/QwenSpeechSummarization/Dockerfile.vllm
@@ -0,0 +1,58 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+FROM ubuntu:20.04 AS download_model
+
+RUN --mount=type=tmpfs,target=/var/cache/apt \
+    --mount=type=tmpfs,target=/var/lib/apt/lists  \
+    --mount=type=tmpfs,target=/tmp \
+    apt-get update && apt-get install --no-install-recommends -y curl ca-certificates python3-venv python3-pip python3-certifi python3-urllib3 && \
+    pip install huggingface_hub[cli]
+
+ARG VLLM_MODEL="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+ENV VLLM_MODEL="${VLLM_MODEL}"
+RUN HF_HUB_DISABLE_XET=1 hf download ${VLLM_MODEL}
+
+
+FROM vllm/vllm-openai:latest
+ARG VLLM_MODEL="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+ENV VLLM_MODEL="${VLLM_MODEL}"
+
+USER root
+RUN mkdir -p /root/.cache
+COPY --chown=root:root --from=download_model /root/.cache/huggingface /root/.cache/huggingface
+
+# default value
+ENV MAX_MODEL_LEN=45000
+
+COPY --chown=root:root vllm-entrypoint.sh /usr/bin/
+
+ENTRYPOINT ["/usr/bin/vllm-entrypoint.sh"]
+
+CMD [ \
+    "--host", "0.0.0.0",\
+    "--port", "11434"\
+    ]
diff --git a/python/QwenSpeechSummarization/README.md b/python/QwenSpeechSummarization/README.md
@@ -0,0 +1,54 @@
+# Overview
+
+This folder contains source code for the OpenMPF Qwen speech summarization component.
+
+This component requires a base image python3.10+ and an mpf_component_api that supports mpf.AllVideoTracksJob.
+
+We have tested Qwen/Qwen3-30B-A3B-Instruct-2507 on an 80GB card and Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 on a 40GB card. Both seem quite viable.
+
+If you are daring, any openai-compatible API could be substituted for VLLM and any model could replace Qwen3-30B BUT these scenarios are untested
+and your mileage may vary.
+
+In either case, the component assumes anonymous access to the openai-api-compatible endpoint that performs the summarization.
+
+# Inputs
+
+- classifiers.json: contains a definition of subjects of interest to score with a low 0-1 confidence if the input DOES NOT include the defined classifier OR high if it does
+
+```json
+[
+    {
+        "Classifier": "Major League Baseball",
+        "Definition": "discussions regarding major league baseball teams, professional baseball players, and baseball stadiums",
+        "Items of Interest": "Baseball fields, baseball teams, baseball players, baseballs, baseball bats, baseball hats"
+    }
+]
+```
+
+# Properties
+
+- `CLASSIFIERS_FILE`: when set to an absolute path (with a valid classifiers.json in a volume mounted such that the file is at the specified path), will replace the default classifiers.json
+- `CLASSIFIERS_LIST`: Either "ALL", or a comma-separated list of specific names of the "Classifier" fields of defined classifiers
+- `PROMPT_TEMPLATE`: if set, will replace the packaged `templates/prompt.jinja` with one read from this location. Must include self-recursive summarization instructions and the jinja templates `{{ classifiers }}` and `{{ input }}`.
+
+# Docker build-args
+
+- `VLLM_MODEL`: if building Dockerfile.vllm for vllm (which downloads the model during docker build), this is the ONLY model that your qwen_speech_summarization_component will be able to use.
+
+NOTE: if you have an internet connection at runtime, you may use the image `vllm/vllm-openai:latest` directly in lieu of building Dockerfile.vllm. We do not support this arrangement BUT it is possible with the right command on the docker service.
+
+# Environment variables
+
+- `VLLM_MODEL`: must MATCH the model name being served by vllm OR be available at whichver openai-api-compatible API you choose to talk to.
+- `VLLM_URI`: the base_url of the openai-api-compatible API providing access to your model. If your vllm service is named vllm, then this would need to be `http://vllm:11434/v1`.
+- `MODEL_MAX_LEN` should be defined on both the qwen container AND the vllm container. It is the maximum input+output token count you can use without erroring. We have tried 45000 for the -FP8 model and 120000 for the nonquantized model on a 40GB and 80GB card, respectively.
+- `INPUT_TOKEN_CHUNK_SIZE` should be about 20%-30% of your `MODEL_MAX_LEN`, and is the token size that your input will be split into during chunking before making a series of calls to the LLM.
+- `INPUT_CHUNK_TOKEN_OVERLAP` should be small and constant. If it is too small, there will be no overlap between chunks, which could negatively impact performance with huge input tracks.
+
+# Outputs
+
+A list of mpf.VideoTracks or mpf.AudioTracks (once supported).
+
+Track[0] will always contain the overall summary of the input, including primary/other topics and entities.
+
+Track[1-n] will be the confidences, reasoning, and name for each of the intersection of enabled classifiers AND classifiers defined in classifiers.json.
diff --git a/python/QwenSpeechSummarization/plugin-files/descriptor/descriptor.json b/python/QwenSpeechSummarization/plugin-files/descriptor/descriptor.json
@@ -0,0 +1,80 @@
+{
+  "componentName": "QwenSpeechSummarization",
+  "componentVersion": "10.0",
+  "middlewareVersion": "10.0",
+  "sourceLanguage": "python",
+  "batchLibrary": "QwenSpeechSummarization",
+  "environmentVariables": [],
+  "algorithm": {
+    "name": "QWENSPEECHSUMMARIZATION",
+    "description": "Uses Qwen3 to summarize speech",
+    "actionType": "DETECTION",
+    "trackType": "TEXT",
+    "requiresCollection": {
+      "states": []
+    },
+    "providesCollection": {
+      "states": [
+        "DETECTION",
+        "DETECTION_TEXT",
+        "DETECTION_TEXT_QWEN_SPEECH_SUMMARIZATION"
+      ],
+      "properties": [
+        {
+          "name": "CLASSIFIERS_LIST",
+          "description": "Comma-separated list of classifiers to include in the summary output.",
+          "type": "STRING",
+          "defaultValue": "ALL"
+        },
+        {
+          "name": "CLASSIFIERS_FILE",
+          "description": "The package-relative OR absolute filename of the classifiers json file",
+          "type": "STRING",
+          "defaultValue": "classifiers.json"
+        },
+        {
+          "name": "ENABLE_DEBUG",
+          "description": "If true, each detection will include extra debug output.",
+          "type": "BOOLEAN",
+          "defaultValue": "FALSE"
+        },
+        {
+          "name": "PROMPT_TEMPLATE",
+          "description": "If set, will override the default, tested prompt template with one read from a different file",
+          "type": "STRING",
+          "defaultValue": ""
+        }
+      ]
+    }
+  },
+  "actions": [
+    {
+      "name": "QWEN SPEECH SUMMARIZATION (WITH FF REGION) ACTION",
+      "description": "Performs Qwen summarization Video|Audio tracks.",
+      "algorithm": "QWENSPEECHSUMMARIZATION",
+      "properties": [
+        {"name": "FEED_FORWARD_ALL_TRACKS", "value": true},
+        {"name": "FEED_FORWARD_TYPE", "value": "REGION"}
+      ]
+    }
+  ],
+  "tasks": [
+    {
+      "name": "QWEN SPEECH SUMMARIZATION (WITH FF REGION) TASK",
+      "description": "Performs Qwen summarization Video|Audio tracks.",
+      "actions": [
+        "QWEN SPEECH SUMMARIZATION (WITH FF REGION) ACTION"
+      ]
+    }
+  ],
+  "pipelines": [
+     {
+      "name": "WHISPER SPEECH DETECTION WITH QWEN SUMMARIZATION PIPELINE",
+      "description": "Runs Whisper speech detection on audio or video and summarizes the transcript using QWEN.",
+      "tasks": [
+        "WHISPER SPEECH DETECTION TASK",
+        "QWEN SPEECH SUMMARIZATION (WITH FF REGION) TASK"
+      ]
+    }
+  ]
+}
diff --git a/python/QwenSpeechSummarization/pyproject.toml b/python/QwenSpeechSummarization/pyproject.toml
@@ -0,0 +1,29 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
diff --git a/python/QwenSpeechSummarization/qwen_speech_summarization_component/__init__.py b/python/QwenSpeechSummarization/qwen_speech_summarization_component/__init__.py
@@ -0,0 +1,25 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
diff --git a/python/QwenSpeechSummarization/qwen_speech_summarization_component/classifiers.json b/python/QwenSpeechSummarization/qwen_speech_summarization_component/classifiers.json
@@ -0,0 +1,7 @@
+[
+    {
+        "Classifier": "Major League Baseball",
+        "Definition": "discussions regarding major league baseball teams, professional baseball players, and baseball stadiums",
+        "Items of Interest": "Baseball fields, baseball teams, baseball players, baseballs, baseball bats, baseball hats"
+    }
+]
diff --git a/python/QwenSpeechSummarization/qwen_speech_summarization_component/llm_util/__init__.py b/python/QwenSpeechSummarization/qwen_speech_summarization_component/llm_util/__init__.py
@@ -0,0 +1,25 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################