Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/docker-compose-reg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ services:
- ./start_stream.sh:/home/pipeline-server/src/start_stream.sh

ClientGst:
image: iotgdevcloud/dlstreamer:latest
image: intel/pipeline-runner-asc:latest
deploy:
mode: replicated
replicas: ${PIPELINE_COUNT:-1}
Expand All @@ -46,13 +46,20 @@ services:
- RTSP_SERVER=${RTSP_SERVER}
- RTSP_PATH=${RTSP_PATH}
- RENDER_MODE=${RENDER_MODE}
# Latency and inference configuration - allow shell overrides
- LOW_LATENCY=${LOW_LATENCY:-0}
- MEDIUM_LATENCY=${MEDIUM_LATENCY:-0}
- INFERENCE_INTERVAL=${INFERENCE_INTERVAL:-1}
- BATCH_SIZE_DETECT=${BATCH_SIZE_DETECT:-1}
- BATCH_SIZE_CLASSIFY=${BATCH_SIZE_CLASSIFY:-1}
volumes:
- ${RESULTS_DIR:-../results}:/tmp/results
- ../performance-tools/sample-media:/home/pipeline-server/sample-media
- ~/.Xauthority:/home/dlstreamer/.Xauthority
- /tmp/.X11-unix:/tmp/.X11-unix
- ~/.cl-cache:/home/pipeline-server/.cl-cache
- ./res/:/home/pipeline-server/envs
- ./pipelines:/home/pipeline-server/pipelines
- ./extensions:/home/pipeline-server/extensions
- ${RETAIL_USE_CASE_ROOT:-}/models:/home/pipeline-server/models
restart: on-failure
restart: on-failure
12 changes: 11 additions & 1 deletion src/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ services:
- ./start_stream.sh:/home/pipeline-server/src/start_stream.sh

ClientGst:
# Note: Latency benchmarks were run using a locally-built image (pipeline-runner-asc:latest)
# based on DLStreamer 2025.0.1-ubuntu24 with Intel NPU drivers for Lunar Lake.
# Build with: make build (uses Dockerfile in this repo)
image: dlstreamer:dev
deploy:
mode: replicated
Expand All @@ -46,13 +49,20 @@ services:
- RTSP_SERVER=${RTSP_SERVER}
- RTSP_PATH=${RTSP_PATH}
- RENDER_MODE=${RENDER_MODE}
# Latency and inference configuration - allow shell overrides
- LOW_LATENCY=${LOW_LATENCY:-0}
- MEDIUM_LATENCY=${MEDIUM_LATENCY:-0}
- INFERENCE_INTERVAL=${INFERENCE_INTERVAL:-1}
- BATCH_SIZE_DETECT=${BATCH_SIZE_DETECT:-1}
- BATCH_SIZE_CLASSIFY=${BATCH_SIZE_CLASSIFY:-1}
volumes:
- ${RESULTS_DIR:-../results}:/tmp/results
- ../performance-tools/sample-media:/home/pipeline-server/sample-media
- ~/.Xauthority:/home/dlstreamer/.Xauthority
- /tmp/.X11-unix:/tmp/.X11-unix
- ~/.cl-cache:/home/pipeline-server/.cl-cache
- ./res/:/home/pipeline-server/envs
- ./pipelines:/home/pipeline-server/pipelines
- ./extensions:/home/pipeline-server/extensions
- ${RETAIL_USE_CASE_ROOT:-}/models:/home/pipeline-server/models
restart: on-failure
restart: on-failure
78 changes: 60 additions & 18 deletions src/pipelines/obj_detection_age_prediction.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,48 @@ OBJECT_DETECTION_DEVICE="${OBJECT_DETECTION_DEVICE:=$DEVICE}"
OBJECT_CLASSIFICATION_DEVICE="${OBJECT_CLASSIFICATION_DEVICE:=$CLASSIFICATION_DEVICE}"
FACE_DETECTION_DEVICE="${FACE_DETECTION_DEVICE:=$DEVICE}"
AGE_CLASSIFICATION_DEVICE="${AGE_CLASSIFICATION_DEVICE:=$CLASSIFICATION_DEVICE}"
# Support INT8 models for NPU compatibility
FACE_DETECTION_MODEL="${FACE_DETECTION_MODEL:=/home/pipeline-server/models/face_detection/FP16/face-detection-retail-0004.xml}"
AGE_PREDICTION_MODEL="${AGE_PREDICTION_MODEL:=/home/pipeline-server/models/age_prediction/FP16/age-gender-recognition-retail-0013.xml}"
PRE_PROCESS="${PRE_PROCESS:=""}"
# Separate inference options for object detection and face detection pipelines
# Use default only if variable is unset (not if it's empty string)
if [ -z "${FACE_DETECTION_OPTIONS+x}" ]; then
FACE_DETECTION_OPTIONS="$DETECTION_OPTIONS"
fi
if [ -z "${AGE_CLASSIFICATION_OPTIONS+x}" ]; then
AGE_CLASSIFICATION_OPTIONS="$CLASSIFICATION_OPTIONS"
fi

# Queue optimization for low latency
# Set LOW_LATENCY=1 to reduce queue sizes and minimize end-to-end latency (aggressive)
# Set MEDIUM_LATENCY=1 for production-realistic settings (balanced latency vs robustness)
# Set DROP_OLD_FRAMES=1 to always process most recent frames (drops old frames when queue is full)
if [ "$LOW_LATENCY" == "1" ]; then
if [ "$DROP_OLD_FRAMES" == "1" ]; then
QUEUE_PARAMS="max-size-buffers=3 max-size-time=100000000 leaky=downstream"
echo "LOW-LATENCY MODE + DROP OLD FRAMES: Always processing most recent frames (max-size-buffers=3, leaky=downstream)"
else
QUEUE_PARAMS="max-size-buffers=3 max-size-time=100000000"
echo "LOW-LATENCY MODE: Queue sizes optimized (max-size-buffers=3, max-size-time=0.1s)"
fi
elif [ "$MEDIUM_LATENCY" == "1" ]; then
if [ "$DROP_OLD_FRAMES" == "1" ]; then
QUEUE_PARAMS="max-size-buffers=10 max-size-time=500000000 leaky=downstream"
echo "MEDIUM-LATENCY MODE + DROP OLD FRAMES: Always processing most recent frames (max-size-buffers=10, max-size-time=0.5s, leaky=downstream)"
else
QUEUE_PARAMS="max-size-buffers=10 max-size-time=500000000"
echo "MEDIUM-LATENCY MODE: Production-realistic queue sizes (max-size-buffers=10, max-size-time=0.5s)"
fi
else
QUEUE_PARAMS=""
echo "STANDARD MODE: Using default queue sizes"
fi

# Inference interval optimization
# Set INFERENCE_INTERVAL to control frame processing (default=3, 1=every frame)
INFERENCE_INTERVAL="${INFERENCE_INTERVAL:-3}"
echo "INFERENCE INTERVAL: Processing every ${INFERENCE_INTERVAL} frame(s)"

if [ "$RENDER_MODE" == "1" ]; then
OUTPUT="gvawatermark ! videoconvert ! fpsdisplaysink video-sink=autovideosink text-overlay=false signal-fps-measurements=true name=obj_fps_sink"
Expand All @@ -35,23 +76,23 @@ fi
echo "Running object detection pipeline on $DEVICE with detection batch size = $BATCH_SIZE_DETECT and classification batch size = $BATCH_SIZE_CLASSIFY"
echo "Running age prediction pipeline on $AGE_PREDICTION_VIDEO"

gstLaunchCmd="GST_DEBUG=\"GST_TRACER:7\" GST_TRACERS='latency_tracer(flags=pipeline)' gst-launch-1.0 --verbose \
gstLaunchCmd="GST_DEBUG=\"GST_TRACER:7\" GST_TRACERS='latency_tracer(flags=pipeline+element)' gst-launch-1.0 --verbose \
$inputsrc_oc1 ! $DECODE \
! queue \
! queue $QUEUE_PARAMS \
! gvadetect batch-size=$BATCH_SIZE_DETECT \
model-instance-id=odmodel \
name=object_detection \
model=/home/pipeline-server/models/object_detection/yolo11n/INT8/yolo11n.xml \
threshold=0.5 \
inference-interval=3 \
inference-interval=$INFERENCE_INTERVAL \
scale-method=fast \
device=$OBJECT_DETECTION_DEVICE \
$PRE_PROCESS $DETECTION_OPTIONS \
! queue \
! queue $QUEUE_PARAMS \
! gvatrack \
name=object_tracking \
tracking-type=zero-term-imageless \
! queue \
! queue $QUEUE_PARAMS \
! gvaclassify batch-size=$BATCH_SIZE_CLASSIFY \
model-instance-id=classifier \
labels=/home/pipeline-server/models/object_classification/efficientnet-b0/INT8/imagenet_2012.txt \
Expand All @@ -62,45 +103,46 @@ gstLaunchCmd="GST_DEBUG=\"GST_TRACER:7\" GST_TRACERS='latency_tracer(flags=pipel
inference-region=1 \
object-class=object \
reclassify-interval=1 \

$CLASSIFICATION_PRE_PROCESS $CLASSIFICATION_OPTIONS \
! gvametaconvert \
! tee name=t_obj \
t_obj. ! queue ! $OUTPUT \
t_obj. ! queue ! gvametapublish name=obj_destination file-format=json-lines file-path=/tmp/results/rs_obj\$cid.jsonl ! fakesink sync=false async=false \
t_obj. ! queue $QUEUE_PARAMS ! $OUTPUT \
t_obj. ! queue $QUEUE_PARAMS ! gvametapublish name=obj_destination file-format=json-lines file-path=/tmp/results/rs_obj\$cid.jsonl ! fakesink sync=false async=false \
\
$inputsrc_ap1 ! $DECODE \
! queue \
! queue $QUEUE_PARAMS \
! gvadetect batch-size=$BATCH_SIZE_DETECT \
model-instance-id=facemodel \
name=face_detection \
model=/home/pipeline-server/models/face_detection/FP16/face-detection-retail-0004.xml \
model=$FACE_DETECTION_MODEL \
model-proc=/home/pipeline-server/models/face_detection/face-detection-retail-0004.json \
inference-interval=3 \
inference-interval=$INFERENCE_INTERVAL \
scale-method=fast \
inference-region=full-frame \
threshold=0.5 \
device=$FACE_DETECTION_DEVICE \
$PRE_PROCESS $DETECTION_OPTIONS \
! queue \
$PRE_PROCESS $FACE_DETECTION_OPTIONS \
! queue $QUEUE_PARAMS \
! gvatrack \
name=face_tracking \
tracking-type=zero-term-imageless \
! queue \
! queue $QUEUE_PARAMS \
! gvaclassify batch-size=$BATCH_SIZE_CLASSIFY \
model-instance-id=age_classifier \
model=/home/pipeline-server/models/age_prediction/FP16/age-gender-recognition-retail-0013.xml \
model=$AGE_PREDICTION_MODEL \
model-proc=/home/pipeline-server/models/age_prediction/age-gender-recognition-retail-0013.json \
device=$AGE_CLASSIFICATION_DEVICE \
name=age_classification \
inference-region=roi-list \
object-class=face \
reclassify-interval=1 \
! queue \
$AGE_CLASSIFICATION_OPTIONS \
! queue $QUEUE_PARAMS \
! gvametaconvert \
! tee name=t \
t. ! queue ! $AGE_OUTPUT \
t. ! queue ! gvametapublish name=destination file-format=json-lines file-path=/tmp/results/rs_age\$cid.jsonl ! fakesink sync=false async=false \
t. ! queue $QUEUE_PARAMS ! $AGE_OUTPUT \
t. ! queue $QUEUE_PARAMS ! gvametapublish name=destination file-format=json-lines file-path=/tmp/results/rs_age\$cid.jsonl ! fakesink sync=false async=false \
2>&1 | tee /tmp/results/gst-launch_\$cid.log \
| (stdbuf -oL awk '
BEGIN {
Expand Down
20 changes: 20 additions & 0 deletions src/res/npu-gpu-flip.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
DECODE='h264parse ! vah264dec ! vapostproc ! "video/x-raw(memory:VAMemory)"'
OCR_DEVICE=GPU
PRE_PROCESS=pre-process-backend=va-surface-sharing
# Object detection pipeline on NPU (testing heavy workload on NPU)
DEVICE=NPU
OBJECT_DETECTION_DEVICE=NPU
OBJECT_CLASSIFICATION_DEVICE=GPU
# Age prediction pipeline on GPU (lighter models back to GPU)
FACE_DETECTION_DEVICE=GPU
AGE_CLASSIFICATION_DEVICE=GPU
CLASSIFICATION_DEVICE=GPU
CLASSIFICATION_PRE_PROCESS=pre-process-backend=va-surface-sharing
BATCH_SIZE_DETECT=${BATCH_SIZE_DETECT:-1}
BATCH_SIZE_CLASSIFY=${BATCH_SIZE_CLASSIFY:-1}
# NPU doesn't support GPU-specific options - leave empty for object detection
DETECTION_OPTIONS=""
# GPU classification options for object classification and age pipeline
CLASSIFICATION_OPTIONS="ie-config=GPU_THROUGHPUT_STREAMS=2 nireq=2 reclassify-interval=1"
FACE_DETECTION_OPTIONS="ie-config=GPU_THROUGHPUT_STREAMS=2 nireq=2"
AGE_CLASSIFICATION_OPTIONS="ie-config=GPU_THROUGHPUT_STREAMS=2 nireq=2 reclassify-interval=1"
Loading