Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import gradio as gr
import torch
import matplotlib.pyplot as plt
from tempfile import NamedTemporaryFile
from decord import VideoReader, cpu
from holmesvau.holmesvau_utils import load_model, generate
import os
import numpy as np
from PIL import Image

mllm_path = 'HolmesVAU-2B'
sampler_path = './holmesvau/ATS/anomaly_scorer.pth'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model, tokenizer, generation_config, sampler = load_model(mllm_path, sampler_path, device)

def save_keyframes_to_folder(vr, idx_list, folder_path="keyframes"):
os.makedirs(folder_path, exist_ok=True)
saved_files = []

for i, idx in enumerate(idx_list):
frame = vr[idx].asnumpy() # [H,W,3] ndarray
img = Image.fromarray(frame)
filename = os.path.join(folder_path, f"frame_{i}.png")
img.save(filename)
saved_files.append(filename)

return saved_files

def analyze_video(video_file, prompt):
try:
with NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
with open(video_file, "rb") as f:
tmp.write(f.read())
tmp_path = tmp.name

pred, history, frame_indices, anomaly_score = generate(
tmp_path, prompt, model, tokenizer, generation_config, sampler,
select_frames=12, use_ATS=True
)

vr = VideoReader(tmp_path, ctx=cpu(0), num_threads=1)

keyframe_folder = "keyframes"
keyframe_files = save_keyframes_to_folder(vr, frame_indices, keyframe_folder)

plot_path = "anomaly_plot.png"

if anomaly_score is not None and len(anomaly_score) > 0:
try:
plt.figure(figsize=(8, 2))
plt.plot(anomaly_score, label='Anomaly Score')
for idx in frame_indices:
plt.vlines(idx / 16, 0, 1, colors='r', linestyle="--", linewidth=1)
plt.ylim(0, 1)
plt.xlabel('Snippet Index')
plt.ylabel('Anomaly Score')
plt.title("Anomaly Score Curve")
plt.tight_layout()
plt.savefig(plot_path)
plt.close()
except Exception as e:
print(f"[ERROR] Plotting failed: {e}")
plot_path = "error_plot.png"
else:
print("[INFO] Uniform Sampling detected - generating flat anomaly plot.")
dummy_score = np.zeros(len(frame_indices))

plt.figure(figsize=(8, 2))
plt.plot(dummy_score, label='Uniform Sampled')
for i in range(len(frame_indices)):
plt.vlines(i, 0, 1, colors='gray', linestyle="--", linewidth=1)
plt.ylim(0, 1)
plt.xlabel('Snippet Index')
plt.ylabel('Anomaly Score')
plt.title("Uniform Sampling (No Anomaly Score)")
plt.tight_layout()
plt.savefig(plot_path)
plt.close()

return str(pred), plot_path, keyframe_files
except Exception as e:
return f"[ERROR] {str(e)}", "error_plot.png", []

with gr.Blocks(title="HolmesVAU Video Anomaly Detector") as demo:
gr.Markdown("## 📹 HolmesVAU - Video Anomaly Detection")

with gr.Row():
video_input = gr.Video(label="🎥 Upload a video")
prompt_input = gr.Textbox(label="💬 Prompt", value="Could you specify the anomaly events present in the video?")

with gr.Row():
submit_btn = gr.Button("🚀 Run Inference")

with gr.Row():
output_text = gr.Textbox(label="🧠 HolmesVAU Prediction")
output_plot_img = gr.Image(label="📈 Anomaly Score Plot")

with gr.Row():
frame_slider = gr.Slider(label="🖼️ Keyframe Index", minimum=0, maximum=11, step=1, visible=False)
with gr.Row():
keyframe_img = gr.Image(label="🔍 Selected Keyframe", visible=False)

keyframe_files_state = gr.State([])

def update_keyframe(idx, keyframe_files):
if 0 <= idx < len(keyframe_files):
return keyframe_files[idx]
return None

def run_and_init_slider(video, prompt):
pred, plot_path, keyframe_files = analyze_video(video, prompt)
return (
pred,
plot_path,
keyframe_files,
gr.update(visible=True, maximum=len(keyframe_files)-1),
gr.update(visible=True, value=keyframe_files[0])
)

submit_btn.click(
fn=run_and_init_slider,
inputs=[video_input, prompt_input],
outputs=[output_text, output_plot_img, keyframe_files_state, frame_slider, keyframe_img]
)

frame_slider.change(
fn=update_keyframe,
inputs=[frame_slider, keyframe_files_state],
outputs=keyframe_img
)

if __name__ == "__main__":
demo.launch()
Binary file added examples/holmes_vau_demo.mp4
Binary file not shown.
2 changes: 1 addition & 1 deletion holmesvau/ATS/Temporal_Sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Temporal_Sampler():
def __init__(self, ckpt_path, device):
self.device = device
self.anomaly_scorer = URDMU().to(device)
self.anomaly_scorer.load_state_dict(torch.load(ckpt_path))
self.anomaly_scorer.load_state_dict(torch.load(ckpt_path, map_location=self.device))
self.tau = 0.1

def get_anomaly_scores(self, pixel_values, model):
Expand Down
5 changes: 4 additions & 1 deletion holmesvau/holmesvau_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ def generate(video_path, prompt, model, tokenizer, generation_config, sampler, d
# anomaly-focused sampling
anomaly_score, sampled_idxs = sampler.density_aware_sample(pixel_values, model, select_frames)
sparse_pixel_values = pixel_values[sampled_idxs]
frame_indices, num_patches_list = [dense_frame_indices[i] for i in sampled_idxs], [num_patches_list[i] for i in sampled_idxs]
# frame_indices, num_patches_list = [dense_frame_indices[i] for i in sampled_idxs], [num_patches_list[i] for i in sampled_idxs]
frame_indices = [dense_frame_indices[int(i)] for i in sampled_idxs]
num_patches_list = [num_patches_list[int(i)] for i in sampled_idxs]

print('Sampled frames: ', frame_indices)
else:
# uniform sampling
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ torchvision==0.16.2
tqdm
transformers==4.37.2
yacs
pydantic==2.10.6