-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_videoinfo.py
More file actions
182 lines (152 loc) · 5.58 KB
/
Copy pathget_videoinfo.py
File metadata and controls
182 lines (152 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
Build a catalog of ALL raw video files (no date filtering).
Scans:
<VideoDirLocal>/<YYYYMMDD>/cam-*/**/*.mp4
Saves Parquet to:
<ResultDirLocal>/bbb_fileinfo/video_info_all.parquet
Columns:
- file_name
- full_path
- starttime (UTC, from parse_video_fname)
- endtime (UTC, from parse_video_fname)
- cam (parsed; extra column for convenience)
"""
import os
import sys
import argparse
from datetime import datetime, timezone
from typing import Iterable, Tuple, Optional
try:
import pandas as pd
except Exception as e:
print(f"ERROR: pandas is required: {e}", file=sys.stderr)
sys.exit(1)
# bb_hpc settings + filename parser
try:
from bb_hpc import settings
from bb_binary.parsing import parse_video_fname # cam, start_dt, end_dt
except Exception as e:
print(f"ERROR: could not import required modules: {e}", file=sys.stderr)
sys.exit(1)
# -------------------------------
# Helpers
# -------------------------------
def _get_local_paths() -> Tuple[str, str]:
"""
Return (video_root_local, resultdir_local) from bb_hpc.settings.
Supports either 'VideoDirLocal' (CamelCase) or 'videodir_local' (snake_case),
and 'ResultDirLocal' or 'resultdir_local'.
"""
video_root = getattr(settings, "videodir_local", "")
resultdir = getattr(settings, "resultdir_local", "")
if not video_root:
print("ERROR: videodir_local is not set in bb_hpc.settings.", file=sys.stderr)
sys.exit(2)
if not os.path.isdir(video_root):
print(f"ERROR: videodir_local path does not exist: {video_root}", file=sys.stderr)
sys.exit(2)
if not resultdir:
print("ERROR: resultdir_local is not set in bb_hpc.settings.", file=sys.stderr)
sys.exit(2)
return video_root, resultdir
def _iter_all_day_dirs(video_root: str) -> Iterable[str]:
"""
Yield absolute paths to top-level YYYYMMDD directories under video_root.
(No date filtering; we scan them all.)
"""
def is_valid_dir(name: str) -> bool:
# Accept YYYYMMDD (8 digits) or YYYY-MM-DD (10 chars with hyphens at pos 4 and 7)
if len(name) == 8 and name.isdigit():
return True
if len(name) == 10 and name[4] == '-' and name[7] == '-':
y, m, d = name.split('-')
if y.isdigit() and m.isdigit() and d.isdigit():
return True
return False
try:
with os.scandir(video_root) as it:
for e in it:
if e.is_dir(follow_symlinks=False) and is_valid_dir(e.name):
yield e.path
except PermissionError:
# best-effort
for name in os.listdir(video_root):
p = os.path.join(video_root, name)
if os.path.isdir(p) and is_valid_dir(name):
yield p
def _iter_videos_in_day(day_dir: str, exts: Tuple[str, ...]) -> Iterable[str]:
"""
Walk a single day directory and yield files with extensions in 'exts' (case-insensitive).
"""
exts_lower = tuple(e.lower() for e in exts)
for root, _, files in os.walk(day_dir):
for f in files:
if f.lower().endswith(exts_lower):
yield os.path.join(root, f)
# -------------------------------
# Core
# -------------------------------
def build_video_info_df(video_root: str, exts: Tuple[str, ...]) -> "pd.DataFrame":
rows = []
n_days = 0
n_files = 0
for day_dir in _iter_all_day_dirs(video_root):
n_days += 1
for path in _iter_videos_in_day(day_dir, exts):
n_files += 1
fn = os.path.basename(path)
# Parse via bb_binary utility
cam = None
start = None
end = None
try:
cam, start, end = parse_video_fname(fn) # returns (cam, start_dt, end_dt)
except Exception:
# Leave as None if unparsable
pass
rows.append({
"file_name": fn,
"full_path": path,
"starttime": start,
"endtime": end,
"cam": cam,
})
print(f"[scan] days={n_days}, files_seen={n_files}, files_kept={len(rows)}")
cols = ["file_name", "full_path", "starttime", "endtime", "cam"]
return pd.DataFrame(rows, columns=cols)
# -------------------------------
# CLI
# -------------------------------
def parse_args():
p = argparse.ArgumentParser(description="Create catalog of ALL raw videos under VideoDirLocal.")
p.add_argument(
"--exts",
nargs="+",
default=[".mp4"],
help="File extensions to include (case-insensitive). Default: .mp4",
)
p.add_argument(
"--outfile",
default=None,
help="Optional explicit parquet path. Default: <RESULTDIR_LOCAL>/bbb_fileinfo/video_info_all.parquet",
)
return p.parse_args()
def main():
args = parse_args()
video_root, resultdir = _get_local_paths()
cache_dir = os.path.join(resultdir, "bbb_fileinfo")
os.makedirs(cache_dir, exist_ok=True)
print(f"[config] VideoDirLocal = {video_root}")
print(f"[config] ResultDirLocal= {resultdir}")
print(f"[config] cache_dir = {cache_dir}")
print(f"[config] exts = {args.exts}")
df = build_video_info_df(video_root, tuple(args.exts))
print(f"[videoinfo] collected {len(df)} files")
out_path = args.outfile or os.path.join(cache_dir, "video_info_all.parquet")
os.makedirs(os.path.dirname(out_path), exist_ok=True)
df.to_parquet(out_path, index=False)
print(f"[videoinfo] wrote {len(df)} rows -> {out_path}")
print("✅ Done.")
if __name__ == "__main__":
main()