Skip to content
This repository was archived by the owner on Mar 3, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM ubuntu:18.04

WORKDIR /usr/src/app

ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" PATH="/opt/venv/bin:$PATH" PIP_NO_CACHE_DIR="false"

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python3 python3-pip python3-venv ffmpeg && \
rm -rf /var/lib/apt/lists/*

COPY requirements.txt .

RUN python3 -m venv /opt/venv && \
python3 -m pip install pip==19.2.3 pip-tools==4.0.0

RUN python3 -m piptools sync

COPY . .
24 changes: 24 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
dockerimage ?= showmax/kinetics-downloader
dockerfile ?= Dockerfile
srcdir ?= $(shell pwd)
datadir ?= $(shell pwd)

install:
@docker build -t $(dockerimage) -f $(dockerfile) .

i: install


update:
@docker build -t $(dockerimage) -f $(dockerfile) . --pull --no-cache

u: update


run:
@docker run -it --rm --ipc="host" -v $(srcdir):/usr/src/app -v $(datadir):/data --entrypoint=/bin/bash $(dockerimage)

r: run


.PHONY: install i run r update u
22 changes: 14 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
# Download DeepMind's Kinetics

Download all videos from DeepMind's [Kinetics dataset](https://deepmind.com/research/open-source/open-source-datasets/kinetics/).
Download all videos from DeepMind's [Kinetics-700 dataset](https://deepmind.com/research/open-source/open-source-datasets/kinetics/).
Moreover, you can use this library to extract **frames** and **sound track** from videos, generate metadata for training
and pack all sound tracks into a single **tfrecords** file for faster reading.

## Requirements
Another Kinetics downloader can be found in the [ActivityNet repository](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics).

* Python >= 3.4
* youtube-dl
* ffmpeg
* gzip

Required Python packages are listed in **requirements.txt**.

## Usage

We package up the app and all its requirements in a self-contained Docker image. Use

make

to build the docker image and

make run datadir=/tmp

to run the container and mount the host's `/tmp` directory at `/data` inside the container.

**WARNING:** Before you start any download from YouTube, please be sure, that you have checked [YouTube Terms Of Service](https://www.youtube.com/static?template=terms) and you are compliant. Especially check section 5.H.

**Download all videos**:
Expand Down Expand Up @@ -161,6 +165,7 @@ underscores (e.g. blowing glass => blowing_glass).
## Contributors

* [Ondrej Biza](https://github.com/ondrejba)
* [Daniel J H](https://github.com/daniel-j-h)

## Acknowledgements

Expand All @@ -169,3 +174,4 @@ The sound to tfrecords script is based on [this tutorial](http://warmspringwinds
## References

* [[1] The Kinetics Human Action Video Dataset - W.Kay et al. (2017)](https://arxiv.org/abs/1705.06950)
* [[2] A Short Note on the Kinetics-700 Human Action Dataset - Joao Carreira et al. (2019)](https://arxiv.org/abs/1907.06987)
3 changes: 3 additions & 0 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy
opencv-contrib-python-headless
youtube-dl
54 changes: 54 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#
# This file is autogenerated by pip-compile
# To update, run:
#
# pip-compile --generate-hashes
#
numpy==1.17.2 \
--hash=sha256:05dbfe72684cc14b92568de1bc1f41e5f62b00f714afc9adee42f6311738091f \
--hash=sha256:0d82cb7271a577529d07bbb05cb58675f2deb09772175fab96dc8de025d8ac05 \
--hash=sha256:10132aa1fef99adc85a905d82e8497a580f83739837d7cbd234649f2e9b9dc58 \
--hash=sha256:12322df2e21f033a60c80319c25011194cd2a21294cc66fee0908aeae2c27832 \
--hash=sha256:16f19b3aa775dddc9814e02a46b8e6ae6a54ed8cf143962b4e53f0471dbd7b16 \
--hash=sha256:3d0b0989dd2d066db006158de7220802899a1e5c8cf622abe2d0bd158fd01c2c \
--hash=sha256:438a3f0e7b681642898fd7993d38e2bf140a2d1eafaf3e89bb626db7f50db355 \
--hash=sha256:5fd214f482ab53f2cea57414c5fb3e58895b17df6e6f5bca5be6a0bb6aea23bb \
--hash=sha256:73615d3edc84dd7c4aeb212fa3748fb83217e00d201875a47327f55363cef2df \
--hash=sha256:7bd355ad7496f4ce1d235e9814ec81ee3d28308d591c067ce92e49f745ba2c2f \
--hash=sha256:7d077f2976b8f3de08a0dcf5d72083f4af5411e8fddacd662aae27baa2601196 \
--hash=sha256:a4092682778dc48093e8bda8d26ee8360153e2047826f95a3f5eae09f0ae3abf \
--hash=sha256:b458de8624c9f6034af492372eb2fee41a8e605f03f4732f43fc099e227858b2 \
--hash=sha256:e70fc8ff03a961f13363c2c95ef8285e0cf6a720f8271836f852cc0fa64e97c8 \
--hash=sha256:ee8e9d7cad5fe6dde50ede0d2e978d81eafeaa6233fb0b8719f60214cf226578 \
--hash=sha256:f4a4f6aba148858a5a5d546a99280f71f5ee6ec8182a7d195af1a914195b21a2
opencv-contrib-python-headless==4.1.1.26 \
--hash=sha256:083c1d0dce23b86c627ad8c7eddc93b19431431ea7413be78673950e8a67966d \
--hash=sha256:08db29152b2a124445e233ec90786a93150e565cdc83f37208e6ccdee87493a4 \
--hash=sha256:1545a6d521d2de1294949a9b25ac2117dae617d87cddb8415b6518c5d3f21240 \
--hash=sha256:33e75d168439c77fcea6fd983d132f4ea6fe6f873e4dde4586278965c36a8680 \
--hash=sha256:3fd6edd50bff5e50c95799e717c096d2262b3967013a24badfdd809660a0fa19 \
--hash=sha256:4a1f9c199e0c98b19bae8e03a490f0b613cfb119cd1611364cb3b6bc03c5c05d \
--hash=sha256:4eb771b366307d8259be8cbd2ad744d477ed7bd3667f767951e8fe392c526e92 \
--hash=sha256:5072d82175c41f9cc5df504ad78c9807e12e0a358a63a1b791c8cc9c0501b173 \
--hash=sha256:5576f2884bfe33c73280c4f3d76cee4c71337f5504aacf6631ae5694a9718ee5 \
--hash=sha256:58627c73e703a306391c102d79c94827ce5e5f401aef4f98ca864b4b8cb57841 \
--hash=sha256:6e21b9dd145a04b73c2e1b5ab39b65b58fb42710bb18e07223bc70685748d5e6 \
--hash=sha256:763b62ab72761c0ce3b78a0f985f3bfdcd067d573719da297d5826824acd29f5 \
--hash=sha256:82981868a3ce8fb6b1f8c332deb86779567fcacc98026d18d3fac11503305760 \
--hash=sha256:972787b61efcaf0c2d833e83601bc06511d8afe17c444e643f6e4b237d564157 \
--hash=sha256:a135288b970165ffe9c4ad571c11d9f140b0e9ef53d6cb49d275ff309715df55 \
--hash=sha256:ab8f0d900f1a0a88a7135050876da15fd8d0e023224e8100839cb3946980afbc \
--hash=sha256:b03886e5eb1b84126b8ba38fed118b272941044c1b0d15f04550130509c1d6cd \
--hash=sha256:b3a4cb11fe8f389278143d732b1bc1237e2ed4b373896a39018ac3d2ebb31069 \
--hash=sha256:b65e99712fbf927237a7a9207ce4166c93c6bec21a7203bd08b08ef9b937501e \
--hash=sha256:cbd9e52c1de91a40e294b3b96fb4c3758c133e4cc92b985beaeea9bde3bea3c5 \
--hash=sha256:d713c76569f44768fd4843bc9dcb227cba30407a2365b042b88e32d23a1ecc55 \
--hash=sha256:db33f3cdd5d59b8a6ab8dea544ef6ecbef2a448e579ac1ff3523074eedc86f05 \
--hash=sha256:defdcc4ca6b86f9032e61258e6846ce786c64f586b10a70feda7a1218a1e3378 \
--hash=sha256:e3f8a7c03ab35c98d402f2758ab88fcb9f9003404eec8085096136976536adc7 \
--hash=sha256:eca35aca76e7e1debd051083399bbf8319dfdb47ca13df56b0d8acb5c2215a22 \
--hash=sha256:f9d57c94410e91af940f331aa9351065ba9d470d05646b8fd289da1170051bd2 \
--hash=sha256:ffec278ef8c6a0341b656dd967c2109c861e39106b0067583756575c54c4caf2
youtube-dl==2019.9.12.1 \
--hash=sha256:6a42947dc0d9f26881260caf7d03dab0e53b2448e141a20566b57b6b388a5153 \
--hash=sha256:d61dd64e81a4cc026726b25981faf8ef8453363598483d51f7dc6f6d5580a78f