diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..48979c3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:18.04 + +WORKDIR /usr/src/app + +ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" PATH="/opt/venv/bin:$PATH" PIP_NO_CACHE_DIR="false" + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + python3 python3-pip python3-venv ffmpeg && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . + +RUN python3 -m venv /opt/venv && \ + python3 -m pip install pip==19.2.3 pip-tools==4.0.0 + +RUN python3 -m piptools sync + +COPY . . diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c71d9fb --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +dockerimage ?= showmax/kinetics-downloader +dockerfile ?= Dockerfile +srcdir ?= $(shell pwd) +datadir ?= $(shell pwd) + +install: + @docker build -t $(dockerimage) -f $(dockerfile) . + +i: install + + +update: + @docker build -t $(dockerimage) -f $(dockerfile) . --pull --no-cache + +u: update + + +run: + @docker run -it --rm --ipc="host" -v $(srcdir):/usr/src/app -v $(datadir):/data --entrypoint=/bin/bash $(dockerimage) + +r: run + + +.PHONY: install i run r update u diff --git a/README.md b/README.md index 75bc3a1..18b6054 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,24 @@ # Download DeepMind's Kinetics -Download all videos from DeepMind's [Kinetics dataset](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). +Download all videos from DeepMind's [Kinetics-700 dataset](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). Moreover, you can use this library to extract **frames** and **sound track** from videos, generate metadata for training and pack all sound tracks into a single **tfrecords** file for faster reading. -## Requirements +Another Kinetics downloader can be found in the [ActivityNet repository](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). -* Python >= 3.4 -* youtube-dl -* ffmpeg -* gzip - -Required Python packages are listed in **requirements.txt**. ## Usage +We package up the app and all its requirements in a self-contained Docker image. Use + + make + +to build the docker image and + + make run datadir=/tmp + +to run the container and mount the host's `/tmp` directory at `/data` inside the container. + **WARNING:** Before you start any download from YouTube, please be sure, that you have checked [YouTube Terms Of Service](https://www.youtube.com/static?template=terms) and you are compliant. Especially check section 5.H. **Download all videos**: @@ -161,6 +165,7 @@ underscores (e.g. blowing glass => blowing_glass). ## Contributors * [Ondrej Biza](https://github.com/ondrejba) +* [Daniel J H](https://github.com/daniel-j-h) ## Acknowledgements @@ -169,3 +174,4 @@ The sound to tfrecords script is based on [this tutorial](http://warmspringwinds ## References * [[1] The Kinetics Human Action Video Dataset - W.Kay et al. (2017)](https://arxiv.org/abs/1705.06950) +* [[2] A Short Note on the Kinetics-700 Human Action Dataset - Joao Carreira et al. (2019)](https://arxiv.org/abs/1907.06987) diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..354044d --- /dev/null +++ b/requirements.in @@ -0,0 +1,3 @@ +numpy +opencv-contrib-python-headless +youtube-dl diff --git a/requirements.txt b/requirements.txt index e69de29..7275007 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,54 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile --generate-hashes +# +numpy==1.17.2 \ + --hash=sha256:05dbfe72684cc14b92568de1bc1f41e5f62b00f714afc9adee42f6311738091f \ + --hash=sha256:0d82cb7271a577529d07bbb05cb58675f2deb09772175fab96dc8de025d8ac05 \ + --hash=sha256:10132aa1fef99adc85a905d82e8497a580f83739837d7cbd234649f2e9b9dc58 \ + --hash=sha256:12322df2e21f033a60c80319c25011194cd2a21294cc66fee0908aeae2c27832 \ + --hash=sha256:16f19b3aa775dddc9814e02a46b8e6ae6a54ed8cf143962b4e53f0471dbd7b16 \ + --hash=sha256:3d0b0989dd2d066db006158de7220802899a1e5c8cf622abe2d0bd158fd01c2c \ + --hash=sha256:438a3f0e7b681642898fd7993d38e2bf140a2d1eafaf3e89bb626db7f50db355 \ + --hash=sha256:5fd214f482ab53f2cea57414c5fb3e58895b17df6e6f5bca5be6a0bb6aea23bb \ + --hash=sha256:73615d3edc84dd7c4aeb212fa3748fb83217e00d201875a47327f55363cef2df \ + --hash=sha256:7bd355ad7496f4ce1d235e9814ec81ee3d28308d591c067ce92e49f745ba2c2f \ + --hash=sha256:7d077f2976b8f3de08a0dcf5d72083f4af5411e8fddacd662aae27baa2601196 \ + --hash=sha256:a4092682778dc48093e8bda8d26ee8360153e2047826f95a3f5eae09f0ae3abf \ + --hash=sha256:b458de8624c9f6034af492372eb2fee41a8e605f03f4732f43fc099e227858b2 \ + --hash=sha256:e70fc8ff03a961f13363c2c95ef8285e0cf6a720f8271836f852cc0fa64e97c8 \ + --hash=sha256:ee8e9d7cad5fe6dde50ede0d2e978d81eafeaa6233fb0b8719f60214cf226578 \ + --hash=sha256:f4a4f6aba148858a5a5d546a99280f71f5ee6ec8182a7d195af1a914195b21a2 +opencv-contrib-python-headless==4.1.1.26 \ + --hash=sha256:083c1d0dce23b86c627ad8c7eddc93b19431431ea7413be78673950e8a67966d \ + --hash=sha256:08db29152b2a124445e233ec90786a93150e565cdc83f37208e6ccdee87493a4 \ + --hash=sha256:1545a6d521d2de1294949a9b25ac2117dae617d87cddb8415b6518c5d3f21240 \ + --hash=sha256:33e75d168439c77fcea6fd983d132f4ea6fe6f873e4dde4586278965c36a8680 \ + --hash=sha256:3fd6edd50bff5e50c95799e717c096d2262b3967013a24badfdd809660a0fa19 \ + --hash=sha256:4a1f9c199e0c98b19bae8e03a490f0b613cfb119cd1611364cb3b6bc03c5c05d \ + --hash=sha256:4eb771b366307d8259be8cbd2ad744d477ed7bd3667f767951e8fe392c526e92 \ + --hash=sha256:5072d82175c41f9cc5df504ad78c9807e12e0a358a63a1b791c8cc9c0501b173 \ + --hash=sha256:5576f2884bfe33c73280c4f3d76cee4c71337f5504aacf6631ae5694a9718ee5 \ + --hash=sha256:58627c73e703a306391c102d79c94827ce5e5f401aef4f98ca864b4b8cb57841 \ + --hash=sha256:6e21b9dd145a04b73c2e1b5ab39b65b58fb42710bb18e07223bc70685748d5e6 \ + --hash=sha256:763b62ab72761c0ce3b78a0f985f3bfdcd067d573719da297d5826824acd29f5 \ + --hash=sha256:82981868a3ce8fb6b1f8c332deb86779567fcacc98026d18d3fac11503305760 \ + --hash=sha256:972787b61efcaf0c2d833e83601bc06511d8afe17c444e643f6e4b237d564157 \ + --hash=sha256:a135288b970165ffe9c4ad571c11d9f140b0e9ef53d6cb49d275ff309715df55 \ + --hash=sha256:ab8f0d900f1a0a88a7135050876da15fd8d0e023224e8100839cb3946980afbc \ + --hash=sha256:b03886e5eb1b84126b8ba38fed118b272941044c1b0d15f04550130509c1d6cd \ + --hash=sha256:b3a4cb11fe8f389278143d732b1bc1237e2ed4b373896a39018ac3d2ebb31069 \ + --hash=sha256:b65e99712fbf927237a7a9207ce4166c93c6bec21a7203bd08b08ef9b937501e \ + --hash=sha256:cbd9e52c1de91a40e294b3b96fb4c3758c133e4cc92b985beaeea9bde3bea3c5 \ + --hash=sha256:d713c76569f44768fd4843bc9dcb227cba30407a2365b042b88e32d23a1ecc55 \ + --hash=sha256:db33f3cdd5d59b8a6ab8dea544ef6ecbef2a448e579ac1ff3523074eedc86f05 \ + --hash=sha256:defdcc4ca6b86f9032e61258e6846ce786c64f586b10a70feda7a1218a1e3378 \ + --hash=sha256:e3f8a7c03ab35c98d402f2758ab88fcb9f9003404eec8085096136976536adc7 \ + --hash=sha256:eca35aca76e7e1debd051083399bbf8319dfdb47ca13df56b0d8acb5c2215a22 \ + --hash=sha256:f9d57c94410e91af940f331aa9351065ba9d470d05646b8fd289da1170051bd2 \ + --hash=sha256:ffec278ef8c6a0341b656dd967c2109c861e39106b0067583756575c54c4caf2 +youtube-dl==2019.9.12.1 \ + --hash=sha256:6a42947dc0d9f26881260caf7d03dab0e53b2448e141a20566b57b6b388a5153 \ + --hash=sha256:d61dd64e81a4cc026726b25981faf8ef8453363598483d51f7dc6f6d5580a78f