-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_data.sh
24 lines (19 loc) · 911 Bytes
/
get_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/bin/bash
## Step 1: Download the modified ArXiv dataset from Google Drive
fileid="1rJeEYJmpqhNOgOIfB3B2yxraL2WEsV4g"
filename="arxiv.zip"
curl -c /tmp/gd_cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
curl -Lb /tmp/gd_cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' /tmp/gd_cookie`&id=${fileid}" -o "${filename}"
## Step 2: Install unzip if applicable and unzip the file
if [[ "$OSTYPE" == "linux-gnu" ]]; then
sudo apt-get -y install unzip
fi
unzip "${filename}"
# Step 3: Delete the zip file and the MACOS specific files that were included
rm "${filename}"
# Step 4: Download the pre-trained GloVe word embeddings
curl -L http://nlp.stanford.edu/data/glove.6B.zip -o glove.6B.zip
# Step 5: make the dir, unzip it to this dir, and delete the zip file
mkdir embeddings
unzip "glove.6B.zip" -d embeddings
rm glove.6B.zip