$ sudo apt-get remove docker docker-engine docker.io
$ sudo apt-get update
$ sudo apt-get install apt-transport-https ca-certificates curl software-properties-common
$ sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
$ sudo apt-key fingerprint 0EBFCD88
$ sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
$ sudo apt-get update
$ sudo apt-get install docker-ce
$ sudo docker run hello-world
$ sudo curl -L "https://github.com/docker/compose/releases/download/1.23.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
$ sudo chmod +x /usr/local/bin/docker-compose
$ sudo apt-get install -y nvidia-docker2 nvidia-container-toolkit
From:
{
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
To:
{
"default-runtime":"nvidia",
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
$ service docker restart
$ docker-compose build --parallel
$ docker-compose up -d
Application | URL |
---|---|
Hadoop | http://localhost:9870 |
Hadoop Cluster | http://localhost:8088 |
Hadoop HDFS | hdfs://localhost:9000 |
Hadoop WEBHDFS | http://localhost:14000/webhdfs/v1 |
Hive Server2 | http://localhost:10000 |
Hue | http://localhost:8888 (username: hue,password: secret) |
Spark Master UI | http://localhost:4080 |
Spark Jobs | http://localhost:4040 |
Livy | http://localhost:8998 |
Jupyter notebook | http://localhost:8899 |
AirFlow | http://localhost:8080 (username: airflow,password: airflow) |
Flower | http://localhost:8555 |
docker exec -it hadoop-master bash
hadoop fs -ls /
hadoop fs -mkdir /dados
hadoop fs -ls /
hadoop fs -ls /dados
hadoop fs -mkdir /dados/bigdata
hadoop fs -ls /dados
hadoop fs -rm -r /dados/bigdata
hadoop fs -ls /dados
cd /root
ls
hadoop fs -mkdir /dados/bigdata
hadoop fs -put /var/log/alternatives.log /dados/bigdata
hadoop fs -ls /dados/bigdata
hadoop fs -ls /dados/bigdata
hadoop fs -cp /dados/bigdata/alternatives.log /dados/bigdata/alternatives2.log
hadoop fs -ls /dados/bigdata
hadoop fs -ls /dados/bigdata
hadoop fs -cat /dados/bigdata/alternatives.log
hadoop fs -mkdir /user/hue
hadoop fs -ls /user/hue
hadoop fs -chmod 777 /user/hue
docker exec -it hadoop-master bash
hive
> show databases;
> use default;
> show tables;
Documentation: https://spark.apache.org/docs/latest/api/python/pyspark.sql.html?highlight=read%20csv
# Access the Hadoio Namenode container
docker exec -it hadoop-master bash
# Download ENEM datasets: http://inep.gov.br/microdados
# create spark folder in HDFS
hadoop fs -mkdir /user/spark/
# Data ingestion in HDFS
hadoop fs -put MICRODADOS_ENEM_2018.csv /user/spark/
hadoop fs -put MICRODADOS_ENEM_2017.csv /user/spark/
docker exec -it spark-master bash
spark-shell
val df = spark.read.format("csv").option("sep", ";").option("inferSchema", "true").option("header", "true").load("hdfs://hadoop-master:9000/user/spark/MICRODADOS_ENEM_2018.csv")
df.printSchema()
df.groupBy("IN_CEGUEIRA").count().show()
df.groupBy("NU_IDADE").count().sort(asc("NU_IDADE")).show(100, false)
docker exec -it kafka-broker1 bash
kafka-topics.sh --create --zookeeper zookeeper:2181 --replication-factor 1 --partitions 1 --topic test
kafka-topics.sh --zookeeper zookeeper:2181 --list
kafka-console-producer.sh --bootstrap-server kafka-broker1:9091 --topic test
>Hello
docker exec -it kafka-broker2 bash
kafka-console-consumer.sh --bootstrap-server kafka-broker1:9091 --from-beginning --topic test
kafka-topics.sh --zookeeper zookeeper:2181 --delete --topic test
This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.