diff --git a/.github/scripts/install-hdfs.sh b/.github/scripts/install-hdfs.sh index 77d8803e..4e7a7879 100755 --- a/.github/scripts/install-hdfs.sh +++ b/.github/scripts/install-hdfs.sh @@ -15,6 +15,12 @@ else ENCRYPT_DATA_TRANSFER="false" fi +CONF_KMS_PROVIDER="" +TRANSPARENT_ENCRYPTION=${TRANSPARENT_ENCRYPTION-"false"} +if [ $TRANSPARENT_ENCRYPTION = "true" ]; then + CONF_KMS_PROVIDER="kms://http@localhost:9600/kms" +fi + CONF_AUTHENTICATION="simple" KERBEROS_REALM="EXAMPLE.COM" KERBEROS_PRINCIPLE="administrator" @@ -50,7 +56,7 @@ EOF sudo apt-get install -y krb5-user krb5-kdc krb5-admin-server printf "$KERBEROS_PASSWORD\n$KERBEROS_PASSWORD" | sudo kdb5_util -r "$KERBEROS_REALM" create -s - for p in nn dn $USER gohdfs1 gohdfs2; do + for p in nn dn kms $USER gohdfs1 gohdfs2; do sudo kadmin.local -q "addprinc -randkey $p/$HOSTNAME@$KERBEROS_REALM" sudo kadmin.local -q "addprinc -randkey $p/localhost@$KERBEROS_REALM" sudo kadmin.local -q "xst -k /tmp/$p.keytab $p/$HOSTNAME@$KERBEROS_REALM" @@ -116,6 +122,10 @@ sudo tee $HADOOP_ROOT/etc/hadoop/core-site.xml <hadoop.rpc.protection $RPC_PROTECTION + + hadoop.security.key.provider.path + $CONF_KMS_PROVIDER + EOF @@ -172,6 +182,40 @@ $HADOOP_ROOT/bin/hdfs namenode -format sudo groupadd hadoop sudo usermod -a -G hadoop $USER +sudo tee $HADOOP_ROOT/etc/hadoop/kms-site.xml < + + hadoop.kms.key.provider.uri + jceks://file@/tmp/hdfs/kms.keystore + + + hadoop.security.keystore.java-keystore-provider.password-file + kms.keystore.password + + + hadoop.kms.authentication.type + $CONF_AUTHENTICATION + + + hadoop.kms.authentication.kerberos.keytab + /tmp/kms.keytab + + + hadoop.kms.authentication.kerberos.principal + kms/localhost@$KERBEROS_REALM + + +EOF + +sudo tee $HADOOP_ROOT/etc/hadoop/kms.keystore.password < /tmp/hdfs/kms.log 2>&1 & +fi + echo "Starting namenode..." $HADOOP_ROOT/bin/hdfs namenode > /tmp/hdfs/namenode.log 2>&1 & @@ -183,5 +227,12 @@ sleep 5 echo "Waiting for cluster to exit safe mode..." $HADOOP_ROOT/bin/hdfs dfsadmin -safemode wait +$HADOOP_ROOT/bin/hadoop fs -mkdir -p /_test/kms +if [ $TRANSPARENT_ENCRYPTION = "true" ]; then + echo "Prepare encrypted zone" + $HADOOP_ROOT/bin/hadoop key create key1 + $HADOOP_ROOT/bin/hdfs crypto -createZone -keyName key1 -path /_test/kms +fi + echo "HADOOP_CONF_DIR=$(pwd)/$HADOOP_ROOT/etc/hadoop" >> $GITHUB_ENV -echo "$(pwd)/$HADOOP_ROOT/bin" >> $GITHUB_PATH \ No newline at end of file +echo "$(pwd)/$HADOOP_ROOT/bin" >> $GITHUB_PATH diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7ff0ed63..e9655810 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,6 +14,8 @@ jobs: include: - hadoop_version: 2.10.1 - hadoop_version: 3.3.1 + - hadoop_version: 3.3.1 + transparent_encryption: true - hadoop_version: 3.3.1 kerberos: true rpc_protection: authentication @@ -48,6 +50,7 @@ jobs: RPC_PROTECTION: ${{ matrix.rpc_protection }} TRANSFER_PROTECTION: ${{ matrix.transfer_protection }} AES: ${{ matrix.aes }} + TRANSPARENT_ENCRYPTION: $${{ matrix.transparent_encryption }} # Similarly, this step adds the bats binary to GITHUB_PATH. - name: install-bats.sh @@ -65,6 +68,10 @@ jobs: run: | make test + - name: cat kms.log + if: always() + run: test -f /tmp/hdfs/kms.log && cat /tmp/hdfs/kms.log + - name: cat namenode.log if: always() run: cat /tmp/hdfs/namenode.log diff --git a/cmd/hdfs/test/kms.bats b/cmd/hdfs/test/kms.bats new file mode 100644 index 00000000..d31ce31f --- /dev/null +++ b/cmd/hdfs/test/kms.bats @@ -0,0 +1,30 @@ +#!/usr/bin/env bats + +load helper + +@test "put java to go" { + run $HADOOP_FS -put $ROOT_TEST_DIR/testdata/foo.txt /_test/kms/foo1 + assert_success + + run $HDFS cat /_test/kms/foo1 + assert_output "bar" +} + +@test "put go to java" { + run $HDFS put $ROOT_TEST_DIR/testdata/foo.txt /_test/kms/foo2 + assert_success + + run HADOOP_FS -cat /_test/kms/foo2 + assert_output "bar" +} + +@test "tail" { + run $HDFS put $ROOT_TEST_DIR/testdata/mobydick.txt /_test/kms/ + assert_success + + run bash -c "$HDFS tail /_test/kms/mobydick.txt > $BATS_TMPDIR/mobydick_test.txt" + assert_success + + SHA=`tail $ROOT_TEST_DIR/testdata/mobydick.txt | shasum | awk '{ print $1 }'` + assert_equal $SHA `shasum < $BATS_TMPDIR/mobydick_test.txt | awk '{ print $1 }'` +}