diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 00000000..2a34a3c5 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,130 @@ +name: core +on: + push: + +env: + # Disable keepAlive and pool + # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080 + MAVEN_OPTS: >- + -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + -Dhttp.keepAlive=false + -Dmaven.wagon.http.pool=false + -Dmaven.wagon.http.retryHandler.count=3 + CONTAINER_REGISTRY: ghcr.io/khwj + +# Use the bash login, because we are using miniconda +defaults: + run: + shell: bash -l {0} + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + build: + runs-on: ubuntu-20.04 + permissions: + packages: write + strategy: + fail-fast: false + matrix: + hadoop: [spark3.3] + env: + SPARK_VERSION: 3.3.1 + HADOOP_VERSION: 3.3.2 + HIVE_VERSION: 2.3.9 + HIVE_REF: rel/release-2.3.9-imetastore + SCALA_VERSION: 2.12 + AWS_SDK_VERSION: 1.12.206 + steps: + - name: Checkout Hive + uses: actions/checkout@v3 + with: + repository: khwj/hive + ref: rel/release-2.3.9-imetastore + path: hive + # - name: Set up JDK 11 + # uses: actions/setup-java@v3 + # with: + # java-version: '11' + # distribution: 'adopt' + - name: Set up JDK 8 + uses: actions/setup-java@v3 + with: + java-version: "8" + distribution: "zulu" + - name: Cache local Maven repository + uses: actions/cache@v2 + with: + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/hive/ + ~/.spark-dist + ~/.cache + key: ${{ runner.os }}-hive-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-hive- + - name: Build Hive + run: | + cd hive + mvn --batch-mode -DskipTests clean install + - name: Checkout Glue Data Catalog Client + uses: actions/checkout@v3 + - name: Build Glue Data Catalog Client + run: >- + mvn clean install package \ + -DskipTests \ + -Dhive2.version=$HIVE_VERSION \ + -Dspark-hive.version=$HIVE_VERSION \ + -Dhadoop.version=$HADOOP_VERSION \ + -Daws.sdk.version=$AWS_SDK_VERSION + - name: Archive Zeppelin binary + uses: actions/upload-artifact@v3 + with: + name: aws-glue-datacatalog-hive2-client + path: | + *.jar + !spark/** + !hive/** + - name: Checkout Spark + uses: actions/checkout@v3 + with: + repository: apache/spark + ref: refs/tags/v3.3.1 + path: spark + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: "11" + distribution: "adopt" + - name: Build Spark + env: + MAVEN_OPTS: -Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g + run: | + cd spark + ./dev/make-distribution.sh --name hadoop3.2-glue-thriftserver -Dhadoop-3.2 -Phive -Phive-thriftserver -Pkubernetes + - name: Archive Spark binary + uses: actions/upload-artifact@v3 + with: + name: spark-${{ env.SPARK_VERSION }}-bin-hadoop3.2-glue-thriftserver + path: | + spark/dist/* + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.CONTAINER_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + with: + platforms: linux/amd64 + - name: Build Spark container images + run: | + find . -not -path "./spark/**" -not -path "./hive/**" -name "*.jar" -exec cp {} spark/dist/jars/ \; + cd spark/dist + ./bin/docker-image-tool.sh -n -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver build + ./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build + ./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver push diff --git a/docker/delta-hadoop-cloud-dependencies.xml b/docker/delta-hadoop-cloud-dependencies.xml new file mode 100644 index 00000000..e69de29b