Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Spark 3 #62

Open
wants to merge 3 commits into
base: branch-1.10.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
name: core
on:
push:

env:
# Disable keepAlive and pool
# https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
MAVEN_OPTS: >-
-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
-Dhttp.keepAlive=false
-Dmaven.wagon.http.pool=false
-Dmaven.wagon.http.retryHandler.count=3
CONTAINER_REGISTRY: ghcr.io/khwj

# Use the bash login, because we are using miniconda
defaults:
run:
shell: bash -l {0}

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build:
runs-on: ubuntu-20.04
permissions:
packages: write
strategy:
fail-fast: false
matrix:
hadoop: [spark3.3]
env:
SPARK_VERSION: 3.3.1
HADOOP_VERSION: 3.3.2
HIVE_VERSION: 2.3.9
HIVE_REF: rel/release-2.3.9-imetastore
SCALA_VERSION: 2.12
AWS_SDK_VERSION: 1.12.206
steps:
- name: Checkout Hive
uses: actions/checkout@v3
with:
repository: khwj/hive
ref: rel/release-2.3.9-imetastore
path: hive
# - name: Set up JDK 11
# uses: actions/setup-java@v3
# with:
# java-version: '11'
# distribution: 'adopt'
- name: Set up JDK 8
uses: actions/setup-java@v3
with:
java-version: "8"
distribution: "zulu"
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/hive/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-hive-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-hive-
- name: Build Hive
run: |
cd hive
mvn --batch-mode -DskipTests clean install
- name: Checkout Glue Data Catalog client
uses: actions/checkout@v3
- name: Build Glue Data Catalog client
run: |
mvn clean install package -DskipTests -Dhive2.version=$HIVE_VERSION -Dspark-hive.version=$HIVE_VERSION -Dhadoop.version=$HADOOP_VERSION -Daws.sdk.version=$AWS_SDK_VERSION
mkdir artifacts
find . -not -path "./spark/**" -not -path "./hive/**" -name "*.jar" -exec cp {} artifacts/ \;
- name: Archive Glue Data Catalog client binary
uses: actions/upload-artifact@v3
with:
name: aws-glue-datacatalog-hive2-client
path: |
artifacts/*.jar
- name: Checkout Spark
uses: actions/checkout@v3
with:
repository: apache/spark
ref: refs/tags/v3.3.1
path: spark
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: "11"
distribution: "adopt"
- name: Build Spark
env:
MAVEN_OPTS: -Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g
run: |
cd spark
./dev/make-distribution.sh --name hadoop3.2-glue-thriftserver -Dhadoop-3.2 -Phive -Phive-thriftserver -Pkubernetes
- name: Archive Spark binary
uses: actions/upload-artifact@v3
with:
name: spark-${{ env.SPARK_VERSION }}-bin-hadoop3.2-glue-thriftserver
path: |
spark/dist/*
- name: Log in to the Container registry
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
registry: ${{ env.CONTAINER_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
platforms: linux/amd64
- name: Build Spark container images
run: |
cp artifacts/*.jar spark/dist/jars/;
cd spark/dist
./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver build
./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver push
5 changes: 0 additions & 5 deletions aws-glue-datacatalog-client-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,6 @@
<artifactId>shims-loader</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,54 +40,7 @@
import org.apache.hadoop.hive.metastore.PartitionDropOptions;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId;
import org.apache.hadoop.hive.metastore.api.DataOperationType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.FireEventRequest;
import org.apache.hadoop.hive.metastore.api.FireEventResponse;
import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest;
import org.apache.hadoop.hive.metastore.api.GetAllFunctionsResponse;
import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse;
import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.HiveObjectType;
import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
import org.apache.hadoop.hive.metastore.api.LockRequest;
import org.apache.hadoop.hive.metastore.api.LockResponse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.MetadataPpdResult;
import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
import org.apache.hadoop.hive.metastore.api.NotificationEventResponse;
import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
import org.apache.hadoop.hive.metastore.api.PartitionEventType;
import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
import org.apache.hadoop.hive.metastore.api.TableMeta;
import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
import org.apache.hadoop.hive.metastore.api.TxnOpenException;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.api.CompactionResponse;
import org.apache.hadoop.hive.metastore.api.*;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.log4j.Logger;
import org.apache.thrift.TException;
Expand Down Expand Up @@ -1250,6 +1203,12 @@ public List<String> listPartitionNames(String databaseName, String tableName,
return glueMetastoreClientDelegate.listPartitionNames(databaseName, tableName, values, max);
}

@Override
public PartitionValuesResponse listPartitionValues(PartitionValuesRequest partitionValuesRequest)
throws MetaException, TException, NoSuchObjectException {
throw new UnsupportedOperationException("listPartitionValues is not supported");
}

@Override
public int getNumPartitionsByFilter(String dbName, String tableName, String filter)
throws MetaException, NoSuchObjectException, TException {
Expand Down
4 changes: 2 additions & 2 deletions aws-glue-datacatalog-spark-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
<artifactId>aws-glue-datacatalog-spark-client</artifactId>
<dependencies>
<dependency>
<groupId>org.spark-project.hive</groupId>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>${spark-hive.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.spark-project.hive</groupId>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${spark-hive.version}</version>
<scope>provided</scope>
Expand Down
Loading