diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
new file mode 100644
index 00000000..decee1d7
--- /dev/null
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,125 @@
+name: core
+on:
+ push:
+
+env:
+ # Disable keepAlive and pool
+ # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
+ MAVEN_OPTS: >-
+ -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
+ -Dhttp.keepAlive=false
+ -Dmaven.wagon.http.pool=false
+ -Dmaven.wagon.http.retryHandler.count=3
+ CONTAINER_REGISTRY: ghcr.io/khwj
+
+# Use the bash login, because we are using miniconda
+defaults:
+ run:
+ shell: bash -l {0}
+
+permissions:
+ contents: read # to fetch code (actions/checkout)
+
+jobs:
+ build:
+ runs-on: ubuntu-20.04
+ permissions:
+ packages: write
+ strategy:
+ fail-fast: false
+ matrix:
+ hadoop: [spark3.3]
+ env:
+ SPARK_VERSION: 3.3.1
+ HADOOP_VERSION: 3.3.2
+ HIVE_VERSION: 2.3.9
+ HIVE_REF: rel/release-2.3.9-imetastore
+ SCALA_VERSION: 2.12
+ AWS_SDK_VERSION: 1.12.206
+ steps:
+ - name: Checkout Hive
+ uses: actions/checkout@v3
+ with:
+ repository: khwj/hive
+ ref: rel/release-2.3.9-imetastore
+ path: hive
+ # - name: Set up JDK 11
+ # uses: actions/setup-java@v3
+ # with:
+ # java-version: '11'
+ # distribution: 'adopt'
+ - name: Set up JDK 8
+ uses: actions/setup-java@v3
+ with:
+ java-version: "8"
+ distribution: "zulu"
+ - name: Cache local Maven repository
+ uses: actions/cache@v2
+ with:
+ path: |
+ ~/.m2/repository
+ !~/.m2/repository/org/apache/hive/
+ ~/.spark-dist
+ ~/.cache
+ key: ${{ runner.os }}-hive-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-hive-
+ - name: Build Hive
+ run: |
+ cd hive
+ mvn --batch-mode -DskipTests clean install
+ - name: Checkout Glue Data Catalog client
+ uses: actions/checkout@v3
+ - name: Build Glue Data Catalog client
+ run: |
+ mvn clean install package -DskipTests -Dhive2.version=$HIVE_VERSION -Dspark-hive.version=$HIVE_VERSION -Dhadoop.version=$HADOOP_VERSION -Daws.sdk.version=$AWS_SDK_VERSION
+ mkdir artifacts
+ find . -not -path "./spark/**" -not -path "./hive/**" -name "*.jar" -exec cp {} artifacts/ \;
+ - name: Archive Glue Data Catalog client binary
+ uses: actions/upload-artifact@v3
+ with:
+ name: aws-glue-datacatalog-hive2-client
+ path: |
+ artifacts/*.jar
+ - name: Checkout Spark
+ uses: actions/checkout@v3
+ with:
+ repository: apache/spark
+ ref: refs/tags/v3.3.1
+ path: spark
+ - name: Set up JDK 11
+ uses: actions/setup-java@v3
+ with:
+ java-version: "11"
+ distribution: "adopt"
+ - name: Build Spark
+ env:
+ MAVEN_OPTS: -Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g
+ run: |
+ cd spark
+ ./dev/make-distribution.sh --name hadoop3.2-glue-thriftserver -Dhadoop-3.2 -Phive -Phive-thriftserver -Pkubernetes
+ - name: Archive Spark binary
+ uses: actions/upload-artifact@v3
+ with:
+ name: spark-${{ env.SPARK_VERSION }}-bin-hadoop3.2-glue-thriftserver
+ path: |
+ spark/dist/*
+ - name: Log in to the Container registry
+ uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
+ with:
+ registry: ${{ env.CONTAINER_REGISTRY }}
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v2
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+ with:
+ platforms: linux/amd64
+ - name: Build Spark container images
+ run: |
+ cp artifacts/*.jar spark/dist/jars/;
+ cd spark/dist
+ ./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver build
+ ./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
+ ./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver push
diff --git a/aws-glue-datacatalog-client-common/pom.xml b/aws-glue-datacatalog-client-common/pom.xml
index 06fcc6e0..763a6e91 100644
--- a/aws-glue-datacatalog-client-common/pom.xml
+++ b/aws-glue-datacatalog-client-common/pom.xml
@@ -69,11 +69,6 @@
shims-loader
${project.version}
-
- com.google.guava
- guava
- ${guava.version}
-
diff --git a/aws-glue-datacatalog-hive2-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java b/aws-glue-datacatalog-hive2-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java
index a1871c3c..4a5849c7 100644
--- a/aws-glue-datacatalog-hive2-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java
+++ b/aws-glue-datacatalog-hive2-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java
@@ -40,54 +40,7 @@
import org.apache.hadoop.hive.metastore.PartitionDropOptions;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.metastore.api.AggrStats;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
-import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
-import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId;
-import org.apache.hadoop.hive.metastore.api.DataOperationType;
-import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.FireEventRequest;
-import org.apache.hadoop.hive.metastore.api.FireEventResponse;
-import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest;
-import org.apache.hadoop.hive.metastore.api.GetAllFunctionsResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest;
-import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
-import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
-import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
-import org.apache.hadoop.hive.metastore.api.HiveObjectType;
-import org.apache.hadoop.hive.metastore.api.Index;
-import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
-import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
-import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
-import org.apache.hadoop.hive.metastore.api.LockRequest;
-import org.apache.hadoop.hive.metastore.api.LockResponse;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.MetadataPpdResult;
-import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
-import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
-import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
-import org.apache.hadoop.hive.metastore.api.NotificationEventResponse;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.PartitionEventType;
-import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
-import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
-import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
-import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
-import org.apache.hadoop.hive.metastore.api.TableMeta;
-import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
-import org.apache.hadoop.hive.metastore.api.TxnOpenException;
-import org.apache.hadoop.hive.metastore.api.UnknownDBException;
-import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
-import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
-import org.apache.hadoop.hive.metastore.api.CompactionResponse;
+import org.apache.hadoop.hive.metastore.api.*;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.log4j.Logger;
import org.apache.thrift.TException;
@@ -1250,6 +1203,12 @@ public List listPartitionNames(String databaseName, String tableName,
return glueMetastoreClientDelegate.listPartitionNames(databaseName, tableName, values, max);
}
+ @Override
+ public PartitionValuesResponse listPartitionValues(PartitionValuesRequest partitionValuesRequest)
+ throws MetaException, TException, NoSuchObjectException {
+ throw new UnsupportedOperationException("listPartitionValues is not supported");
+ }
+
@Override
public int getNumPartitionsByFilter(String dbName, String tableName, String filter)
throws MetaException, NoSuchObjectException, TException {
diff --git a/aws-glue-datacatalog-spark-client/pom.xml b/aws-glue-datacatalog-spark-client/pom.xml
index ed8c8103..043c009e 100644
--- a/aws-glue-datacatalog-spark-client/pom.xml
+++ b/aws-glue-datacatalog-spark-client/pom.xml
@@ -13,13 +13,13 @@
aws-glue-datacatalog-spark-client
- org.spark-project.hive
+ org.apache.hive
hive-metastore
${spark-hive.version}
provided
- org.spark-project.hive
+ org.apache.hive
hive-exec
${spark-hive.version}
provided
diff --git a/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java b/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java
index 34590b34..9cfb961e 100644
--- a/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java
+++ b/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java
@@ -20,7 +20,6 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -30,60 +29,16 @@
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.PartitionDropOptions;
-import org.apache.hadoop.hive.metastore.TableType;
-import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.metastore.api.AggrStats;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
-import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
-import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId;
-import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.FireEventRequest;
-import org.apache.hadoop.hive.metastore.api.FireEventResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest;
-import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
-import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
-import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
-import org.apache.hadoop.hive.metastore.api.HiveObjectType;
-import org.apache.hadoop.hive.metastore.api.Index;
-import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
-import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
-import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
-import org.apache.hadoop.hive.metastore.api.LockRequest;
-import org.apache.hadoop.hive.metastore.api.LockResponse;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
-import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
-import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
-import org.apache.hadoop.hive.metastore.api.NotificationEventResponse;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.PartitionEventType;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
-import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
-import org.apache.hadoop.hive.metastore.api.TxnOpenException;
-import org.apache.hadoop.hive.metastore.api.UnknownDBException;
-import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
-import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.metastore.*;
+import org.apache.hadoop.hive.metastore.api.*;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.log4j.Logger;
import org.apache.thrift.TException;
import java.io.IOException;
import java.net.URI;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.nio.ByteBuffer;
+import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -366,6 +321,27 @@ public void alter_partition(String dbName, String tblName, org.apache.hadoop.hiv
glueMetastoreClientDelegate.alterPartitions(dbName, tblName, Lists.newArrayList(partition));
}
+ @Override
+ public void alter_partition(
+ String dbName,
+ String tblName,
+ org.apache.hadoop.hive.metastore.api.Partition partition,
+ EnvironmentContext environmentContext
+ ) throws InvalidOperationException, MetaException, TException {
+ List p = new ArrayList<>();
+ p.add(partition);
+ glueMetastoreClientDelegate.alterPartitions(dbName, tblName, p);
+ }
+
+ @Override
+ public void alter_partitions(
+ String dbName, String tblName,
+ List partitions,
+ EnvironmentContext environmentContext
+ ) throws InvalidOperationException, MetaException, TException {
+ glueMetastoreClientDelegate.alterPartitions(dbName, tblName, partitions);
+ }
+
@Override
public void alter_partitions(String dbName, String tblName, List partitions)
throws InvalidOperationException, MetaException, TException {
@@ -396,6 +372,16 @@ public void alter_table(
glueMetastoreClientDelegate.alterTable(dbName, tblName, table, environmentContext);
}
+ @Override
+ public void alter_table_with_environmentContext(
+ String dbName,
+ String tblName,
+ org.apache.hadoop.hive.metastore.api.Table table,
+ EnvironmentContext environmentContext
+ ) throws InvalidOperationException, MetaException, TException {
+ glueMetastoreClientDelegate.alterTable(dbName, tblName, table, environmentContext);
+ }
+
@Override
public org.apache.hadoop.hive.metastore.api.Partition appendPartition(
String dbName,
@@ -477,6 +463,46 @@ public String getTokenStrForm() throws IOException {
return glueMetastoreClientDelegate.getTokenStrForm();
}
+ @Override
+ public boolean addToken(String tokenIdentifier, String delegationToken) throws TException {
+ return glueMetastoreClientDelegate.addToken(tokenIdentifier, delegationToken);
+ }
+
+ @Override
+ public boolean removeToken(String tokenIdentifier) throws TException {
+ return glueMetastoreClientDelegate.removeToken(tokenIdentifier);
+ }
+
+ @Override
+ public String getToken(String tokenIdentifier) throws TException {
+ return glueMetastoreClientDelegate.getToken(tokenIdentifier);
+ }
+
+ @Override
+ public List getAllTokenIdentifiers() throws TException {
+ return glueMetastoreClientDelegate.getAllTokenIdentifiers();
+ }
+
+ @Override
+ public int addMasterKey(String key) throws MetaException, TException {
+ return glueMetastoreClientDelegate.addMasterKey(key);
+ }
+
+ @Override
+ public void updateMasterKey(Integer seqNo, String key) throws NoSuchObjectException, MetaException, TException {
+ glueMetastoreClientDelegate.updateMasterKey(seqNo, key);
+ }
+
+ @Override
+ public boolean removeMasterKey(Integer keySeq) throws TException {
+ return glueMetastoreClientDelegate.removeMasterKey(keySeq);
+ }
+
+ @Override
+ public String[] getMasterKeys() throws TException {
+ return glueMetastoreClientDelegate.getMasterKeys();
+ }
+
@Override
public LockResponse checkLock(long lockId)
throws NoSuchTxnException, TxnAbortedException, NoSuchLockException, TException {
@@ -493,6 +519,11 @@ public void commitTxn(long txnId) throws NoSuchTxnException, TxnAbortedException
glueMetastoreClientDelegate.commitTxn(txnId);
}
+ @Override
+ public void abortTxns(List txnIds) throws TException {
+ glueMetastoreClientDelegate.abortTxns(txnIds);
+ }
+
@Override
public void compact(
String dbName,
@@ -503,6 +534,30 @@ public void compact(
glueMetastoreClientDelegate.compact(dbName, tblName, partitionName, compactionType);
}
+ @Override
+ public void compact(
+ String dbName,
+ String tblName,
+ String partitionName,
+ CompactionType compactionType,
+ Map tblProperties
+ ) throws TException {
+ glueMetastoreClientDelegate.compact(dbName, tblName, partitionName, compactionType, tblProperties);
+ }
+
+ @Override
+ public CompactionResponse compact2(
+ String dbName,
+ String tblName,
+ String partitionName,
+ CompactionType compactionType,
+ Map tblProperties
+ ) throws TException {
+
+ return glueMetastoreClientDelegate.compact2(
+ dbName, tblName, partitionName, compactionType, tblProperties);
+ }
+
@Override
public void createFunction(org.apache.hadoop.hive.metastore.api.Function function) throws InvalidObjectException, MetaException, TException {
glueMetastoreClientDelegate.createFunction(function);
@@ -632,20 +687,18 @@ public boolean dropPartition(String dbName, String tblName, String partitionName
@Override
public List dropPartitions(
- String dbName, String tblName, List> partExprs,
- boolean deleteData, boolean ignoreProtection, boolean ifExists)
- throws NoSuchObjectException, MetaException, TException {
- return dropPartitions_core(dbName, tblName, partExprs, deleteData, false);
+ String dbName,
+ String tblName,
+ List> partExprs,
+ boolean deleteData,
+ boolean ifExists) throws NoSuchObjectException, MetaException, TException {
+ return dropPartitions_core(dbName, tblName, partExprs, deleteData, false);
}
@Override
public List dropPartitions(
- String dbName,
- String tblName, List> partExprs,
- boolean deleteData,
- boolean ignoreProtection,
- boolean ifExists,
- boolean needResult)
+ String dbName, String tblName, List> partExprs,
+ boolean deleteData, boolean ignoreProtection, boolean ifExists)
throws NoSuchObjectException, MetaException, TException {
return dropPartitions_core(dbName, tblName, partExprs, deleteData, false);
}
@@ -807,6 +860,18 @@ public org.apache.hadoop.hive.metastore.api.Partition exchange_partition(
return glueMetastoreClientDelegate.exchangePartition(partitionSpecs, srcDb, srcTbl, dstDb, dstTbl);
}
+ // https://github.com/ismailsimsek/aws-glue-data-catalog-client-for-apache-hive-metastore/blob/1f819a8502c388c0717c1b353c9e8216a03f1ce8/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClient.java#L834
+ @Override
+ public List exchange_partitions(
+ Map partitionSpecs,
+ String sourceDb, String sourceTable, String destdb,
+ String destTableName
+ ) throws MetaException, NoSuchObjectException, InvalidObjectException, TException {
+ List p = new ArrayList<>();
+ p.add(glueMetastoreClientDelegate.exchangePartition(partitionSpecs, sourceDb, sourceTable, destdb, destTableName));
+ return p;
+ }
+
@Override
public AggrStats getAggrColStatsFor(String dbName, String tblName, List colNames, List partName)
throws NoSuchObjectException, MetaException, TException {
@@ -868,6 +933,11 @@ public List getFunctions(String dbName, String pattern) throws MetaExcep
return glueMetastoreClientDelegate.getFunctions(dbName, pattern);
}
+ @Override
+ public GetAllFunctionsResponse getAllFunctions() throws MetaException, TException {
+ throw new UnsupportedOperationException("getAllFunctions is not supported");
+ }
+
@Override
public Index getIndex(String dbName, String tblName, String indexName) throws MetaException, UnknownTableException,
NoSuchObjectException, TException {
@@ -983,6 +1053,16 @@ public List getTables(String dbname, String tablePattern) throws MetaExc
return glueMetastoreClientDelegate.getTables(dbname, tablePattern);
}
+ @Override
+ public List getTables(String dbname, String tablePattern, TableType tableType) throws MetaException, TException, UnknownDBException {
+ return glueMetastoreClientDelegate.getTables(dbname, tablePattern, tableType);
+ }
+
+ @Override
+ public List getTableMeta(String dbPatterns, String tablePatterns, List tableTypes) throws MetaException, TException, UnknownDBException {
+ return glueMetastoreClientDelegate.getTableMeta(dbPatterns, tablePatterns, tableTypes);
+ }
+
@Override
public ValidTxnList getValidTxns() throws TException {
return glueMetastoreClientDelegate.getValidTxns();
@@ -1104,6 +1184,18 @@ public List listPartitionNames(String databaseName, String tableName,
return glueMetastoreClientDelegate.listPartitionNames(databaseName, tableName, values, max);
}
+ @Override
+ public PartitionValuesResponse listPartitionValues(PartitionValuesRequest partitionValuesRequest)
+ throws MetaException, TException, NoSuchObjectException {
+ throw new UnsupportedOperationException("listPartitionValues is not supported");
+ }
+
+ @Override
+ public int getNumPartitionsByFilter(String dbName, String tableName, String filter)
+ throws MetaException, NoSuchObjectException, TException {
+ return glueMetastoreClientDelegate.getNumPartitionsByFilter(dbName, tableName, filter);
+ }
+
@Override
public PartitionSpecProxy listPartitionSpecs(String dbName, String tblName, int max) throws TException {
return glueMetastoreClientDelegate.listPartitionSpecs(dbName, tblName, max);
@@ -1420,6 +1512,83 @@ public boolean setPartitionColumnStatistics(org.apache.hadoop.hive.metastore.api
return glueMetastoreClientDelegate.setPartitionColumnStatistics(request);
}
+ @Override
+ public boolean isLocalMetaStore() {
+ return false;
+ }
+
+ @Override
+ public void flushCache() {
+ throw new UnsupportedOperationException("flushCache is not supported");
+ }
+
+ @Override
+ public Iterable> getFileMetadata(List fileIds) throws TException {
+ return glueMetastoreClientDelegate.getFileMetadata(fileIds);
+ }
+
+ @Override
+ public Iterable> getFileMetadataBySarg(
+ List fileIds,
+ ByteBuffer sarg,
+ boolean doGetFooters
+ ) throws TException {
+ return glueMetastoreClientDelegate.getFileMetadataBySarg(fileIds, sarg, doGetFooters);
+ }
+
+ @Override
+ public void clearFileMetadata(List fileIds) throws TException {
+ glueMetastoreClientDelegate.clearFileMetadata(fileIds);
+ }
+
+ @Override
+ public void putFileMetadata(List fileIds, List metadata) throws TException {
+ glueMetastoreClientDelegate.putFileMetadata(fileIds, metadata);
+ }
+
+ @Override
+ public boolean isSameConfObj(HiveConf hiveConf) {
+ throw new UnsupportedOperationException("isSameConfObj is not supported");
+ }
+
+ @Override
+ public boolean cacheFileMetadata(String dbName, String tblName, String partName, boolean allParts)
+ throws TException {
+ return glueMetastoreClientDelegate.cacheFileMetadata(dbName, tblName, partName, allParts);
+ }
+
+ @Override
+ public List getPrimaryKeys(PrimaryKeysRequest primaryKeysRequest) throws MetaException, NoSuchObjectException, TException {
+ throw new UnsupportedOperationException("getPrimaryKeys is unsupported");
+ }
+
+ @Override
+ public List getForeignKeys(ForeignKeysRequest foreignKeysRequest) throws MetaException, NoSuchObjectException, TException {
+ throw new UnsupportedOperationException("getForeignKeys is unsupported");
+ }
+
+ @Override
+ public void createTableWithConstraints(org.apache.hadoop.hive.metastore.api.Table table, List primaryKeys, List foreignKeys) throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, TException {
+ this.createTable(table);
+ glueMetastoreClientDelegate.createTableWithConstraints(table, primaryKeys, foreignKeys
+ );
+ }
+
+ @Override
+ public void dropConstraint(String dbName, String tblName, String constraintName) throws MetaException, NoSuchObjectException, TException {
+ glueMetastoreClientDelegate.dropConstraint(dbName, tblName, constraintName);
+ }
+
+ @Override
+ public void addPrimaryKey(List primaryKeyCols) throws MetaException, NoSuchObjectException, TException {
+ glueMetastoreClientDelegate.addPrimaryKey(primaryKeyCols);
+ }
+
+ @Override
+ public void addForeignKey(List foreignKeyCols) throws MetaException, NoSuchObjectException, TException {
+ glueMetastoreClientDelegate.addForeignKey(foreignKeyCols);
+ }
+
@Override
public ShowCompactResponse showCompactions() throws TException {
return glueMetastoreClientDelegate.showCompactions();
@@ -1435,6 +1604,22 @@ public void addDynamicPartitions(
glueMetastoreClientDelegate.addDynamicPartitions(txnId, dbName, tblName, partNames);
}
+ @Override
+ public void addDynamicPartitions(
+ long txnId,
+ String dbName,
+ String tblName,
+ List partNames,
+ DataOperationType operationType
+ ) throws TException {
+ glueMetastoreClientDelegate.addDynamicPartitions(txnId, dbName, tblName, partNames, operationType);
+ }
+
+ @Override
+ public void insertTable(org.apache.hadoop.hive.metastore.api.Table table, boolean overwrite) throws MetaException {
+ glueMetastoreClientDelegate.insertTable(table, overwrite);
+ }
+
@Override
public NotificationEventResponse getNextNotification(
long lastEventId, int maxEvents, NotificationFilter notificationFilter) throws TException {
@@ -1456,6 +1641,11 @@ public ShowLocksResponse showLocks() throws TException {
return glueMetastoreClientDelegate.showLocks();
}
+ @Override
+ public ShowLocksResponse showLocks(ShowLocksRequest showLocksRequest) throws TException {
+ return glueMetastoreClientDelegate.showLocks(showLocksRequest);
+ }
+
@Override
public GetOpenTxnsInfoResponse showTxns() throws TException {
return glueMetastoreClientDelegate.showTxns();
diff --git a/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java b/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java
index 713a78ad..29f225a7 100644
--- a/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java
+++ b/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java
@@ -6,15 +6,18 @@
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientFactory;
+import java.util.concurrent.ConcurrentHashMap;
+
public class AWSGlueDataCatalogHiveClientFactory implements HiveMetaStoreClientFactory {
@Override
public IMetaStoreClient createMetaStoreClient(
HiveConf conf,
- HiveMetaHookLoader hookLoader
+ HiveMetaHookLoader hookLoader,
+ boolean allowEmbedded,
+ ConcurrentHashMap metaCallTimeMap
) throws MetaException {
- AWSCatalogMetastoreClient client = new AWSCatalogMetastoreClient(conf, hookLoader);
- return client;
+ return new AWSCatalogMetastoreClient(conf, hookLoader);
}
}
\ No newline at end of file
diff --git a/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClientTest.java b/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClientTest.java
index 62653f07..26cd82de 100644
--- a/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClientTest.java
+++ b/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/AWSCatalogMetastoreClientTest.java
@@ -155,7 +155,8 @@ public void setUp() throws Exception {
clientFactory = mock(GlueClientFactory.class);
metastoreFactory = mock(AWSGlueMetastoreFactory.class);
when(clientFactory.newClient()).thenReturn(glueClient);
- when(metastoreFactory.newMetastore(conf)).thenReturn(new DefaultAWSGlueMetastore(conf, glueClient));
+ DefaultAWSGlueMetastore defaultMetastore = new DefaultAWSGlueMetastore(conf, glueClient);
+ when(metastoreFactory.newMetastore(conf)).thenReturn(defaultMetastore);
metastoreClient = new AWSCatalogMetastoreClient.Builder().withClientFactory(clientFactory)
.withMetastoreFactory(metastoreFactory).withWarehouse(wh).createDefaults(false).withHiveConf(conf).build();
}
diff --git a/docker/delta-hadoop-cloud-dependencies.xml b/docker/delta-hadoop-cloud-dependencies.xml
new file mode 100644
index 00000000..e69de29b
diff --git a/pom.xml b/pom.xml
index d32a20f5..450e4401 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,14 +19,14 @@
UTF-8
14.0.1
- 2.3.3
- 1.2.1
- 1.11.267
+ 2.3.9
+ 2.3.9
+ 1.12.206
4.11
1.10.19
2.15
1.6.4
- 2.8.3
+ 2.10.1
2.9
1.3
4.5.3
diff --git a/shims/spark-hive-shims/pom.xml b/shims/spark-hive-shims/pom.xml
index d88ee48f..91cf027b 100644
--- a/shims/spark-hive-shims/pom.xml
+++ b/shims/spark-hive-shims/pom.xml
@@ -16,13 +16,13 @@
- org.spark-project.hive
+ org.apache.hive
hive-exec
${spark-hive.version}
provided
- org.spark-project.hive
+ org.apache.hive
hive-metastore
${spark-hive.version}
provided
diff --git a/shims/spark-hive-shims/src/main/java/com/amazonaws/glue/shims/AwsGlueSparkHiveShims.java b/shims/spark-hive-shims/src/main/java/com/amazonaws/glue/shims/AwsGlueSparkHiveShims.java
index 775b947f..44def06b 100644
--- a/shims/spark-hive-shims/src/main/java/com/amazonaws/glue/shims/AwsGlueSparkHiveShims.java
+++ b/shims/spark-hive-shims/src/main/java/com/amazonaws/glue/shims/AwsGlueSparkHiveShims.java
@@ -1,19 +1,28 @@
package com.amazonaws.glue.shims;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.log4j.Logger;
+
+import java.util.HashMap;
+import java.util.Map;
class AwsGlueSparkHiveShims implements AwsGlueHiveShims {
+ private static final Logger logger = Logger.getLogger(AwsGlueSparkHiveShims.class);
+
private static final String SPARK_HIVE_VERSION = "1.2.";
static boolean supportsVersion(String version) {
@@ -22,22 +31,22 @@ static boolean supportsVersion(String version) {
@Override
public ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes) {
- return Utilities.deserializeExpressionFromKryo(exprBytes);
+ return SerializationUtilities.deserializeExpressionFromKryo(exprBytes);
}
@Override
public byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr) {
- return Utilities.serializeExpressionToKryo(expr);
+ return SerializationUtilities.serializeExpressionToKryo(expr);
}
@Override
public Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) throws MetaException {
- return warehouse.getTablePath(db, tableName);
+ return warehouse.getDefaultTablePath(db, tableName);
}
@Override
public boolean validateTableName(String name, Configuration conf) {
- return MetaStoreUtils.validateName(name);
+ return MetaStoreUtils.validateName(name, conf);
}
@Override
@@ -47,7 +56,7 @@ public boolean requireCalStats(
Partition newPart,
Table tbl,
EnvironmentContext environmentContext) {
- return MetaStoreUtils.requireCalStats(conf, oldPart, newPart, tbl);
+ return MetaStoreUtils.requireCalStats(conf, oldPart, newPart, tbl, environmentContext);
}
@Override
@@ -59,7 +68,66 @@ public boolean updateTableStatsFast(
boolean forceRecompute,
EnvironmentContext environmentContext
) throws MetaException {
- return MetaStoreUtils.updateUnpartitionedTableStatsFast(db, tbl, wh, madeDir, forceRecompute);
+ return updateUnpartitionedTableStatsFast(db, tbl, wh, madeDir, forceRecompute);
}
+ /**
+ * Ref: https://github.com/facebookarchive/swift-hive-metastore/blob/9c89d3ce58cfdb3b13e4f5fde8ec28a701ef6885/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java#L1295
+ * Updates the numFiles and totalSize parameters for the passed unpartitioned Table by querying
+ * the warehouse if the passed Table does not already have values for these parameters.
+ * @param db
+ * @param tbl
+ * @param wh
+ * @param madeDir if true, the directory was just created and can be assumed to be empty
+ * @param forceRecompute Recompute stats even if the passed Table already has
+ * these parameters set
+ * @return true if the stats were updated, false otherwise
+ */
+ public static boolean updateUnpartitionedTableStatsFast(
+ Database db,
+ Table tbl,
+ Warehouse wh,
+ boolean madeDir,
+ boolean forceRecompute
+ ) throws MetaException {
+
+ FileStatus[] fileStatus = wh.getFileStatusesForUnpartitionedTable(db, tbl);
+ Map params = tbl.getParameters();
+
+ if ((params != null) && params.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) {
+ boolean doNotUpdateStats = Boolean.parseBoolean(params.get(StatsSetupConst.DO_NOT_UPDATE_STATS));
+ params.remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
+ tbl.setParameters(params); // to make sure we remove this marker property
+ if (doNotUpdateStats) {
+ return false;
+ }
+ }
+
+ boolean updated = false;
+ if (forceRecompute || params == null || !MetaStoreUtils.containsAllFastStats(params)) {
+ if (params == null) {
+ params = new HashMap<>();
+ }
+ if (!madeDir) {
+ // The table location already exists and may contain data.
+ // Let's try to populate those stats that don't require full scan.
+ logger.info("Updating table stats fast for " + tbl.getTableName());
+ MetaStoreUtils.populateQuickStats(fileStatus, params);
+ logger.info("Updated size of table " + tbl.getTableName() + " to " + params.get(StatsSetupConst.TOTAL_SIZE));
+ if (!params.containsKey(StatsSetupConst.STATS_GENERATED)) {
+ // invalidate stats requiring scan since this is a regular ddl alter case
+ for (String stat : StatsSetupConst.statsRequireCompute) {
+ params.put(stat, "-1");
+ }
+ params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.FALSE);
+ } else {
+ params.remove(StatsSetupConst.STATS_GENERATED);
+ params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.TRUE);
+ }
+ }
+ tbl.setParameters(params);
+ updated = true;
+ }
+ return updated;
+ }
}