32
32
import org .apache .doris .common .FeConstants ;
33
33
import org .apache .doris .common .UserException ;
34
34
import org .apache .doris .common .security .authentication .AuthenticationConfig ;
35
+ import org .apache .doris .common .security .authentication .HadoopAuthenticator ;
35
36
import org .apache .doris .common .util .CacheBulkLoader ;
36
37
import org .apache .doris .common .util .LocationPath ;
37
38
import org .apache .doris .common .util .Util ;
38
39
import org .apache .doris .datasource .CacheException ;
39
40
import org .apache .doris .datasource .ExternalMetaCacheMgr ;
40
41
import org .apache .doris .datasource .hive .AcidInfo .DeleteDeltaInfo ;
42
+ import org .apache .doris .datasource .hive .HiveUtil .ACIDFileFilter ;
43
+ import org .apache .doris .datasource .hive .HiveUtil .FullAcidFileFilter ;
44
+ import org .apache .doris .datasource .hive .HiveUtil .InsertOnlyACIDFileFilter ;
41
45
import org .apache .doris .datasource .property .PropertyConverter ;
42
46
import org .apache .doris .fs .FileSystemCache ;
43
47
import org .apache .doris .fs .remote .RemoteFile ;
55
59
import com .github .benmanes .caffeine .cache .LoadingCache ;
56
60
import com .google .common .annotations .VisibleForTesting ;
57
61
import com .google .common .base .Preconditions ;
58
- import com .google .common .base .Strings ;
59
62
import com .google .common .collect .BiMap ;
60
63
import com .google .common .collect .HashBiMap ;
61
64
import com .google .common .collect .Iterables ;
77
80
import org .apache .hadoop .hive .ql .io .AcidUtils ;
78
81
import org .apache .hadoop .mapred .FileInputFormat ;
79
82
import org .apache .hadoop .mapred .JobConf ;
80
- import org .apache .hadoop .security .UserGroupInformation ;
81
83
import org .apache .logging .log4j .LogManager ;
82
84
import org .apache .logging .log4j .Logger ;
83
85
84
86
import java .net .URI ;
85
- import java .security .PrivilegedExceptionAction ;
86
87
import java .util .ArrayList ;
87
88
import java .util .Collections ;
88
89
import java .util .HashMap ;
@@ -107,8 +108,6 @@ public class HiveMetaStoreCache {
107
108
// After hive 3, transactional table's will have file '_orc_acid_version' with value >= '2'.
108
109
public static final String HIVE_ORC_ACID_VERSION_FILE = "_orc_acid_version" ;
109
110
110
- private static final String HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX = "bucket_" ;
111
-
112
111
private final HMSExternalCatalog catalog ;
113
112
private JobConf jobConf ;
114
113
private final ExecutorService refreshExecutor ;
@@ -742,19 +741,16 @@ public LoadingCache<PartitionCacheKey, HivePartition> getPartitionCache() {
742
741
public List <FileCacheValue > getFilesByTransaction (List <HivePartition > partitions , ValidWriteIdList validWriteIds ,
743
742
boolean isFullAcid , boolean skipCheckingAcidVersionFile , long tableId , String bindBrokerName ) {
744
743
List <FileCacheValue > fileCacheValues = Lists .newArrayList ();
745
- String remoteUser = jobConf .get (AuthenticationConfig .HADOOP_USER_NAME );
746
744
try {
747
745
for (HivePartition partition : partitions ) {
746
+
747
+ AuthenticationConfig authenticationConfig = AuthenticationConfig .getKerberosConfig (jobConf );
748
+ HadoopAuthenticator hadoopAuthenticator =
749
+ HadoopAuthenticator .getHadoopAuthenticator (authenticationConfig );
750
+
748
751
FileCacheValue fileCacheValue = new FileCacheValue ();
749
- AcidUtils .Directory directory ;
750
- if (!Strings .isNullOrEmpty (remoteUser )) {
751
- UserGroupInformation ugi = UserGroupInformation .createRemoteUser (remoteUser );
752
- directory = ugi .doAs ((PrivilegedExceptionAction <AcidUtils .Directory >) () -> AcidUtils .getAcidState (
753
- new Path (partition .getPath ()), jobConf , validWriteIds , false , true ));
754
- } else {
755
- directory = AcidUtils .getAcidState (new Path (partition .getPath ()), jobConf , validWriteIds , false ,
756
- true );
757
- }
752
+ AcidUtils .Directory directory = hadoopAuthenticator .doAs (() -> AcidUtils .getAcidState (
753
+ new Path (partition .getPath ()), jobConf , validWriteIds , false , true ));
758
754
if (directory == null ) {
759
755
return Collections .emptyList ();
760
756
}
@@ -775,7 +771,8 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
775
771
return Collections .emptyList ();
776
772
}
777
773
if (!skipCheckingAcidVersionFile ) {
778
- String acidVersionPath = new Path (baseOrDeltaPath , "_orc_acid_version" ).toUri ().toString ();
774
+ String acidVersionPath = new Path (
775
+ baseOrDeltaPath , HIVE_ORC_ACID_VERSION_FILE ).toUri ().toString ();
779
776
RemoteFileSystem fs = Env .getCurrentEnv ().getExtMetaCacheMgr ().getFsCache ().getRemoteFileSystem (
780
777
new FileSystemCache .FileSystemCacheKey (
781
778
LocationPath .getFSIdentity (baseOrDeltaPath .toUri ().toString (),
@@ -798,6 +795,8 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
798
795
}
799
796
}
800
797
798
+ ACIDFileFilter fileFilter = isFullAcid ? new FullAcidFileFilter () : new InsertOnlyACIDFileFilter ();
799
+
801
800
// delta directories
802
801
List <DeleteDeltaInfo > deleteDeltas = new ArrayList <>();
803
802
for (AcidUtils .ParsedDelta delta : directory .getCurrentDirectories ()) {
@@ -810,14 +809,14 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
810
809
Status status = fs .listFiles (location , false , remoteFiles );
811
810
if (status .ok ()) {
812
811
if (delta .isDeleteDelta ()) {
813
- List <String > deleteDeltaFileNames = remoteFiles .stream (). map ( f -> f . getName ()). filter (
814
- name -> name . startsWith ( HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX ) )
812
+ List <String > deleteDeltaFileNames = remoteFiles .stream ()
813
+ . map ( f -> f . getName ()). filter ( fileFilter :: accept )
815
814
.collect (Collectors .toList ());
816
815
deleteDeltas .add (new DeleteDeltaInfo (location , deleteDeltaFileNames ));
817
816
continue ;
818
817
}
819
- remoteFiles .stream ().filter (
820
- f -> f . getName (). startsWith ( HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX )) .forEach (file -> {
818
+ remoteFiles .stream ().filter (f -> fileFilter . accept ( f . getName ()))
819
+ .forEach (file -> {
821
820
LocationPath path = new LocationPath (file .getPath ().toString (),
822
821
catalog .getProperties ());
823
822
fileCacheValue .addFile (file , path );
@@ -837,8 +836,7 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
837
836
List <RemoteFile > remoteFiles = new ArrayList <>();
838
837
Status status = fs .listFiles (location , false , remoteFiles );
839
838
if (status .ok ()) {
840
- remoteFiles .stream ().filter (
841
- f -> f .getName ().startsWith (HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX ))
839
+ remoteFiles .stream ().filter (f -> fileFilter .accept (f .getName ()))
842
840
.forEach (file -> {
843
841
LocationPath path = new LocationPath (file .getPath ().toString (),
844
842
catalog .getProperties ());
@@ -848,7 +846,12 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
848
846
throw new RuntimeException (status .getErrMsg ());
849
847
}
850
848
}
851
- fileCacheValue .setAcidInfo (new AcidInfo (partition .getPath (), deleteDeltas ));
849
+
850
+ if (isFullAcid ) {
851
+ fileCacheValue .setAcidInfo (new AcidInfo (partition .getPath (), deleteDeltas ));
852
+ } else if (!deleteDeltas .isEmpty ()) {
853
+ throw new RuntimeException ("No Hive Full Acid Table have delete_delta_* Dir." );
854
+ }
852
855
fileCacheValues .add (fileCacheValue );
853
856
}
854
857
} catch (Exception e ) {
0 commit comments