36
36
@ Named
37
37
public class DimensionalTimeSliceCrawler implements Crawler <DimensionalTimeSliceWorkerProgressState > {
38
38
private static final Logger log = LoggerFactory .getLogger (DimensionalTimeSliceCrawler .class );
39
+ // delay five minutes for partition creation on latest time duration to ensure the newly generated events are queryable
40
+ // In general, newly generated events become queryable after 30 ~ 120 second
41
+ protected static final long WAIT_SECONDS_BEFORE_PARTITION_CREATION = 300 ;
39
42
private static final String DIMENSIONAL_TIME_SLICE_WORKER_PARTITIONS_CREATED = "DimensionalTimeSliceWorkerPartitionsCreated" ;
40
43
private static final String WORKER_PARTITION_WAIT_TIME = "WorkerPartitionWaitTime" ;
41
44
private static final String WORKER_PARTITION_PROCESS_LATENCY = "WorkerPartitionProcessLatency" ;
@@ -73,10 +76,9 @@ public void initialize(List<String> dimensionTypes) {
73
76
*/
74
77
@ Override
75
78
public Instant crawl (LeaderPartition leaderPartition , EnhancedSourceCoordinator coordinator ) {
76
- Instant latestModifiedTime = Instant .now ();
77
79
double startCount = partitionsCreatedCounter .count ();
78
80
79
- createPartitionsForDimensionTypes (leaderPartition , coordinator , latestModifiedTime , dimensionTypes );
81
+ Instant latestModifiedTime = createPartitions (leaderPartition , coordinator );
80
82
81
83
double partitionsInThisCrawl = partitionsCreatedCounter .count () - startCount ;
82
84
log .info ("Total partitions created in this crawl: {}" , partitionsInThisCrawl );
@@ -89,82 +91,85 @@ public void executePartition(DimensionalTimeSliceWorkerProgressState state, Buff
89
91
partitionProcessLatencyTimer .record (() -> client .executePartition (state , buffer , acknowledgementSet ));
90
92
}
91
93
92
- private void createPartitionsForDimensionTypes (LeaderPartition leaderPartition ,
93
- EnhancedSourceCoordinator coordinator ,
94
- Instant latestModifiedTime ,
95
- List <String > dimensionTypes ) {
94
+ private Instant createPartitions (LeaderPartition leaderPartition ,
95
+ EnhancedSourceCoordinator coordinator ) {
96
96
DimensionalTimeSliceLeaderProgressState leaderProgressState =
97
97
(DimensionalTimeSliceLeaderProgressState ) leaderPartition .getProgressState ().get ();
98
98
99
99
if (leaderProgressState .getRemainingHours () == 0 ) {
100
- createPartitionForIncrementalSync (leaderPartition , coordinator ,
101
- latestModifiedTime , dimensionTypes );
100
+ return createPartitionsForIncrementalSync (leaderPartition , coordinator );
102
101
} else {
103
- createPartitionForHistoricalPull (leaderPartition , coordinator ,
104
- latestModifiedTime , dimensionTypes );
102
+ return createPartitionsForHistoricalPull (leaderPartition , coordinator );
105
103
}
106
104
}
107
105
108
106
/**
109
107
* Creates partitions for historical data pull. Creates hourly partitions
110
108
* for each dimension type, working backwards from the current time.
111
109
*/
112
- private void createPartitionForHistoricalPull (LeaderPartition leaderPartition ,
113
- EnhancedSourceCoordinator coordinator ,
114
- Instant latestModifiedTime ,
115
- List <String > dimensionTypes ) {
110
+ private Instant createPartitionsForHistoricalPull (LeaderPartition leaderPartition ,
111
+ EnhancedSourceCoordinator coordinator ) {
116
112
DimensionalTimeSliceLeaderProgressState leaderProgressState =
117
113
(DimensionalTimeSliceLeaderProgressState ) leaderPartition .getProgressState ().get ();
118
114
int remainingHours = leaderProgressState .getRemainingHours ();
119
115
Instant initialTime = leaderProgressState .getLastPollTime ();
120
- Instant nowUtc = initialTime .truncatedTo (ChronoUnit .HOURS );
121
- for (int i = remainingHours ; i > 0 ; i -- ) {
122
- Instant startTime = nowUtc .minus (Duration .ofHours (i )); ;
116
+ Instant latestHour = initialTime .truncatedTo (ChronoUnit .HOURS );
117
+ for (int i = remainingHours ; i > 1 ; i --) {
118
+ Instant startTime = latestHour .minus (Duration .ofHours (i ));
123
119
Instant endTime = startTime .plus (HOUR_DURATION );
124
120
125
- for (String dimensionType : dimensionTypes ) {
126
- createWorkerPartition (startTime , endTime , dimensionType , coordinator );
127
- }
121
+ createWorkerPartitionsForDimensionTypes (startTime , endTime , coordinator );
128
122
}
129
123
130
- // Create final partitions from last hour to now
131
- for (String dimensionType : dimensionTypes ) {
132
- createWorkerPartition (nowUtc , latestModifiedTime , dimensionType , coordinator );
124
+ Instant latestModifiedTime = initialTime .minusSeconds (WAIT_SECONDS_BEFORE_PARTITION_CREATION );
125
+ if (latestModifiedTime .isAfter (latestHour )) {
126
+ // if checkpointing time is after the latest hour, creat one partition for last hour
127
+ // and one from latest hour to checkpointing time
128
+ createWorkerPartitionsForDimensionTypes (latestHour .minus (Duration .ofHours (1 )), latestHour , coordinator );
129
+ createWorkerPartitionsForDimensionTypes (latestHour , latestModifiedTime , coordinator );
130
+ } else {
131
+ // if checkpointing time is not later than the latest hour, create one partition from 1 hour ago to checkpointing time
132
+ createWorkerPartitionsForDimensionTypes (latestHour .minus (Duration .ofHours (1 )), latestModifiedTime , coordinator );
133
133
}
134
134
135
135
updateLeaderProgressState (leaderPartition , 0 , latestModifiedTime , coordinator );
136
+
137
+ return latestModifiedTime ;
136
138
}
137
139
138
140
/**
139
141
* Creates partitions for incremental sync. Creates one partition per dimension type
140
142
* from the last poll time to current time.
141
143
*/
142
- private void createPartitionForIncrementalSync (LeaderPartition leaderPartition ,
143
- EnhancedSourceCoordinator coordinator ,
144
- Instant latestModifiedTime ,
145
- List <String > dimensionTypes ) {
144
+ private Instant createPartitionsForIncrementalSync (LeaderPartition leaderPartition ,
145
+ EnhancedSourceCoordinator coordinator ) {
146
+ Instant latestModifiedTime = Instant .now ().minusSeconds (WAIT_SECONDS_BEFORE_PARTITION_CREATION );
146
147
LeaderProgressState leaderProgressState = leaderPartition .getProgressState ().get ();
147
148
Instant lastPollTime = leaderProgressState .getLastPollTime ();
148
149
149
- // Create one partition from lastPollTime to latestModifiedTime for each type
150
- for (String dimensionType : dimensionTypes ) {
151
- createWorkerPartition (lastPollTime , latestModifiedTime , dimensionType , coordinator );
150
+ if (lastPollTime .isBefore (latestModifiedTime )) {
151
+ // Create one partition from lastPollTime to latestModifiedTime for each type
152
+ createWorkerPartitionsForDimensionTypes (lastPollTime , latestModifiedTime , coordinator );
153
+
154
+ updateLeaderProgressState (leaderPartition , 0 , latestModifiedTime , coordinator );
155
+ return latestModifiedTime ;
152
156
}
153
157
154
- updateLeaderProgressState ( leaderPartition , 0 , latestModifiedTime , coordinator ) ;
158
+ return lastPollTime ;
155
159
}
156
160
157
- void createWorkerPartition (Instant startTime , Instant endTime ,
158
- String dimensionType , EnhancedSourceCoordinator coordinator ) {
159
- DimensionalTimeSliceWorkerProgressState workerState = new DimensionalTimeSliceWorkerProgressState ();
160
- workerState .setPartitionCreationTime (Instant .now ());
161
- workerState .setStartTime (startTime );
162
- workerState .setEndTime (endTime );
163
- workerState .setDimensionType (dimensionType );
164
-
165
- SaasSourcePartition partition = new SaasSourcePartition (workerState , LAST_UPDATED_KEY + UUID .randomUUID ());
166
- coordinator .createPartition (partition );
167
- partitionsCreatedCounter .increment ();
161
+ void createWorkerPartitionsForDimensionTypes (Instant startTime , Instant endTime , EnhancedSourceCoordinator coordinator ) {
162
+ for (String dimensionType : dimensionTypes ) {
163
+ DimensionalTimeSliceWorkerProgressState workerState = new DimensionalTimeSliceWorkerProgressState ();
164
+ workerState .setPartitionCreationTime (Instant .now ());
165
+ workerState .setStartTime (startTime );
166
+ workerState .setEndTime (endTime );
167
+ workerState .setDimensionType (dimensionType );
168
+
169
+ SaasSourcePartition partition = new SaasSourcePartition (workerState , LAST_UPDATED_KEY + UUID .randomUUID ());
170
+ coordinator .createPartition (partition );
171
+ partitionsCreatedCounter .increment ();
172
+ }
168
173
}
169
174
170
175
/**
0 commit comments