Skip to content

Commit 6f77ae2

Browse files
authored
Cassandra implementation for cross cluster queue (cadence-workflow#4237)
1 parent de3a6ac commit 6f77ae2

File tree

12 files changed

+991
-539
lines changed

12 files changed

+991
-539
lines changed

common/persistence/cassandra/cassandraPersistence.go

+97-9
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ var _ p.ExecutionStore = (*cassandraPersistence)(nil)
5656
// Where x is any hexadecimal value, E represents the entity type valid values are:
5757
// E = {DomainID = 1, WorkflowID = 2, RunID = 3}
5858
// R represents row type in executions table, valid values are:
59-
// R = {Shard = 1, Execution = 2, Transfer = 3, Timer = 4, Replication = 5}
59+
// R = {Shard = 1, Execution = 2, Transfer = 3, Timer = 4, Replication = 5, Replication_DLQ = 6, CrossCluster = 7}
6060
const (
6161
// Special Domains related constants
6262
emptyDomainID = "10000000-0000-f000-f000-000000000000"
@@ -82,7 +82,9 @@ const (
8282
// Row Constants for Replication Task DLQ Row. Source cluster name will be used as WorkflowID.
8383
rowTypeDLQDomainID = "10000000-6000-f000-f000-000000000000"
8484
rowTypeDLQRunID = "30000000-6000-f000-f000-000000000000"
85-
// TODO: add rowType for cross-region tasks
85+
// Row Constants for Cross Cluster Task Row
86+
rowTypeCrossClusterDomainID = "10000000-7000-f000-f000-000000000000"
87+
rowTypeCrossClusterRunID = "30000000-7000-f000-f000-000000000000"
8688
// Special TaskId constants
8789
rowTypeExecutionTaskID = int64(-10)
8890
rowTypeShardTaskID = int64(-11)
@@ -99,7 +101,7 @@ const (
99101
rowTypeTimerTask
100102
rowTypeReplicationTask
101103
rowTypeDLQ
102-
// TODO: add row type
104+
rowTypeCrossClusterTask
103105
)
104106

105107
const (
@@ -181,6 +183,8 @@ const (
181183
`version: ?` +
182184
`}`
183185

186+
templateCrossClusterTaskType = templateTransferTaskType
187+
184188
templateReplicationTaskType = `{` +
185189
`domain_id: ?, ` +
186190
`workflow_id: ?, ` +
@@ -324,6 +328,10 @@ workflow_state = ? ` +
324328
`shard_id, type, domain_id, workflow_id, run_id, transfer, visibility_ts, task_id) ` +
325329
`VALUES(?, ?, ?, ?, ?, ` + templateTransferTaskType + `, ?, ?)`
326330

331+
templateCreateCrossClusterTaskQuery = `INSERT INTO executions (` +
332+
`shard_id, type, domain_id, workflow_id, run_id, cross_cluster, visibility_ts, task_id) ` +
333+
`VALUES(?, ?, ?, ?, ?, ` + templateCrossClusterTaskType + `, ?, ?)`
334+
327335
templateCreateReplicationTaskQuery = `INSERT INTO executions (` +
328336
`shard_id, type, domain_id, workflow_id, run_id, replication, visibility_ts, task_id) ` +
329337
`VALUES(?, ?, ?, ?, ?, ` + templateReplicationTaskType + `, ?, ?)`
@@ -636,6 +644,17 @@ workflow_state = ? ` +
636644
`and task_id > ? ` +
637645
`and task_id <= ?`
638646

647+
templateGetCrossClusterTasksQuery = `SELECT cross_cluster ` +
648+
`FROM executions ` +
649+
`WHERE shard_id = ? ` +
650+
`and type = ? ` +
651+
`and domain_id = ? ` +
652+
`and workflow_id = ? ` +
653+
`and run_id = ? ` +
654+
`and visibility_ts = ? ` +
655+
`and task_id > ? ` +
656+
`and task_id <= ?`
657+
639658
templateGetReplicationTasksQuery = `SELECT replication ` +
640659
`FROM executions ` +
641660
`WHERE shard_id = ? ` +
@@ -674,6 +693,10 @@ workflow_state = ? ` +
674693
`and task_id > ? ` +
675694
`and task_id <= ?`
676695

696+
templateCompleteCrossClusterTaskQuery = templateCompleteTransferTaskQuery
697+
698+
templateRangeCompleteCrossClusterTaskQuery = templateRangeCompleteTransferTaskQuery
699+
677700
templateCompleteReplicationTaskBeforeQuery = `DELETE FROM executions ` +
678701
`WHERE shard_id = ? ` +
679702
`and type = ? ` +
@@ -1663,8 +1686,44 @@ func (d *cassandraPersistence) GetCrossClusterTasks(
16631686
ctx context.Context,
16641687
request *p.GetCrossClusterTasksRequest,
16651688
) (*p.GetCrossClusterTasksResponse, error) {
1666-
// TODO: Implement GetCrossClusterTasks
1667-
panic("not implemented")
1689+
1690+
// Reading cross-cluster tasks need to be quorum level consistent, otherwise we could loose task
1691+
query := d.session.Query(templateGetCrossClusterTasksQuery,
1692+
d.shardID,
1693+
rowTypeCrossClusterTask,
1694+
rowTypeCrossClusterDomainID,
1695+
request.TargetCluster, // workflowID field is used to store target cluster
1696+
rowTypeCrossClusterRunID,
1697+
defaultVisibilityTimestamp,
1698+
request.ReadLevel,
1699+
request.MaxReadLevel,
1700+
).PageSize(request.BatchSize).PageState(request.NextPageToken).WithContext(ctx)
1701+
1702+
iter := query.Iter()
1703+
if iter == nil {
1704+
return nil, &types.InternalServiceError{
1705+
Message: "GetCrossClusterTasks operation failed. Not able to create query iterator.",
1706+
}
1707+
}
1708+
1709+
response := &p.GetCrossClusterTasksResponse{}
1710+
task := make(map[string]interface{})
1711+
for iter.MapScan(task) {
1712+
t := createCrossClusterTaskInfo(task["cross_cluster"].(map[string]interface{}))
1713+
// Reset task map to get it ready for next scan
1714+
task = make(map[string]interface{})
1715+
1716+
response.Tasks = append(response.Tasks, t)
1717+
}
1718+
nextPageToken := iter.PageState()
1719+
response.NextPageToken = make([]byte, len(nextPageToken))
1720+
copy(response.NextPageToken, nextPageToken)
1721+
1722+
if err := iter.Close(); err != nil {
1723+
return nil, convertCommonErrors(d.client, "GetCrossClusterTasks", err)
1724+
}
1725+
1726+
return response, nil
16681727
}
16691728

16701729
func (d *cassandraPersistence) GetReplicationTasks(
@@ -1766,16 +1825,45 @@ func (d *cassandraPersistence) CompleteCrossClusterTask(
17661825
ctx context.Context,
17671826
request *p.CompleteCrossClusterTaskRequest,
17681827
) error {
1769-
// TODO: Implement CompleteCrossClusterTask
1770-
panic("not implemented")
1828+
query := d.session.Query(templateCompleteCrossClusterTaskQuery,
1829+
d.shardID,
1830+
rowTypeCrossClusterTask,
1831+
rowTypeCrossClusterDomainID,
1832+
request.TargetCluster,
1833+
rowTypeCrossClusterRunID,
1834+
defaultVisibilityTimestamp,
1835+
request.TaskID,
1836+
).WithContext(ctx)
1837+
1838+
err := query.Exec()
1839+
if err != nil {
1840+
return convertCommonErrors(d.client, "CompleteCrossClusterTask", err)
1841+
}
1842+
1843+
return nil
17711844
}
17721845

17731846
func (d *cassandraPersistence) RangeCompleteCrossClusterTask(
17741847
ctx context.Context,
17751848
request *p.RangeCompleteCrossClusterTaskRequest,
17761849
) error {
1777-
// TODO: Implement RangeCompleteCrossClusterTask
1778-
panic("not implemented")
1850+
query := d.session.Query(templateRangeCompleteCrossClusterTaskQuery,
1851+
d.shardID,
1852+
rowTypeCrossClusterTask,
1853+
rowTypeCrossClusterDomainID,
1854+
request.TargetCluster,
1855+
rowTypeCrossClusterRunID,
1856+
defaultVisibilityTimestamp,
1857+
request.ExclusiveBeginTaskID,
1858+
request.InclusiveEndTaskID,
1859+
).WithContext(ctx)
1860+
1861+
err := query.Exec()
1862+
if err != nil {
1863+
return convertCommonErrors(d.client, "RangeCompleteCrossClusterTask", err)
1864+
}
1865+
1866+
return nil
17791867
}
17801868

17811869
func (d *cassandraPersistence) CompleteReplicationTask(

common/persistence/cassandra/cassandraPersistenceUtil.go

+105-2
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,16 @@ func applyTasks(
621621
return err
622622
}
623623

624-
// TODO: create cross-cluster tasks
624+
if err := createCrossClusterTasks(
625+
batch,
626+
crossClusterTasks,
627+
shardID,
628+
domainID,
629+
workflowID,
630+
runID,
631+
); err != nil {
632+
return err
633+
}
625634

626635
if err := createReplicationTasks(
627636
batch,
@@ -707,7 +716,7 @@ func createTransferTasks(
707716

708717
default:
709718
return &types.InternalServiceError{
710-
Message: fmt.Sprintf("Unknow transfer type: %v", task.GetType()),
719+
Message: fmt.Sprintf("Unknown transfer type: %v", task.GetType()),
711720
}
712721
}
713722

@@ -738,6 +747,82 @@ func createTransferTasks(
738747
return nil
739748
}
740749

750+
func createCrossClusterTasks(
751+
batch gocql.Batch,
752+
crossClusterTasks []p.Task,
753+
shardID int,
754+
domainID string,
755+
workflowID string,
756+
runID string,
757+
) error {
758+
759+
for _, task := range crossClusterTasks {
760+
var taskList string
761+
var scheduleID int64
762+
var targetCluster string
763+
var targetDomainID string
764+
var targetWorkflowID string
765+
targetRunID := p.CrossClusterTaskDefaultTargetRunID
766+
targetChildWorkflowOnly := false
767+
recordVisibility := false
768+
769+
switch task.GetType() {
770+
case p.CrossClusterTaskTypeStartChildExecution:
771+
targetCluster = task.(*p.CrossClusterStartChildExecutionTask).TargetCluster
772+
targetDomainID = task.(*p.CrossClusterStartChildExecutionTask).TargetDomainID
773+
targetWorkflowID = task.(*p.CrossClusterStartChildExecutionTask).TargetWorkflowID
774+
scheduleID = task.(*p.CrossClusterStartChildExecutionTask).InitiatedID
775+
776+
case p.CrossClusterTaskTypeCancelExecution:
777+
targetCluster = task.(*p.CrossClusterCancelExecutionTask).TargetCluster
778+
targetDomainID = task.(*p.CrossClusterCancelExecutionTask).TargetDomainID
779+
targetWorkflowID = task.(*p.CrossClusterCancelExecutionTask).TargetWorkflowID
780+
targetRunID = task.(*p.CrossClusterCancelExecutionTask).TargetRunID
781+
targetChildWorkflowOnly = task.(*p.CrossClusterCancelExecutionTask).TargetChildWorkflowOnly
782+
scheduleID = task.(*p.CrossClusterCancelExecutionTask).InitiatedID
783+
784+
case p.CrossClusterTaskTypeSignalExecution:
785+
targetCluster = task.(*p.CrossClusterSignalExecutionTask).TargetCluster
786+
targetDomainID = task.(*p.CrossClusterSignalExecutionTask).TargetDomainID
787+
targetWorkflowID = task.(*p.CrossClusterSignalExecutionTask).TargetWorkflowID
788+
targetRunID = task.(*p.CrossClusterSignalExecutionTask).TargetRunID
789+
targetChildWorkflowOnly = task.(*p.CrossClusterSignalExecutionTask).TargetChildWorkflowOnly
790+
scheduleID = task.(*p.CrossClusterSignalExecutionTask).InitiatedID
791+
792+
default:
793+
return &types.InternalServiceError{
794+
Message: fmt.Sprintf("Unknown cross-cluster task type: %v", task.GetType()),
795+
}
796+
}
797+
798+
batch.Query(templateCreateCrossClusterTaskQuery,
799+
shardID,
800+
rowTypeCrossClusterTask,
801+
rowTypeCrossClusterDomainID,
802+
targetCluster,
803+
rowTypeCrossClusterRunID,
804+
domainID,
805+
workflowID,
806+
runID,
807+
task.GetVisibilityTimestamp(),
808+
task.GetTaskID(),
809+
targetDomainID,
810+
targetWorkflowID,
811+
targetRunID,
812+
targetChildWorkflowOnly,
813+
taskList,
814+
task.GetType(),
815+
scheduleID,
816+
recordVisibility,
817+
task.GetVersion(),
818+
defaultVisibilityTimestamp,
819+
task.GetTaskID(),
820+
)
821+
}
822+
823+
return nil
824+
}
825+
741826
func createReplicationTasks(
742827
batch gocql.Batch,
743828
replicationTasks []p.Task,
@@ -1624,6 +1709,24 @@ func createTransferTaskInfo(
16241709
return info
16251710
}
16261711

1712+
func createCrossClusterTaskInfo(
1713+
result map[string]interface{},
1714+
) *p.CrossClusterTaskInfo {
1715+
info := (*p.CrossClusterTaskInfo)(createTransferTaskInfo(result))
1716+
if p.CrossClusterTaskDefaultTargetRunID == p.TransferTaskTransferTargetRunID {
1717+
return info
1718+
}
1719+
1720+
// incase CrossClusterTaskDefaultTargetRunID is updated and not equal to TransferTaskTransferTargetRunID
1721+
if v, ok := result["target_run_id"]; ok {
1722+
info.TargetRunID = v.(gocql.UUID).String()
1723+
if info.TargetRunID == p.CrossClusterTaskDefaultTargetRunID {
1724+
info.TargetRunID = ""
1725+
}
1726+
}
1727+
return info
1728+
}
1729+
16271730
func createReplicationTaskInfo(
16281731
result map[string]interface{},
16291732
) *p.InternalReplicationTaskInfo {

common/persistence/dataInterfaces.go

+55-1
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ const (
196196
// TransferTaskTransferTargetRunID is the the dummy run ID for transfer tasks of types
197197
// that do not have a target workflow
198198
TransferTaskTransferTargetRunID = "30000000-0000-f000-f000-000000000002"
199+
// CrossClusterTaskDefaultTargetRunID is the the dummy run ID for cross-cluster tasks of types
200+
// that do not have a target workflow
201+
CrossClusterTaskDefaultTargetRunID = TransferTaskTransferTargetRunID
199202

200203
// indicate invalid workflow state transition
201204
invalidStateTransitionMsg = "unable to change workflow state from %v to %v, close status %v"
@@ -1057,11 +1060,13 @@ type (
10571060

10581061
// CompleteCrossClusterTaskRequest is used to complete a task in the cross-cluster task queue
10591062
CompleteCrossClusterTaskRequest struct {
1060-
TaskID int64
1063+
TargetCluster string
1064+
TaskID int64
10611065
}
10621066

10631067
// RangeCompleteCrossClusterTaskRequest is used to complete a range of tasks in the cross-cluster task queue
10641068
RangeCompleteCrossClusterTaskRequest struct {
1069+
TargetCluster string
10651070
ExclusiveBeginTaskID int64
10661071
InclusiveEndTaskID int64
10671072
}
@@ -2612,6 +2617,55 @@ func (t *TimerTaskInfo) String() string {
26122617
)
26132618
}
26142619

2620+
// Copy returns a copy of shardInfo
2621+
func (s *ShardInfo) Copy() *ShardInfo {
2622+
transferFailoverLevels := map[string]TransferFailoverLevel{}
2623+
for k, v := range s.TransferFailoverLevels {
2624+
transferFailoverLevels[k] = v
2625+
}
2626+
timerFailoverLevels := map[string]TimerFailoverLevel{}
2627+
for k, v := range s.TimerFailoverLevels {
2628+
timerFailoverLevels[k] = v
2629+
}
2630+
clusterTransferAckLevel := make(map[string]int64)
2631+
for k, v := range s.ClusterTransferAckLevel {
2632+
clusterTransferAckLevel[k] = v
2633+
}
2634+
clusterTimerAckLevel := make(map[string]time.Time)
2635+
for k, v := range s.ClusterTimerAckLevel {
2636+
clusterTimerAckLevel[k] = v
2637+
}
2638+
clusterReplicationLevel := make(map[string]int64)
2639+
for k, v := range s.ClusterReplicationLevel {
2640+
clusterReplicationLevel[k] = v
2641+
}
2642+
replicationDLQAckLevel := make(map[string]int64)
2643+
for k, v := range s.ReplicationDLQAckLevel {
2644+
replicationDLQAckLevel[k] = v
2645+
}
2646+
return &ShardInfo{
2647+
ShardID: s.ShardID,
2648+
Owner: s.Owner,
2649+
RangeID: s.RangeID,
2650+
StolenSinceRenew: s.StolenSinceRenew,
2651+
ReplicationAckLevel: s.ReplicationAckLevel,
2652+
TransferAckLevel: s.TransferAckLevel,
2653+
TimerAckLevel: s.TimerAckLevel,
2654+
TransferFailoverLevels: transferFailoverLevels,
2655+
TimerFailoverLevels: timerFailoverLevels,
2656+
ClusterTransferAckLevel: clusterTransferAckLevel,
2657+
ClusterTimerAckLevel: clusterTimerAckLevel,
2658+
TransferProcessingQueueStates: s.TransferProcessingQueueStates,
2659+
CrossClusterProcessQueueStates: s.CrossClusterProcessQueueStates,
2660+
TimerProcessingQueueStates: s.TimerProcessingQueueStates,
2661+
DomainNotificationVersion: s.DomainNotificationVersion,
2662+
ClusterReplicationLevel: clusterReplicationLevel,
2663+
ReplicationDLQAckLevel: replicationDLQAckLevel,
2664+
PendingFailoverMarkers: s.PendingFailoverMarkers,
2665+
UpdatedAt: s.UpdatedAt,
2666+
}
2667+
}
2668+
26152669
// SerializeClusterConfigs makes an array of *ClusterReplicationConfig serializable
26162670
// by flattening them into map[string]interface{}
26172671
func SerializeClusterConfigs(replicationConfigs []*ClusterReplicationConfig) []map[string]interface{} {

0 commit comments

Comments
 (0)