-
Notifications
You must be signed in to change notification settings - Fork 193
/
github-for-jira.sd.yml
825 lines (783 loc) · 30.6 KB
/
github-for-jira.sd.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
buildNumber: 0.0.0
name: Github For Jira
description: Connect app for integrating GitHub into Jira
organization: Infrastructure Services - Open Toolchain
notifications:
email: [email protected]
tags:
brahmos: enabled
compose:
pgbouncer:
image: docker.atl-paas.net/sox/micros/pgbouncer
tag: 1.15.0
command: [ "database" ]
microservice:
image: ${DOCKER_IMAGE_NAME}
tag: ${DOCKER_IMAGE_TAG}
ports:
- 8080:8080
links:
- pgbouncer
- cryptor
depends_on:
- cryptor
cryptor:
image: docker.atl-paas.net/sox/cryptor-sidecar-application
tag: 1.1-stable-release
computeClassification:
dataType:
- UGC/Label # name of GitHub org / Jira site
- PII/IndirectConfidential # name of GitHub org
- UGC/Configuration # data about the installation of the GitHub app into Jira sites
- Security/Secret # shared Connect secret
- UGC/PrimaryIdentifier # references to GitHub entities (commits, pull requests, etc.) and Jira issues
- UGC/Primary # GitHub entities (non-persistent) like commits, pull requests, etc.
links:
healthcheck:
uri: healthcheck
deepcheck:
uri: deepcheck
source:
url: [email protected]:atlassian/github-jira-integration.git
httpRedirect: true
cleanup: false
serviceProxy:
enabled: true
ingress:
authentication:
enabled: true
egress:
dependencies:
- name: brahmos-wiremock
plugins:
auth:
logLevel: info
authentication:
plugins:
- type: asap
- type: slauthtoken
authorization:
plugins:
- type: poco
lifecycleEvents:
source: queue
# Number of seconds between the termination message and when the instance is terminated.
timeout: 30
resources:
- type: slauth-gateway # needed for "atlas slauth curl" calls, see https://hello.atlassian.net/wiki/spaces/MICROS/pages/1463433257/Network+Segmentation+for+Micros+EC2+compute#Add-a-slauth-gateway-resource
name: ingress
- type: redisx
name: cache
attributes:
version: 6.x
size: 500
clusterModeEnabled: false
transitEncryptionEnabled: true
dataType:
- UGC/Label # name of GitHub org / Jira site
- PII/IndirectConfidential # name of GitHub org
- UGC/PrimaryIdentifier # references to GitHub entities (commits, pull requests, etc.) and Jira issues
- type: sqs
name: backfill
attributes:
MaxReceiveCount: 3
MessageRetentionPeriod: 1209600
VisibilityTimeout: 602 #Visibility timeout will be overridden by the SQS Listener when we read message from the queue. We set it here anyway in case if that override fails.
dataType:
- UGC/Label # name of GitHub org
- PII/IndirectConfidential # name of GitHub org
- type: sqs
name: push
attributes:
MaxReceiveCount: 5
MessageRetentionPeriod: 1209600
VisibilityTimeout: 62 #Visibility timeout will be overridden by the SQS Listener when we read message from the queue. We set it here anyway in case if that override fails
dataType:
- UGC/Label # name of GitHub org
- PII/IndirectConfidential # name of GitHub org
- UGC/PrimaryIdentifier # references to GitHub entities (commits, pull requests, etc.) and Jira issues
- type: sqs
name: deployment
attributes:
MaxReceiveCount: 5
MessageRetentionPeriod: 1209600
VisibilityTimeout: 62 #Visibility timeout will be overridden by the SQS Listener when we read message from the queue. We set it here anyway in case if that override fails
dataType:
- UGC/Label # name of GitHub org, URL/name of Jira site
- PII/IndirectConfidential # name of GitHub org
- UGC/PrimaryIdentifier # references to GitHub entities (commits, pull requests, etc.) and Jira issues
- type: sqs
name: branch
attributes:
MaxReceiveCount: 5
MessageRetentionPeriod: 1209600
VisibilityTimeout: 62 #Visibility timeout will be overridden by the SQS Listener when we read message from the queue. We set it here anyway in case if that override fails
dataType:
- UGC/Label # name of GitHub org, URL/name of Jira site
- PII/IndirectConfidential # name of GitHub org
- UGC/PrimaryIdentifier # references to GitHub entities (commits, pull requests, etc.) and Jira issues
- name: rds
type: dedicated-rds
attributes:
dataType:
- UGC/Label # name of GitHub org / Jira site
- PII/IndirectConfidential # name of GitHub org
parameters:
DBType: postgres1211
CustomParameters:
log_min_duration_statement: 5000
- name: database
type: postgres-db
attributes:
connectionLimit: 150 # keep in sync with PGBOUNCER_DEFAULT_POOL_SIZE.
# For ddev and staging we have scaling policy different from prod: up to 5 Worker and up to 5 WebServer nodes.
# Therefore, when all the nodes are running, we might have up to "PGBOUNCER_DEFAULT_POOL_SIZE * 10" number
# of connections.
dataType:
- UGC/Label # name of GitHub org / Jira site
- PII/IndirectConfidential # name of GitHub org
- UGC/Configuration # data about the installation of the GitHub app into Jira sites
- Security/Secret # shared Connect secret
dedicatedRds:
# Name of the service that owns the RDS
service: github-for-jira
# Name of the RDS resource from above
resource: rds
- type: s3
name: "dumps" # for heapdumps, temp resource, will be deleted after the investigation is over
attributes:
dataType:
- UGC/Label # name of GitHub org / Jira site
- PII/IndirectConfidential # name of GitHub org
- UGC/Configuration # data about the installation of the GitHub app into Jira sites
- Security/Secret # shared Connect secret
- UGC/PrimaryIdentifier # references to GitHub entities (commits, pull requests, etc.) and Jira issues
- UGC/Primary # GitHub entities (non-persistent) like commits, pull requests, etc.
- name: deployment-history-cache
type: dynamo-db
attributes: &table-attributes
HashKeyName: Id
HashKeyType: "S"
RangeKeyName: CreatedAt
RangeKeyType: "N"
ReadWriteCapacityMode: ON_DEMAND
TTLAttributeName: ExpiredAfter
dataType:
- UGC/PrimaryIdentifier # Sha of a commit that point to user's code
- name: audit-log
type: dynamo-db
attributes: &audit-log-table-attributes
HashKeyName: Id
HashKeyType: "S"
RangeKeyName: CreatedAt
RangeKeyType: "N"
ReadWriteCapacityMode: ON_DEMAND
TTLAttributeName: ExpiredAfter
dataType:
- UGC/PrimaryIdentifier # Sha of a commit that point to user's code
scaling:
instance: t2.small
min: 1
max: 5
metrics: &CpuMemScalingRules
complexScalingRule:
EvaluationPeriods: 1
Threshold:
Lower: 30
Upper: 80
Metrics:
- Expression: "MAX([cpuUsage, memoryUsage])"
Id: "combinedCpuRam"
Label: "Scaling based on CPU and Memory consumption"
- MetricStat:
Metric:
# Dimensions can be ignored here due to defaulting
MetricName: CPUUtilization
Namespace: AWS/EC2
Period: 300
Stat: Maximum
Id: cpuUsage
ReturnData: false
- MetricStat:
Metric:
# Dimensions can be ignored here due to defaulting
MetricName: MemoryUtilization
Namespace: System/Linux
Period: 300
Stat: Maximum
Id: memoryUsage
ReturnData: false
alarms:
overrides:
MemoryConsumptionAlert:
Namespace: System/Linux
MetricName: MemoryUtilization
Description: "Memory utilization is more than 90%"
Threshold: 90
Priority: Low
EvaluationPeriods: 5
Period: 120
ComparisonOperator: GreaterThanThreshold
Statistic: Average
HighSeverityAlarmWhenTooManyBackend5xxErrors: null
HighSeverityAlarmWhenTooManyELB5xxErrors: null
LowSeverityAlarmWhenTooManyBackend5xxErrors: null
LowSeverityAlarmWhenTooManyELB5xxErrors: null
UnHealthyHostCount:
EvaluationPeriods: 4
Period: 60
Threshold: 2
ManyUnHealthyHostCount:
EvaluationPeriods: 8
Period: 60
Threshold: 3
config:
environmentVariables:
NODE_ENV: production
NODE_OPTIONS: "--no-deprecation"
LOG_LEVEL: info
PORT: "8080"
CONCURRENT_WORKERS: "40"
PG_DATABASE_BOUNCER: pgbouncer
GIT_COMMIT_SHA: ${GIT_COMMIT_SHA}
GIT_COMMIT_DATE: ${GIT_COMMIT_DATE}
GIT_BRANCH_NAME: ${GIT_BRANCH_NAME}
DEPLOYMENT_DATE: ${DEPLOYMENT_DATE}
PRIVATE_KEY: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-private-key-stg
GITHUB_CLIENT_SECRET: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-client-secret-stg
WEBHOOK_SECRETS: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-webhook-secrets-stg
COOKIE_SESSION_KEY: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-cookie-session-key-stg
CRYPTOR_URL: http://cryptor:26272
CRYPTOR_SIDECAR_CLIENT_IDENTIFICATION_CHALLENGE: "6CF9E6A52167B58CBB0DED180CC8B848" # https://developer.atlassian.com/platform/cryptor/integration/integrating-sidecar/#enabling-ssrf-protection
# These secret environment variables need to be stashed with "atlas micros stash" for each environment:
# STORAGE_SECRET: secret generated by running openssl rand -hex 32
# SENTRY_DSN: client key required to connect to Sentry
# WEBHOOK_SECRET: the webhook secret configured in the GitHub app
PGBOUNCER_POOL_MODE: "session"
PGBOUNCER_DEFAULT_POOL_SIZE: "15" # Max scale up:
# (30 web servers + 15 workers) * 10 pool size = 675 connections (See postgres-db connections limit)
# Normal business:
# (5 web servers + 3 workers) * 15 pool size = 120 connections
PGBOUNCER_SERVER_IDLE_TIMEOUT: "60"
PGBOUNCER_MAX_CLIENT_CONN: "1000"
loadBalancer:
type: ALB
single: true
slowStart: 180
workers:
- name: Worker
scaling:
instance: t2.small
min: 1
max: 5
metrics: &CpuMemAndQueuesScalingRules
complexScalingRule:
EvaluationPeriods: 3
Threshold:
# Scale down if max value from queues, cpu and memory is below 30 for 3 consecutive periods of 60 seconds
Lower: 30
# Scale up if max value from queues, cpu and memory is over 80 for 3 consecutive periods of 60 seconds
Upper: 80
Metrics:
- Expression: "MAX([cpuUsage, memoryUsage, (pushQueueMessages/250)*100, (backfillQueueMessages/10)*100, (branchQueueMessages/250)*100, (deploymentQueueMessages/250)*100])"
Id: "combinedCpuRamAndQueues"
Label: "Scaling based on Queue Sizes and CPU and Memory consumption"
- MetricStat:
Metric:
# Dimensions can be ignored here due to defaulting
MetricName: CPUUtilization
Namespace: AWS/EC2
Period: 60
Stat: Maximum
Id: cpuUsage
ReturnData: false
- MetricStat:
Metric:
# Dimensions can be ignored here due to defaulting
MetricName: MemoryUtilization
Namespace: System/Linux
Period: 60
Stat: Maximum
Id: memoryUsage
ReturnData: false
- MetricStat:
Metric:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Dimensions:
- LogicalQueueName: 'push'
Period: 60
Stat: Maximum
Id: pushQueueMessages
ReturnData: false
- MetricStat:
Metric:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Dimensions:
- LogicalQueueName: 'backfill'
Period: 60
Stat: Maximum
Id: backfillQueueMessages
ReturnData: false
- MetricStat:
Metric:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Dimensions:
- LogicalQueueName: 'branch'
Period: 60
Stat: Maximum
Id: branchQueueMessages
ReturnData: false
- MetricStat:
Metric:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Dimensions:
- LogicalQueueName: 'deployment'
Period: 60
Stat: Maximum
Id: deploymentQueueMessages
ReturnData: false
environmentOverrides:
ddev:
# Uncomment lines 140-149 if you want remote debugging in ddev
# loadBalancer:
# type: ELB # needed for remote debugging, default is "ALB"
# compose:
# microservice:
# ports:
# - 8080:8080
# - 5005:5005 # remote debugging port (has to be 5005 because that is hard coded into Micros)
# links:
# remoteDebug: true
config:
environmentVariables:
APP_URL: https://github-for-jira.dev.services.atlassian.com
WEBHOOK_PROXY_URL: https://github-for-jira.dev.services.atlassian.com
APP_KEY: com.github.integration.development
NODE_OPTIONS: "--no-deprecation"
LOG_LEVEL: debug
SENTRY_ENVIRONMENT: ddev
APP_ID: '124403'
GITHUB_CLIENT_ID: Iv1.600bf90a20f1ab18
PRIVATE_KEY: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-private-key-ddev
GITHUB_CLIENT_SECRET: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-client-secret-ddev
WEBHOOK_SECRETS: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-webhook-secrets-ddev
COOKIE_SESSION_KEY: vault://secret/data/builds/micros-sv--github-for-jira-dl-admins/github-app-cookie-session-key-ddev
scaling:
instance: t2.small
min: 1
max: 1
metrics: *CpuMemScalingRules
workers:
- name: Worker
scaling:
instance: t2.small
min: 1
max: 5
metrics: *CpuMemAndQueuesScalingRules
resources:
- type: globaledge
name: proxy
attributes:
default_vanity_dns: false
domain:
- github.dev.atlassian.com
ip_whitelist:
- public
routes: &blackholeSpammingIPs
- match: # Blackhole IP that keeps spamming us
prefix: /
external_address_header_match:
- 94.156.174.137
route:
cluster: blackhole # Deny by sending traffic to blackhole
- &cryptorGithubServerAppSecrets
name: github-server-app-secrets
type: cryptor
attributes:
parameters:
encryptingServices:
- github-for-jira
decryptingServices:
- github-for-jira
- &cryptorJiraInstanceSecrets
name: jira-instance-secrets
type: cryptor
attributes:
parameters:
encryptingServices:
- github-for-jira
decryptingServices:
- github-for-jira
- name: deployment-history-cache
type: dynamo-db
attributes:
<<: *table-attributes
- name: audit-log
type: dynamo-db
attributes:
<<: *audit-log-table-attributes
alarms:
overrides:
LatencyHigh: null
HealthyHostCount: null
UnHealthyHostCount: null
WebServerAlarmWhenLowCPUCredits: null
WebServerDiskSpaceUtilizationAlarmHigh: null
WebServerInstanceVolumeSpaceUtilizationAlarmHigh: null
WebServerMemoryAlarmHigh: null
WebServerServiceRespawnAlarm: null
staging:
config:
environmentVariables:
APP_URL: https://github.stg.atlassian.com
WEBHOOK_PROXY_URL: https://github.stg.atlassian.com
APP_KEY: com.github.integration.staging
LOG_LEVEL: debug
SENTRY_ENVIRONMENT: stg-west
APP_ID: '12645'
GITHUB_CLIENT_ID: Iv1.2d8e2a184a746aec
scaling:
instance: c5.large
min: 1
max: 5
metrics: &CpuMemAlbScalingRules
complexScalingRule:
EvaluationPeriods: 1
Threshold:
Lower: 30
Upper: 80
Metrics:
- Expression: "MAX([(responseTime/5)*100, cpuUsage, memoryUsage])"
Id: "combinedCpuRamAndLatency"
Label: "Scaling based on CPU and Memory consumption or response time"
- MetricStat:
Metric:
MetricName: CPUUtilization
Namespace: AWS/EC2
Period: 300
Stat: Maximum
Id: cpuUsage
ReturnData: false
- MetricStat:
Metric:
MetricName: MemoryUtilization
Namespace: System/Linux
Period: 300
Stat: Maximum
Id: memoryUsage
ReturnData: false
- MetricStat:
Metric:
Dimensions:
- Name: LoadBalancer
Value: { "Fn::GetAtt": ["EnvironmentStack", "LoadBalancerName"] }
- Name: TargetGroup
Value: { "Fn::GetAtt": [ "ALBTargetGroup", "TargetGroupFullName" ] }
MetricName: TargetResponseTime
Namespace: AWS/ApplicationELB
Period: 300
Stat: Average
Id: responseTime
ReturnData: false
alarms:
overrides:
LatencyHigh: null
#TODO Uncomment when we stop doing hourly deployments
# ElbResponseTimeAlert:
# Namespace: AWS/ApplicationELB
# MetricName: TargetResponseTime
# Description: "Response latency is too high. Runbook: https://hello.atlassian.net/wiki/spaces/PF/pages/1283532004/HOWTO+Investigate+High+CPU+Memory+or+Latency+Alarms"
# Dimensions:
# - Name: LoadBalancer
# Value: { "Fn::GetAtt": ["EnvironmentStack", "LoadBalancerName"] }
# - Name: TargetGroup
# Value: { "Fn::GetAtt": [ "ALBTargetGroup", "TargetGroupFullName" ] }
# Threshold: 5
# Priority: Low
# EvaluationPeriods: 5
# Period: 180
# ComparisonOperator: GreaterThanThreshold
# Statistic: Average
workers:
- name: Worker
scaling:
instance: t3.medium
min: 1
max: 5
metrics: *CpuMemAndQueuesScalingRules
resources:
- name: rds
type: dedicated-rds
attributes:
parameters:
DBInstanceClass: db.t2.medium
# Adding read replica in staging as it's not enabled by default so we can read state
ReadReplica: true
- name: deployment-history-cache
type: dynamo-db
attributes:
<<: *table-attributes
- name: audit-log
type: dynamo-db
attributes:
<<: *audit-log-table-attributes
- type: globaledge
name: proxy
attributes:
default_vanity_dns: false
domain:
- github.stg.atlassian.com
ip_whitelist:
- public
routes: *blackholeSpammingIPs
- *cryptorGithubServerAppSecrets
- *cryptorJiraInstanceSecrets
prod:
serviceProxy:
egress:
dependencies:
- name: brahmos-wiremock
config:
environmentVariables:
APP_URL: https://github.atlassian.com
WEBHOOK_PROXY_URL: https://github.atlassian.com
APP_KEY: com.github.integration.production
SENTRY_ENVIRONMENT: prod-west
APP_ID: '14320'
GITHUB_CLIENT_ID: Iv1.45aafbb099e1c1d7
PRIVATE_KEY: vault://secret/data/builds/micros-sv--github-for-jira-dl-vault-compliant/github-app-private-key
GITHUB_CLIENT_SECRET: vault://secret/data/builds/micros-sv--github-for-jira-dl-vault-compliant/github-app-client-secret
WEBHOOK_SECRETS: vault://secret/data/builds/micros-sv--github-for-jira-dl-vault-compliant/github-app-webhook-secrets
COOKIE_SESSION_KEY: vault://secret/data/builds/micros-sv--github-for-jira-dl-vault-compliant/github-app-cookie-session-key
CRYPTOR_SIDECAR_CLIENT_IDENTIFICATION_CHALLENGE: "D92A2D7364AC3057D2A90BA9512D8CA0"
scaling:
instance: c5.2xlarge
min: 15
max: 30 # keep in sync with PGBOUNCER_DEFAULT_POOL_SIZE
metrics: *CpuMemAlbScalingRules
workers:
- name: Worker
scaling:
instance: c5.2xlarge
min: 5
max: 15 # keep in sync with PGBOUNCER_DEFAULT_POOL_SIZE
metrics: *CpuMemAndQueuesScalingRules
alarms:
overrides:
LatencyHigh: null
#TODO Uncomment when we stop doing hourly deployments
# ElbResponseTimeAlert:
# Namespace: AWS/ApplicationELB
# MetricName: TargetResponseTime
# Description: "Response latency is too high. Runbook: https://hello.atlassian.net/wiki/spaces/PF/pages/1283532004/HOWTO+Investigate+High+CPU+Memory+or+Latency+Alarms"
# Dimensions:
# - Name: LoadBalancer
# Value: { "Fn::GetAtt": ["EnvironmentStack", "LoadBalancerName"] }
# - Name: TargetGroup
# Value: { "Fn::GetAtt": [ "ALBTargetGroup", "TargetGroupFullName" ] }
# Threshold: 5
# Priority: Low
# EvaluationPeriods: 5
# Period: 180
# ComparisonOperator: GreaterThanThreshold
# Statistic: Average
resources:
- type: redisx
name: cache
attributes:
size: 5000
alarms:
AlarmOnNumConnections:
MetricName: CurrConnections
Description: "The number of client connections is too high. Please follow the runbook: https://hello.atlassian.net/wiki/spaces/PF/pages/1453195358/HOWTO+Investigate+Redis+Issues"
Threshold: 10000
EvaluationPeriods: 3
Period: 120
Priority: Low
ComparisonOperator: GreaterThanThreshold
Statistic: Maximum
Unit: Count
- name: database
type: postgres-db
attributes:
connectionLimit: 675 # keep in sync with PGBOUNCER_DEFAULT_POOL_SIZE.
# Should be set to Max Connections can be used by nodes + some room if we have to scale even more manually. PGBOUNCER_DEFAULT_POOL_SIZE*N*1.5
- name: rds
type: dedicated-rds
attributes:
parameters:
DBInstanceClass: db.r5.4xlarge
AllocatedStorage: 40 # GB
MaxAllocatedStorage: 100
ConnectionAlarm: 900 # keep in sync with PGBOUNCER_DEFAULT_POOL_SIZE * 2 * N (cause two stacks can work
# together during the deploy)
TransactionLogsDiskUsageAlarm: 4000000000 # approximately 4GB
- name: deployment-history-cache
type: dynamo-db
attributes:
<<: *table-attributes
- name: audit-log
type: dynamo-db
attributes:
<<: *audit-log-table-attributes
- type: globaledge
name: proxy
attributes:
default_vanity_dns: false
domain:
- github.atlassian.com
ip_whitelist:
- public
routes: *blackholeSpammingIPs
- type: sqs
name: backfill
alarms:
AlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for backfill queue too high! Follow the runbook: https://hello.atlassian.net/wiki/spaces/PF/pages/1550488861/HOWTO+Respond+to+Too+Many+Messages+on+the+Queue+Alert"
Threshold: 3000 #~ Autoscaling Threshold x 5. So we'll be alerted if autoscaling failed to fix the piling messages
Priority: High
Dimensions:
- Name: QueueName
Value: { "Ref": "QueueName" }
EvaluationPeriods: 6
Period: 600
ComparisonOperator: GreaterThanThreshold
Statistic: Maximum
DLQAlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for backfill dead letter queue too high! Please follow the runbook: https://hello.atlassian.net/wiki/spaces/OTFS/pages/2314414972/HOWTO+Analyse+and+Replay+the+DLQ"
Threshold: 50
Priority: Low
Dimensions:
- Name: QueueName
Value: { "Ref": "DLQueueName" }
EvaluationPeriods: 5
Period: 300
ComparisonOperator: GreaterThanOrEqualToThreshold
Statistic: Maximum
- type: sqs
name: push
alarms:
AlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for push queue too high! Follow the runbook: https://hello.atlassian.net/wiki/spaces/PF/pages/1550488861/HOWTO+Respond+to+Too+Many+Messages+on+the+Queue+Alert"
Threshold: 500 #~ Autoscaling Threshold x 2. So we'll be alerted if autoscaling failed to fix the piling messages
Priority: High
Dimensions:
- Name: QueueName
Value: { "Ref": "QueueName" }
EvaluationPeriods: 5
Period: 300
ComparisonOperator: GreaterThanThreshold
Statistic: Maximum
DLQAlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for push dead letter queue too high! Please follow the runbook: https://hello.atlassian.net/wiki/spaces/OTFS/pages/2314414972/HOWTO+Analyse+and+Replay+the+DLQ"
Threshold: 100
Priority: Low
Dimensions:
- Name: QueueName
Value: { "Ref": "DLQueueName" }
EvaluationPeriods: 5
Period: 300
ComparisonOperator: GreaterThanOrEqualToThreshold
Statistic: Maximum
- type: sqs
name: deployment
alarms:
AlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for deployment queue too high! Follow the runbook: https://hello.atlassian.net/wiki/spaces/PF/pages/1550488861/HOWTO+Respond+to+Too+Many+Messages+on+the+Queue+Alert"
Threshold: 500 #~ Autoscaling Threshold x 2. So we'll be alerted if autoscaling failed to fix the piling messages
Priority: Low
Dimensions:
- Name: QueueName
Value: { "Ref": "QueueName" }
EvaluationPeriods: 5
Period: 300
ComparisonOperator: GreaterThanThreshold
Statistic: Maximum
DLQAlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for deployment dead letter queue too high! Please follow the runbook: https://hello.atlassian.net/wiki/spaces/OTFS/pages/2314414972/HOWTO+Analyse+and+Replay+the+DLQ"
Threshold: 5000 # number is initially high until we start bringing numbers down
Priority: Low
Dimensions:
- Name: QueueName
Value: { "Ref": "DLQueueName" }
EvaluationPeriods: 5
Period: 300
ComparisonOperator: GreaterThanOrEqualToThreshold
Statistic: Maximum
- type: sqs
name: branch
alarms:
AlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for branch queue too high! Follow the runbook: https://hello.atlassian.net/wiki/spaces/PF/pages/1550488861/HOWTO+Respond+to+Too+Many+Messages+on+the+Queue+Alert"
Threshold: 500 #~ Autoscaling Threshold x 2. So we'll be alerted if autoscaling failed to fix the piling messages
Priority: High
Dimensions:
- Name: QueueName
Value: { "Ref": "QueueName" }
EvaluationPeriods: 5
Period: 300
ComparisonOperator: GreaterThanThreshold
Statistic: Maximum
DLQAlarmOnTooManyMessages:
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Description: "Message count for branch dead letter queue too high! Please follow the runbook: https://hello.atlassian.net/wiki/spaces/OTFS/pages/2314414972/HOWTO+Analyse+and+Replay+the+DLQ"
Threshold: 1000 # number is initially high until we start bringing numbers down
Priority: Low
Dimensions:
- Name: QueueName
Value: { "Ref": "DLQueueName" }
EvaluationPeriods: 5
Period: 300
ComparisonOperator: GreaterThanOrEqualToThreshold
Statistic: Maximum
- name: github-server-app-secrets
type: cryptor
attributes:
parameters:
encryptingServices:
- github-for-jira
decryptingServices:
- github-for-jira
- name: jira-instance-secrets
type: cryptor
attributes:
parameters:
encryptingServices:
- github-for-jira
decryptingServices:
- github-for-jira
- type: lambda
name: auto-deployment-github-app
attributes:
runtime: nodejs18.x
prefetchMicrosEnvVars: true
artifact: "_sox/github-for-jira/auto-deployment-github-app-${BITBUCKET_BUILD_NUMBER}.zip"
handler: auto-deployment.handler
private: true
concurrentExecutions: 2
timeout: 60
scheduledRules:
- name: 'PipelinesExecutionForGH4J'
# Cron for running 10 minutes
expression: 'cron(0/10 * * * ? *)'
dataType:
- Atlassian/Configuration