Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/aws_cloudtrail_otel/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# newer versions go on top
- version: "0.2.0"
changes:
- description: Add alerting rule templates
type: enhancement
link: https://github.com/elastic/integrations/pull/16750
- version: "0.1.0"
changes:
- description: Initial draft of the AWS CloudTrail Logs OpenTelemetry Assets package
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"id": "aws-cloudtrail-otel-high-error-rate",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS CloudTrail OTEL] High error rate",
"tags": ["AWS CloudTrail Logs OpenTelemetry Assets"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
"timeWindowUnit": "m",
"esqlQuery": {
"esql": "// Alert triggers when any source IP address whose critical error count exceed a threshold (e.g. > 5 in 10 minutes)\n// You can adjust the threshold value in WHERE clause as needed.\nFROM logs-aws.cloudtrail.otel-default| WHERE aws.error.code IN (\"InvalidClientTokenId\",\"SignatureDoesNotMatch\",\"InvalidAccessKeyId\",\"ExpiredToken\",\"InvalidToken\",\"InvalidPassword\",\"Failed authentication\",\"UnrecognizedClientException\",\"AccessDenied\",\"AccessDeniedException\",\"UnauthorizedOperation\")| STATS error_count = COUNT(*) BY source.address| WHERE error_count > 5"
},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"id": "aws-cloudtrail-otel-high-resource-deletion",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS CloudTrail OTEL] High resource deletion",
"tags": ["AWS CloudTrail Logs OpenTelemetry Assets"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be 15m to match description

"timeWindowUnit": "m",
"esqlQuery": {
"esql": "// Alert triggers when any source IP address whose delete requests exceed a threshold (e.g. > 5 in 10 minutes)\n// You can adjust the threshold value in WHERE clause as needed.\nFROM logs-aws.cloudtrail.otel-default | WHERE aws.error.code IS NULL | WHERE rpc.method IN (\"TerminateInstances\",\"DeleteBucket\",\"DeleteDBInstance\",\"DeleteFunction\",\"DeleteVolume\",\"DeleteSnapshot\") | STATS deletion_count = COUNT(*) BY source.address | WHERE deletion_count >= 5"
},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"id": "aws-cloudtrail-otel-high-risk-actions-succeeded",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS CloudTrail OTEL] High-risk actions succeeded",
"tags": ["AWS CloudTrail Logs OpenTelemetry Assets"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
"timeWindowUnit": "m",
"esqlQuery": {
"esql": "// Alert triggers when any high risk actions succeded within a given threshold time from a single user or IP\nFROM logs-aws.cloudtrail.otel-default | WHERE rpc.method IN (\"StopLogging\", \"DeleteTrail\", \"UpdateTrail\", \"AttachUserPolicy\", \"AttachRolePolicy\", \"PutUserPolicy\", \"PutRolePolicy\", \"CreateAccessKey\", \"CreateUser\", \"CreateLoginProfile\", \"DisableKey\", \"ScheduleKeyDeletion\", \"DeleteBucket\", \"PutBucketPolicy\", \"PutBucketLogging\", \"DeleteDetector\", \"DeleteMembers\", \"DisassociateFromMasterAccount\", \"DeleteFlowLogs\", \"DeleteAlarms\", \"DeleteConfigRule\", \"DeleteEventBusRule\") AND aws.error.code IS NULL | STATS action_count = COUNT(*), actions = VALUES(rpc.method), ips = VALUES(source.address) BY aws.principal.arn, user.name | WHERE action_count>1"
},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"id": "aws-cloudtrail-otel-multiple-failed-login-attempts",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS CloudTrail OTEL] Multiple failed login attempts",
"tags": ["AWS CloudTrail Logs OpenTelemetry Assets"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does 10m seem like a long time period for detecting failed login attempts?

"timeWindowUnit": "m",
"esqlQuery": {
"esql": "// Alert triggers when any source IP address whose reject requests exceed a threshold (e.g. > 100 in 10 minutes)\n// You can adjust the threshold value in WHERE clause as needed.\nFROM logs-aws.cloudtrail.otel-default | WHERE @timestamp > NOW()- 10m | WHERE rpc.method == \"ConsoleLogin\" | WHERE aws.error.code IS NOT NULL | STATS failed_count = COUNT(*), users_tried = VALUES(user.name) BY source.address | WHERE failed_count >= 100 | SORT failed_count DESC"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need the VALUES agg here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i had a few concerns about this rule and did a sanity check by asking chatgpt for some feedback. it has a lot of concerns about this rule.

did we get an LLM to thouroughly review all the queries here?

i don't know if the concerns are valid, but i just want to check we have considered feedback like this.

please DM me for the detail i got from GPT, but the summary was:

Primary concerns:

  • Threshold is orders of magnitude too high

  • Failure signal is weak

  • Missing service scoping

  • Detection intent is unclear

As written, this alert will almost certainly never fire for real attacks, while giving a false sense of coverage.

},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
2 changes: 1 addition & 1 deletion packages/aws_cloudtrail_otel/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
format_version: 3.5.0
name: aws_cloudtrail_otel
title: "AWS CloudTrail Logs OpenTelemetry Assets"
version: 0.1.0
version: 0.2.0
source:
license: "Elastic-2.0"
description: "AWS CloudTrail Logs OpenTelemetry Assets"
Expand Down
5 changes: 5 additions & 0 deletions packages/aws_elb_otel/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# newer versions go on top
- version: "0.2.0"
changes:
- description: Add alerting rule templates
type: enhancement
link: https://github.com/elastic/integrations/pull/16750
- version: "0.1.1"
changes:
- description: Add "Alternative setup using awss3receiver" section to README
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"id": "aws-elb-otel-application-errors",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS ELB OTEL] Application errors",
"tags": [
"AWS Elb Logs OpenTelemetry Assets"
],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
"timeWindowUnit": "m",
"esqlQuery": {
"esql": "// Alert triggers when any client resource.id whose error count exceed a threshold (e.g. 50 in 10 minutes)\n// You can adjust the threshold value in WHERE clause as needed.\nFROM logs-aws.elbaccess.otel-default | WHERE aws.elb.status.code >= 400| STATS error_count = COUNT(*) BY cloud.resource_id | WHERE error_count >= 50"
},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"id": "aws-elb-otel-backend-errors",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS ELB OTEL] Backend errors",
"tags": ["AWS Elb Logs OpenTelemetry Assets"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
"timeWindowUnit": "m",
"esqlQuery": {
"esql": "// Alert triggers when any resource.id whose backend service error count exceed a threshold (e.g. > 50 in 10 minutes)\n// You can adjust the threshold value in WHERE clause as needed.\nFROM logs-aws.elbaccess.otel-default | WHERE aws.elb.backend.status.code >= 500| STATS backend_error_count = COUNT(*), BY cloud.resource_id | WHERE backend_error_count >= 50"
},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
2 changes: 1 addition & 1 deletion packages/aws_elb_otel/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
format_version: 3.5.0
name: aws_elb_otel
title: "AWS ELB OpenTelemetry Assets"
version: 0.1.1
version: 0.2.0
source:
license: "Elastic-2.0"
description: "AWS ELB logs for OpenTelemetry Collector"
Expand Down
5 changes: 5 additions & 0 deletions packages/aws_vpcflow_otel/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# newer versions go on top
- version: "0.2.0"
changes:
- description: Add alerting rule templates
type: enhancement
link: https://github.com/elastic/integrations/pull/16750
- version: "0.1.1"
changes:
- description: Add "Alternative setup using awss3receiver" section to README
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"id": "aws-vpcflow-otel-high-data-transfer-rate",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS VPC OTEL] High data transfer rate",
"tags": ["AWS VPC Logs OpenTelemetry Assets"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't a good tag name

we should have tags for 'aws', 'vpc' (and possibly 'otel'?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(same for all other rules in this PR)

"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
"timeWindowUnit": "m",
"esqlQuery": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think we need to include WHERE @timestamp > NOW()- 10m - it's handled by the timeWindowSize param in the rule.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(same for all other rules in this PR)

"esql": "// Alert triggers when any source whose bytes exceed a threshold (e.g. > 50GB in 10 minutes)\n// You can adjust the threshold value in WHERE clause as needed.\nFROM logs-aws.vpcflow.otel-default| WHERE aws.vpc.flow.action == \"ACCEPT\"| STATS total_bytes = SUM(aws.vpc.flow.bytes) BY network.interface.name| WHERE total_bytes > 53687091200"
},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"id": "aws-vpcflow-otel-high-reject-actions",
"type": "alerting_rule_template",
"attributes": {
"name": "[AWS VPC OTEL] High reject actions",
"tags": ["AWS VPC Logs OpenTelemetry Assets"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "5m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
"timeWindowUnit": "m",
"esqlQuery": {
"esql": "// Alert triggers when any source whose reject requests exceed a threshold (e.g. > 1000 in 10 minutes)\n// You can adjust the threshold value in WHERE clause as needed.\nFROM logs-aws.vpcflow.otel-default| WHERE aws.vpc.flow.action == \"REJECT\"| STATS reject_count = COUNT(*) BY network.interface.name| WHERE reject_count > 100"
},
"groupBy": "row",
"timeField": "@timestamp"
},
"alertDelay": {
"active": 1
}
},
"managed": true,
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
2 changes: 1 addition & 1 deletion packages/aws_vpcflow_otel/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
format_version: 3.5.0
name: aws_vpcflow_otel
title: "AWS VPC Flow Logs OpenTelemetry Assets"
version: 0.1.1
version: 0.2.0
source:
license: "Elastic-2.0"
description: "AWS VPC Flow Logs OpenTelemetry Assets"
Expand Down