Skip to content

Commit 3eb6faf

Browse files
authored
Merge pull request #10 from oguarni/cloud-rules-7d-ml
feat(security): add CloudWatch logging and VPC flow log rules, expand…
2 parents 21d830b + b65cad4 commit 3eb6faf

9 files changed

Lines changed: 397 additions & 52 deletions

File tree

terrasafe/application/scanner.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,10 @@ def _validate_features(self, features: np.ndarray) -> np.ndarray:
219219
Validated feature array with values clipped to acceptable bounds
220220
"""
221221
# Define acceptable bounds for each feature
222-
# [open_ports, hardcoded_secrets, public_access, unencrypted_storage, total_resources]
223-
min_bounds = np.array([0, 0, 0, 0, 0], dtype=np.int32)
224-
max_bounds = np.array([100, 100, 100, 100, 10000], dtype=np.int32)
222+
# [open_ports, hardcoded_secrets, public_access, unencrypted_storage,
223+
# missing_logging, missing_flow_logs, total_resources]
224+
min_bounds = np.array([0, 0, 0, 0, 0, 0, 0], dtype=np.int32)
225+
max_bounds = np.array([100, 100, 100, 100, 100, 100, 10000], dtype=np.int32)
225226

226227
# Clip features to acceptable ranges
227228
validated = np.clip(features, min_bounds, max_bounds)
@@ -244,11 +245,11 @@ def _extract_features(self, vulnerabilities: List[Vulnerability]) -> np.ndarray:
244245
vulnerabilities: List of detected vulnerabilities
245246
246247
Returns:
247-
Numpy array of features (shape: 1x5)
248+
Numpy array of features (shape: 1x7)
248249
"""
249250
if not vulnerabilities:
250251
# Return default feature vector for empty vulnerability list
251-
return np.array([[0, 0, 0, 0, 1]], dtype=np.int32)
252+
return np.array([[0, 0, 0, 0, 0, 0, 1]], dtype=np.int32)
252253

253254
# Count unique resources
254255
unique_resources = len(set(v.resource for v in vulnerabilities))
@@ -270,12 +271,18 @@ def _extract_features(self, vulnerabilities: List[Vulnerability]) -> np.ndarray:
270271

271272
unencrypted_mask = np.char.find(messages, 'unencrypted') >= 0
272273

274+
missing_logging_mask = np.char.find(messages, 'missing logging') >= 0
275+
276+
missing_flow_logs_mask = np.char.find(messages, 'missing vpc flow logs') >= 0
277+
273278
# Count matches using numpy sum (faster than Python loops)
274279
features = np.array([
275280
np.sum(open_ports_mask),
276281
np.sum(hardcoded_mask),
277282
np.sum(public_access_mask),
278283
np.sum(unencrypted_mask),
284+
np.sum(missing_logging_mask),
285+
np.sum(missing_flow_logs_mask),
279286
unique_resources
280287
], dtype=np.int32).reshape(1, -1)
281288

@@ -288,7 +295,10 @@ def _summarize_vulns(self, vulns: List[Vulnerability]) -> Dict[str, int]:
288295
return summary
289296

290297
def _format_features(self, features: np.ndarray) -> Dict[str, int]:
291-
feature_names = ['open_ports', 'hardcoded_secrets', 'public_access', 'unencrypted_storage', 'total_resources']
298+
feature_names = [
299+
'open_ports', 'hardcoded_secrets', 'public_access', 'unencrypted_storage',
300+
'missing_logging', 'missing_flow_logs', 'total_resources'
301+
]
292302
return {name: int(val) for name, val in zip(feature_names, features[0])}
293303

294304
def _vulnerability_to_dict(self, vuln: Vulnerability) -> Dict[str, Any]:

terrasafe/config/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ class Settings(BaseSettings):
6767
default="models/isolation_forest.pkl",
6868
description="Path to ML model file"
6969
)
70+
severity_overrides: Dict[str, str] = Field(
71+
default={},
72+
description="Override severity for specific rules, e.g. {'missing_logging': 'MEDIUM'}"
73+
)
7074

7175
# Security Configuration
7276
max_file_size_mb: int = Field(

terrasafe/domain/security_rules.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import re
33
from typing import List, Dict
44
from .models import Vulnerability, Severity
5+
from ..config.settings import get_settings
56

67

78
# Constants for severity points (Clean Code: No magic numbers)
@@ -258,6 +259,67 @@ def check_iam_policies(self, tf_content: Dict) -> List[Vulnerability]:
258259

259260
return vulns
260261

262+
def check_missing_logging(self, tf_content: Dict) -> List[Vulnerability]:
263+
"""Check for missing CloudTrail/CloudWatch logging resources.
264+
265+
If infrastructure resources exist but no logging resources are present,
266+
flag as HIGH severity.
267+
"""
268+
vulns: List[Vulnerability] = []
269+
270+
if 'resource' not in tf_content:
271+
return vulns
272+
273+
resources = tf_content.get('resource', [])
274+
all_resource_types = set()
275+
for resource_block in resources:
276+
all_resource_types.update(resource_block.keys())
277+
278+
# Only flag if there are infrastructure resources to log
279+
infra_types = all_resource_types - {'aws_cloudtrail', 'aws_cloudwatch_log_group'}
280+
has_infra = bool(infra_types)
281+
has_logging = 'aws_cloudtrail' in all_resource_types or 'aws_cloudwatch_log_group' in all_resource_types
282+
283+
if has_infra and not has_logging:
284+
vulns.append(Vulnerability(
285+
severity=Severity.HIGH,
286+
points=POINTS_HIGH,
287+
message="[HIGH] Missing logging - no CloudTrail or CloudWatch log group detected",
288+
resource="Logging",
289+
remediation="Add aws_cloudtrail or aws_cloudwatch_log_group to enable audit logging"
290+
))
291+
292+
return vulns
293+
294+
def check_missing_vpc_flow_logs(self, tf_content: Dict) -> List[Vulnerability]:
295+
"""Check for VPC resources without corresponding flow logs.
296+
297+
If an aws_vpc resource exists but no aws_flow_log is found, flag as MEDIUM.
298+
"""
299+
vulns: List[Vulnerability] = []
300+
301+
if 'resource' not in tf_content:
302+
return vulns
303+
304+
resources = tf_content.get('resource', [])
305+
all_resource_types = set()
306+
for resource_block in resources:
307+
all_resource_types.update(resource_block.keys())
308+
309+
has_vpc = 'aws_vpc' in all_resource_types
310+
has_flow_log = 'aws_flow_log' in all_resource_types
311+
312+
if has_vpc and not has_flow_log:
313+
vulns.append(Vulnerability(
314+
severity=Severity.MEDIUM,
315+
points=POINTS_MEDIUM,
316+
message="[MEDIUM] Missing VPC flow logs - aws_vpc present but no aws_flow_log detected",
317+
resource="VPC",
318+
remediation="Add an aws_flow_log resource to enable VPC traffic logging"
319+
))
320+
321+
return vulns
322+
261323
def analyze(self, tf_content: Dict, raw_content: str) -> List[Vulnerability]:
262324
"""Run all security checks"""
263325
all_vulns = []
@@ -268,5 +330,23 @@ def analyze(self, tf_content: Dict, raw_content: str) -> List[Vulnerability]:
268330
all_vulns.extend(self.check_encryption(tf_content))
269331
all_vulns.extend(self.check_public_s3(tf_content))
270332
all_vulns.extend(self.check_iam_policies(tf_content))
333+
all_vulns.extend(self.check_missing_logging(tf_content))
334+
all_vulns.extend(self.check_missing_vpc_flow_logs(tf_content))
335+
336+
# Apply severity overrides from config
337+
overrides = get_settings().severity_overrides
338+
if overrides:
339+
severity_map = {s.value: s for s in Severity}
340+
rule_key_map = {
341+
'missing_logging': '[HIGH] Missing logging',
342+
'missing_flow_logs': '[MEDIUM] Missing VPC flow logs',
343+
}
344+
for vuln in all_vulns:
345+
for rule_name, override_level in overrides.items():
346+
fragment = rule_key_map.get(rule_name)
347+
if fragment and fragment in vuln.message:
348+
new_severity = severity_map.get(override_level.upper())
349+
if new_severity:
350+
vuln.severity = new_severity
271351

272352
return all_vulns

terrasafe/infrastructure/ml_model.py

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -364,33 +364,33 @@ def _train_baseline_model(self):
364364
rng = np.random.default_rng(42)
365365

366366
# Enhanced baseline patterns representing secure configurations
367-
# Features: [open_ports, secrets, public_access, unencrypted, resource_count]
367+
# Features: [open_ports, secrets, public_access, unencrypted, missing_logging, missing_flow_logs, resource_count]
368368
baseline_patterns = [
369369
# Fully secure configurations
370-
[0, 0, 0, 0, 5], # Small secure microservice
371-
[0, 0, 0, 0, 10], # Medium secure application
372-
[0, 0, 0, 0, 15], # Large secure infrastructure
373-
[0, 0, 0, 0, 25], # Enterprise secure setup
374-
[0, 0, 0, 0, 3], # Minimal secure Lambda function
370+
[0, 0, 0, 0, 0, 0, 5], # Small secure microservice
371+
[0, 0, 0, 0, 0, 0, 10], # Medium secure application
372+
[0, 0, 0, 0, 0, 0, 15], # Large secure infrastructure
373+
[0, 0, 0, 0, 0, 0, 25], # Enterprise secure setup
374+
[0, 0, 0, 0, 0, 0, 3], # Minimal secure Lambda function
375375

376376
# Web applications (acceptable public exposure)
377-
[1, 0, 0, 0, 8], # Simple web app with HTTP
378-
[2, 0, 0, 0, 12], # Web app with HTTP/HTTPS
379-
[2, 0, 1, 0, 20], # E-commerce with CDN (public S3)
380-
[1, 0, 1, 0, 15], # Static site with S3 hosting
381-
[2, 0, 2, 0, 30], # Multi-region web platform
377+
[1, 0, 0, 0, 0, 0, 8], # Simple web app with HTTP
378+
[2, 0, 0, 0, 0, 0, 12], # Web app with HTTP/HTTPS
379+
[2, 0, 1, 0, 0, 0, 20], # E-commerce with CDN (public S3)
380+
[1, 0, 1, 0, 0, 0, 15], # Static site with S3 hosting
381+
[2, 0, 2, 0, 0, 0, 30], # Multi-region web platform
382382

383383
# Development environments (slightly relaxed)
384-
[1, 0, 0, 1, 6], # Dev env with one unencrypted volume
385-
[2, 0, 0, 1, 10], # Staging with test data
386-
[1, 0, 1, 1, 8], # QA environment
387-
[0, 0, 0, 2, 12], # Test cluster with temp storage
384+
[1, 0, 0, 1, 0, 0, 6], # Dev env with one unencrypted volume
385+
[2, 0, 0, 1, 0, 0, 10], # Staging with test data
386+
[1, 0, 1, 1, 0, 0, 8], # QA environment
387+
[0, 0, 0, 2, 0, 0, 12], # Test cluster with temp storage
388388

389389
# Microservices architectures
390-
[3, 0, 0, 0, 40], # Service mesh with multiple endpoints
391-
[4, 0, 1, 0, 50], # Kubernetes cluster with ingress
392-
[2, 0, 0, 0, 35], # Docker swarm setup
393-
[3, 0, 2, 0, 45], # Multi-service with CDN
390+
[3, 0, 0, 0, 0, 0, 40], # Service mesh with multiple endpoints
391+
[4, 0, 1, 0, 0, 0, 50], # Kubernetes cluster with ingress
392+
[2, 0, 0, 0, 0, 0, 35], # Docker swarm setup
393+
[3, 0, 2, 0, 0, 0, 45], # Multi-service with CDN
394394
]
395395

396396
baseline_features = np.array(baseline_patterns)
@@ -401,7 +401,7 @@ def _train_baseline_model(self):
401401
# Add noise variations for each pattern
402402
for pattern in baseline_features:
403403
for _ in range(3): # Create 3 variations per pattern
404-
noise = rng.normal(0, 0.15, 5)
404+
noise = rng.normal(0, 0.15, 7)
405405
augmented = pattern + noise
406406
augmented = np.maximum(augmented, 0) # Ensure non-negative
407407
# Round discrete features
@@ -410,11 +410,11 @@ def _train_baseline_model(self):
410410

411411
# Add edge cases representing acceptable boundaries
412412
edge_cases = np.array([
413-
[5, 0, 0, 0, 60], # Large microservices
414-
[0, 0, 5, 0, 40], # Content delivery network
415-
[3, 0, 3, 2, 50], # Legacy migration
416-
[0, 0, 0, 3, 25], # Development cluster
417-
[6, 0, 2, 0, 70], # API gateway with multiple services
413+
[5, 0, 0, 0, 0, 0, 60], # Large microservices
414+
[0, 0, 5, 0, 0, 0, 40], # Content delivery network
415+
[3, 0, 3, 2, 0, 0, 50], # Legacy migration
416+
[0, 0, 0, 3, 0, 0, 25], # Development cluster
417+
[6, 0, 2, 0, 0, 0, 70], # API gateway with multiple services
418418
])
419419

420420
augmented_data = np.vstack([augmented_data, edge_cases])
@@ -446,7 +446,9 @@ def _train_baseline_model(self):
446446
'hardcoded_secrets': {'min': int(augmented_data[:, 1].min()), 'max': int(augmented_data[:, 1].max())},
447447
'public_access': {'min': int(augmented_data[:, 2].min()), 'max': int(augmented_data[:, 2].max())},
448448
'unencrypted_storage': {'min': int(augmented_data[:, 3].min()), 'max': int(augmented_data[:, 3].max())},
449-
'total_resources': {'min': int(augmented_data[:, 4].min()), 'max': int(augmented_data[:, 4].max())},
449+
'missing_logging': {'min': int(augmented_data[:, 4].min()), 'max': int(augmented_data[:, 4].max())},
450+
'missing_flow_logs': {'min': int(augmented_data[:, 5].min()), 'max': int(augmented_data[:, 5].max())},
451+
'total_resources': {'min': int(augmented_data[:, 6].min()), 'max': int(augmented_data[:, 6].max())},
450452
},
451453
'model_parameters': {
452454
'contamination': 0.1,

test_files/mixed.tf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,18 @@ variable "db_password" {
6363
type = string
6464
sensitive = true
6565
}
66+
67+
resource "aws_vpc" "app_vpc" {
68+
cidr_block = "10.0.0.0/16" # MEDIUM: No aws_flow_log present
69+
tags = {
70+
Name = "mixed-vpc"
71+
}
72+
}
73+
74+
resource "aws_cloudtrail" "app_trail" {
75+
name = "app-trail"
76+
s3_bucket_name = aws_s3_bucket.app_bucket.bucket
77+
# CloudTrail present — satisfies missing_logging rule
78+
}
79+
# NOTE: aws_cloudtrail present → no missing_logging vuln
80+
# NOTE: no aws_flow_log → triggers missing_vpc_flow_logs rule only

test_files/secure.tf

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,36 @@ variable "db_password" {
7272
description = "Database password"
7373
type = string
7474
sensitive = true
75+
}
76+
77+
resource "aws_vpc" "main" {
78+
cidr_block = "10.0.0.0/16"
79+
tags = {
80+
Name = "secure-vpc"
81+
}
82+
}
83+
84+
resource "aws_flow_log" "main" {
85+
vpc_id = aws_vpc.main.id
86+
traffic_type = "ALL"
87+
iam_role_arn = var.flow_log_role_arn
88+
log_destination = aws_cloudwatch_log_group.flow_logs.arn
89+
}
90+
91+
resource "aws_cloudwatch_log_group" "flow_logs" {
92+
name = "/aws/vpc/flow-logs"
93+
retention_in_days = 90
94+
}
95+
96+
resource "aws_cloudtrail" "main" {
97+
name = "secure-trail"
98+
s3_bucket_name = aws_s3_bucket.main_bucket.bucket
99+
include_global_service_events = true
100+
is_multi_region_trail = true
101+
enable_log_file_validation = true
102+
}
103+
104+
variable "flow_log_role_arn" {
105+
description = "IAM role ARN for VPC flow logs"
106+
type = string
75107
}

test_files/vulnerable.tf

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,13 @@ resource "aws_s3_bucket" "main_bucket" {
5959
tags = {
6060
Environment = "test"
6161
}
62-
}
62+
}
63+
64+
resource "aws_vpc" "main" {
65+
cidr_block = "10.0.0.0/16" # HIGH: No aws_flow_log present — VPC traffic is unmonitored
66+
tags = {
67+
Name = "vulnerable-vpc"
68+
}
69+
}
70+
# NOTE: no aws_cloudtrail, no aws_cloudwatch_log_group → triggers missing_logging rule
71+
# NOTE: no aws_flow_log → triggers missing_vpc_flow_logs rule

0 commit comments

Comments
 (0)