Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions terrasafe/application/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,10 @@ def _validate_features(self, features: np.ndarray) -> np.ndarray:
Validated feature array with values clipped to acceptable bounds
"""
# Define acceptable bounds for each feature
# [open_ports, hardcoded_secrets, public_access, unencrypted_storage, total_resources]
min_bounds = np.array([0, 0, 0, 0, 0], dtype=np.int32)
max_bounds = np.array([100, 100, 100, 100, 10000], dtype=np.int32)
# [open_ports, hardcoded_secrets, public_access, unencrypted_storage,
# missing_logging, missing_flow_logs, total_resources]
min_bounds = np.array([0, 0, 0, 0, 0, 0, 0], dtype=np.int32)
max_bounds = np.array([100, 100, 100, 100, 100, 100, 10000], dtype=np.int32)

# Clip features to acceptable ranges
validated = np.clip(features, min_bounds, max_bounds)
Expand All @@ -244,11 +245,11 @@ def _extract_features(self, vulnerabilities: List[Vulnerability]) -> np.ndarray:
vulnerabilities: List of detected vulnerabilities

Returns:
Numpy array of features (shape: 1x5)
Numpy array of features (shape: 1x7)
"""
if not vulnerabilities:
# Return default feature vector for empty vulnerability list
return np.array([[0, 0, 0, 0, 1]], dtype=np.int32)
return np.array([[0, 0, 0, 0, 0, 0, 1]], dtype=np.int32)

# Count unique resources
unique_resources = len(set(v.resource for v in vulnerabilities))
Expand All @@ -270,12 +271,18 @@ def _extract_features(self, vulnerabilities: List[Vulnerability]) -> np.ndarray:

unencrypted_mask = np.char.find(messages, 'unencrypted') >= 0

missing_logging_mask = np.char.find(messages, 'missing logging') >= 0

missing_flow_logs_mask = np.char.find(messages, 'missing vpc flow logs') >= 0

# Count matches using numpy sum (faster than Python loops)
features = np.array([
np.sum(open_ports_mask),
np.sum(hardcoded_mask),
np.sum(public_access_mask),
np.sum(unencrypted_mask),
np.sum(missing_logging_mask),
np.sum(missing_flow_logs_mask),
unique_resources
], dtype=np.int32).reshape(1, -1)

Expand All @@ -288,7 +295,10 @@ def _summarize_vulns(self, vulns: List[Vulnerability]) -> Dict[str, int]:
return summary

def _format_features(self, features: np.ndarray) -> Dict[str, int]:
feature_names = ['open_ports', 'hardcoded_secrets', 'public_access', 'unencrypted_storage', 'total_resources']
feature_names = [
'open_ports', 'hardcoded_secrets', 'public_access', 'unencrypted_storage',
'missing_logging', 'missing_flow_logs', 'total_resources'
]
return {name: int(val) for name, val in zip(feature_names, features[0])}

def _vulnerability_to_dict(self, vuln: Vulnerability) -> Dict[str, Any]:
Expand Down
4 changes: 4 additions & 0 deletions terrasafe/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ class Settings(BaseSettings):
default="models/isolation_forest.pkl",
description="Path to ML model file"
)
severity_overrides: Dict[str, str] = Field(
default={},
description="Override severity for specific rules, e.g. {'missing_logging': 'MEDIUM'}"
)

# Security Configuration
max_file_size_mb: int = Field(
Expand Down
80 changes: 80 additions & 0 deletions terrasafe/domain/security_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from typing import List, Dict
from .models import Vulnerability, Severity
from ..config.settings import get_settings


# Constants for severity points (Clean Code: No magic numbers)
Expand Down Expand Up @@ -258,6 +259,67 @@ def check_iam_policies(self, tf_content: Dict) -> List[Vulnerability]:

return vulns

def check_missing_logging(self, tf_content: Dict) -> List[Vulnerability]:
"""Check for missing CloudTrail/CloudWatch logging resources.

If infrastructure resources exist but no logging resources are present,
flag as HIGH severity.
"""
vulns: List[Vulnerability] = []

if 'resource' not in tf_content:
return vulns

resources = tf_content.get('resource', [])
all_resource_types = set()
for resource_block in resources:
all_resource_types.update(resource_block.keys())

# Only flag if there are infrastructure resources to log
infra_types = all_resource_types - {'aws_cloudtrail', 'aws_cloudwatch_log_group'}
has_infra = bool(infra_types)
has_logging = 'aws_cloudtrail' in all_resource_types or 'aws_cloudwatch_log_group' in all_resource_types

if has_infra and not has_logging:
vulns.append(Vulnerability(
severity=Severity.HIGH,
points=POINTS_HIGH,
message="[HIGH] Missing logging - no CloudTrail or CloudWatch log group detected",
resource="Logging",
remediation="Add aws_cloudtrail or aws_cloudwatch_log_group to enable audit logging"
))

return vulns

def check_missing_vpc_flow_logs(self, tf_content: Dict) -> List[Vulnerability]:
"""Check for VPC resources without corresponding flow logs.

If an aws_vpc resource exists but no aws_flow_log is found, flag as MEDIUM.
"""
vulns: List[Vulnerability] = []

if 'resource' not in tf_content:
return vulns

resources = tf_content.get('resource', [])
all_resource_types = set()
for resource_block in resources:
all_resource_types.update(resource_block.keys())

has_vpc = 'aws_vpc' in all_resource_types
has_flow_log = 'aws_flow_log' in all_resource_types

if has_vpc and not has_flow_log:
vulns.append(Vulnerability(
severity=Severity.MEDIUM,
points=POINTS_MEDIUM,
message="[MEDIUM] Missing VPC flow logs - aws_vpc present but no aws_flow_log detected",
resource="VPC",
remediation="Add an aws_flow_log resource to enable VPC traffic logging"
))

return vulns

def analyze(self, tf_content: Dict, raw_content: str) -> List[Vulnerability]:
"""Run all security checks"""
all_vulns = []
Expand All @@ -268,5 +330,23 @@ def analyze(self, tf_content: Dict, raw_content: str) -> List[Vulnerability]:
all_vulns.extend(self.check_encryption(tf_content))
all_vulns.extend(self.check_public_s3(tf_content))
all_vulns.extend(self.check_iam_policies(tf_content))
all_vulns.extend(self.check_missing_logging(tf_content))
all_vulns.extend(self.check_missing_vpc_flow_logs(tf_content))

# Apply severity overrides from config
overrides = get_settings().severity_overrides
if overrides:
severity_map = {s.value: s for s in Severity}
rule_key_map = {
'missing_logging': '[HIGH] Missing logging',
'missing_flow_logs': '[MEDIUM] Missing VPC flow logs',
}
for vuln in all_vulns:
for rule_name, override_level in overrides.items():
fragment = rule_key_map.get(rule_name)
if fragment and fragment in vuln.message:
new_severity = severity_map.get(override_level.upper())
if new_severity:
vuln.severity = new_severity

return all_vulns
54 changes: 28 additions & 26 deletions terrasafe/infrastructure/ml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,33 +364,33 @@ def _train_baseline_model(self):
rng = np.random.default_rng(42)

# Enhanced baseline patterns representing secure configurations
# Features: [open_ports, secrets, public_access, unencrypted, resource_count]
# Features: [open_ports, secrets, public_access, unencrypted, missing_logging, missing_flow_logs, resource_count]
baseline_patterns = [
# Fully secure configurations
[0, 0, 0, 0, 5], # Small secure microservice
[0, 0, 0, 0, 10], # Medium secure application
[0, 0, 0, 0, 15], # Large secure infrastructure
[0, 0, 0, 0, 25], # Enterprise secure setup
[0, 0, 0, 0, 3], # Minimal secure Lambda function
[0, 0, 0, 0, 0, 0, 5], # Small secure microservice
[0, 0, 0, 0, 0, 0, 10], # Medium secure application
[0, 0, 0, 0, 0, 0, 15], # Large secure infrastructure
[0, 0, 0, 0, 0, 0, 25], # Enterprise secure setup
[0, 0, 0, 0, 0, 0, 3], # Minimal secure Lambda function

# Web applications (acceptable public exposure)
[1, 0, 0, 0, 8], # Simple web app with HTTP
[2, 0, 0, 0, 12], # Web app with HTTP/HTTPS
[2, 0, 1, 0, 20], # E-commerce with CDN (public S3)
[1, 0, 1, 0, 15], # Static site with S3 hosting
[2, 0, 2, 0, 30], # Multi-region web platform
[1, 0, 0, 0, 0, 0, 8], # Simple web app with HTTP
[2, 0, 0, 0, 0, 0, 12], # Web app with HTTP/HTTPS
[2, 0, 1, 0, 0, 0, 20], # E-commerce with CDN (public S3)
[1, 0, 1, 0, 0, 0, 15], # Static site with S3 hosting
[2, 0, 2, 0, 0, 0, 30], # Multi-region web platform

# Development environments (slightly relaxed)
[1, 0, 0, 1, 6], # Dev env with one unencrypted volume
[2, 0, 0, 1, 10], # Staging with test data
[1, 0, 1, 1, 8], # QA environment
[0, 0, 0, 2, 12], # Test cluster with temp storage
[1, 0, 0, 1, 0, 0, 6], # Dev env with one unencrypted volume
[2, 0, 0, 1, 0, 0, 10], # Staging with test data
[1, 0, 1, 1, 0, 0, 8], # QA environment
[0, 0, 0, 2, 0, 0, 12], # Test cluster with temp storage

# Microservices architectures
[3, 0, 0, 0, 40], # Service mesh with multiple endpoints
[4, 0, 1, 0, 50], # Kubernetes cluster with ingress
[2, 0, 0, 0, 35], # Docker swarm setup
[3, 0, 2, 0, 45], # Multi-service with CDN
[3, 0, 0, 0, 0, 0, 40], # Service mesh with multiple endpoints
[4, 0, 1, 0, 0, 0, 50], # Kubernetes cluster with ingress
[2, 0, 0, 0, 0, 0, 35], # Docker swarm setup
[3, 0, 2, 0, 0, 0, 45], # Multi-service with CDN
]

baseline_features = np.array(baseline_patterns)
Expand All @@ -401,7 +401,7 @@ def _train_baseline_model(self):
# Add noise variations for each pattern
for pattern in baseline_features:
for _ in range(3): # Create 3 variations per pattern
noise = rng.normal(0, 0.15, 5)
noise = rng.normal(0, 0.15, 7)
augmented = pattern + noise
augmented = np.maximum(augmented, 0) # Ensure non-negative
# Round discrete features
Expand All @@ -410,11 +410,11 @@ def _train_baseline_model(self):

# Add edge cases representing acceptable boundaries
edge_cases = np.array([
[5, 0, 0, 0, 60], # Large microservices
[0, 0, 5, 0, 40], # Content delivery network
[3, 0, 3, 2, 50], # Legacy migration
[0, 0, 0, 3, 25], # Development cluster
[6, 0, 2, 0, 70], # API gateway with multiple services
[5, 0, 0, 0, 0, 0, 60], # Large microservices
[0, 0, 5, 0, 0, 0, 40], # Content delivery network
[3, 0, 3, 2, 0, 0, 50], # Legacy migration
[0, 0, 0, 3, 0, 0, 25], # Development cluster
[6, 0, 2, 0, 0, 0, 70], # API gateway with multiple services
])

augmented_data = np.vstack([augmented_data, edge_cases])
Expand Down Expand Up @@ -446,7 +446,9 @@ def _train_baseline_model(self):
'hardcoded_secrets': {'min': int(augmented_data[:, 1].min()), 'max': int(augmented_data[:, 1].max())},
'public_access': {'min': int(augmented_data[:, 2].min()), 'max': int(augmented_data[:, 2].max())},
'unencrypted_storage': {'min': int(augmented_data[:, 3].min()), 'max': int(augmented_data[:, 3].max())},
'total_resources': {'min': int(augmented_data[:, 4].min()), 'max': int(augmented_data[:, 4].max())},
'missing_logging': {'min': int(augmented_data[:, 4].min()), 'max': int(augmented_data[:, 4].max())},
'missing_flow_logs': {'min': int(augmented_data[:, 5].min()), 'max': int(augmented_data[:, 5].max())},
'total_resources': {'min': int(augmented_data[:, 6].min()), 'max': int(augmented_data[:, 6].max())},
},
'model_parameters': {
'contamination': 0.1,
Expand Down
15 changes: 15 additions & 0 deletions test_files/mixed.tf
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,18 @@ variable "db_password" {
type = string
sensitive = true
}

resource "aws_vpc" "app_vpc" {
cidr_block = "10.0.0.0/16" # MEDIUM: No aws_flow_log present
tags = {
Name = "mixed-vpc"
}
}

resource "aws_cloudtrail" "app_trail" {
name = "app-trail"
s3_bucket_name = aws_s3_bucket.app_bucket.bucket
# CloudTrail present — satisfies missing_logging rule
}
# NOTE: aws_cloudtrail present → no missing_logging vuln
# NOTE: no aws_flow_log → triggers missing_vpc_flow_logs rule only
32 changes: 32 additions & 0 deletions test_files/secure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,36 @@ variable "db_password" {
description = "Database password"
type = string
sensitive = true
}

resource "aws_vpc" "main" {
cidr_block = "10.0.0.0/16"
tags = {
Name = "secure-vpc"
}
}

resource "aws_flow_log" "main" {
vpc_id = aws_vpc.main.id
traffic_type = "ALL"
iam_role_arn = var.flow_log_role_arn
log_destination = aws_cloudwatch_log_group.flow_logs.arn
}

resource "aws_cloudwatch_log_group" "flow_logs" {
name = "/aws/vpc/flow-logs"
retention_in_days = 90
}

resource "aws_cloudtrail" "main" {
name = "secure-trail"
s3_bucket_name = aws_s3_bucket.main_bucket.bucket
include_global_service_events = true
is_multi_region_trail = true
enable_log_file_validation = true
}

variable "flow_log_role_arn" {
description = "IAM role ARN for VPC flow logs"
type = string
}
11 changes: 10 additions & 1 deletion test_files/vulnerable.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,13 @@ resource "aws_s3_bucket" "main_bucket" {
tags = {
Environment = "test"
}
}
}

resource "aws_vpc" "main" {
cidr_block = "10.0.0.0/16" # HIGH: No aws_flow_log present — VPC traffic is unmonitored
tags = {
Name = "vulnerable-vpc"
}
}
# NOTE: no aws_cloudtrail, no aws_cloudwatch_log_group → triggers missing_logging rule
# NOTE: no aws_flow_log → triggers missing_vpc_flow_logs rule
Loading