@@ -364,33 +364,33 @@ def _train_baseline_model(self):
364364 rng = np .random .default_rng (42 )
365365
366366 # Enhanced baseline patterns representing secure configurations
367- # Features: [open_ports, secrets, public_access, unencrypted, resource_count]
367+ # Features: [open_ports, secrets, public_access, unencrypted, missing_logging, missing_flow_logs, resource_count]
368368 baseline_patterns = [
369369 # Fully secure configurations
370- [0 , 0 , 0 , 0 , 5 ], # Small secure microservice
371- [0 , 0 , 0 , 0 , 10 ], # Medium secure application
372- [0 , 0 , 0 , 0 , 15 ], # Large secure infrastructure
373- [0 , 0 , 0 , 0 , 25 ], # Enterprise secure setup
374- [0 , 0 , 0 , 0 , 3 ], # Minimal secure Lambda function
370+ [0 , 0 , 0 , 0 , 0 , 0 , 5 ], # Small secure microservice
371+ [0 , 0 , 0 , 0 , 0 , 0 , 10 ], # Medium secure application
372+ [0 , 0 , 0 , 0 , 0 , 0 , 15 ], # Large secure infrastructure
373+ [0 , 0 , 0 , 0 , 0 , 0 , 25 ], # Enterprise secure setup
374+ [0 , 0 , 0 , 0 , 0 , 0 , 3 ], # Minimal secure Lambda function
375375
376376 # Web applications (acceptable public exposure)
377- [1 , 0 , 0 , 0 , 8 ], # Simple web app with HTTP
378- [2 , 0 , 0 , 0 , 12 ], # Web app with HTTP/HTTPS
379- [2 , 0 , 1 , 0 , 20 ], # E-commerce with CDN (public S3)
380- [1 , 0 , 1 , 0 , 15 ], # Static site with S3 hosting
381- [2 , 0 , 2 , 0 , 30 ], # Multi-region web platform
377+ [1 , 0 , 0 , 0 , 0 , 0 , 8 ], # Simple web app with HTTP
378+ [2 , 0 , 0 , 0 , 0 , 0 , 12 ], # Web app with HTTP/HTTPS
379+ [2 , 0 , 1 , 0 , 0 , 0 , 20 ], # E-commerce with CDN (public S3)
380+ [1 , 0 , 1 , 0 , 0 , 0 , 15 ], # Static site with S3 hosting
381+ [2 , 0 , 2 , 0 , 0 , 0 , 30 ], # Multi-region web platform
382382
383383 # Development environments (slightly relaxed)
384- [1 , 0 , 0 , 1 , 6 ], # Dev env with one unencrypted volume
385- [2 , 0 , 0 , 1 , 10 ], # Staging with test data
386- [1 , 0 , 1 , 1 , 8 ], # QA environment
387- [0 , 0 , 0 , 2 , 12 ], # Test cluster with temp storage
384+ [1 , 0 , 0 , 1 , 0 , 0 , 6 ], # Dev env with one unencrypted volume
385+ [2 , 0 , 0 , 1 , 0 , 0 , 10 ], # Staging with test data
386+ [1 , 0 , 1 , 1 , 0 , 0 , 8 ], # QA environment
387+ [0 , 0 , 0 , 2 , 0 , 0 , 12 ], # Test cluster with temp storage
388388
389389 # Microservices architectures
390- [3 , 0 , 0 , 0 , 40 ], # Service mesh with multiple endpoints
391- [4 , 0 , 1 , 0 , 50 ], # Kubernetes cluster with ingress
392- [2 , 0 , 0 , 0 , 35 ], # Docker swarm setup
393- [3 , 0 , 2 , 0 , 45 ], # Multi-service with CDN
390+ [3 , 0 , 0 , 0 , 0 , 0 , 40 ], # Service mesh with multiple endpoints
391+ [4 , 0 , 1 , 0 , 0 , 0 , 50 ], # Kubernetes cluster with ingress
392+ [2 , 0 , 0 , 0 , 0 , 0 , 35 ], # Docker swarm setup
393+ [3 , 0 , 2 , 0 , 0 , 0 , 45 ], # Multi-service with CDN
394394 ]
395395
396396 baseline_features = np .array (baseline_patterns )
@@ -401,7 +401,7 @@ def _train_baseline_model(self):
401401 # Add noise variations for each pattern
402402 for pattern in baseline_features :
403403 for _ in range (3 ): # Create 3 variations per pattern
404- noise = rng .normal (0 , 0.15 , 5 )
404+ noise = rng .normal (0 , 0.15 , 7 )
405405 augmented = pattern + noise
406406 augmented = np .maximum (augmented , 0 ) # Ensure non-negative
407407 # Round discrete features
@@ -410,11 +410,11 @@ def _train_baseline_model(self):
410410
411411 # Add edge cases representing acceptable boundaries
412412 edge_cases = np .array ([
413- [5 , 0 , 0 , 0 , 60 ], # Large microservices
414- [0 , 0 , 5 , 0 , 40 ], # Content delivery network
415- [3 , 0 , 3 , 2 , 50 ], # Legacy migration
416- [0 , 0 , 0 , 3 , 25 ], # Development cluster
417- [6 , 0 , 2 , 0 , 70 ], # API gateway with multiple services
413+ [5 , 0 , 0 , 0 , 0 , 0 , 60 ], # Large microservices
414+ [0 , 0 , 5 , 0 , 0 , 0 , 40 ], # Content delivery network
415+ [3 , 0 , 3 , 2 , 0 , 0 , 50 ], # Legacy migration
416+ [0 , 0 , 0 , 3 , 0 , 0 , 25 ], # Development cluster
417+ [6 , 0 , 2 , 0 , 0 , 0 , 70 ], # API gateway with multiple services
418418 ])
419419
420420 augmented_data = np .vstack ([augmented_data , edge_cases ])
@@ -446,7 +446,9 @@ def _train_baseline_model(self):
446446 'hardcoded_secrets' : {'min' : int (augmented_data [:, 1 ].min ()), 'max' : int (augmented_data [:, 1 ].max ())},
447447 'public_access' : {'min' : int (augmented_data [:, 2 ].min ()), 'max' : int (augmented_data [:, 2 ].max ())},
448448 'unencrypted_storage' : {'min' : int (augmented_data [:, 3 ].min ()), 'max' : int (augmented_data [:, 3 ].max ())},
449- 'total_resources' : {'min' : int (augmented_data [:, 4 ].min ()), 'max' : int (augmented_data [:, 4 ].max ())},
449+ 'missing_logging' : {'min' : int (augmented_data [:, 4 ].min ()), 'max' : int (augmented_data [:, 4 ].max ())},
450+ 'missing_flow_logs' : {'min' : int (augmented_data [:, 5 ].min ()), 'max' : int (augmented_data [:, 5 ].max ())},
451+ 'total_resources' : {'min' : int (augmented_data [:, 6 ].min ()), 'max' : int (augmented_data [:, 6 ].max ())},
450452 },
451453 'model_parameters' : {
452454 'contamination' : 0.1 ,
0 commit comments