diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 48a7753832..7373464dfd 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -7,6 +7,8 @@ All notable changes to the **Prowler API** are documented in this file. ### Added - `/api/v1/overviews/compliance-watchlist` to retrieve the compliance watchlist [(#9596)](https://github.com/prowler-cloud/prowler/pull/9596) - Support AlibabaCloud provider [(#9485)](https://github.com/prowler-cloud/prowler/pull/9485) +- `/api/v1/overviews/resource-groups` to retrieve an overview of the resource groups based on finding severities [(#9694)](https://github.com/prowler-cloud/prowler/pull/9694) +- Endpoints `GET /findings` and `GET /findings/metadata/latest` now support the `group` filter [(#9694)](https://github.com/prowler-cloud/prowler/pull/9694) - `provider_id` and `provider_id__in` filter aliases for findings endpoints to enable consistent frontend parameter naming [(#9701)](https://github.com/prowler-cloud/prowler/pull/9701) --- diff --git a/api/src/backend/api/filters.py b/api/src/backend/api/filters.py index fdb2282e12..008a3cc77d 100644 --- a/api/src/backend/api/filters.py +++ b/api/src/backend/api/filters.py @@ -45,6 +45,7 @@ Role, Scan, ScanCategorySummary, + ScanGroupSummary, ScanSummary, SeverityChoices, StateChoices, @@ -214,6 +215,9 @@ class CommonFindingFilters(FilterSet): category = CharFilter(method="filter_category") category__in = CharInFilter(field_name="categories", lookup_expr="overlap") + resource_groups = CharFilter(field_name="resource_groups", lookup_expr="exact") + resource_groups__in = CharInFilter(field_name="resource_groups", lookup_expr="in") + # Temporarily disabled until we implement tag filtering in the UI # resource_tag_key = CharFilter(field_name="resources__tags__key") # resource_tag_key__in = CharInFilter( @@ -439,6 +443,8 @@ class ResourceFilter(ProviderRelationshipFilterSet): updated_at = DateFilter(field_name="updated_at", lookup_expr="date") scan = UUIDFilter(field_name="provider__scan", lookup_expr="exact") scan__in = UUIDInFilter(field_name="provider__scan", lookup_expr="in") + groups = CharFilter(method="filter_groups") + groups__in = CharInFilter(field_name="groups", lookup_expr="overlap") class Meta: model = Resource @@ -453,6 +459,9 @@ class Meta: "updated_at": ["gte", "lte"], } + def filter_groups(self, queryset, name, value): + return queryset.filter(groups__contains=[value]) + def filter_queryset(self, queryset): if not (self.data.get("scan") or self.data.get("scan__in")) and not ( self.data.get("updated_at") @@ -517,6 +526,8 @@ class LatestResourceFilter(ProviderRelationshipFilterSet): tag_value = CharFilter(method="filter_tag_value") tag = CharFilter(method="filter_tag") tags = CharFilter(method="filter_tag") + groups = CharFilter(method="filter_groups") + groups__in = CharInFilter(field_name="groups", lookup_expr="overlap") class Meta: model = Resource @@ -529,6 +540,9 @@ class Meta: "type": ["exact", "icontains", "in"], } + def filter_groups(self, queryset, name, value): + return queryset.filter(groups__contains=[value]) + def filter_tag_key(self, queryset, name, value): return queryset.filter(Q(tags__key=value) | Q(tags__key__icontains=value)) @@ -1154,6 +1168,26 @@ class CategoryOverviewFilter(BaseScanProviderFilter): class Meta(BaseScanProviderFilter.Meta): model = ScanCategorySummary + fields = {} + + +class ResourceGroupOverviewFilter(FilterSet): + provider_id = UUIDFilter(field_name="scan__provider__id", lookup_expr="exact") + provider_id__in = UUIDInFilter(field_name="scan__provider__id", lookup_expr="in") + provider_type = ChoiceFilter( + field_name="scan__provider__provider", choices=Provider.ProviderChoices.choices + ) + provider_type__in = ChoiceInFilter( + field_name="scan__provider__provider", + choices=Provider.ProviderChoices.choices, + lookup_expr="in", + ) + resource_group = CharFilter(field_name="resource_group", lookup_expr="exact") + resource_group__in = CharInFilter(field_name="resource_group", lookup_expr="in") + + class Meta: + model = ScanGroupSummary + fields = {} class ComplianceWatchlistFilter(BaseProviderFilter): diff --git a/api/src/backend/api/migrations/0068_finding_resource_group_scangroupsummary.py b/api/src/backend/api/migrations/0068_finding_resource_group_scangroupsummary.py new file mode 100644 index 0000000000..932a2a6c85 --- /dev/null +++ b/api/src/backend/api/migrations/0068_finding_resource_group_scangroupsummary.py @@ -0,0 +1,126 @@ +import uuid + +import django.db.models.deletion +from django.db import migrations, models + +import api.db_utils +import api.rls + + +class Migration(migrations.Migration): + dependencies = [ + ("api", "0067_tenant_compliance_summary"), + ] + + operations = [ + migrations.AddField( + model_name="finding", + name="resource_groups", + field=models.TextField( + blank=True, + help_text="Resource group from check metadata for efficient filtering", + null=True, + ), + ), + migrations.CreateModel( + name="ScanGroupSummary", + fields=[ + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ( + "tenant", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="api.tenant", + ), + ), + ( + "inserted_at", + models.DateTimeField(auto_now_add=True), + ), + ( + "scan", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="resource_group_summaries", + related_query_name="resource_group_summary", + to="api.scan", + ), + ), + ( + "resource_group", + models.CharField(max_length=50), + ), + ( + "severity", + api.db_utils.SeverityEnumField( + choices=[ + ("critical", "Critical"), + ("high", "High"), + ("medium", "Medium"), + ("low", "Low"), + ("informational", "Informational"), + ], + ), + ), + ( + "total_findings", + models.IntegerField( + default=0, help_text="Non-muted findings (PASS + FAIL)" + ), + ), + ( + "failed_findings", + models.IntegerField( + default=0, + help_text="Non-muted FAIL findings (subset of total_findings)", + ), + ), + ( + "new_failed_findings", + models.IntegerField( + default=0, + help_text="Non-muted FAIL with delta='new' (subset of failed_findings)", + ), + ), + ( + "resources_count", + models.IntegerField( + default=0, help_text="Count of distinct resource_uid values" + ), + ), + ], + options={ + "db_table": "scan_resource_group_summaries", + "abstract": False, + }, + ), + migrations.AddIndex( + model_name="scangroupsummary", + index=models.Index( + fields=["tenant_id", "scan"], name="srgs_tenant_scan_idx" + ), + ), + migrations.AddConstraint( + model_name="scangroupsummary", + constraint=models.UniqueConstraint( + fields=("tenant_id", "scan_id", "resource_group", "severity"), + name="unique_resource_group_severity_per_scan", + ), + ), + migrations.AddConstraint( + model_name="scangroupsummary", + constraint=api.rls.RowLevelSecurityConstraint( + field="tenant_id", + name="rls_on_scangroupsummary", + statements=["SELECT", "INSERT", "UPDATE", "DELETE"], + ), + ), + ] diff --git a/api/src/backend/api/migrations/0069_resource_resource_group.py b/api/src/backend/api/migrations/0069_resource_resource_group.py new file mode 100644 index 0000000000..14a26995c2 --- /dev/null +++ b/api/src/backend/api/migrations/0069_resource_resource_group.py @@ -0,0 +1,21 @@ +from django.contrib.postgres.fields import ArrayField +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("api", "0068_finding_resource_group_scangroupsummary"), + ] + + operations = [ + migrations.AddField( + model_name="resource", + name="groups", + field=ArrayField( + models.CharField(max_length=100), + blank=True, + help_text="Groups for categorization (e.g., compute, storage, IAM)", + null=True, + ), + ), + ] diff --git a/api/src/backend/api/models.py b/api/src/backend/api/models.py index aa568bd17c..68bfa78561 100644 --- a/api/src/backend/api/models.py +++ b/api/src/backend/api/models.py @@ -704,6 +704,12 @@ class Resource(RowLevelSecurityProtectedModel): metadata = models.TextField(blank=True, null=True) details = models.TextField(blank=True, null=True) partition = models.TextField(blank=True, null=True) + groups = ArrayField( + models.CharField(max_length=100), + blank=True, + null=True, + help_text="Groups for categorization (e.g., compute, storage, IAM)", + ) failed_findings_count = models.IntegerField(default=0) @@ -890,6 +896,11 @@ class DeltaChoices(models.TextChoices): null=True, help_text="Categories from check metadata for efficient filtering", ) + resource_groups = models.TextField( + blank=True, + null=True, + help_text="Resource group from check metadata for efficient filtering", + ) # Relationships scan = models.ForeignKey(to=Scan, related_name="findings", on_delete=models.CASCADE) @@ -2032,6 +2043,67 @@ class JSONAPIMeta: resource_name = "scan-category-summaries" +class ScanGroupSummary(RowLevelSecurityProtectedModel): + """ + Pre-aggregated resource group metrics per scan by severity. + + Stores one row per (resource_group, severity) combination per scan for efficient + overview queries. Resource groups come from check_metadata.Group. + + Count relationships (each is a subset of the previous): + - total_findings >= failed_findings >= new_failed_findings + """ + + id = models.UUIDField(primary_key=True, default=uuid4, editable=False) + inserted_at = models.DateTimeField(auto_now_add=True, editable=False) + + scan = models.ForeignKey( + Scan, + on_delete=models.CASCADE, + related_name="resource_group_summaries", + related_query_name="resource_group_summary", + ) + + resource_group = models.CharField(max_length=50) + severity = SeverityEnumField(choices=SeverityChoices) + + total_findings = models.IntegerField( + default=0, help_text="Non-muted findings (PASS + FAIL)" + ) + failed_findings = models.IntegerField( + default=0, help_text="Non-muted FAIL findings (subset of total_findings)" + ) + new_failed_findings = models.IntegerField( + default=0, + help_text="Non-muted FAIL with delta='new' (subset of failed_findings)", + ) + resources_count = models.IntegerField( + default=0, help_text="Count of distinct resource_uid values" + ) + + class Meta(RowLevelSecurityProtectedModel.Meta): + db_table = "scan_resource_group_summaries" + + indexes = [ + models.Index(fields=["tenant_id", "scan"], name="srgs_tenant_scan_idx"), + ] + + constraints = [ + models.UniqueConstraint( + fields=("tenant_id", "scan_id", "resource_group", "severity"), + name="unique_resource_group_severity_per_scan", + ), + RowLevelSecurityConstraint( + field="tenant_id", + name="rls_on_%(class)s", + statements=["SELECT", "INSERT", "UPDATE", "DELETE"], + ), + ] + + class JSONAPIMeta: + resource_name = "scan-resource-group-summaries" + + class LighthouseConfiguration(RowLevelSecurityProtectedModel): """ Stores configuration and API keys for LLM services. diff --git a/api/src/backend/api/specs/v1.yaml b/api/src/backend/api/specs/v1.yaml index 2e238a5ad4..5a55602d49 100644 --- a/api/src/backend/api/specs/v1.yaml +++ b/api/src/backend/api/specs/v1.yaml @@ -712,6 +712,7 @@ paths: - check_id - check_metadata - categories + - resource_groups - raw_result - inserted_at - updated_at @@ -988,6 +989,19 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[resource_groups] + schema: + type: string + - in: query + name: filter[resource_groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[resource_name] schema: @@ -1239,6 +1253,7 @@ paths: - check_id - check_metadata - categories + - resource_groups - raw_result - inserted_at - updated_at @@ -1560,6 +1575,19 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[resource_groups] + schema: + type: string + - in: query + name: filter[resource_groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[resource_name] schema: @@ -1789,6 +1817,7 @@ paths: - check_id - check_metadata - categories + - resource_groups - raw_result - inserted_at - updated_at @@ -2040,6 +2069,19 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[resource_groups] + schema: + type: string + - in: query + name: filter[resource_groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[resource_name] schema: @@ -2251,6 +2293,7 @@ paths: - regions - resource_types - categories + - groups description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. explode: false @@ -2518,6 +2561,19 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[resource_groups] + schema: + type: string + - in: query + name: filter[resource_groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[resource_name] schema: @@ -2742,6 +2798,7 @@ paths: - regions - resource_types - categories + - groups description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. explode: false @@ -2984,6 +3041,19 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[resource_groups] + schema: + type: string + - in: query + name: filter[resource_groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[resource_name] schema: @@ -4968,12 +5038,12 @@ paths: description: '' /api/v1/overviews/compliance-watchlist: get: - operationId: overviews_compliance_watchlist_retrieve - description: |- - Get compliance watchlist overview with FAIL-dominant aggregation. - - Without filters: uses pre-aggregated TenantComplianceSummary (~70 rows). - With provider filters: queries ProviderComplianceScore with FAIL-dominant logic. + operationId: overviews_compliance_watchlist_list + description: 'Retrieve compliance metrics with FAIL-dominant aggregation. Without + filters: uses pre-aggregated TenantComplianceSummary. With provider filters: + queries ProviderComplianceScore with FAIL-dominant logic where any FAIL in + a requirement marks it as failed.' + summary: Get compliance watchlist overview parameters: - in: query name: fields[compliance-watchlist-overviews] @@ -4991,6 +5061,119 @@ paths: description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. explode: false + - in: query + name: filter[provider_id] + schema: + type: string + format: uuid + - in: query + name: filter[provider_id__in] + schema: + type: array + items: + type: string + format: uuid + description: Multiple values may be separated by commas. + explode: false + style: form + - in: query + name: filter[provider_type] + schema: + type: string + enum: + - alibabacloud + - aws + - azure + - gcp + - github + - iac + - kubernetes + - m365 + - mongodbatlas + - oraclecloud + description: |- + * `aws` - AWS + * `azure` - Azure + * `gcp` - GCP + * `kubernetes` - Kubernetes + * `m365` - M365 + * `github` - GitHub + * `mongodbatlas` - MongoDB Atlas + * `iac` - IaC + * `oraclecloud` - Oracle Cloud Infrastructure + * `alibabacloud` - Alibaba Cloud + - in: query + name: filter[provider_type__in] + schema: + type: array + items: + type: string + enum: + - alibabacloud + - aws + - azure + - gcp + - github + - iac + - kubernetes + - m365 + - mongodbatlas + - oraclecloud + description: |- + Multiple values may be separated by commas. + + * `aws` - AWS + * `azure` - Azure + * `gcp` - GCP + * `kubernetes` - Kubernetes + * `m365` - M365 + * `github` - GitHub + * `mongodbatlas` - MongoDB Atlas + * `iac` - IaC + * `oraclecloud` - Oracle Cloud Infrastructure + * `alibabacloud` - Alibaba Cloud + explode: false + style: form + - name: filter[search] + required: false + in: query + description: A search term. + schema: + type: string + - name: page[number] + required: false + in: query + description: A page number within the paginated result set. + schema: + type: integer + - name: page[size] + required: false + in: query + description: Number of results to return per page. + schema: + type: integer + - name: sort + required: false + in: query + description: '[list of fields to sort by](https://jsonapi.org/format/#fetching-sorting)' + schema: + type: array + items: + type: string + enum: + - id + - -id + - compliance_id + - -compliance_id + - requirements_passed + - -requirements_passed + - requirements_failed + - -requirements_failed + - requirements_manual + - -requirements_manual + - total_requirements + - -total_requirements + explode: false tags: - Overview security: @@ -5000,7 +5183,7 @@ paths: content: application/vnd.api+json: schema: - $ref: '#/components/schemas/ComplianceWatchlistOverviewResponse' + $ref: '#/components/schemas/PaginatedComplianceWatchlistOverviewList' description: '' /api/v1/overviews/findings: get: @@ -5797,6 +5980,170 @@ paths: schema: $ref: '#/components/schemas/OverviewRegionResponse' description: '' + /api/v1/overviews/resource-groups: + get: + operationId: overviews_resource_groups_list + description: Retrieve aggregated resource group metrics from latest completed + scans per provider. Returns one row per resource group with total, failed, + and new failed findings counts, plus a severity breakdown showing failed findings + per severity level, and a count of distinct resources evaluated per group. + summary: Get resource group overview + parameters: + - in: query + name: fields[resource-group-overviews] + schema: + type: array + items: + type: string + enum: + - id + - total_findings + - failed_findings + - new_failed_findings + - resources_count + - severity + description: endpoint return only specific fields in the response on a per-type + basis by including a fields[TYPE] query parameter. + explode: false + - in: query + name: filter[provider_id] + schema: + type: string + format: uuid + - in: query + name: filter[provider_id__in] + schema: + type: array + items: + type: string + format: uuid + description: Multiple values may be separated by commas. + explode: false + style: form + - in: query + name: filter[provider_type] + schema: + type: string + x-spec-enum-id: 684bf4173d2b754f + enum: + - alibabacloud + - aws + - azure + - gcp + - github + - iac + - kubernetes + - m365 + - mongodbatlas + - oraclecloud + description: |- + * `aws` - AWS + * `azure` - Azure + * `gcp` - GCP + * `kubernetes` - Kubernetes + * `m365` - M365 + * `github` - GitHub + * `mongodbatlas` - MongoDB Atlas + * `iac` - IaC + * `oraclecloud` - Oracle Cloud Infrastructure + * `alibabacloud` - Alibaba Cloud + - in: query + name: filter[provider_type__in] + schema: + type: array + items: + type: string + x-spec-enum-id: 684bf4173d2b754f + enum: + - alibabacloud + - aws + - azure + - gcp + - github + - iac + - kubernetes + - m365 + - mongodbatlas + - oraclecloud + description: |- + Multiple values may be separated by commas. + + * `aws` - AWS + * `azure` - Azure + * `gcp` - GCP + * `kubernetes` - Kubernetes + * `m365` - M365 + * `github` - GitHub + * `mongodbatlas` - MongoDB Atlas + * `iac` - IaC + * `oraclecloud` - Oracle Cloud Infrastructure + * `alibabacloud` - Alibaba Cloud + explode: false + style: form + - in: query + name: filter[resource_group] + schema: + type: string + - in: query + name: filter[resource_group__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form + - name: filter[search] + required: false + in: query + description: A search term. + schema: + type: string + - name: page[number] + required: false + in: query + description: A page number within the paginated result set. + schema: + type: integer + - name: page[size] + required: false + in: query + description: Number of results to return per page. + schema: + type: integer + - name: sort + required: false + in: query + description: '[list of fields to sort by](https://jsonapi.org/format/#fetching-sorting)' + schema: + type: array + items: + type: string + enum: + - id + - -id + - total_findings + - -total_findings + - failed_findings + - -failed_findings + - new_failed_findings + - -new_failed_findings + - resources_count + - -resources_count + - severity + - -severity + explode: false + tags: + - Overview + security: + - JWT or API Key: [] + responses: + '200': + content: + application/vnd.api+json: + schema: + $ref: '#/components/schemas/PaginatedResourceGroupOverviewList' + description: '' /api/v1/overviews/services: get: operationId: overviews_services_retrieve @@ -7333,10 +7680,24 @@ paths: - metadata - details - partition + - groups - type description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. explode: false + - in: query + name: filter[groups] + schema: + type: string + - in: query + name: filter[groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[inserted_at] schema: @@ -7673,6 +8034,7 @@ paths: - metadata - details - partition + - groups - type description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. @@ -7735,10 +8097,24 @@ paths: - metadata - details - partition + - groups - type description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. explode: false + - in: query + name: filter[groups] + schema: + type: string + - in: query + name: filter[groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[name] schema: @@ -8003,9 +8379,23 @@ paths: - services - regions - types + - groups description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. explode: false + - in: query + name: filter[groups] + schema: + type: string + - in: query + name: filter[groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[inserted_at] schema: @@ -8306,9 +8696,23 @@ paths: - services - regions - types + - groups description: endpoint return only specific fields in the response on a per-type basis by including a fields[TYPE] query parameter. explode: false + - in: query + name: filter[groups] + schema: + type: string + - in: query + name: filter[groups__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[name] schema: @@ -11573,13 +11977,6 @@ components: - requirements_failed - requirements_manual - total_requirements - ComplianceWatchlistOverviewResponse: - type: object - properties: - data: - $ref: '#/components/schemas/ComplianceWatchlistOverview' - required: - - data Finding: type: object required: @@ -11654,6 +12051,10 @@ components: maxLength: 100 nullable: true description: Categories from check metadata for efficient filtering + resource_groups: + type: string + nullable: true + description: Resource group from check metadata for efficient filtering raw_result: {} inserted_at: type: string @@ -11808,6 +12209,10 @@ components: type: array items: type: string + groups: + type: array + items: + type: string required: - services - regions @@ -14560,6 +14965,15 @@ components: $ref: '#/components/schemas/ComplianceOverview' required: - data + PaginatedComplianceWatchlistOverviewList: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ComplianceWatchlistOverview' + required: + - data PaginatedFindingList: type: object properties: @@ -14677,6 +15091,15 @@ components: $ref: '#/components/schemas/ProviderSecret' required: - data + PaginatedResourceGroupOverviewList: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ResourceGroupOverview' + required: + - data PaginatedResourceList: type: object properties: @@ -18681,6 +19104,14 @@ components: type: string readOnly: true nullable: true + groups: + type: array + items: + type: string + maxLength: 100 + readOnly: true + nullable: true + description: Groups for categorization (e.g., compute, storage, IAM) type: type: string readOnly: true @@ -18742,6 +19173,44 @@ components: readOnly: true required: - provider + ResourceGroupOverview: + type: object + required: + - type + - id + additionalProperties: false + properties: + type: + type: string + description: The [type](https://jsonapi.org/format/#document-resource-object-identification) + member is used to describe resource objects that share common attributes + and relationships. + enum: + - resource-group-overviews + id: {} + attributes: + type: object + properties: + id: + type: string + total_findings: + type: integer + failed_findings: + type: integer + new_failed_findings: + type: integer + resources_count: + type: integer + severity: + description: 'Severity breakdown: {informational, low, medium, high, + critical}' + required: + - id + - total_findings + - failed_findings + - new_failed_findings + - resources_count + - severity ResourceMetadata: type: object required: @@ -18772,10 +19241,15 @@ components: type: array items: type: string + groups: + type: array + items: + type: string required: - services - regions - types + - groups ResourceMetadataResponse: type: object properties: diff --git a/api/src/backend/api/tests/test_views.py b/api/src/backend/api/tests/test_views.py index f2a0c299f2..f58fe138a9 100644 --- a/api/src/backend/api/tests/test_views.py +++ b/api/src/backend/api/tests/test_views.py @@ -3625,6 +3625,7 @@ def test_resources_list(self, authenticated_client, resources_fixture): assert "metadata" in response.json()["data"][0]["attributes"] assert "details" in response.json()["data"][0]["attributes"] assert "partition" in response.json()["data"][0]["attributes"] + assert "groups" in response.json()["data"][0]["attributes"] @pytest.mark.parametrize( "include_values, expected_resources", @@ -3699,6 +3700,10 @@ def test_resources_list_include( # full text search on resource tags ("search", "multi word", 1), ("search", "key2", 2), + # groups filter (ArrayField) + ("groups", "compute", 2), + ("groups", "storage", 1), + ("groups.in", "compute,storage", 3), ] ), ) @@ -3845,12 +3850,14 @@ def test_resources_metadata_retrieve( expected_services = {"ec2", "s3"} expected_regions = {"us-east-1", "eu-west-1"} expected_resource_types = {"prowler-test"} + expected_groups = {"compute", "storage"} assert data["data"]["type"] == "resources-metadata" assert data["data"]["id"] is None assert set(data["data"]["attributes"]["services"]) == expected_services assert set(data["data"]["attributes"]["regions"]) == expected_regions assert set(data["data"]["attributes"]["types"]) == expected_resource_types + assert set(data["data"]["attributes"]["groups"]) == expected_groups def test_resources_metadata_resource_filter_retrieve( self, authenticated_client, resources_fixture, backfill_scan_metadata_fixture @@ -3886,6 +3893,7 @@ def test_resources_metadata_future_date(self, authenticated_client): assert data["data"]["attributes"]["services"] == [] assert data["data"]["attributes"]["regions"] == [] assert data["data"]["attributes"]["types"] == [] + assert data["data"]["attributes"]["groups"] == [] def test_resources_metadata_invalid_date(self, authenticated_client): response = authenticated_client.get( @@ -3925,6 +3933,7 @@ def test_resources_metadata_latest( assert attributes["services"] == [latest_scan_resource.service] assert attributes["regions"] == [latest_scan_resource.region] assert attributes["types"] == [latest_scan_resource.type] + assert "groups" in attributes @pytest.mark.django_db @@ -4417,6 +4426,17 @@ def test_findings_metadata_latest_categories( attributes = response.json()["data"]["attributes"] assert set(attributes["categories"]) == {"gen-ai", "iam"} + def test_findings_metadata_latest_groups( + self, authenticated_client, latest_scan_finding_with_categories + ): + response = authenticated_client.get( + reverse("finding-metadata_latest"), + ) + assert response.status_code == status.HTTP_200_OK + attributes = response.json()["data"]["attributes"] + assert "groups" in attributes + assert "ai_ml" in attributes["groups"] + def test_findings_filter_by_category( self, authenticated_client, findings_with_categories ): @@ -4463,6 +4483,49 @@ def test_findings_filter_by_category_no_match( assert response.status_code == status.HTTP_200_OK assert len(response.json()["data"]) == 0 + def test_findings_filter_by_resource_groups( + self, authenticated_client, findings_with_group + ): + finding = findings_with_group + response = authenticated_client.get( + reverse("finding-list"), + { + "filter[resource_groups]": "storage", + "filter[inserted_at]": finding.inserted_at.strftime("%Y-%m-%d"), + }, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 1 + assert response.json()["data"][0]["attributes"]["resource_groups"] == "storage" + + def test_findings_filter_by_resource_groups_in( + self, authenticated_client, findings_with_multiple_groups + ): + finding1, _ = findings_with_multiple_groups + response = authenticated_client.get( + reverse("finding-list"), + { + "filter[resource_groups__in]": "storage,security", + "filter[inserted_at]": finding1.inserted_at.strftime("%Y-%m-%d"), + }, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 2 + + def test_findings_filter_by_resource_groups_no_match( + self, authenticated_client, findings_with_group + ): + finding = findings_with_group + response = authenticated_client.get( + reverse("finding-list"), + { + "filter[resource_groups]": "nonexistent", + "filter[inserted_at]": finding.inserted_at.strftime("%Y-%m-%d"), + }, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 0 + @pytest.mark.django_db class TestJWTFields: @@ -8009,6 +8072,228 @@ def test_overview_categories_aggregates_multiple_providers( assert data[0]["attributes"]["failed_findings"] == 13 assert data[0]["attributes"]["new_failed_findings"] == 5 + def test_overview_groups_no_data(self, authenticated_client): + response = authenticated_client.get(reverse("overview-resource-groups")) + assert response.status_code == status.HTTP_200_OK + assert response.json()["data"] == [] + + def test_overview_groups_aggregates_by_group_with_severity( + self, + authenticated_client, + tenants_fixture, + providers_fixture, + create_scan_resource_group_summary, + ): + tenant = tenants_fixture[0] + provider = providers_fixture[0] + + scan = Scan.objects.create( + name="resource-groups-scan", + provider=provider, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.COMPLETED, + tenant=tenant, + ) + + # resources_count is group-level (same for all severities within a group) + create_scan_resource_group_summary( + tenant, + scan, + "storage", + "high", + total_findings=20, + failed_findings=10, + new_failed_findings=5, + resources_count=8, + ) + create_scan_resource_group_summary( + tenant, + scan, + "storage", + "medium", + total_findings=15, + failed_findings=7, + new_failed_findings=3, + resources_count=8, # Same as high - group-level count + ) + create_scan_resource_group_summary( + tenant, + scan, + "security", + "critical", + total_findings=10, + failed_findings=8, + new_failed_findings=2, + resources_count=4, + ) + + response = authenticated_client.get(reverse("overview-resource-groups")) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 2 + + storage_data = next(d for d in data if d["id"] == "storage") + security_data = next(d for d in data if d["id"] == "security") + + assert storage_data["attributes"]["total_findings"] == 35 + assert storage_data["attributes"]["failed_findings"] == 17 + assert storage_data["attributes"]["new_failed_findings"] == 8 + assert ( + storage_data["attributes"]["resources_count"] == 8 + ) # Group-level, not sum + assert security_data["attributes"]["total_findings"] == 10 + assert security_data["attributes"]["failed_findings"] == 8 + assert security_data["attributes"]["resources_count"] == 4 + + @pytest.mark.parametrize( + "filter_key,filter_value_fn,expected_total,expected_failed", + [ + ("filter[provider_id]", lambda p1, p2: str(p1.id), 10, 5), + ("filter[provider_id__in]", lambda p1, p2: f"{p1.id},{p2.id}", 25, 12), + ("filter[provider_type]", lambda p1, p2: "aws", 10, 5), + ("filter[provider_type__in]", lambda p1, p2: "aws,gcp", 25, 12), + ], + ) + def test_overview_groups_provider_filters( + self, + authenticated_client, + tenants_fixture, + providers_fixture, + create_scan_resource_group_summary, + filter_key, + filter_value_fn, + expected_total, + expected_failed, + ): + tenant = tenants_fixture[0] + provider1 = providers_fixture[0] # AWS + gcp_provider = providers_fixture[2] # GCP + + scan1 = Scan.objects.create( + name="aws-rg-scan", + provider=provider1, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.COMPLETED, + tenant=tenant, + ) + scan2 = Scan.objects.create( + name="gcp-rg-scan", + provider=gcp_provider, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.COMPLETED, + tenant=tenant, + ) + + create_scan_resource_group_summary( + tenant, scan1, "storage", "high", total_findings=10, failed_findings=5 + ) + create_scan_resource_group_summary( + tenant, scan2, "storage", "high", total_findings=15, failed_findings=7 + ) + + response = authenticated_client.get( + reverse("overview-resource-groups"), + {filter_key: filter_value_fn(provider1, gcp_provider)}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + assert data[0]["attributes"]["total_findings"] == expected_total + assert data[0]["attributes"]["failed_findings"] == expected_failed + + def test_overview_groups_group_filter( + self, + authenticated_client, + tenants_fixture, + providers_fixture, + create_scan_resource_group_summary, + ): + tenant = tenants_fixture[0] + provider = providers_fixture[0] + + scan = Scan.objects.create( + name="rg-filter-scan", + provider=provider, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.COMPLETED, + tenant=tenant, + ) + + create_scan_resource_group_summary( + tenant, scan, "storage", "high", total_findings=10, failed_findings=5 + ) + create_scan_resource_group_summary( + tenant, scan, "compute", "medium", total_findings=20, failed_findings=8 + ) + create_scan_resource_group_summary( + tenant, scan, "security", "low", total_findings=15, failed_findings=3 + ) + + response = authenticated_client.get( + reverse("overview-resource-groups"), + {"filter[resource_group__in]": "storage,compute"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + group_ids = {item["id"] for item in data} + assert group_ids == {"storage", "compute"} + + def test_overview_groups_aggregates_multiple_providers( + self, + authenticated_client, + tenants_fixture, + providers_fixture, + create_scan_resource_group_summary, + ): + tenant = tenants_fixture[0] + provider1, provider2, *_ = providers_fixture + + scan1 = Scan.objects.create( + name="multi-provider-rg-scan-1", + provider=provider1, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.COMPLETED, + tenant=tenant, + ) + scan2 = Scan.objects.create( + name="multi-provider-rg-scan-2", + provider=provider2, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.COMPLETED, + tenant=tenant, + ) + + create_scan_resource_group_summary( + tenant, + scan1, + "storage", + "high", + total_findings=10, + failed_findings=5, + new_failed_findings=2, + resources_count=4, + ) + create_scan_resource_group_summary( + tenant, + scan2, + "storage", + "high", + total_findings=15, + failed_findings=8, + new_failed_findings=3, + resources_count=6, + ) + + response = authenticated_client.get(reverse("overview-resource-groups")) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + assert data[0]["id"] == "storage" + assert data[0]["attributes"]["total_findings"] == 25 + assert data[0]["attributes"]["failed_findings"] == 13 + assert data[0]["attributes"]["new_failed_findings"] == 5 + assert data[0]["attributes"]["resources_count"] == 10 + def test_compliance_watchlist_no_filters_uses_tenant_summary( self, authenticated_client, tenant_compliance_summary_fixture ): diff --git a/api/src/backend/api/utils.py b/api/src/backend/api/utils.py index bc203c1584..e624a36aab 100644 --- a/api/src/backend/api/utils.py +++ b/api/src/backend/api/utils.py @@ -393,11 +393,21 @@ def get_findings_metadata_no_aggregations(tenant_id: str, filtered_queryset): categories_set.update(categories_list) categories = sorted(categories_set) + # Aggregate groups from findings + groups = list( + filtered_queryset.exclude(resource_groups__isnull=True) + .exclude(resource_groups__exact="") + .values_list("resource_groups", flat=True) + .distinct() + .order_by("resource_groups") + ) + result = { "services": services, "regions": regions, "resource_types": resource_types, "categories": categories, + "groups": groups, } serializer = FindingMetadataSerializer(data=result) diff --git a/api/src/backend/api/v1/serializers.py b/api/src/backend/api/v1/serializers.py index 00c8c37dfb..ae9d86e89e 100644 --- a/api/src/backend/api/v1/serializers.py +++ b/api/src/backend/api/v1/serializers.py @@ -1175,6 +1175,7 @@ class Meta: "metadata", "details", "partition", + "groups", ] extra_kwargs = { "id": {"read_only": True}, @@ -1183,6 +1184,7 @@ class Meta: "metadata": {"read_only": True}, "details": {"read_only": True}, "partition": {"read_only": True}, + "groups": {"read_only": True}, } included_serializers = { @@ -1276,6 +1278,7 @@ class ResourceMetadataSerializer(BaseSerializerV1): services = serializers.ListField(child=serializers.CharField(), allow_empty=True) regions = serializers.ListField(child=serializers.CharField(), allow_empty=True) types = serializers.ListField(child=serializers.CharField(), allow_empty=True) + groups = serializers.ListField(child=serializers.CharField(), allow_empty=True) # Temporarily disabled until we implement tag filtering in the UI # tags = serializers.JSONField(help_text="Tags are described as key-value pairs.") @@ -1302,6 +1305,7 @@ class Meta: "check_id", "check_metadata", "categories", + "resource_groups", "raw_result", "inserted_at", "updated_at", @@ -1358,6 +1362,9 @@ class FindingMetadataSerializer(BaseSerializerV1): child=serializers.CharField(), allow_empty=True ) categories = serializers.ListField(child=serializers.CharField(), allow_empty=True) + groups = serializers.ListField( + child=serializers.CharField(), allow_empty=True, required=False, default=list + ) # Temporarily disabled until we implement tag filtering in the UI # tags = serializers.JSONField(help_text="Tags are described as key-value pairs.") @@ -2303,6 +2310,22 @@ class JSONAPIMeta: resource_name = "category-overviews" +class ResourceGroupOverviewSerializer(BaseSerializerV1): + """Serializer for resource group overview aggregations.""" + + id = serializers.CharField(source="resource_group") + total_findings = serializers.IntegerField() + failed_findings = serializers.IntegerField() + new_failed_findings = serializers.IntegerField() + resources_count = serializers.IntegerField() + severity = serializers.JSONField( + help_text="Severity breakdown: {informational, low, medium, high, critical}" + ) + + class JSONAPIMeta: + resource_name = "resource-group-overviews" + + class ComplianceWatchlistOverviewSerializer(BaseSerializerV1): """Serializer for compliance watchlist overview with FAIL-dominant aggregation.""" diff --git a/api/src/backend/api/v1/views.py b/api/src/backend/api/v1/views.py index ffdfc1005e..7c6fd1b9b3 100644 --- a/api/src/backend/api/v1/views.py +++ b/api/src/backend/api/v1/views.py @@ -119,6 +119,7 @@ ProviderGroupFilter, ProviderSecretFilter, ResourceFilter, + ResourceGroupOverviewFilter, RoleFilter, ScanFilter, ScanSummaryFilter, @@ -160,6 +161,7 @@ SAMLToken, Scan, ScanCategorySummary, + ScanGroupSummary, ScanSummary, SeverityChoices, StateChoices, @@ -233,6 +235,7 @@ ProviderSecretUpdateSerializer, ProviderSerializer, ProviderUpdateSerializer, + ResourceGroupOverviewSerializer, ResourceMetadataSerializer, ResourceSerializer, RoleCreateSerializer, @@ -2527,10 +2530,20 @@ def metadata(self, request): .order_by("resource_type") ) + # Get groups from Resource model (flatten ArrayField) + all_groups = Resource.objects.filter( + tenant_id=tenant_id, + groups__isnull=False, + ).values_list("groups", flat=True) + groups = sorted( + set(g for groups_list in all_groups if groups_list for g in groups_list) + ) + result = { "services": services, "regions": regions, "types": resource_types, + "groups": groups, } serializer = self.get_serializer(data=result) @@ -2587,10 +2600,20 @@ def metadata_latest(self, request): .order_by("resource_type") ) + # Get groups from Resource model for resources in latest scans (flatten ArrayField) + all_groups = Resource.objects.filter( + tenant_id=tenant_id, + groups__isnull=False, + ).values_list("groups", flat=True) + groups = sorted( + set(g for groups_list in all_groups if groups_list for g in groups_list) + ) + result = { "services": services, "regions": regions, "types": resource_types, + "groups": groups, } serializer = self.get_serializer(data=result) @@ -3019,11 +3042,23 @@ def metadata_latest(self, request): categories_set.update(categories_list) categories = sorted(categories_set) + # Get groups from ScanGroupSummary for latest scans + groups = list( + ScanGroupSummary.objects.filter( + tenant_id=tenant_id, + scan_id__in=latest_scans_queryset.values_list("id", flat=True), + ) + .values_list("resource_group", flat=True) + .distinct() + .order_by("resource_group") + ) + result = { "services": services, "regions": regions, "resource_types": resource_types, "categories": categories, + "groups": groups, } serializer = self.get_serializer(data=result) @@ -4097,6 +4132,30 @@ def attributes(self, request): filters=True, responses={200: CategoryOverviewSerializer(many=True)}, ), + resource_groups=extend_schema( + summary="Get resource group overview", + description=( + "Retrieve aggregated resource group metrics from latest completed scans per provider. " + "Returns one row per resource group with total, failed, and new failed findings counts, " + "plus a severity breakdown showing failed findings per severity level, " + "and a count of distinct resources evaluated per group." + ), + tags=["Overview"], + filters=True, + responses={200: ResourceGroupOverviewSerializer(many=True)}, + ), + compliance_watchlist=extend_schema( + summary="Get compliance watchlist overview", + description=( + "Retrieve compliance metrics with FAIL-dominant aggregation. " + "Without filters: uses pre-aggregated TenantComplianceSummary. " + "With provider filters: queries ProviderComplianceScore with FAIL-dominant logic " + "where any FAIL in a requirement marks it as failed." + ), + tags=["Overview"], + filters=True, + responses={200: ComplianceWatchlistOverviewSerializer(many=True)}, + ), ) @method_decorator(CACHE_DECORATOR, name="list") class OverviewViewSet(BaseRLSViewSet): @@ -4146,6 +4205,8 @@ def get_serializer_class(self): return AttackSurfaceOverviewSerializer elif self.action == "categories": return CategoryOverviewSerializer + elif self.action == "resource_groups": + return ResourceGroupOverviewSerializer elif self.action == "compliance_watchlist": return ComplianceWatchlistOverviewSerializer return super().get_serializer_class() @@ -4161,6 +4222,8 @@ def get_filterset_class(self): return DailySeveritySummaryFilter elif self.action == "categories": return CategoryOverviewFilter + elif self.action == "resource_groups": + return ResourceGroupOverviewFilter elif self.action == "attack_surface": return AttackSurfaceOverviewFilter elif self.action == "compliance_watchlist": @@ -5005,6 +5068,95 @@ def categories(self, request): status=status.HTTP_200_OK, ) + @action( + detail=False, + methods=["get"], + url_name="resource-groups", + url_path="resource-groups", + ) + def resource_groups(self, request): + tenant_id = request.tenant_id + provider_filters = self._extract_provider_filters_from_params() + latest_scan_ids = self._latest_scan_ids_for_allowed_providers( + tenant_id, provider_filters + ) + + base_queryset = ScanGroupSummary.objects.filter( + tenant_id=tenant_id, scan_id__in=latest_scan_ids + ) + provider_filter_keys = { + "provider_id", + "provider_id__in", + "provider_type", + "provider_type__in", + } + filtered_queryset = self._apply_filterset( + base_queryset, + ResourceGroupOverviewFilter, + exclude_keys=provider_filter_keys, + ) + + aggregation = ( + filtered_queryset.values("resource_group", "severity") + .annotate( + total=Coalesce(Sum("total_findings"), 0), + failed=Coalesce(Sum("failed_findings"), 0), + new_failed=Coalesce(Sum("new_failed_findings"), 0), + ) + .order_by("resource_group", "severity") + ) + + # Get resource_group-level resources_count: + # 1. Max per (scan, resource_group) to deduplicate within-scan severity rows + # 2. Sum across scans for cross-provider aggregation + scan_resource_group_resources = filtered_queryset.values( + "scan_id", "resource_group" + ).annotate(resources=Coalesce(Max("resources_count"), 0)) + resources_by_resource_group = defaultdict(int) + for row in scan_resource_group_resources: + resources_by_resource_group[row["resource_group"]] += row["resources"] + + resource_group_data = defaultdict( + lambda: { + "total_findings": 0, + "failed_findings": 0, + "new_failed_findings": 0, + "resources_count": 0, + "severity": { + "informational": 0, + "low": 0, + "medium": 0, + "high": 0, + "critical": 0, + }, + } + ) + + for row in aggregation: + grp = row["resource_group"] + sev = row["severity"] + resource_group_data[grp]["total_findings"] += row["total"] + resource_group_data[grp]["failed_findings"] += row["failed"] + resource_group_data[grp]["new_failed_findings"] += row["new_failed"] + if sev in resource_group_data[grp]["severity"]: + resource_group_data[grp]["severity"][sev] = row["failed"] + + # Set resources_count from resource_group-level aggregation + for grp in resource_group_data: + resource_group_data[grp]["resources_count"] = ( + resources_by_resource_group.get(grp, 0) + ) + + response_data = [ + {"resource_group": grp, **data} + for grp, data in sorted(resource_group_data.items()) + ] + + return Response( + self.get_serializer(response_data, many=True).data, + status=status.HTTP_200_OK, + ) + @action( detail=False, methods=["get"], diff --git a/api/src/backend/conftest.py b/api/src/backend/conftest.py index 8eefab551f..be47c6ab42 100644 --- a/api/src/backend/conftest.py +++ b/api/src/backend/conftest.py @@ -14,6 +14,7 @@ from tasks.jobs.backfill import ( backfill_resource_scan_summaries, backfill_scan_category_summaries, + backfill_scan_resource_group_summaries, ) from api.db_utils import rls_transaction @@ -41,6 +42,7 @@ SAMLDomainIndex, Scan, ScanCategorySummary, + ScanGroupSummary, ScanSummary, StateChoices, StatusChoices, @@ -739,6 +741,7 @@ def resources_fixture(providers_fixture): region="us-east-1", service="ec2", type="prowler-test", + groups=["compute"], ) resource1.upsert_or_delete_tags(tags) @@ -751,6 +754,7 @@ def resources_fixture(providers_fixture): region="eu-west-1", service="s3", type="prowler-test", + groups=["storage"], ) resource2.upsert_or_delete_tags(tags) @@ -762,6 +766,7 @@ def resources_fixture(providers_fixture): region="us-east-1", service="ec2", type="test", + groups=["compute"], ) tags = [ @@ -1383,11 +1388,13 @@ def latest_scan_finding_with_categories( check_id="genai_iam_check", check_metadata={"CheckId": "genai_iam_check"}, categories=["gen-ai", "iam"], + resource_groups="ai_ml", first_seen_at="2024-01-02T00:00:00Z", ) finding.add_resources([resource]) backfill_resource_scan_summaries(tenant_id, str(scan.id)) backfill_scan_category_summaries(tenant_id, str(scan.id)) + backfill_scan_resource_group_summaries(tenant_id, str(scan.id)) return finding @@ -1629,6 +1636,103 @@ def _create( return _create +@pytest.fixture(scope="function") +def findings_with_group(scans_fixture, resources_fixture): + scan = scans_fixture[0] + resource = resources_fixture[0] + + finding = Finding.objects.create( + tenant_id=scan.tenant_id, + uid="finding_with_group_1", + scan=scan, + delta=None, + status=Status.FAIL, + status_extended="test status", + impact=Severity.critical, + impact_extended="test impact", + severity=Severity.critical, + raw_result={"status": Status.FAIL}, + check_id="storage_check", + check_metadata={"CheckId": "storage_check"}, + resource_groups="storage", + first_seen_at="2024-01-02T00:00:00Z", + ) + finding.add_resources([resource]) + backfill_resource_scan_summaries(str(scan.tenant_id), str(scan.id)) + return finding + + +@pytest.fixture(scope="function") +def findings_with_multiple_groups(scans_fixture, resources_fixture): + scan = scans_fixture[0] + resource1, resource2 = resources_fixture[:2] + + finding1 = Finding.objects.create( + tenant_id=scan.tenant_id, + uid="finding_multi_grp_1", + scan=scan, + delta=None, + status=Status.FAIL, + status_extended="test status", + impact=Severity.critical, + impact_extended="test impact", + severity=Severity.critical, + raw_result={"status": Status.FAIL}, + check_id="storage_check", + check_metadata={"CheckId": "storage_check"}, + resource_groups="storage", + first_seen_at="2024-01-02T00:00:00Z", + ) + finding1.add_resources([resource1]) + + finding2 = Finding.objects.create( + tenant_id=scan.tenant_id, + uid="finding_multi_grp_2", + scan=scan, + delta=None, + status=Status.FAIL, + status_extended="test status 2", + impact=Severity.high, + impact_extended="test impact 2", + severity=Severity.high, + raw_result={"status": Status.FAIL}, + check_id="security_check", + check_metadata={"CheckId": "security_check"}, + resource_groups="security", + first_seen_at="2024-01-02T00:00:00Z", + ) + finding2.add_resources([resource2]) + + backfill_resource_scan_summaries(str(scan.tenant_id), str(scan.id)) + return finding1, finding2 + + +@pytest.fixture +def create_scan_resource_group_summary(): + def _create( + tenant, + scan, + resource_group, + severity, + total_findings=10, + failed_findings=5, + new_failed_findings=2, + resources_count=3, + ): + return ScanGroupSummary.objects.create( + tenant=tenant, + scan=scan, + resource_group=resource_group, + severity=severity, + total_findings=total_findings, + failed_findings=failed_findings, + new_failed_findings=new_failed_findings, + resources_count=resources_count, + ) + + return _create + + def get_authorization_header(access_token: str) -> dict: return {"Authorization": f"Bearer {access_token}"} diff --git a/api/src/backend/tasks/jobs/backfill.py b/api/src/backend/tasks/jobs/backfill.py index 851efe4e4a..d9985afafb 100644 --- a/api/src/backend/tasks/jobs/backfill.py +++ b/api/src/backend/tasks/jobs/backfill.py @@ -2,13 +2,13 @@ from datetime import timedelta from celery.utils.log import get_task_logger -from django.db.models import Sum +from django.db.models import OuterRef, Subquery, Sum from django.utils import timezone from tasks.jobs.queries import ( COMPLIANCE_UPSERT_PROVIDER_SCORE_SQL, COMPLIANCE_UPSERT_TENANT_SUMMARY_ALL_SQL, ) -from tasks.jobs.scan import aggregate_category_counts +from tasks.jobs.scan import aggregate_category_counts, aggregate_resource_group_counts from api.db_router import READ_REPLICA_ALIAS, MainRouter from api.db_utils import ( @@ -28,6 +28,7 @@ ResourceScanSummary, Scan, ScanCategorySummary, + ScanGroupSummary, ScanSummary, StateChoices, ) @@ -356,6 +357,92 @@ def backfill_scan_category_summaries(tenant_id: str, scan_id: str): return {"status": "backfilled", "categories_count": len(category_counts)} +def backfill_scan_resource_group_summaries(tenant_id: str, scan_id: str): + """ + Backfill ScanGroupSummary for a completed scan. + + Aggregates resource group counts from all findings in the scan and creates + one ScanGroupSummary row per (resource_group, severity) combination. + + Args: + tenant_id: Target tenant UUID + scan_id: Scan UUID to backfill + + Returns: + dict: Status indicating whether backfill was performed + """ + with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS): + if ScanGroupSummary.objects.filter( + tenant_id=tenant_id, scan_id=scan_id + ).exists(): + return {"status": "already backfilled"} + + if not Scan.objects.filter( + tenant_id=tenant_id, + id=scan_id, + state__in=(StateChoices.COMPLETED, StateChoices.FAILED), + ).exists(): + return {"status": "scan is not completed"} + + resource_group_counts: dict[tuple[str, str], dict[str, int]] = {} + group_resources_cache: dict[str, set] = {} + # Get findings with their first resource UID via annotation + resource_uid_subquery = ResourceFindingMapping.objects.filter( + finding_id=OuterRef("id"), tenant_id=tenant_id + ).values("resource__uid")[:1] + + for finding in ( + Finding.all_objects.filter(tenant_id=tenant_id, scan_id=scan_id) + .annotate(resource_uid=Subquery(resource_uid_subquery)) + .values( + "resource_groups", + "severity", + "status", + "delta", + "muted", + "resource_uid", + ) + ): + aggregate_resource_group_counts( + resource_group=finding.get("resource_groups"), + severity=finding.get("severity"), + status=finding.get("status"), + delta=finding.get("delta"), + muted=finding.get("muted", False), + resource_uid=finding.get("resource_uid") or "", + cache=resource_group_counts, + group_resources_cache=group_resources_cache, + ) + + if not resource_group_counts: + return {"status": "no resource groups to backfill"} + + # Compute group-level resource counts (same value for all severity rows in a group) + group_resource_counts = { + grp: len(uids) for grp, uids in group_resources_cache.items() + } + resource_group_summaries = [ + ScanGroupSummary( + tenant_id=tenant_id, + scan_id=scan_id, + resource_group=grp, + severity=severity, + total_findings=counts["total"], + failed_findings=counts["failed"], + new_failed_findings=counts["new_failed"], + resources_count=group_resource_counts.get(grp, 0), + ) + for (grp, severity), counts in resource_group_counts.items() + ] + + with rls_transaction(tenant_id): + ScanGroupSummary.objects.bulk_create( + resource_group_summaries, batch_size=500, ignore_conflicts=True + ) + + return {"status": "backfilled", "resource_groups_count": len(resource_group_counts)} + + def backfill_provider_compliance_scores(tenant_id: str) -> dict: """ Backfill ProviderComplianceScore from latest completed scan per provider. diff --git a/api/src/backend/tasks/jobs/scan.py b/api/src/backend/tasks/jobs/scan.py index 9e40e2df03..9697359065 100644 --- a/api/src/backend/tasks/jobs/scan.py +++ b/api/src/backend/tasks/jobs/scan.py @@ -45,6 +45,7 @@ ResourceTag, Scan, ScanCategorySummary, + ScanGroupSummary, ScanSummary, StateChoices, ) @@ -127,6 +128,50 @@ def aggregate_category_counts( cache[key]["new_failed"] += 1 +def aggregate_resource_group_counts( + resource_group: str | None, + severity: str, + status: str, + delta: str | None, + muted: bool, + resource_uid: str, + cache: dict[tuple[str, str], dict[str, int]], + group_resources_cache: dict[str, set], +) -> None: + """ + Increment resource group counters in-place for a finding. + + Args: + resource_group: Resource group from check metadata (e.g., "database", "compute"). + severity: Severity level (e.g., "high", "medium"). + status: Finding status as string ("FAIL", "PASS"). + delta: Delta value as string ("new", "changed") or None. + muted: Whether the finding is muted. + resource_uid: Unique identifier for the resource to count distinct resources. + cache: Dict {(resource_group, severity): {"total", "failed", "new_failed"}} to update. + group_resources_cache: Dict {resource_group: set(resource_uids)} for group-level resource tracking. + """ + if not resource_group: + return + + is_failed = status == "FAIL" and not muted + is_new_failed = is_failed and delta == "new" + + key = (resource_group, severity) + if key not in cache: + cache[key] = {"total": 0, "failed": 0, "new_failed": 0} + if not muted: + cache[key]["total"] += 1 + if is_failed: + cache[key]["failed"] += 1 + if is_new_failed: + cache[key]["new_failed"] += 1 + + # Track resources at GROUP level (not per-severity) to avoid over-counting + if resource_uid and not muted: + group_resources_cache.setdefault(resource_group, set()).add(resource_uid) + + def _get_attack_surface_mapping_from_provider(provider_type: str) -> dict: global _ATTACK_SURFACE_MAPPING_CACHE @@ -438,6 +483,8 @@ def _process_finding_micro_batch( scan_resource_cache: set, mute_rules_cache: dict, scan_categories_cache: dict[tuple[str, str], dict[str, int]], + scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]], + group_resources_cache: dict[str, set], ) -> None: """ Process a micro-batch of findings and persist them using bulk operations. @@ -459,6 +506,8 @@ def _process_finding_micro_batch( scan_resource_cache: Set of tuples used to create `ResourceScanSummary` rows. mute_rules_cache: Map of finding UID -> mute reason gathered before the scan. scan_categories_cache: Dict tracking category counts {(category, severity): {"total", "failed", "new_failed"}}. + scan_resource_groups_cache: Dict tracking resource group counts {(resource_group, severity): {"total", "failed", "new_failed"}}. + group_resources_cache: Dict tracking unique resources per group {resource_group: set(resource_uids)}. """ # Accumulate objects for bulk operations findings_to_create = [] @@ -499,6 +548,8 @@ def _process_finding_micro_batch( with rls_transaction(tenant_id): resource_uid = finding.resource_uid if resource_uid not in resource_cache: + check_metadata = finding.get_metadata() + group = check_metadata.get("resourcegroup") or None resource_instance, _ = Resource.objects.get_or_create( tenant_id=tenant_id, provider=provider_instance, @@ -508,6 +559,7 @@ def _process_finding_micro_batch( "service": finding.service_name, "type": finding.resource_type, "name": finding.resource_name, + "groups": [group] if group else None, }, ) resource_cache[resource_uid] = resource_instance @@ -528,6 +580,8 @@ def _process_finding_micro_batch( # Track resource field changes (defer save) updated = False + check_metadata = finding.get_metadata() + group = check_metadata.get("resourcegroup") or None if finding.region and resource_instance.region != finding.region: resource_instance.region = finding.region updated = True @@ -548,6 +602,11 @@ def _process_finding_micro_batch( if resource_instance.partition != finding.partition: resource_instance.partition = finding.partition updated = True + if group and ( + not resource_instance.groups or group not in resource_instance.groups + ): + resource_instance.groups = (resource_instance.groups or []) + [group] + updated = True if updated: dirty_resources[resource_uid] = resource_instance @@ -633,6 +692,7 @@ def _process_finding_micro_batch( muted_reason=muted_reason, compliance=finding.compliance, categories=check_metadata.get("categories", []) or [], + resource_groups=check_metadata.get("resourcegroup") or None, ) findings_to_create.append(finding_instance) resource_denormalized_data.append((finding_instance, resource_instance)) @@ -657,6 +717,18 @@ def _process_finding_micro_batch( cache=scan_categories_cache, ) + # Track resource groups with counts for ScanGroupSummary + aggregate_resource_group_counts( + resource_group=check_metadata.get("resourcegroup") or None, + severity=finding.severity.value, + status=status.value, + delta=delta.value if delta else None, + muted=is_muted, + resource_uid=resource_instance.uid if resource_instance else "", + cache=scan_resource_groups_cache, + group_resources_cache=group_resources_cache, + ) + # Bulk operations within single transaction with rls_transaction(tenant_id): # Bulk create findings @@ -714,7 +786,15 @@ def _process_finding_micro_batch( tenant_id=tenant_id, model=Resource, objects=list(dirty_resources.values()), - fields=["metadata", "details", "partition", "region", "service", "type"], + fields=[ + "metadata", + "details", + "partition", + "region", + "service", + "type", + "groups", + ], batch_size=1000, ) @@ -757,6 +837,8 @@ def perform_prowler_scan( unique_resources = set() scan_resource_cache: set[tuple[str, str, str, str]] = set() scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {} + scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {} + group_resources_cache: dict[str, set] = {} start_time = time.time() exc = None @@ -847,6 +929,8 @@ def perform_prowler_scan( scan_resource_cache=scan_resource_cache, mute_rules_cache=mute_rules_cache, scan_categories_cache=scan_categories_cache, + scan_resource_groups_cache=scan_resource_groups_cache, + group_resources_cache=group_resources_cache, ) # Update scan progress @@ -933,6 +1017,38 @@ def perform_prowler_scan( sentry_sdk.capture_exception(cat_exception) logger.error(f"Error storing categories for scan {scan_id}: {cat_exception}") + try: + if scan_resource_groups_cache: + # Compute group-level resource counts (same value for all severity rows in a group) + group_resource_counts = { + grp: len(uids) for grp, uids in group_resources_cache.items() + } + resource_group_summaries = [ + ScanGroupSummary( + tenant_id=tenant_id, + scan_id=scan_id, + resource_group=grp, + severity=severity, + total_findings=counts["total"], + failed_findings=counts["failed"], + new_failed_findings=counts["new_failed"], + resources_count=group_resource_counts.get(grp, 0), + ) + for ( + grp, + severity, + ), counts in scan_resource_groups_cache.items() + ] + with rls_transaction(tenant_id): + ScanGroupSummary.objects.bulk_create( + resource_group_summaries, batch_size=500, ignore_conflicts=True + ) + except Exception as rg_exception: + sentry_sdk.capture_exception(rg_exception) + logger.error( + f"Error storing resource groups for scan {scan_id}: {rg_exception}" + ) + serializer = ScanTaskSerializer(instance=scan_instance) return serializer.data diff --git a/api/src/backend/tasks/tasks.py b/api/src/backend/tasks/tasks.py index 542f01ef65..91e1ac85ea 100644 --- a/api/src/backend/tasks/tasks.py +++ b/api/src/backend/tasks/tasks.py @@ -14,6 +14,7 @@ backfill_provider_compliance_scores, backfill_resource_scan_summaries, backfill_scan_category_summaries, + backfill_scan_resource_group_summaries, ) from tasks.jobs.connection import ( check_integration_connection, @@ -613,6 +614,21 @@ def backfill_scan_category_summaries_task(tenant_id: str, scan_id: str): return backfill_scan_category_summaries(tenant_id=tenant_id, scan_id=scan_id) +@shared_task(name="backfill-scan-resource-group-summaries", queue="backfill") +@handle_provider_deletion +def backfill_scan_resource_group_summaries_task(tenant_id: str, scan_id: str): + """ + Backfill ScanGroupSummary for a completed scan. + + Aggregates unique resource groups from findings and creates a summary row. + + Args: + tenant_id (str): The tenant identifier. + scan_id (str): The scan identifier. + """ + return backfill_scan_resource_group_summaries(tenant_id=tenant_id, scan_id=scan_id) + + @shared_task(name="backfill-provider-compliance-scores", queue="backfill") def backfill_provider_compliance_scores_task(tenant_id: str): """ diff --git a/api/src/backend/tasks/tests/test_backfill.py b/api/src/backend/tasks/tests/test_backfill.py index 4c9780d101..04b3158d22 100644 --- a/api/src/backend/tasks/tests/test_backfill.py +++ b/api/src/backend/tasks/tests/test_backfill.py @@ -8,6 +8,7 @@ backfill_provider_compliance_scores, backfill_resource_scan_summaries, backfill_scan_category_summaries, + backfill_scan_resource_group_summaries, ) from api.models import ( @@ -16,6 +17,7 @@ ResourceScanSummary, Scan, ScanCategorySummary, + ScanGroupSummary, StateChoices, ) from prowler.lib.check.models import Severity @@ -265,6 +267,94 @@ def test_successful_backfill(self, findings_with_categories_fixture): assert summary.new_failed_findings == 1 +@pytest.fixture(scope="function") +def findings_with_group_fixture(scans_fixture, resources_fixture): + scan = scans_fixture[0] + resource = resources_fixture[0] + + finding = Finding.objects.create( + tenant_id=scan.tenant_id, + uid="finding_with_group", + scan=scan, + delta="new", + status=Status.FAIL, + status_extended="test status", + impact=Severity.high, + impact_extended="test impact", + severity=Severity.high, + raw_result={"status": Status.FAIL}, + check_id="test_check", + check_metadata={"CheckId": "test_check"}, + resource_groups="ai_ml", + first_seen_at="2024-01-02T00:00:00Z", + ) + finding.add_resources([resource]) + return finding + + +@pytest.fixture(scope="function") +def scan_resource_group_summary_fixture(scans_fixture): + scan = scans_fixture[0] + return ScanGroupSummary.objects.create( + tenant_id=scan.tenant_id, + scan=scan, + resource_group="existing-group", + severity=Severity.high, + total_findings=1, + failed_findings=0, + new_failed_findings=0, + resources_count=1, + ) + + +@pytest.mark.django_db +class TestBackfillScanGroupSummaries: + def test_already_backfilled(self, scan_resource_group_summary_fixture): + tenant_id = scan_resource_group_summary_fixture.tenant_id + scan_id = scan_resource_group_summary_fixture.scan_id + + result = backfill_scan_resource_group_summaries(str(tenant_id), str(scan_id)) + + assert result == {"status": "already backfilled"} + + def test_not_completed_scan(self, get_not_completed_scans): + for scan in get_not_completed_scans: + result = backfill_scan_resource_group_summaries( + str(scan.tenant_id), str(scan.id) + ) + assert result == {"status": "scan is not completed"} + + def test_no_resource_groups_to_backfill(self, scans_fixture): + scan = scans_fixture[1] # Failed scan with no findings + result = backfill_scan_resource_group_summaries( + str(scan.tenant_id), str(scan.id) + ) + assert result == {"status": "no resource groups to backfill"} + + def test_successful_backfill(self, findings_with_group_fixture): + finding = findings_with_group_fixture + tenant_id = str(finding.tenant_id) + scan_id = str(finding.scan_id) + + result = backfill_scan_resource_group_summaries(tenant_id, scan_id) + + # 1 resource group × 1 severity = 1 row + assert result == {"status": "backfilled", "resource_groups_count": 1} + + summaries = ScanGroupSummary.objects.filter( + tenant_id=tenant_id, scan_id=scan_id + ) + assert summaries.count() == 1 + + summary = summaries.first() + assert summary.resource_group == "ai_ml" + assert summary.severity == Severity.high + assert summary.total_findings == 1 + assert summary.failed_findings == 1 + assert summary.new_failed_findings == 1 + assert summary.resources_count == 1 + + @pytest.mark.django_db class TestBackfillProviderComplianceScores: def test_no_completed_scans(self, tenants_fixture): diff --git a/api/src/backend/tasks/tests/test_scan.py b/api/src/backend/tasks/tests/test_scan.py index 8902b17b54..5f244e0103 100644 --- a/api/src/backend/tasks/tests/test_scan.py +++ b/api/src/backend/tasks/tests/test_scan.py @@ -1380,6 +1380,8 @@ def test_process_finding_micro_batch_creates_records_and_updates_caches( scan_resource_cache: set[tuple[str, str, str, str]] = set() mute_rules_cache = {} scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {} + scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {} + group_resources_cache: dict[str, set] = {} with ( patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction), @@ -1398,6 +1400,8 @@ def test_process_finding_micro_batch_creates_records_and_updates_caches( scan_resource_cache, mute_rules_cache, scan_categories_cache, + scan_resource_groups_cache, + group_resources_cache, ) created_finding = Finding.objects.get(uid=finding.uid) @@ -1491,6 +1495,8 @@ def test_process_finding_micro_batch_manual_mute_and_dirty_resources( scan_resource_cache: set[tuple[str, str, str, str]] = set() mute_rules_cache = {finding.uid: "Muted via rule"} scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {} + scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {} + group_resources_cache: dict[str, set] = {} with ( patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction), @@ -1509,6 +1515,8 @@ def test_process_finding_micro_batch_manual_mute_and_dirty_resources( scan_resource_cache, mute_rules_cache, scan_categories_cache, + scan_resource_groups_cache, + group_resources_cache, ) existing_resource.refresh_from_db() @@ -1617,6 +1625,8 @@ def test_process_finding_micro_batch_skips_long_uid( scan_resource_cache: set[tuple[str, str, str, str]] = set() mute_rules_cache = {} scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {} + scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {} + group_resources_cache: dict[str, set] = {} with ( patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction), @@ -1636,6 +1646,8 @@ def test_process_finding_micro_batch_skips_long_uid( scan_resource_cache, mute_rules_cache, scan_categories_cache, + scan_resource_groups_cache, + group_resources_cache, ) # Verify the long UID finding was NOT created @@ -1713,6 +1725,8 @@ def test_process_finding_micro_batch_tracks_categories( scan_resource_cache: set[tuple[str, str, str, str]] = set() mute_rules_cache = {} scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {} + scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {} + group_resources_cache: dict[str, set] = {} with ( patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction), @@ -1731,6 +1745,8 @@ def test_process_finding_micro_batch_tracks_categories( scan_resource_cache, mute_rules_cache, scan_categories_cache, + scan_resource_groups_cache, + group_resources_cache, ) # finding1: PASS, severity=low, categories=["gen-ai", "security"]