From 22254ca4a8956a6422b24145b4ba7ab883334768 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Thu, 18 Dec 2025 20:21:53 -0500 Subject: [PATCH 01/17] progress --- src/sentry/api/urls.py | 5 + .../endpoints/organization_dashboards.py | 127 +++-- src/sentry/models/dashboard_widget.py | 5 + .../organization_preprod_app_size_stats.py | 459 +++++++++++++++++ src/sentry/preprod/api/endpoints/urls.py | 7 + ...est_organization_preprod_app_size_stats.py | 479 ++++++++++++++++++ 6 files changed, 1012 insertions(+), 70 deletions(-) create mode 100644 src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py create mode 100644 tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index 621aaa2aa9e41e..8f5b96a7eb58a2 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -2656,6 +2656,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]: OrganizationObjectstoreEndpoint.as_view(), name="sentry-api-0-organization-objectstore", ), + re_path( + r"^(?P[^/]+)/preprod/app-size-stats/$", + preprod_urls.OrganizationPreprodAppSizeStatsEndpoint.as_view(), + name="sentry-api-0-organization-preprod-app-size-stats", + ), ] PROJECT_URLS: list[URLPattern | URLResolver] = [ diff --git a/src/sentry/dashboards/endpoints/organization_dashboards.py b/src/sentry/dashboards/endpoints/organization_dashboards.py index 48823248bae680..42ae03b58da328 100644 --- a/src/sentry/dashboards/endpoints/organization_dashboards.py +++ b/src/sentry/dashboards/endpoints/organization_dashboards.py @@ -3,7 +3,6 @@ from enum import IntEnum from typing import Any, TypedDict -import sentry_sdk from django.db import IntegrityError, router, transaction from django.db.models import ( Case, @@ -71,6 +70,7 @@ class PrebuiltDashboardId(IntEnum): MOBILE_VITALS_APP_STARTS = 9 MOBILE_VITALS_SCREEN_LOADS = 10 MOBILE_VITALS_SCREEN_RENDERING = 11 + MOBILE_APP_SIZE = 12 class PrebuiltDashboard(TypedDict): @@ -120,60 +120,13 @@ class PrebuiltDashboard(TypedDict): "prebuilt_id": PrebuiltDashboardId.MOBILE_VITALS_SCREEN_RENDERING, "title": "Screen Rendering", }, + { + "prebuilt_id": PrebuiltDashboardId.MOBILE_APP_SIZE, + "title": "Mobile App Size Monitoring", + }, ] -def sync_prebuilt_dashboards(organization: Organization) -> None: - """ - Queries the database to check if prebuilt dashboards have a Dashboard record and - creates them if they don't, updates titles if they've changed, or deletes them - if they should no longer exist. - """ - - with transaction.atomic(router.db_for_write(Dashboard)): - enabled_prebuilt_dashboard_ids = options.get("dashboards.prebuilt-dashboard-ids") - enabled_prebuilt_dashboards = [ - dashboard - for dashboard in PREBUILT_DASHBOARDS - if dashboard["prebuilt_id"] in enabled_prebuilt_dashboard_ids - ] - - saved_prebuilt_dashboards = Dashboard.objects.filter( - organization=organization, - prebuilt_id__isnull=False, - ) - - saved_prebuilt_dashboard_map = {d.prebuilt_id: d for d in saved_prebuilt_dashboards} - - # Create prebuilt dashboards if they don't exist, or update titles if changed - dashboards_to_update: list[Dashboard] = [] - for prebuilt_dashboard in enabled_prebuilt_dashboards: - prebuilt_id: PrebuiltDashboardId = prebuilt_dashboard["prebuilt_id"] - - if prebuilt_id not in saved_prebuilt_dashboard_map: - # Create new dashboard - Dashboard.objects.create( - organization=organization, - title=prebuilt_dashboard["title"], - created_by_id=None, - prebuilt_id=prebuilt_id, - ) - elif saved_prebuilt_dashboard_map[prebuilt_id].title != prebuilt_dashboard["title"]: - # Update title if changed - saved_prebuilt_dashboard_map[prebuilt_id].title = prebuilt_dashboard["title"] - dashboards_to_update.append(saved_prebuilt_dashboard_map[prebuilt_id]) - - if dashboards_to_update: - Dashboard.objects.bulk_update(dashboards_to_update, ["title"]) - - # Delete old prebuilt dashboards if they should no longer exist - prebuilt_ids = [d["prebuilt_id"] for d in enabled_prebuilt_dashboards] - Dashboard.objects.filter( - organization=organization, - prebuilt_id__isnull=False, - ).exclude(prebuilt_id__in=prebuilt_ids).delete() - - class OrganizationDashboardsPermission(OrganizationPermission): scope_map = { "GET": ["org:read", "org:write", "org:admin"], @@ -193,18 +146,14 @@ def has_object_permission( if isinstance(obj, Dashboard): is_superuser = is_active_superuser(request) - # allow strictly for Owners and superusers, this allows them to delete dashboards - # of users that no longer have access to the organization if is_superuser or request.access.has_role_in_organization( role=roles.get_top_dog().id, organization=obj.organization, user_id=request.user.id ): return True - # check if user is restricted from editing dashboard if hasattr(obj, "permissions"): return obj.permissions.has_edit_permissions(request.user.id) - # if no permissions are assigned, it is considered accessible to all users return True return True @@ -249,7 +198,6 @@ def get(self, request: Request, organization: Organization) -> Response: organization, actor=request.user, ): - # Sync prebuilt dashboards to the database try: lock = locks.get( f"dashboards:sync_prebuilt_dashboards:{organization.id}", @@ -257,14 +205,9 @@ def get(self, request: Request, organization: Organization) -> Response: name="sync_prebuilt_dashboards", ) with lock.acquire(): - # Adds prebuilt dashboards to the database if they don't exist. - # Deletes old prebuilt dashboards from the database if they should no longer exist. sync_prebuilt_dashboards(organization) except UnableToAcquireLock: - # Another process is already syncing the prebuilt dashboards. We can skip syncing this time. pass - except Exception as err: - sentry_sdk.capture_exception(err) filter_by = request.query_params.get("filter") if filter_by == "onlyFavorites": @@ -532,14 +475,11 @@ def post(self, request: Request, organization: Organization, retry: int = 0) -> actor=request.user, ): if serializer.validated_data.get("is_favorited"): - try: - DashboardFavoriteUser.objects.insert_favorite_dashboard( - organization=organization, - user_id=request.user.id, - dashboard=dashboard, - ) - except Exception as e: - sentry_sdk.capture_exception(e) + DashboardFavoriteUser.objects.insert_favorite_dashboard( + organization=organization, + user_id=request.user.id, + dashboard=dashboard, + ) return Response(serialize(dashboard, request.user), status=201) except IntegrityError: @@ -553,3 +493,50 @@ def post(self, request: Request, organization: Organization, retry: int = 0) -> return self.post(request, organization, retry=retry + 1) except UnableToAcquireLock: return Response("Unable to create dashboard, please try again", status=503) + + +def sync_prebuilt_dashboards(organization: Organization) -> None: + """ + Queries the database to check if prebuilt dashboards have a Dashboard record and + creates them if they don't, updates titles if they've changed, or deletes them + if they should no longer exist. + """ + + with transaction.atomic(router.db_for_write(Dashboard)): + enabled_prebuilt_dashboard_ids = options.get("dashboards.prebuilt-dashboard-ids") + enabled_prebuilt_dashboards = [ + dashboard + for dashboard in PREBUILT_DASHBOARDS + if dashboard["prebuilt_id"] in enabled_prebuilt_dashboard_ids + ] + + saved_prebuilt_dashboards = Dashboard.objects.filter( + organization=organization, + prebuilt_id__isnull=False, + ) + + saved_prebuilt_dashboard_map = {d.prebuilt_id: d for d in saved_prebuilt_dashboards} + + dashboards_to_update: list[Dashboard] = [] + for prebuilt_dashboard in enabled_prebuilt_dashboards: + prebuilt_id: PrebuiltDashboardId = prebuilt_dashboard["prebuilt_id"] + + if prebuilt_id not in saved_prebuilt_dashboard_map: + Dashboard.objects.create( + organization=organization, + title=prebuilt_dashboard["title"], + created_by_id=None, + prebuilt_id=prebuilt_id, + ) + elif saved_prebuilt_dashboard_map[prebuilt_id].title != prebuilt_dashboard["title"]: + saved_prebuilt_dashboard_map[prebuilt_id].title = prebuilt_dashboard["title"] + dashboards_to_update.append(saved_prebuilt_dashboard_map[prebuilt_id]) + + if dashboards_to_update: + Dashboard.objects.bulk_update(dashboards_to_update, ["title"]) + + prebuilt_ids = [d["prebuilt_id"] for d in enabled_prebuilt_dashboards] + Dashboard.objects.filter( + organization=organization, + prebuilt_id__isnull=False, + ).exclude(prebuilt_id__in=prebuilt_ids).delete() diff --git a/src/sentry/models/dashboard_widget.py b/src/sentry/models/dashboard_widget.py index 83f2d7cc522198..ca4bf8845c34d0 100644 --- a/src/sentry/models/dashboard_widget.py +++ b/src/sentry/models/dashboard_widget.py @@ -72,6 +72,10 @@ class DashboardWidgetTypes(TypesClass): These represent the tracemetrics item type on the EAP dataset. """ TRACEMETRICS = 104 + """ + Mobile app size metrics from preprod item type on the EAP dataset. + """ + MOBILE_APP_SIZE = 105 TYPES = [ (DISCOVER, "discover"), @@ -85,6 +89,7 @@ class DashboardWidgetTypes(TypesClass): (SPANS, "spans"), (LOGS, "logs"), (TRACEMETRICS, "tracemetrics"), + (MOBILE_APP_SIZE, "mobile-app-size"), ] TYPE_NAMES = [t[1] for t in TYPES] diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py new file mode 100644 index 00000000000000..a91ef4b532d859 --- /dev/null +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -0,0 +1,459 @@ +from __future__ import annotations + +import logging +from collections import defaultdict +from datetime import datetime, timedelta +from typing import Any + +from rest_framework.exceptions import ParseError +from rest_framework.request import Request +from rest_framework.response import Response +from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import TraceItemTableResponse +from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue +from sentry_protos.snuba.v1.trace_item_filter_pb2 import ( + AndFilter, + ComparisonFilter, + TraceItemFilter, +) + +from sentry.api.api_publish_status import ApiPublishStatus +from sentry.api.base import region_silo_endpoint +from sentry.api.bases.organization import OrganizationEndpoint +from sentry.api.exceptions import ResourceDoesNotExist +from sentry.models.organization import Organization +from sentry.preprod.eap.read import query_preprod_size_metrics +from sentry.utils.dates import parse_stats_period + +logger = logging.getLogger(__name__) + + +def extract_unique_values_from_response( + response: TraceItemTableResponse, +) -> dict[str, set[str]]: + """Extract unique string values from each column in the response.""" + unique_values: dict[str, set[str]] = {} + + if not response.column_values: + return unique_values + + # Iterate through each column + for column in response.column_values: + attr_name = column.attribute_name + unique_values[attr_name] = set() + + # Iterate through all results in this column + for result in column.results: + if result.HasField("val_str") and result.val_str: + unique_values[attr_name].add(result.val_str) + + return unique_values + + +@region_silo_endpoint +class OrganizationPreprodAppSizeStatsEndpoint(OrganizationEndpoint): + """ + Returns time-series data for app size metrics stored in EAP. + Compatible with Sentry dashboard widgets. + """ + + publish_status = { + "GET": ApiPublishStatus.PRIVATE, + } + + def get(self, request: Request, organization: Organization) -> Response: + """ + Retrieve app size metrics over time. + + Query Parameters: + - project: Project ID(s) (can be repeated or comma-separated) + - start: Start timestamp (ISO format or Unix timestamp) + - end: End timestamp (ISO format or Unix timestamp) + - statsPeriod: Alternative to start/end (e.g., "14d", "24h") + - interval: Time interval for buckets (e.g., "1h", "1d") + - field: Aggregate field (e.g., "max(max_install_size)") + - query: Filter query string (e.g., "app_id:com.example.app artifact_type:0") + - includeFilters: If "true", includes available filter values in response + + Response Format: + { + "data": [[timestamp, [{"count": value}]], ...], + "start": unix_timestamp, + "end": unix_timestamp, + "meta": {...}, + "filters": { // Only if includeFilters=true + "app_ids": ["com.example.app", ...], + "branches": ["main", "develop", ...] + } + } + """ + try: + project_ids_set = self.get_requested_project_ids_unchecked(request) + if not project_ids_set: + projects = self.get_projects(request, organization) + project_ids = [p.id for p in projects] + else: + project_ids = list(project_ids_set) + + start, end = self._parse_time_range(request) + + interval = request.GET.get("interval", "1d") + interval_seconds = self._parse_interval(interval) + + field = request.GET.get("field", "max(max_install_size)") + aggregate_func, aggregate_field = self._parse_field(field) + + filter_kwargs = self._parse_query(request.GET.get("query", "")) + query_filter = self._build_filter(filter_kwargs) if filter_kwargs else None + + response = query_preprod_size_metrics( + organization_id=organization.id, + project_ids=project_ids, + start=start, + end=end, + referrer="api.preprod.app-size-stats", + filter=query_filter, + limit=10000, # Get all data points + ) + + # Transform to time-series format + timeseries_data = self._transform_to_timeseries( + response, start, end, interval_seconds, aggregate_func, aggregate_field + ) + + result: dict[str, Any] = { + "data": timeseries_data, + "start": int(start.timestamp()), + "end": int(end.timestamp()), + "meta": { + "fields": { + field: "integer", + } + }, + } + + # Include available filter values if requested + include_filters = request.GET.get("includeFilters", "").lower() == "true" + if include_filters: + filter_values = self._fetch_filter_values(organization.id, project_ids, start, end) + result["filters"] = filter_values + + return Response(result) + + except (ValueError, KeyError) as e: + logger.exception("[AppSize] Parse error: %s", e) + raise ParseError(str(e)) + except Exception as e: + logger.exception("[AppSize] Unexpected error: %s", e) + raise ResourceDoesNotExist(f"Failed to fetch app size metrics: {e}") + + def _parse_time_range(self, request: Request) -> tuple[datetime, datetime]: + """Parse start/end or statsPeriod from request.""" + stats_period = request.GET.get("statsPeriod") + start_param = request.GET.get("start") + end_param = request.GET.get("end") + + if stats_period: + # Parse relative period like "14d" or "24h" + delta = parse_stats_period(stats_period) + if delta is None: + raise ParseError(f"Invalid statsPeriod: {stats_period}") + end = datetime.utcnow() + start = end - delta + elif start_param and end_param: + # Parse absolute timestamps + start = self._parse_datetime(start_param) + end = self._parse_datetime(end_param) + else: + # Default to last 14 days + end = datetime.utcnow() + start = end - timedelta(days=14) + + return start, end + + def _parse_datetime(self, value: str) -> datetime: + """Parse datetime from ISO format or Unix timestamp.""" + try: + # Try Unix timestamp first + return datetime.fromtimestamp(float(value)) + except (ValueError, TypeError): + # Try ISO format + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + raise ParseError(f"Invalid datetime format: {value}") + + def _parse_interval(self, interval: str) -> int: + """Parse interval string like '1h', '1d' to seconds.""" + interval_map = { + "1m": 60, + "5m": 300, + "10m": 600, + "15m": 900, + "30m": 1800, + "1h": 3600, + "4h": 14400, + "1d": 86400, + "7d": 604800, + } + if interval not in interval_map: + raise ParseError(f"Invalid interval: {interval}") + return interval_map[interval] + + def _parse_field(self, field: str) -> tuple[str, str]: + """ + Parse field like 'max(max_install_size)' into ('max', 'max_install_size'). + """ + if "(" not in field or ")" not in field: + raise ParseError(f"Invalid field format: {field}") + + func_name = field[: field.index("(")] + field_name = field[field.index("(") + 1 : field.index(")")] + + valid_funcs = ["max", "min", "avg", "count"] + if func_name not in valid_funcs: + raise ParseError(f"Unsupported aggregate function: {func_name}") + + valid_fields = [ + "max_install_size", + "max_download_size", + "min_install_size", + "min_download_size", + ] + if field_name and field_name not in valid_fields: + raise ParseError(f"Invalid field: {field_name}") + + return func_name, field_name + + def _parse_query(self, query: str) -> dict[str, Any]: + """ + Parse query string like 'app_id:com.example.app artifact_type:0' into filter kwargs. + """ + filters: dict[str, str | int] = {} + + if not query: + return filters + + # Simple space-separated key:value parsing + for token in query.split(): + if ":" not in token: + continue + + key, value = token.split(":", 1) + + # Map query keys to function kwargs + if key == "app_id": + filters["app_id"] = value + elif key == "artifact_type": + filters["artifact_type"] = int(value) + elif key == "build_configuration_name": + filters["build_configuration_name"] = value + elif key == "git_head_ref": + filters["git_head_ref"] = value + elif key == "artifact_id": + filters["artifact_id"] = int(value) + + return filters + + def _build_filter(self, filter_kwargs: dict[str, Any]) -> TraceItemFilter: + """Build TraceItemFilter from parsed query parameters.""" + filters = [] + + for key, value in filter_kwargs.items(): + if key == "artifact_id": + # Special case: artifact_id maps to sentry.trace_id + import uuid + + from sentry.preprod.eap.constants import PREPROD_NAMESPACE + + trace_id = uuid.uuid5(PREPROD_NAMESPACE, str(value)).hex + filters.append( + TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey( + name="sentry.trace_id", type=AttributeKey.Type.TYPE_STRING + ), + op=ComparisonFilter.OP_EQUALS, + value=AttributeValue(val_str=trace_id), + ) + ) + ) + elif isinstance(value, str): + filters.append( + TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(name=key, type=AttributeKey.Type.TYPE_STRING), + op=ComparisonFilter.OP_EQUALS, + value=AttributeValue(val_str=value), + ) + ) + ) + elif isinstance(value, int): + filters.append( + TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(name=key, type=AttributeKey.Type.TYPE_INT), + op=ComparisonFilter.OP_EQUALS, + value=AttributeValue(val_int=value), + ) + ) + ) + + if len(filters) == 1: + return filters[0] + return TraceItemFilter(and_filter=AndFilter(filters=filters)) + + def _transform_to_timeseries( + self, + response: TraceItemTableResponse, + start: datetime, + end: datetime, + interval_seconds: int, + aggregate_func: str, + aggregate_field: str, + ) -> list[tuple[int, list[dict[str, Any]]]]: + """ + Transform EAP protobuf response into dashboard time-series format. + + Output format: [[timestamp, [{"count": value}]], ...] + + Note: EAP response is column-oriented (column_values[col_idx].results[row_idx]) + """ + # Create time buckets + buckets: dict[int, list[float]] = defaultdict(list) + + # Extract column names and find indices + column_map: dict[str, int] = {} + column_values = response.column_values + + if not column_values: + # No data, return empty buckets with None + result: list[tuple[int, list[dict[str, Any]]]] = [] + start_ts = int(start.timestamp()) + current = int(start_ts // interval_seconds * interval_seconds) + end_ts = int(end.timestamp()) + while current < end_ts: + result.append((current, [{"count": None}])) + current += interval_seconds + return result + + for idx, column_value in enumerate(column_values): + column_map[column_value.attribute_name] = idx + + # Get the target field index + if aggregate_field not in column_map: + # If field doesn't exist, return None values + result = [] + start_ts = int(start.timestamp()) + current = int(start_ts // interval_seconds * interval_seconds) + end_ts = int(end.timestamp()) + while current < end_ts: + result.append((current, [{"count": None}])) + current += interval_seconds + return result + + field_idx = column_map[aggregate_field] + timestamp_idx = column_map.get("timestamp") + + if timestamp_idx is None: + # No timestamp column, return None values + result = [] + start_ts = int(start.timestamp()) + current = int(start_ts // interval_seconds * interval_seconds) + end_ts = int(end.timestamp()) + while current < end_ts: + result.append((current, [{"count": None}])) + current += interval_seconds + return result + + # Process each row (iterate through results in column-oriented format) + num_rows = len(column_values[0].results) + + for row_idx in range(num_rows): + # Get timestamp for this row + timestamp_result = column_values[timestamp_idx].results[row_idx] + + if timestamp_result.HasField("val_double"): + timestamp = timestamp_result.val_double + elif timestamp_result.HasField("val_float"): + timestamp = timestamp_result.val_float + else: + continue + + bucket_ts = int(timestamp // interval_seconds * interval_seconds) + + # Get the value for this row + value_result = column_values[field_idx].results[row_idx] + value = None + + if value_result.HasField("val_int"): + value = float(value_result.val_int) + elif value_result.HasField("val_double"): + value = value_result.val_double + elif value_result.HasField("val_float"): + value = value_result.val_float + + if value is not None: + buckets[bucket_ts].append(value) + + result = [] + # Align start time to interval boundary + start_ts = int(start.timestamp()) + current = int(start_ts // interval_seconds * interval_seconds) + end_ts = int(end.timestamp()) + + while current < end_ts: + values = buckets.get(current, []) + + if not values: + # Use None for missing data so chart interpolates instead of showing 0 + aggregated = None + elif aggregate_func == "max": + aggregated = max(values) + elif aggregate_func == "min": + aggregated = min(values) + elif aggregate_func == "avg": + aggregated = sum(values) / len(values) + elif aggregate_func == "count": + aggregated = len(values) + else: + aggregated = None + + result.append( + (current, [{"count": int(aggregated) if aggregated is not None else None}]) + ) + current += interval_seconds + + return result + + def _fetch_filter_values( + self, + organization_id: int, + project_ids: list[int], + start: datetime, + end: datetime, + ) -> dict[str, list[str]]: + """ + Fetch available filter values (app_ids, branches, and build configs) from EAP data. + + Returns a dict with sorted lists of unique values. + """ + # Query for app_id, git_head_ref, and build_configuration_name columns over the time range + response = query_preprod_size_metrics( + organization_id=organization_id, + project_ids=project_ids, + start=start, + end=end, + referrer="api.preprod.app-size-filters", + filter=None, + columns=["app_id", "git_head_ref", "build_configuration_name"], + limit=10000, # Get many rows to extract unique values + ) + + # Extract unique values using the helper function + unique_values = extract_unique_values_from_response(response) + + return { + "app_ids": sorted(list(unique_values.get("app_id", set()))), + "branches": sorted(list(unique_values.get("git_head_ref", set()))), + "build_configs": sorted(list(unique_values.get("build_configuration_name", set()))), + } diff --git a/src/sentry/preprod/api/endpoints/urls.py b/src/sentry/preprod/api/endpoints/urls.py index 4a3c6aef5d8edc..7c795355e02dee 100644 --- a/src/sentry/preprod/api/endpoints/urls.py +++ b/src/sentry/preprod/api/endpoints/urls.py @@ -15,6 +15,7 @@ ProjectPreprodArtifactSizeAnalysisDownloadEndpoint, ) +from .organization_preprod_app_size_stats import OrganizationPreprodAppSizeStatsEndpoint from .organization_preprod_artifact_assemble import ProjectPreprodArtifactAssembleEndpoint from .preprod_artifact_admin_batch_delete import PreprodArtifactAdminBatchDeleteEndpoint from .preprod_artifact_admin_info import PreprodArtifactAdminInfoEndpoint @@ -43,6 +44,12 @@ OrganizationPullRequestSizeAnalysisDownloadEndpoint, ) +__all__ = [ + "OrganizationPreprodAppSizeStatsEndpoint", + "preprod_urlpatterns", + "preprod_internal_urlpatterns", +] + preprod_urlpatterns = [ re_path( r"^(?P[^/]+)/(?P[^/]+)/files/preprodartifacts/assemble/$", diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py new file mode 100644 index 00000000000000..dd50c91f79e9a4 --- /dev/null +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -0,0 +1,479 @@ +from __future__ import annotations + +import uuid +from datetime import datetime, timedelta + +import requests +from django.conf import settings +from django.urls import reverse +from google.protobuf.timestamp_pb2 import Timestamp +from rest_framework.test import APIClient +from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType +from sentry_protos.snuba.v1.trace_item_pb2 import TraceItem + +from sentry.preprod.eap.constants import PREPROD_NAMESPACE +from sentry.search.eap.rpc_utils import anyvalue +from sentry.testutils.cases import APITestCase +from sentry.testutils.helpers.datetime import before_now +from sentry.utils.eap import EAP_ITEMS_INSERT_ENDPOINT + + +class OrganizationPreprodAppSizeStatsEndpointTest(APITestCase): + endpoint = "sentry-api-0-organization-preprod-app-size-stats" + + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + self.url = reverse( + self.endpoint, kwargs={"organization_id_or_slug": self.organization.slug} + ) + + def store_preprod_size_metric( + self, + project_id: int, + organization_id: int, + timestamp: datetime, + preprod_artifact_id: int = 1, + size_metric_id: int = 1, + app_id: str = "com.example.app", + artifact_type: int = 0, + max_install_size: int = 100000, + max_download_size: int = 80000, + min_install_size: int = 95000, + min_download_size: int = 75000, + git_head_ref: str | None = None, + build_configuration_name: str | None = None, + ) -> None: + """Write a preprod size metric to EAP for testing.""" + proto_timestamp = Timestamp() + proto_timestamp.FromDatetime(timestamp) + + trace_id = uuid.uuid5(PREPROD_NAMESPACE, str(preprod_artifact_id)).hex + item_id_str = f"size_metric_{size_metric_id}" + item_id = int(uuid.uuid5(PREPROD_NAMESPACE, item_id_str).hex, 16).to_bytes(16, "little") + + attributes = { + "preprod_artifact_id": anyvalue(preprod_artifact_id), + "size_metric_id": anyvalue(size_metric_id), + "sub_item_type": anyvalue("size_metric"), + "metrics_artifact_type": anyvalue(0), + "identifier": anyvalue(""), + "min_install_size": anyvalue(min_install_size), + "max_install_size": anyvalue(max_install_size), + "min_download_size": anyvalue(min_download_size), + "max_download_size": anyvalue(max_download_size), + "artifact_type": anyvalue(artifact_type), + "app_id": anyvalue(app_id), + } + + if git_head_ref: + attributes["git_head_ref"] = anyvalue(git_head_ref) + if build_configuration_name: + attributes["build_configuration_name"] = anyvalue(build_configuration_name) + + trace_item = TraceItem( + organization_id=organization_id, + project_id=project_id, + item_type=TraceItemType.TRACE_ITEM_TYPE_PREPROD, + timestamp=proto_timestamp, + trace_id=trace_id, + item_id=item_id, + received=proto_timestamp, + retention_days=90, + attributes=attributes, + ) + + response = requests.post( + settings.SENTRY_SNUBA + EAP_ITEMS_INSERT_ENDPOINT, + files={"item_0": trace_item.SerializeToString()}, + ) + assert response.status_code == 200 + + def test_get_with_defaults(self) -> None: + """Test GET with default parameters returns proper structure.""" + response = self.get_success_response(self.organization.slug) + + assert "data" in response.data + assert "start" in response.data + assert "end" in response.data + assert "meta" in response.data + assert isinstance(response.data["data"], list) + + def test_get_with_start_and_end(self) -> None: + """Test explicit time range parameters.""" + start = datetime(2024, 1, 1, 0, 0, 0) + end = datetime(2024, 1, 2, 0, 0, 0) + + response = self.get_success_response( + self.organization.slug, + qs_params={ + "start": str(int(start.timestamp())), + "end": str(int(end.timestamp())), + }, + ) + + assert response.data["start"] == int(start.timestamp()) + assert response.data["end"] == int(end.timestamp()) + + def test_get_with_stats_period(self) -> None: + """Test relative time period (7d, 24h, etc.).""" + response = self.get_success_response( + self.organization.slug, + qs_params={"statsPeriod": "7d"}, + ) + + assert "data" in response.data + # Verify that end - start is approximately 7 days + duration = response.data["end"] - response.data["start"] + assert 6 * 86400 < duration < 8 * 86400 + + def test_get_with_interval(self) -> None: + """Test different time intervals.""" + response = self.get_success_response( + self.organization.slug, + qs_params={"interval": "1h"}, + ) + + assert response.status_code == 200 + + def test_get_with_field(self) -> None: + """Test different aggregate fields.""" + response = self.get_success_response( + self.organization.slug, + qs_params={"field": "max(max_download_size)"}, + ) + + assert response.data["meta"]["fields"]["max(max_download_size)"] == "integer" + + def test_get_with_query_filter(self) -> None: + """Test query string filters (app_id, artifact_type, etc.).""" + response = self.get_success_response( + self.organization.slug, + qs_params={"query": "app_id:com.example.app artifact_type:0"}, + ) + + assert response.status_code == 200 + + def test_get_with_include_filters(self) -> None: + """Test that response includes available filter values.""" + response = self.get_success_response( + self.organization.slug, + qs_params={"includeFilters": "true"}, + ) + + assert "filters" in response.data + assert "app_ids" in response.data["filters"] + assert "branches" in response.data["filters"] + assert "build_configs" in response.data["filters"] + assert isinstance(response.data["filters"]["app_ids"], list) + assert isinstance(response.data["filters"]["branches"], list) + assert isinstance(response.data["filters"]["build_configs"], list) + + def test_get_invalid_interval(self) -> None: + """Test validation of interval parameter.""" + response = self.get_error_response( + self.organization.slug, + qs_params={"interval": "99h"}, + ) + assert "Invalid interval" in str(response.data) + + def test_get_invalid_field(self) -> None: + """Test validation of field format.""" + response = self.get_error_response( + self.organization.slug, + qs_params={"field": "invalid_field"}, + ) + assert "Invalid field format" in str(response.data) + + def test_get_invalid_aggregate_function(self) -> None: + """Test validation of aggregate function.""" + response = self.get_error_response( + self.organization.slug, + qs_params={"field": "median(max_install_size)"}, + ) + assert "Unsupported aggregate function" in str(response.data) + + def test_get_invalid_field_name(self) -> None: + """Test validation of field name.""" + response = self.get_error_response( + self.organization.slug, + qs_params={"field": "max(invalid_field)"}, + ) + assert "Invalid field" in str(response.data) + + def test_get_invalid_stats_period(self) -> None: + """Test validation of statsPeriod parameter.""" + response = self.get_error_response( + self.organization.slug, + qs_params={"statsPeriod": "invalid"}, + ) + assert "Invalid statsPeriod" in str(response.data) + + def test_get_invalid_datetime(self) -> None: + """Test validation of datetime format.""" + response = self.get_error_response( + self.organization.slug, + qs_params={"start": "not-a-date", "end": "also-not-a-date"}, + ) + assert "Invalid datetime format" in str(response.data) + + def test_get_requires_authentication(self) -> None: + """Test that endpoint requires authentication.""" + client = APIClient() + response = client.get(self.url) + assert response.status_code == 401 + + def test_get_with_project_filter(self) -> None: + """Test filtering by project ID.""" + project = self.create_project(organization=self.organization) + + response = self.get_success_response( + self.organization.slug, + qs_params={"project": [project.id]}, + ) + + assert response.status_code == 200 + + def test_empty_response_structure(self) -> None: + """Test response structure when no data exists.""" + response = self.get_success_response( + self.organization.slug, + qs_params={"statsPeriod": "1d", "interval": "1h"}, + ) + + # Verify proper response structure with time buckets + assert len(response.data["data"]) > 0 + # Each time bucket should have structure: [timestamp, [{"count": value}]] + for timestamp, values in response.data["data"]: + assert isinstance(timestamp, int) + assert isinstance(values, list) + assert len(values) == 1 + assert "count" in values[0] + + def test_query_filter_parsing(self) -> None: + """Test various query filter combinations.""" + # Test multiple filters + response = self.get_success_response( + self.organization.slug, + qs_params={"query": "app_id:com.example.app artifact_type:0 git_head_ref:main"}, + ) + assert response.status_code == 200 + + # Test with build configuration filter + response = self.get_success_response( + self.organization.slug, + qs_params={"query": "build_configuration_name:Release"}, + ) + assert response.status_code == 200 + + def test_time_range_validation(self) -> None: + """Test that start/end or statsPeriod are properly handled.""" + # Default behavior (no time params) should work + response = self.get_success_response(self.organization.slug) + assert response.status_code == 200 + + # Explicit start/end should work + now = before_now() + start = now - timedelta(days=7) + response = self.get_success_response( + self.organization.slug, + qs_params={ + "start": str(int(start.timestamp())), + "end": str(int(now.timestamp())), + }, + ) + assert response.status_code == 200 + + def test_all_aggregate_functions(self) -> None: + """Test that all supported aggregate functions work.""" + for func in ["max", "min", "avg", "count"]: + response = self.get_success_response( + self.organization.slug, + qs_params={"field": f"{func}(max_install_size)"}, + ) + assert response.status_code == 200 + assert f"{func}(max_install_size)" in response.data["meta"]["fields"] + + def test_all_valid_intervals(self) -> None: + """Test that all supported intervals work.""" + for interval in ["1m", "5m", "10m", "15m", "30m", "1h", "4h", "1d", "7d"]: + response = self.get_success_response( + self.organization.slug, + qs_params={"interval": interval}, + ) + assert response.status_code == 200 + + def test_get_with_actual_data(self) -> None: + """Test querying actual data written to EAP.""" + now = before_now(minutes=5) + start = now - timedelta(hours=2) + + # Write some test data + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now - timedelta(hours=1), + preprod_artifact_id=1, + size_metric_id=1, + app_id="com.example.app", + max_install_size=100000, + min_install_size=95000, + ) + + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=2, + size_metric_id=2, + app_id="com.example.app", + max_install_size=105000, + min_install_size=98000, + ) + + response = self.get_success_response( + self.organization.slug, + qs_params={ + "start": str(int(start.timestamp())), + "end": str(int(now.timestamp())), + "interval": "1h", + "field": "max(max_install_size)", + }, + ) + + assert response.status_code == 200 + assert "data" in response.data + # Verify we have time buckets with data + data_points = response.data["data"] + assert len(data_points) > 0 + + # Check that at least one bucket has non-None values + non_null_values = [d[1][0]["count"] for d in data_points if d[1][0]["count"] is not None] + assert len(non_null_values) > 0 + # Verify the max value is correct + assert max(non_null_values) == 105000 + + def test_aggregation_with_real_data(self) -> None: + """Test different aggregation functions with real data.""" + now = before_now(minutes=5) + start = now - timedelta(hours=1) + + # Write multiple metrics in the same time bucket + for i, size in enumerate([100000, 150000, 125000]): + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=i + 1, + size_metric_id=i + 1, + max_install_size=size, + ) + + # Test max aggregation + response = self.get_success_response( + self.organization.slug, + qs_params={ + "start": str(int(start.timestamp())), + "end": str(int(now.timestamp())), + "interval": "1h", + "field": "max(max_install_size)", + }, + ) + data_points = [ + d[1][0]["count"] for d in response.data["data"] if d[1][0]["count"] is not None + ] + assert max(data_points) == 150000 + + # Test min aggregation + response = self.get_success_response( + self.organization.slug, + qs_params={ + "start": str(int(start.timestamp())), + "end": str(int(now.timestamp())), + "interval": "1h", + "field": "min(max_install_size)", + }, + ) + data_points = [ + d[1][0]["count"] for d in response.data["data"] if d[1][0]["count"] is not None + ] + assert min(data_points) == 100000 + + def test_filter_by_app_id(self) -> None: + """Test filtering by app_id.""" + now = before_now(minutes=5) + start = now - timedelta(hours=1) + + # Write metrics for different apps + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=1, + size_metric_id=1, + app_id="com.example.app1", + max_install_size=100000, + ) + + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=2, + size_metric_id=2, + app_id="com.example.app2", + max_install_size=200000, + ) + + # Query for app1 only + response = self.get_success_response( + self.organization.slug, + qs_params={ + "start": str(int(start.timestamp())), + "end": str(int(now.timestamp())), + "query": "app_id:com.example.app1", + "field": "max(max_install_size)", + }, + ) + + data_points = [ + d[1][0]["count"] for d in response.data["data"] if d[1][0]["count"] is not None + ] + # Should only get app1's data + assert len(data_points) > 0 + assert max(data_points) == 100000 + + def test_include_filters_with_real_data(self) -> None: + """Test that includeFilters returns actual filter values.""" + now = before_now(minutes=5) + + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now, + app_id="com.example.app1", + git_head_ref="main", + build_configuration_name="Release", + ) + + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now, + app_id="com.example.app2", + git_head_ref="develop", + build_configuration_name="Debug", + ) + + response = self.get_success_response( + self.organization.slug, + qs_params={"includeFilters": "true"}, + ) + + assert "filters" in response.data + assert "com.example.app1" in response.data["filters"]["app_ids"] + assert "com.example.app2" in response.data["filters"]["app_ids"] + assert "main" in response.data["filters"]["branches"] + assert "develop" in response.data["filters"]["branches"] + assert "Release" in response.data["filters"]["build_configs"] + assert "Debug" in response.data["filters"]["build_configs"] From fd9b2a03f44e700c99453c8656208d7130962f95 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Fri, 19 Dec 2025 01:25:00 +0000 Subject: [PATCH 02/17] :hammer_and_wrench: Sync API Urls to TypeScirpt --- static/app/utils/api/knownSentryApiUrls.generated.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/app/utils/api/knownSentryApiUrls.generated.ts b/static/app/utils/api/knownSentryApiUrls.generated.ts index 40a4ea9f823a60..8bd3f2be1f19c4 100644 --- a/static/app/utils/api/knownSentryApiUrls.generated.ts +++ b/static/app/utils/api/knownSentryApiUrls.generated.ts @@ -468,6 +468,7 @@ export type KnownSentryApiUrls = | '/organizations/$organizationIdOrSlug/plugins/' | '/organizations/$organizationIdOrSlug/plugins/$pluginSlug/deprecation-info/' | '/organizations/$organizationIdOrSlug/plugins/configs/' + | '/organizations/$organizationIdOrSlug/preprod/app-size-stats/' | '/organizations/$organizationIdOrSlug/prevent/ai/github/config/$gitOrganizationName/' | '/organizations/$organizationIdOrSlug/prevent/ai/github/repos/' | '/organizations/$organizationIdOrSlug/prevent/owner/$owner/repositories/' @@ -539,7 +540,6 @@ export type KnownSentryApiUrls = | '/organizations/$organizationIdOrSlug/seer/explorer-runs/' | '/organizations/$organizationIdOrSlug/seer/explorer-update/$runId/' | '/organizations/$organizationIdOrSlug/seer/onboarding-check/' - | '/organizations/$organizationIdOrSlug/seer/onboarding/' | '/organizations/$organizationIdOrSlug/seer/setup-check/' | '/organizations/$organizationIdOrSlug/sent-first-event/' | '/organizations/$organizationIdOrSlug/sentry-app-components/' From cfdc82d10f23b995dd18dad536e846f1c63bdf4a Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Thu, 18 Dec 2025 20:39:11 -0500 Subject: [PATCH 03/17] fix --- .../api/endpoints/organization_preprod_app_size_stats.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index a91ef4b532d859..57170080ed61a9 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import uuid from collections import defaultdict from datetime import datetime, timedelta from typing import Any @@ -16,11 +17,13 @@ TraceItemFilter, ) +from sentry.api.api_owners import ApiOwner from sentry.api.api_publish_status import ApiPublishStatus from sentry.api.base import region_silo_endpoint from sentry.api.bases.organization import OrganizationEndpoint from sentry.api.exceptions import ResourceDoesNotExist from sentry.models.organization import Organization +from sentry.preprod.eap.constants import PREPROD_NAMESPACE from sentry.preprod.eap.read import query_preprod_size_metrics from sentry.utils.dates import parse_stats_period @@ -56,6 +59,7 @@ class OrganizationPreprodAppSizeStatsEndpoint(OrganizationEndpoint): Compatible with Sentry dashboard widgets. """ + owner = ApiOwner.EMERGE_TOOLS publish_status = { "GET": ApiPublishStatus.PRIVATE, } @@ -261,10 +265,6 @@ def _build_filter(self, filter_kwargs: dict[str, Any]) -> TraceItemFilter: for key, value in filter_kwargs.items(): if key == "artifact_id": # Special case: artifact_id maps to sentry.trace_id - import uuid - - from sentry.preprod.eap.constants import PREPROD_NAMESPACE - trace_id = uuid.uuid5(PREPROD_NAMESPACE, str(value)).hex filters.append( TraceItemFilter( From 6bdc133160c72787562ecd5f961238dfc50de666 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 11:16:31 -0500 Subject: [PATCH 04/17] fix --- .../endpoints/organization_dashboards.py | 122 ++++++++++-------- .../organization_preprod_app_size_stats.py | 8 +- ...est_organization_preprod_app_size_stats.py | 60 +++++++-- 3 files changed, 124 insertions(+), 66 deletions(-) diff --git a/src/sentry/dashboards/endpoints/organization_dashboards.py b/src/sentry/dashboards/endpoints/organization_dashboards.py index 42ae03b58da328..9be904298468c2 100644 --- a/src/sentry/dashboards/endpoints/organization_dashboards.py +++ b/src/sentry/dashboards/endpoints/organization_dashboards.py @@ -3,6 +3,7 @@ from enum import IntEnum from typing import Any, TypedDict +import sentry_sdk from django.db import IntegrityError, router, transaction from django.db.models import ( Case, @@ -127,6 +128,57 @@ class PrebuiltDashboard(TypedDict): ] +def sync_prebuilt_dashboards(organization: Organization) -> None: + """ + Queries the database to check if prebuilt dashboards have a Dashboard record and + creates them if they don't, updates titles if they've changed, or deletes them + if they should no longer exist. + """ + + with transaction.atomic(router.db_for_write(Dashboard)): + enabled_prebuilt_dashboard_ids = options.get("dashboards.prebuilt-dashboard-ids") + enabled_prebuilt_dashboards = [ + dashboard + for dashboard in PREBUILT_DASHBOARDS + if dashboard["prebuilt_id"] in enabled_prebuilt_dashboard_ids + ] + + saved_prebuilt_dashboards = Dashboard.objects.filter( + organization=organization, + prebuilt_id__isnull=False, + ) + + saved_prebuilt_dashboard_map = {d.prebuilt_id: d for d in saved_prebuilt_dashboards} + + # Create prebuilt dashboards if they don't exist, or update titles if changed + dashboards_to_update: list[Dashboard] = [] + for prebuilt_dashboard in enabled_prebuilt_dashboards: + prebuilt_id: PrebuiltDashboardId = prebuilt_dashboard["prebuilt_id"] + + if prebuilt_id not in saved_prebuilt_dashboard_map: + # Create new dashboard + Dashboard.objects.create( + organization=organization, + title=prebuilt_dashboard["title"], + created_by_id=None, + prebuilt_id=prebuilt_id, + ) + elif saved_prebuilt_dashboard_map[prebuilt_id].title != prebuilt_dashboard["title"]: + # Update title if changed + saved_prebuilt_dashboard_map[prebuilt_id].title = prebuilt_dashboard["title"] + dashboards_to_update.append(saved_prebuilt_dashboard_map[prebuilt_id]) + + if dashboards_to_update: + Dashboard.objects.bulk_update(dashboards_to_update, ["title"]) + + # Delete old prebuilt dashboards if they should no longer exist + prebuilt_ids = [d["prebuilt_id"] for d in enabled_prebuilt_dashboards] + Dashboard.objects.filter( + organization=organization, + prebuilt_id__isnull=False, + ).exclude(prebuilt_id__in=prebuilt_ids).delete() + + class OrganizationDashboardsPermission(OrganizationPermission): scope_map = { "GET": ["org:read", "org:write", "org:admin"], @@ -146,14 +198,18 @@ def has_object_permission( if isinstance(obj, Dashboard): is_superuser = is_active_superuser(request) + # allow strictly for Owners and superusers, this allows them to delete dashboards + # of users that no longer have access to the organization if is_superuser or request.access.has_role_in_organization( role=roles.get_top_dog().id, organization=obj.organization, user_id=request.user.id ): return True + # check if user is restricted from editing dashboard if hasattr(obj, "permissions"): return obj.permissions.has_edit_permissions(request.user.id) + # if no permissions are assigned, it is considered accessible to all users return True return True @@ -198,6 +254,7 @@ def get(self, request: Request, organization: Organization) -> Response: organization, actor=request.user, ): + # Sync prebuilt dashboards to the database try: lock = locks.get( f"dashboards:sync_prebuilt_dashboards:{organization.id}", @@ -205,9 +262,14 @@ def get(self, request: Request, organization: Organization) -> Response: name="sync_prebuilt_dashboards", ) with lock.acquire(): + # Adds prebuilt dashboards to the database if they don't exist. + # Deletes old prebuilt dashboards from the database if they should no longer exist. sync_prebuilt_dashboards(organization) except UnableToAcquireLock: + # Another process is already syncing the prebuilt dashboards. We can skip syncing this time. pass + except Exception as err: + sentry_sdk.capture_exception(err) filter_by = request.query_params.get("filter") if filter_by == "onlyFavorites": @@ -475,11 +537,14 @@ def post(self, request: Request, organization: Organization, retry: int = 0) -> actor=request.user, ): if serializer.validated_data.get("is_favorited"): - DashboardFavoriteUser.objects.insert_favorite_dashboard( - organization=organization, - user_id=request.user.id, - dashboard=dashboard, - ) + try: + DashboardFavoriteUser.objects.insert_favorite_dashboard( + organization=organization, + user_id=request.user.id, + dashboard=dashboard, + ) + except Exception as e: + sentry_sdk.capture_exception(e) return Response(serialize(dashboard, request.user), status=201) except IntegrityError: @@ -493,50 +558,3 @@ def post(self, request: Request, organization: Organization, retry: int = 0) -> return self.post(request, organization, retry=retry + 1) except UnableToAcquireLock: return Response("Unable to create dashboard, please try again", status=503) - - -def sync_prebuilt_dashboards(organization: Organization) -> None: - """ - Queries the database to check if prebuilt dashboards have a Dashboard record and - creates them if they don't, updates titles if they've changed, or deletes them - if they should no longer exist. - """ - - with transaction.atomic(router.db_for_write(Dashboard)): - enabled_prebuilt_dashboard_ids = options.get("dashboards.prebuilt-dashboard-ids") - enabled_prebuilt_dashboards = [ - dashboard - for dashboard in PREBUILT_DASHBOARDS - if dashboard["prebuilt_id"] in enabled_prebuilt_dashboard_ids - ] - - saved_prebuilt_dashboards = Dashboard.objects.filter( - organization=organization, - prebuilt_id__isnull=False, - ) - - saved_prebuilt_dashboard_map = {d.prebuilt_id: d for d in saved_prebuilt_dashboards} - - dashboards_to_update: list[Dashboard] = [] - for prebuilt_dashboard in enabled_prebuilt_dashboards: - prebuilt_id: PrebuiltDashboardId = prebuilt_dashboard["prebuilt_id"] - - if prebuilt_id not in saved_prebuilt_dashboard_map: - Dashboard.objects.create( - organization=organization, - title=prebuilt_dashboard["title"], - created_by_id=None, - prebuilt_id=prebuilt_id, - ) - elif saved_prebuilt_dashboard_map[prebuilt_id].title != prebuilt_dashboard["title"]: - saved_prebuilt_dashboard_map[prebuilt_id].title = prebuilt_dashboard["title"] - dashboards_to_update.append(saved_prebuilt_dashboard_map[prebuilt_id]) - - if dashboards_to_update: - Dashboard.objects.bulk_update(dashboards_to_update, ["title"]) - - prebuilt_ids = [d["prebuilt_id"] for d in enabled_prebuilt_dashboards] - Dashboard.objects.filter( - organization=organization, - prebuilt_id__isnull=False, - ).exclude(prebuilt_id__in=prebuilt_ids).delete() diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 57170080ed61a9..c2bd0f932c8fde 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -3,7 +3,7 @@ import logging import uuid from collections import defaultdict -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta from typing import Any from rest_framework.exceptions import ParseError @@ -161,7 +161,7 @@ def _parse_time_range(self, request: Request) -> tuple[datetime, datetime]: delta = parse_stats_period(stats_period) if delta is None: raise ParseError(f"Invalid statsPeriod: {stats_period}") - end = datetime.utcnow() + end = datetime.now(UTC) start = end - delta elif start_param and end_param: # Parse absolute timestamps @@ -169,7 +169,7 @@ def _parse_time_range(self, request: Request) -> tuple[datetime, datetime]: end = self._parse_datetime(end_param) else: # Default to last 14 days - end = datetime.utcnow() + end = datetime.now(UTC) start = end - timedelta(days=14) return start, end @@ -178,7 +178,7 @@ def _parse_datetime(self, value: str) -> datetime: """Parse datetime from ISO format or Unix timestamp.""" try: # Try Unix timestamp first - return datetime.fromtimestamp(float(value)) + return datetime.fromtimestamp(float(value), UTC) except (ValueError, TypeError): # Try ISO format try: diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py index dd50c91f79e9a4..0cd2c04be8273a 100644 --- a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -1,7 +1,7 @@ from __future__ import annotations import uuid -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta import requests from django.conf import settings @@ -446,12 +446,13 @@ def test_filter_by_app_id(self) -> None: def test_include_filters_with_real_data(self) -> None: """Test that includeFilters returns actual filter values.""" now = before_now(minutes=5) + start = now - timedelta(minutes=10) self.store_preprod_size_metric( project_id=self.project.id, organization_id=self.organization.id, timestamp=now, - app_id="com.example.app1", + app_id="com.test.filters.app1", git_head_ref="main", build_configuration_name="Release", ) @@ -460,20 +461,59 @@ def test_include_filters_with_real_data(self) -> None: project_id=self.project.id, organization_id=self.organization.id, timestamp=now, - app_id="com.example.app2", + app_id="com.test.filters.app2", git_head_ref="develop", build_configuration_name="Debug", ) response = self.get_success_response( self.organization.slug, - qs_params={"includeFilters": "true"}, + qs_params={ + "includeFilters": "true", + "start": str(int(start.timestamp())), + "end": str(int((now + timedelta(minutes=1)).timestamp())), + }, ) assert "filters" in response.data - assert "com.example.app1" in response.data["filters"]["app_ids"] - assert "com.example.app2" in response.data["filters"]["app_ids"] - assert "main" in response.data["filters"]["branches"] - assert "develop" in response.data["filters"]["branches"] - assert "Release" in response.data["filters"]["build_configs"] - assert "Debug" in response.data["filters"]["build_configs"] + app_ids = response.data["filters"]["app_ids"] + branches = response.data["filters"]["branches"] + configs = response.data["filters"]["build_configs"] + + assert "com.test.filters.app1" in app_ids + assert "com.test.filters.app2" in app_ids + assert "main" in branches + assert "develop" in branches + assert "Release" in configs + assert "Debug" in configs + + def test_timezone_consistency(self) -> None: + """Test that Unix timestamps are parsed consistently with UTC.""" + # Create a specific UTC timestamp + utc_time = datetime(2024, 6, 15, 12, 0, 0, tzinfo=UTC) + unix_timestamp = int(utc_time.timestamp()) + + # Query with Unix timestamp for start/end + response = self.get_success_response( + self.organization.slug, + qs_params={ + "start": str(unix_timestamp), + "end": str(unix_timestamp + 3600), # +1 hour + }, + ) + + # Verify the returned timestamps match what we sent (in UTC) + assert response.data["start"] == unix_timestamp + assert response.data["end"] == unix_timestamp + 3600 + + # Verify statsPeriod also produces consistent results + # (both should use UTC, not local time) + response2 = self.get_success_response( + self.organization.slug, + qs_params={"statsPeriod": "1h"}, + ) + + # Both should be within reasonable range + duration = response2.data["end"] - response2.data["start"] + # Allow some slack for test execution time + assert 3500 < duration < 3700 From 46caebeb295c7904a10d4b0e32d340f8319477dc Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 12:02:16 -0500 Subject: [PATCH 05/17] fix --- .../organization_preprod_app_size_stats.py | 168 ++++++------------ ...est_organization_preprod_app_size_stats.py | 53 ++++-- 2 files changed, 98 insertions(+), 123 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index c2bd0f932c8fde..1e929a78493298 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -3,7 +3,7 @@ import logging import uuid from collections import defaultdict -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta from typing import Any from rest_framework.exceptions import ParseError @@ -21,44 +21,18 @@ from sentry.api.api_publish_status import ApiPublishStatus from sentry.api.base import region_silo_endpoint from sentry.api.bases.organization import OrganizationEndpoint -from sentry.api.exceptions import ResourceDoesNotExist +from sentry.api.utils import get_date_range_from_params +from sentry.exceptions import InvalidParams from sentry.models.organization import Organization from sentry.preprod.eap.constants import PREPROD_NAMESPACE from sentry.preprod.eap.read import query_preprod_size_metrics -from sentry.utils.dates import parse_stats_period +from sentry.utils.dates import get_rollup_from_request logger = logging.getLogger(__name__) -def extract_unique_values_from_response( - response: TraceItemTableResponse, -) -> dict[str, set[str]]: - """Extract unique string values from each column in the response.""" - unique_values: dict[str, set[str]] = {} - - if not response.column_values: - return unique_values - - # Iterate through each column - for column in response.column_values: - attr_name = column.attribute_name - unique_values[attr_name] = set() - - # Iterate through all results in this column - for result in column.results: - if result.HasField("val_str") and result.val_str: - unique_values[attr_name].add(result.val_str) - - return unique_values - - @region_silo_endpoint class OrganizationPreprodAppSizeStatsEndpoint(OrganizationEndpoint): - """ - Returns time-series data for app size metrics stored in EAP. - Compatible with Sentry dashboard widgets. - """ - owner = ApiOwner.EMERGE_TOOLS publish_status = { "GET": ApiPublishStatus.PRIVATE, @@ -66,7 +40,7 @@ class OrganizationPreprodAppSizeStatsEndpoint(OrganizationEndpoint): def get(self, request: Request, organization: Organization) -> Response: """ - Retrieve app size metrics over time. + Retrieve app size metrics over time based on the provided filters. Query Parameters: - project: Project ID(s) (can be repeated or comma-separated) @@ -98,10 +72,18 @@ def get(self, request: Request, organization: Organization) -> Response: else: project_ids = list(project_ids_set) - start, end = self._parse_time_range(request) + # Use shared utility for time range parsing + start, end = get_date_range_from_params( + request.GET, default_stats_period=timedelta(days=14) + ) - interval = request.GET.get("interval", "1d") - interval_seconds = self._parse_interval(interval) + # Use shared utility for interval parsing + interval_seconds = get_rollup_from_request( + request, + date_range=end - start, + default_interval="1d", + error=ParseError("Invalid interval"), + ) field = request.GET.get("field", "max(max_install_size)") aggregate_func, aggregate_field = self._parse_field(field) @@ -116,10 +98,9 @@ def get(self, request: Request, organization: Organization) -> Response: end=end, referrer="api.preprod.app-size-stats", filter=query_filter, - limit=10000, # Get all data points + limit=10000, ) - # Transform to time-series format timeseries_data = self._transform_to_timeseries( response, start, end, interval_seconds, aggregate_func, aggregate_field ) @@ -135,7 +116,6 @@ def get(self, request: Request, organization: Organization) -> Response: }, } - # Include available filter values if requested include_filters = request.GET.get("includeFilters", "").lower() == "true" if include_filters: filter_values = self._fetch_filter_values(organization.id, project_ids, start, end) @@ -143,65 +123,12 @@ def get(self, request: Request, organization: Organization) -> Response: return Response(result) + except InvalidParams as e: + logger.exception("Invalid parameters for app size stats request") + raise ParseError(str(e)) except (ValueError, KeyError) as e: - logger.exception("[AppSize] Parse error: %s", e) + logger.exception("Error while parsing app size stats request") raise ParseError(str(e)) - except Exception as e: - logger.exception("[AppSize] Unexpected error: %s", e) - raise ResourceDoesNotExist(f"Failed to fetch app size metrics: {e}") - - def _parse_time_range(self, request: Request) -> tuple[datetime, datetime]: - """Parse start/end or statsPeriod from request.""" - stats_period = request.GET.get("statsPeriod") - start_param = request.GET.get("start") - end_param = request.GET.get("end") - - if stats_period: - # Parse relative period like "14d" or "24h" - delta = parse_stats_period(stats_period) - if delta is None: - raise ParseError(f"Invalid statsPeriod: {stats_period}") - end = datetime.now(UTC) - start = end - delta - elif start_param and end_param: - # Parse absolute timestamps - start = self._parse_datetime(start_param) - end = self._parse_datetime(end_param) - else: - # Default to last 14 days - end = datetime.now(UTC) - start = end - timedelta(days=14) - - return start, end - - def _parse_datetime(self, value: str) -> datetime: - """Parse datetime from ISO format or Unix timestamp.""" - try: - # Try Unix timestamp first - return datetime.fromtimestamp(float(value), UTC) - except (ValueError, TypeError): - # Try ISO format - try: - return datetime.fromisoformat(value.replace("Z", "+00:00")) - except ValueError: - raise ParseError(f"Invalid datetime format: {value}") - - def _parse_interval(self, interval: str) -> int: - """Parse interval string like '1h', '1d' to seconds.""" - interval_map = { - "1m": 60, - "5m": 300, - "10m": 600, - "15m": 900, - "30m": 1800, - "1h": 3600, - "4h": 14400, - "1d": 86400, - "7d": 604800, - } - if interval not in interval_map: - raise ParseError(f"Invalid interval: {interval}") - return interval_map[interval] def _parse_field(self, field: str) -> tuple[str, str]: """ @@ -318,15 +245,12 @@ def _transform_to_timeseries( Note: EAP response is column-oriented (column_values[col_idx].results[row_idx]) """ - # Create time buckets buckets: dict[int, list[float]] = defaultdict(list) - # Extract column names and find indices column_map: dict[str, int] = {} column_values = response.column_values if not column_values: - # No data, return empty buckets with None result: list[tuple[int, list[dict[str, Any]]]] = [] start_ts = int(start.timestamp()) current = int(start_ts // interval_seconds * interval_seconds) @@ -339,9 +263,7 @@ def _transform_to_timeseries( for idx, column_value in enumerate(column_values): column_map[column_value.attribute_name] = idx - # Get the target field index if aggregate_field not in column_map: - # If field doesn't exist, return None values result = [] start_ts = int(start.timestamp()) current = int(start_ts // interval_seconds * interval_seconds) @@ -365,11 +287,9 @@ def _transform_to_timeseries( current += interval_seconds return result - # Process each row (iterate through results in column-oriented format) num_rows = len(column_values[0].results) for row_idx in range(num_rows): - # Get timestamp for this row timestamp_result = column_values[timestamp_idx].results[row_idx] if timestamp_result.HasField("val_double"): @@ -381,7 +301,6 @@ def _transform_to_timeseries( bucket_ts = int(timestamp // interval_seconds * interval_seconds) - # Get the value for this row value_result = column_values[field_idx].results[row_idx] value = None @@ -432,12 +351,6 @@ def _fetch_filter_values( start: datetime, end: datetime, ) -> dict[str, list[str]]: - """ - Fetch available filter values (app_ids, branches, and build configs) from EAP data. - - Returns a dict with sorted lists of unique values. - """ - # Query for app_id, git_head_ref, and build_configuration_name columns over the time range response = query_preprod_size_metrics( organization_id=organization_id, project_ids=project_ids, @@ -446,14 +359,45 @@ def _fetch_filter_values( referrer="api.preprod.app-size-filters", filter=None, columns=["app_id", "git_head_ref", "build_configuration_name"], - limit=10000, # Get many rows to extract unique values + limit=10000, ) - # Extract unique values using the helper function - unique_values = extract_unique_values_from_response(response) + unique_values = _extract_unique_values_from_response(response) + + branches = list(unique_values.get("git_head_ref", set())) + branches.sort(key=_branch_sort_key) return { "app_ids": sorted(list(unique_values.get("app_id", set()))), - "branches": sorted(list(unique_values.get("git_head_ref", set()))), + "branches": branches, "build_configs": sorted(list(unique_values.get("build_configuration_name", set()))), } + + +def _extract_unique_values_from_response( + response: TraceItemTableResponse, +) -> dict[str, set[str]]: + unique_values: dict[str, set[str]] = {} + + if not response.column_values: + return unique_values + + for column in response.column_values: + attr_name = column.attribute_name + unique_values[attr_name] = set() + + for result in column.results: + if result.HasField("val_str") and result.val_str: + unique_values[attr_name].add(result.val_str) + + return unique_values + + +def _branch_sort_key(branch: str) -> tuple[int, str]: + """Sort key that prioritizes main and master branches.""" + if branch == "main": + return (0, branch) + elif branch == "master": + return (1, branch) + else: + return (2, branch.lower()) diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py index 0cd2c04be8273a..5c86978d3104e2 100644 --- a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -169,14 +169,6 @@ def test_get_with_include_filters(self) -> None: assert isinstance(response.data["filters"]["branches"], list) assert isinstance(response.data["filters"]["build_configs"], list) - def test_get_invalid_interval(self) -> None: - """Test validation of interval parameter.""" - response = self.get_error_response( - self.organization.slug, - qs_params={"interval": "99h"}, - ) - assert "Invalid interval" in str(response.data) - def test_get_invalid_field(self) -> None: """Test validation of field format.""" response = self.get_error_response( @@ -215,7 +207,7 @@ def test_get_invalid_datetime(self) -> None: self.organization.slug, qs_params={"start": "not-a-date", "end": "also-not-a-date"}, ) - assert "Invalid datetime format" in str(response.data) + assert "not a valid ISO8601 date query" in str(response.data) def test_get_requires_authentication(self) -> None: """Test that endpoint requires authentication.""" @@ -295,8 +287,9 @@ def test_all_aggregate_functions(self) -> None: assert f"{func}(max_install_size)" in response.data["meta"]["fields"] def test_all_valid_intervals(self) -> None: - """Test that all supported intervals work.""" - for interval in ["1m", "5m", "10m", "15m", "30m", "1h", "4h", "1d", "7d"]: + """Test that common intervals work with default 14-day range.""" + # For 14-day range, these intervals don't exceed MAX_ROLLUP_POINTS + for interval in ["5m", "15m", "30m", "1h", "4h", "1d"]: response = self.get_success_response( self.organization.slug, qs_params={"interval": interval}, @@ -484,9 +477,47 @@ def test_include_filters_with_real_data(self) -> None: assert "com.test.filters.app2" in app_ids assert "main" in branches assert "develop" in branches + # Verify main comes before develop + assert branches.index("main") < branches.index("develop") assert "Release" in configs assert "Debug" in configs + def test_branch_sorting_priority(self) -> None: + """Test that main and master branches are prioritized in the list.""" + now = before_now(minutes=5) + start = now - timedelta(minutes=10) + + # Create metrics with various branch names using different artifact IDs + # so they're stored as separate records + for idx, branch in enumerate(["feature/test", "main", "develop", "master", "release/1.0"]): + self.store_preprod_size_metric( + project_id=self.project.id, + organization_id=self.organization.id, + timestamp=now, + preprod_artifact_id=100 + idx, + size_metric_id=100 + idx, + app_id="com.test.branches", + git_head_ref=branch, + ) + + response = self.get_success_response( + self.organization.slug, + qs_params={ + "includeFilters": "true", + "start": str(int(start.timestamp())), + "end": str(int((now + timedelta(minutes=1)).timestamp())), + }, + ) + + branches = response.data["filters"]["branches"] + + # Verify main is first, master is second + assert branches[0] == "main" + assert branches[1] == "master" + # Others should be alphabetically sorted + remaining = branches[2:] + assert remaining == sorted(remaining, key=str.lower) + def test_timezone_consistency(self) -> None: """Test that Unix timestamps are parsed consistently with UTC.""" # Create a specific UTC timestamp From 42f0e437fc2e8c6c370c53f66b0ee962b9051d51 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 12:09:53 -0500 Subject: [PATCH 06/17] fix --- .../endpoints/organization_preprod_app_size_stats.py | 10 +++++++--- .../test_organization_preprod_app_size_stats.py | 12 ++++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 1e929a78493298..c0b3e49a0d9321 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -125,10 +125,10 @@ def get(self, request: Request, organization: Organization) -> Response: except InvalidParams as e: logger.exception("Invalid parameters for app size stats request") - raise ParseError(str(e)) + raise ParseError("Invalid query parameters") except (ValueError, KeyError) as e: logger.exception("Error while parsing app size stats request") - raise ParseError(str(e)) + raise ParseError("Invalid request parameters") def _parse_field(self, field: str) -> tuple[str, str]: """ @@ -150,7 +150,11 @@ def _parse_field(self, field: str) -> tuple[str, str]: "min_install_size", "min_download_size", ] - if field_name and field_name not in valid_fields: + + if not field_name: + raise ParseError(f"Field name is required for {func_name}() function") + + if field_name not in valid_fields: raise ParseError(f"Invalid field: {field_name}") return func_name, field_name diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py index 5c86978d3104e2..d344474a07f575 100644 --- a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -193,13 +193,21 @@ def test_get_invalid_field_name(self) -> None: ) assert "Invalid field" in str(response.data) + def test_get_empty_field_name(self) -> None: + """Test that empty field names are rejected.""" + response = self.get_error_response( + self.organization.slug, + qs_params={"field": "count()"}, + ) + assert "Field name is required" in str(response.data) + def test_get_invalid_stats_period(self) -> None: """Test validation of statsPeriod parameter.""" response = self.get_error_response( self.organization.slug, qs_params={"statsPeriod": "invalid"}, ) - assert "Invalid statsPeriod" in str(response.data) + assert response.status_code == 400 def test_get_invalid_datetime(self) -> None: """Test validation of datetime format.""" @@ -207,7 +215,7 @@ def test_get_invalid_datetime(self) -> None: self.organization.slug, qs_params={"start": "not-a-date", "end": "also-not-a-date"}, ) - assert "not a valid ISO8601 date query" in str(response.data) + assert response.status_code == 400 def test_get_requires_authentication(self) -> None: """Test that endpoint requires authentication.""" From c4a9ce4ff9d5a571148d98e73ca96b9f708904c8 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 12:31:13 -0500 Subject: [PATCH 07/17] fix --- .../organization_preprod_app_size_stats.py | 4 +- ...est_organization_preprod_app_size_stats.py | 77 +------------------ 2 files changed, 3 insertions(+), 78 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index c0b3e49a0d9321..7d1d4ad16bdecc 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -123,10 +123,10 @@ def get(self, request: Request, organization: Organization) -> Response: return Response(result) - except InvalidParams as e: + except InvalidParams: logger.exception("Invalid parameters for app size stats request") raise ParseError("Invalid query parameters") - except (ValueError, KeyError) as e: + except (ValueError, KeyError): logger.exception("Error while parsing app size stats request") raise ParseError("Invalid request parameters") diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py index d344474a07f575..f73e6b557420b6 100644 --- a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -1,7 +1,7 @@ from __future__ import annotations import uuid -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta import requests from django.conf import settings @@ -201,22 +201,6 @@ def test_get_empty_field_name(self) -> None: ) assert "Field name is required" in str(response.data) - def test_get_invalid_stats_period(self) -> None: - """Test validation of statsPeriod parameter.""" - response = self.get_error_response( - self.organization.slug, - qs_params={"statsPeriod": "invalid"}, - ) - assert response.status_code == 400 - - def test_get_invalid_datetime(self) -> None: - """Test validation of datetime format.""" - response = self.get_error_response( - self.organization.slug, - qs_params={"start": "not-a-date", "end": "also-not-a-date"}, - ) - assert response.status_code == 400 - def test_get_requires_authentication(self) -> None: """Test that endpoint requires authentication.""" client = APIClient() @@ -266,24 +250,6 @@ def test_query_filter_parsing(self) -> None: ) assert response.status_code == 200 - def test_time_range_validation(self) -> None: - """Test that start/end or statsPeriod are properly handled.""" - # Default behavior (no time params) should work - response = self.get_success_response(self.organization.slug) - assert response.status_code == 200 - - # Explicit start/end should work - now = before_now() - start = now - timedelta(days=7) - response = self.get_success_response( - self.organization.slug, - qs_params={ - "start": str(int(start.timestamp())), - "end": str(int(now.timestamp())), - }, - ) - assert response.status_code == 200 - def test_all_aggregate_functions(self) -> None: """Test that all supported aggregate functions work.""" for func in ["max", "min", "avg", "count"]: @@ -294,16 +260,6 @@ def test_all_aggregate_functions(self) -> None: assert response.status_code == 200 assert f"{func}(max_install_size)" in response.data["meta"]["fields"] - def test_all_valid_intervals(self) -> None: - """Test that common intervals work with default 14-day range.""" - # For 14-day range, these intervals don't exceed MAX_ROLLUP_POINTS - for interval in ["5m", "15m", "30m", "1h", "4h", "1d"]: - response = self.get_success_response( - self.organization.slug, - qs_params={"interval": interval}, - ) - assert response.status_code == 200 - def test_get_with_actual_data(self) -> None: """Test querying actual data written to EAP.""" now = before_now(minutes=5) @@ -525,34 +481,3 @@ def test_branch_sorting_priority(self) -> None: # Others should be alphabetically sorted remaining = branches[2:] assert remaining == sorted(remaining, key=str.lower) - - def test_timezone_consistency(self) -> None: - """Test that Unix timestamps are parsed consistently with UTC.""" - # Create a specific UTC timestamp - utc_time = datetime(2024, 6, 15, 12, 0, 0, tzinfo=UTC) - unix_timestamp = int(utc_time.timestamp()) - - # Query with Unix timestamp for start/end - response = self.get_success_response( - self.organization.slug, - qs_params={ - "start": str(unix_timestamp), - "end": str(unix_timestamp + 3600), # +1 hour - }, - ) - - # Verify the returned timestamps match what we sent (in UTC) - assert response.data["start"] == unix_timestamp - assert response.data["end"] == unix_timestamp + 3600 - - # Verify statsPeriod also produces consistent results - # (both should use UTC, not local time) - response2 = self.get_success_response( - self.organization.slug, - qs_params={"statsPeriod": "1h"}, - ) - - # Both should be within reasonable range - duration = response2.data["end"] - response2.data["start"] - # Allow some slack for test execution time - assert 3500 < duration < 3700 From ebdcb810765415a95a1b4d32ea919bfa56c12d88 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 12:38:49 -0500 Subject: [PATCH 08/17] fix --- .../organization_preprod_app_size_stats.py | 14 ++++++++------ .../test_organization_preprod_app_size_stats.py | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 7d1d4ad16bdecc..8a879851ae71d3 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -65,12 +65,14 @@ def get(self, request: Request, organization: Organization) -> Response: } """ try: - project_ids_set = self.get_requested_project_ids_unchecked(request) - if not project_ids_set: - projects = self.get_projects(request, organization) - project_ids = [p.id for p in projects] - else: - project_ids = list(project_ids_set) + # Parse project IDs from request, then validate permissions + req_proj_ids = self.get_requested_project_ids_unchecked(request) + projects = self.get_projects( + request=request, + organization=organization, + project_ids=req_proj_ids, + ) + project_ids = [p.id for p in projects] # Use shared utility for time range parsing start, end = get_date_range_from_params( diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py index f73e6b557420b6..4c0656245be5ba 100644 --- a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -207,6 +207,23 @@ def test_get_requires_authentication(self) -> None: response = client.get(self.url) assert response.status_code == 401 + def test_cannot_access_other_organization_projects(self) -> None: + """Test that users cannot access projects from other organizations (IDOR protection).""" + # Create a second organization with a project that user doesn't have access to + other_org = self.create_organization(name="Other Org") + other_project = self.create_project(organization=other_org, name="Other Project") + + # Try to query our organization's endpoint with the other org's project ID + response = self.get_error_response( + self.organization.slug, + qs_params={"project": [other_project.id]}, + ) + + # Should return 403 Forbidden since user doesn't have permission to access other_project + # This validates that get_projects() properly validates project ownership + assert response.status_code == 403 + assert "permission" in str(response.data).lower() + def test_get_with_project_filter(self) -> None: """Test filtering by project ID.""" project = self.create_project(organization=self.organization) From ad368a5c559edfbe299be673894ecebbb26f3736 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 13:31:52 -0500 Subject: [PATCH 09/17] fix --- .../organization_preprod_app_size_stats.py | 119 ++++++------------ 1 file changed, 41 insertions(+), 78 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 8a879851ae71d3..b4e1e19f1674ce 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import uuid from collections import defaultdict from datetime import datetime, timedelta from typing import Any @@ -24,7 +23,6 @@ from sentry.api.utils import get_date_range_from_params from sentry.exceptions import InvalidParams from sentry.models.organization import Organization -from sentry.preprod.eap.constants import PREPROD_NAMESPACE from sentry.preprod.eap.read import query_preprod_size_metrics from sentry.utils.dates import get_rollup_from_request @@ -49,7 +47,7 @@ def get(self, request: Request, organization: Organization) -> Response: - statsPeriod: Alternative to start/end (e.g., "14d", "24h") - interval: Time interval for buckets (e.g., "1h", "1d") - field: Aggregate field (e.g., "max(max_install_size)") - - query: Filter query string (e.g., "app_id:com.example.app artifact_type:0") + - query: Filter query string (e.g., "app_id:com.example.app artifact_type:0 git_head_ref:main") - includeFilters: If "true", includes available filter values in response Response Format: @@ -64,22 +62,14 @@ def get(self, request: Request, organization: Organization) -> Response: } } """ - try: - # Parse project IDs from request, then validate permissions - req_proj_ids = self.get_requested_project_ids_unchecked(request) - projects = self.get_projects( - request=request, - organization=organization, - project_ids=req_proj_ids, - ) - project_ids = [p.id for p in projects] + projects = self.get_projects(request=request, organization=organization) + project_ids = [p.id for p in projects] - # Use shared utility for time range parsing + try: start, end = get_date_range_from_params( request.GET, default_stats_period=timedelta(days=14) ) - # Use shared utility for interval parsing interval_seconds = get_rollup_from_request( request, date_range=end - start, @@ -92,50 +82,47 @@ def get(self, request: Request, organization: Organization) -> Response: filter_kwargs = self._parse_query(request.GET.get("query", "")) query_filter = self._build_filter(filter_kwargs) if filter_kwargs else None - - response = query_preprod_size_metrics( - organization_id=organization.id, - project_ids=project_ids, - start=start, - end=end, - referrer="api.preprod.app-size-stats", - filter=query_filter, - limit=10000, - ) - - timeseries_data = self._transform_to_timeseries( - response, start, end, interval_seconds, aggregate_func, aggregate_field - ) - - result: dict[str, Any] = { - "data": timeseries_data, - "start": int(start.timestamp()), - "end": int(end.timestamp()), - "meta": { - "fields": { - field: "integer", - } - }, - } - - include_filters = request.GET.get("includeFilters", "").lower() == "true" - if include_filters: - filter_values = self._fetch_filter_values(organization.id, project_ids, start, end) - result["filters"] = filter_values - - return Response(result) - except InvalidParams: logger.exception("Invalid parameters for app size stats request") raise ParseError("Invalid query parameters") - except (ValueError, KeyError): + except ValueError: logger.exception("Error while parsing app size stats request") raise ParseError("Invalid request parameters") + response = query_preprod_size_metrics( + organization_id=organization.id, + project_ids=project_ids, + start=start, + end=end, + referrer="api.preprod.app-size-stats", + filter=query_filter, + limit=10000, + ) + + timeseries_data = self._transform_to_timeseries( + response, start, end, interval_seconds, aggregate_func, aggregate_field + ) + + result: dict[str, Any] = { + "data": timeseries_data, + "start": int(start.timestamp()), + "end": int(end.timestamp()), + "meta": { + "fields": { + field: "integer", + } + }, + } + + include_filters = request.GET.get("includeFilters", "").lower() == "true" + if include_filters: + filter_values = self._fetch_filter_values(organization.id, project_ids, start, end) + result["filters"] = filter_values + + return Response(result) + def _parse_field(self, field: str) -> tuple[str, str]: - """ - Parse field like 'max(max_install_size)' into ('max', 'max_install_size'). - """ + """Parse field like 'max(max_install_size)' into ('max', 'max_install_size').""" if "(" not in field or ")" not in field: raise ParseError(f"Invalid field format: {field}") @@ -162,22 +149,18 @@ def _parse_field(self, field: str) -> tuple[str, str]: return func_name, field_name def _parse_query(self, query: str) -> dict[str, Any]: - """ - Parse query string like 'app_id:com.example.app artifact_type:0' into filter kwargs. - """ + """Parse query string like 'app_id:com.example.app artifact_type:0' into filter kwargs.""" filters: dict[str, str | int] = {} if not query: return filters - # Simple space-separated key:value parsing for token in query.split(): if ":" not in token: continue key, value = token.split(":", 1) - # Map query keys to function kwargs if key == "app_id": filters["app_id"] = value elif key == "artifact_type": @@ -186,8 +169,6 @@ def _parse_query(self, query: str) -> dict[str, Any]: filters["build_configuration_name"] = value elif key == "git_head_ref": filters["git_head_ref"] = value - elif key == "artifact_id": - filters["artifact_id"] = int(value) return filters @@ -196,21 +177,7 @@ def _build_filter(self, filter_kwargs: dict[str, Any]) -> TraceItemFilter: filters = [] for key, value in filter_kwargs.items(): - if key == "artifact_id": - # Special case: artifact_id maps to sentry.trace_id - trace_id = uuid.uuid5(PREPROD_NAMESPACE, str(value)).hex - filters.append( - TraceItemFilter( - comparison_filter=ComparisonFilter( - key=AttributeKey( - name="sentry.trace_id", type=AttributeKey.Type.TYPE_STRING - ), - op=ComparisonFilter.OP_EQUALS, - value=AttributeValue(val_str=trace_id), - ) - ) - ) - elif isinstance(value, str): + if isinstance(value, str): filters.append( TraceItemFilter( comparison_filter=ComparisonFilter( @@ -245,11 +212,7 @@ def _transform_to_timeseries( aggregate_field: str, ) -> list[tuple[int, list[dict[str, Any]]]]: """ - Transform EAP protobuf response into dashboard time-series format. - - Output format: [[timestamp, [{"count": value}]], ...] - - Note: EAP response is column-oriented (column_values[col_idx].results[row_idx]) + Transform EAP protobuf response into dashboard time-series format: [[timestamp, [{"count": value}]], ...] """ buckets: dict[int, list[float]] = defaultdict(list) From f65b9ed6a81f44e295277d317a5432cd1651c65d Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 13:37:35 -0500 Subject: [PATCH 10/17] fix --- .../organization_preprod_app_size_stats.py | 48 +++++++++---------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index b4e1e19f1674ce..4faebe5812cde6 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -331,10 +331,10 @@ def _fetch_filter_values( limit=10000, ) - unique_values = _extract_unique_values_from_response(response) + unique_values = self._extract_unique_values_from_response(response) branches = list(unique_values.get("git_head_ref", set())) - branches.sort(key=_branch_sort_key) + branches.sort(key=self._branch_sort_key) return { "app_ids": sorted(list(unique_values.get("app_id", set()))), @@ -342,31 +342,29 @@ def _fetch_filter_values( "build_configs": sorted(list(unique_values.get("build_configuration_name", set()))), } + def _extract_unique_values_from_response( + self, response: TraceItemTableResponse + ) -> dict[str, set[str]]: + unique_values: dict[str, set[str]] = {} -def _extract_unique_values_from_response( - response: TraceItemTableResponse, -) -> dict[str, set[str]]: - unique_values: dict[str, set[str]] = {} + if not response.column_values: + return unique_values - if not response.column_values: - return unique_values - - for column in response.column_values: - attr_name = column.attribute_name - unique_values[attr_name] = set() + for column in response.column_values: + attr_name = column.attribute_name + unique_values[attr_name] = set() - for result in column.results: - if result.HasField("val_str") and result.val_str: - unique_values[attr_name].add(result.val_str) - - return unique_values + for result in column.results: + if result.HasField("val_str") and result.val_str: + unique_values[attr_name].add(result.val_str) + return unique_values -def _branch_sort_key(branch: str) -> tuple[int, str]: - """Sort key that prioritizes main and master branches.""" - if branch == "main": - return (0, branch) - elif branch == "master": - return (1, branch) - else: - return (2, branch.lower()) + def _branch_sort_key(self, branch: str) -> tuple[int, str]: + """Sort key that prioritizes main and master branches.""" + if branch == "main": + return (0, branch) + elif branch == "master": + return (1, branch) + else: + return (2, branch.lower()) From f19387f03a0f8012e4846e0114750bb69e481072 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 13:57:34 -0500 Subject: [PATCH 11/17] fix --- .../organization_preprod_app_size_stats.py | 106 +++++++++--------- 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 4faebe5812cde6..5c4b3ed09b0ef6 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -214,77 +214,84 @@ def _transform_to_timeseries( """ Transform EAP protobuf response into dashboard time-series format: [[timestamp, [{"count": value}]], ...] """ - buckets: dict[int, list[float]] = defaultdict(list) - - column_map: dict[str, int] = {} column_values = response.column_values - if not column_values: - result: list[tuple[int, list[dict[str, Any]]]] = [] - start_ts = int(start.timestamp()) - current = int(start_ts // interval_seconds * interval_seconds) - end_ts = int(end.timestamp()) - while current < end_ts: - result.append((current, [{"count": None}])) - current += interval_seconds - return result + return self._generate_empty_buckets(start, end, interval_seconds) + column_map: dict[str, int] = {} for idx, column_value in enumerate(column_values): column_map[column_value.attribute_name] = idx if aggregate_field not in column_map: - result = [] - start_ts = int(start.timestamp()) - current = int(start_ts // interval_seconds * interval_seconds) - end_ts = int(end.timestamp()) - while current < end_ts: - result.append((current, [{"count": None}])) - current += interval_seconds - return result + raise ValueError(f"Required field '{aggregate_field}' not found in EAP response") - field_idx = column_map[aggregate_field] timestamp_idx = column_map.get("timestamp") - if timestamp_idx is None: - # No timestamp column, return None values - result = [] - start_ts = int(start.timestamp()) - current = int(start_ts // interval_seconds * interval_seconds) - end_ts = int(end.timestamp()) - while current < end_ts: - result.append((current, [{"count": None}])) - current += interval_seconds - return result + raise ValueError("Required 'timestamp' column not found in EAP response") + field_idx = column_map[aggregate_field] num_rows = len(column_values[0].results) + for column in column_values: + if len(column.results) != num_rows: + raise ValueError( + f"EAP response has inconsistent column lengths: expected {num_rows}, " + f"got {len(column.results)} for column '{column.attribute_name}'" + ) + + buckets: dict[int, list[float]] = defaultdict(list) + for row_idx in range(num_rows): timestamp_result = column_values[timestamp_idx].results[row_idx] - - if timestamp_result.HasField("val_double"): - timestamp = timestamp_result.val_double - elif timestamp_result.HasField("val_float"): - timestamp = timestamp_result.val_float - else: + timestamp = self._extract_numeric_value(timestamp_result) + if timestamp is None: continue bucket_ts = int(timestamp // interval_seconds * interval_seconds) value_result = column_values[field_idx].results[row_idx] - value = None - - if value_result.HasField("val_int"): - value = float(value_result.val_int) - elif value_result.HasField("val_double"): - value = value_result.val_double - elif value_result.HasField("val_float"): - value = value_result.val_float + value = self._extract_numeric_value(value_result) if value is not None: buckets[bucket_ts].append(value) - result = [] - # Align start time to interval boundary + return self._aggregate_buckets(buckets, start, end, interval_seconds, aggregate_func) + + def _extract_numeric_value(self, result: AttributeValue) -> float | None: + """Extract numeric value from protobuf result, supporting int/float/double.""" + if result.HasField("val_int"): + return float(result.val_int) + elif result.HasField("val_double"): + return result.val_double + elif result.HasField("val_float"): + return result.val_float + return None + + def _generate_empty_buckets( + self, start: datetime, end: datetime, interval_seconds: int + ) -> list[tuple[int, list[dict[str, Any]]]]: + """Generate time buckets with None values when no data is available.""" + result: list[tuple[int, list[dict[str, Any]]]] = [] + start_ts = int(start.timestamp()) + current = int(start_ts // interval_seconds * interval_seconds) + end_ts = int(end.timestamp()) + + while current < end_ts: + result.append((current, [{"count": None}])) + current += interval_seconds + + return result + + def _aggregate_buckets( + self, + buckets: dict[int, list[float]], + start: datetime, + end: datetime, + interval_seconds: int, + aggregate_func: str, + ) -> list[tuple[int, list[dict[str, Any]]]]: + """Aggregate bucketed values and generate final time series.""" + result: list[tuple[int, list[dict[str, Any]]]] = [] start_ts = int(start.timestamp()) current = int(start_ts // interval_seconds * interval_seconds) end_ts = int(end.timestamp()) @@ -293,7 +300,6 @@ def _transform_to_timeseries( values = buckets.get(current, []) if not values: - # Use None for missing data so chart interpolates instead of showing 0 aggregated = None elif aggregate_func == "max": aggregated = max(values) @@ -306,9 +312,7 @@ def _transform_to_timeseries( else: aggregated = None - result.append( - (current, [{"count": int(aggregated) if aggregated is not None else None}]) - ) + result.append((current, [{"count": aggregated}])) current += interval_seconds return result From fca469285e92c373541319a15fe43c91e3c8bd95 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 14:34:55 -0500 Subject: [PATCH 12/17] fix --- ...est_organization_preprod_app_size_stats.py | 272 +++++------------- 1 file changed, 76 insertions(+), 196 deletions(-) diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py index 4c0656245be5ba..42fb8d6aad56c9 100644 --- a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -89,78 +89,32 @@ def store_preprod_size_metric( ) assert response.status_code == 200 - def test_get_with_defaults(self) -> None: - """Test GET with default parameters returns proper structure.""" - response = self.get_success_response(self.organization.slug) - - assert "data" in response.data - assert "start" in response.data - assert "end" in response.data - assert "meta" in response.data - assert isinstance(response.data["data"], list) + def test_get_with_include_filters(self) -> None: + """Test that response includes available filter values.""" + project = self.create_project(organization=self.organization) + now = before_now(minutes=5) + start = now - timedelta(minutes=10) - def test_get_with_start_and_end(self) -> None: - """Test explicit time range parameters.""" - start = datetime(2024, 1, 1, 0, 0, 0) - end = datetime(2024, 1, 2, 0, 0, 0) + self.store_preprod_size_metric( + project_id=project.id, + organization_id=self.organization.id, + timestamp=now, + app_id="com.test.app", + git_head_ref="main", + build_configuration_name="Release", + ) response = self.get_success_response( self.organization.slug, qs_params={ + "project": [project.id], + "includeFilters": "true", "start": str(int(start.timestamp())), - "end": str(int(end.timestamp())), + "end": str(int((now + timedelta(minutes=1)).timestamp())), }, ) - assert response.data["start"] == int(start.timestamp()) - assert response.data["end"] == int(end.timestamp()) - - def test_get_with_stats_period(self) -> None: - """Test relative time period (7d, 24h, etc.).""" - response = self.get_success_response( - self.organization.slug, - qs_params={"statsPeriod": "7d"}, - ) - - assert "data" in response.data - # Verify that end - start is approximately 7 days - duration = response.data["end"] - response.data["start"] - assert 6 * 86400 < duration < 8 * 86400 - - def test_get_with_interval(self) -> None: - """Test different time intervals.""" - response = self.get_success_response( - self.organization.slug, - qs_params={"interval": "1h"}, - ) - - assert response.status_code == 200 - - def test_get_with_field(self) -> None: - """Test different aggregate fields.""" - response = self.get_success_response( - self.organization.slug, - qs_params={"field": "max(max_download_size)"}, - ) - - assert response.data["meta"]["fields"]["max(max_download_size)"] == "integer" - - def test_get_with_query_filter(self) -> None: - """Test query string filters (app_id, artifact_type, etc.).""" - response = self.get_success_response( - self.organization.slug, - qs_params={"query": "app_id:com.example.app artifact_type:0"}, - ) - assert response.status_code == 200 - - def test_get_with_include_filters(self) -> None: - """Test that response includes available filter values.""" - response = self.get_success_response( - self.organization.slug, - qs_params={"includeFilters": "true"}, - ) - assert "filters" in response.data assert "app_ids" in response.data["filters"] assert "branches" in response.data["filters"] @@ -168,6 +122,9 @@ def test_get_with_include_filters(self) -> None: assert isinstance(response.data["filters"]["app_ids"], list) assert isinstance(response.data["filters"]["branches"], list) assert isinstance(response.data["filters"]["build_configs"], list) + assert "com.test.app" in response.data["filters"]["app_ids"] + assert "main" in response.data["filters"]["branches"] + assert "Release" in response.data["filters"]["build_configs"] def test_get_invalid_field(self) -> None: """Test validation of field format.""" @@ -177,30 +134,6 @@ def test_get_invalid_field(self) -> None: ) assert "Invalid field format" in str(response.data) - def test_get_invalid_aggregate_function(self) -> None: - """Test validation of aggregate function.""" - response = self.get_error_response( - self.organization.slug, - qs_params={"field": "median(max_install_size)"}, - ) - assert "Unsupported aggregate function" in str(response.data) - - def test_get_invalid_field_name(self) -> None: - """Test validation of field name.""" - response = self.get_error_response( - self.organization.slug, - qs_params={"field": "max(invalid_field)"}, - ) - assert "Invalid field" in str(response.data) - - def test_get_empty_field_name(self) -> None: - """Test that empty field names are rejected.""" - response = self.get_error_response( - self.organization.slug, - qs_params={"field": "count()"}, - ) - assert "Field name is required" in str(response.data) - def test_get_requires_authentication(self) -> None: """Test that endpoint requires authentication.""" client = APIClient() @@ -209,82 +142,78 @@ def test_get_requires_authentication(self) -> None: def test_cannot_access_other_organization_projects(self) -> None: """Test that users cannot access projects from other organizations (IDOR protection).""" - # Create a second organization with a project that user doesn't have access to other_org = self.create_organization(name="Other Org") other_project = self.create_project(organization=other_org, name="Other Project") - # Try to query our organization's endpoint with the other org's project ID response = self.get_error_response( self.organization.slug, qs_params={"project": [other_project.id]}, ) - # Should return 403 Forbidden since user doesn't have permission to access other_project - # This validates that get_projects() properly validates project ownership assert response.status_code == 403 - assert "permission" in str(response.data).lower() def test_get_with_project_filter(self) -> None: - """Test filtering by project ID.""" - project = self.create_project(organization=self.organization) - - response = self.get_success_response( - self.organization.slug, - qs_params={"project": [project.id]}, - ) + """Test filtering by project ID actually filters the data.""" + now = before_now(minutes=5) + start = now - timedelta(hours=1) - assert response.status_code == 200 + project1 = self.create_project(organization=self.organization, name="Project 1") + project2 = self.create_project(organization=self.organization, name="Project 2") - def test_empty_response_structure(self) -> None: - """Test response structure when no data exists.""" - response = self.get_success_response( - self.organization.slug, - qs_params={"statsPeriod": "1d", "interval": "1h"}, + self.store_preprod_size_metric( + project_id=project1.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=1, + size_metric_id=1, + app_id="com.project1.app", + max_install_size=100000, ) - # Verify proper response structure with time buckets - assert len(response.data["data"]) > 0 - # Each time bucket should have structure: [timestamp, [{"count": value}]] - for timestamp, values in response.data["data"]: - assert isinstance(timestamp, int) - assert isinstance(values, list) - assert len(values) == 1 - assert "count" in values[0] - - def test_query_filter_parsing(self) -> None: - """Test various query filter combinations.""" - # Test multiple filters - response = self.get_success_response( - self.organization.slug, - qs_params={"query": "app_id:com.example.app artifact_type:0 git_head_ref:main"}, + self.store_preprod_size_metric( + project_id=project2.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=2, + size_metric_id=2, + app_id="com.project2.app", + max_install_size=200000, ) - assert response.status_code == 200 - # Test with build configuration filter response = self.get_success_response( self.organization.slug, - qs_params={"query": "build_configuration_name:Release"}, + qs_params={ + "project": [project1.id], + "start": str(int(start.timestamp())), + "end": str(int(now.timestamp())), + "includeFilters": "true", + }, ) + assert response.status_code == 200 + app_ids = response.data["filters"]["app_ids"] + assert "com.project1.app" in app_ids + assert "com.project2.app" not in app_ids - def test_all_aggregate_functions(self) -> None: + def test_aggregate_functions(self) -> None: """Test that all supported aggregate functions work.""" for func in ["max", "min", "avg", "count"]: - response = self.get_success_response( - self.organization.slug, - qs_params={"field": f"{func}(max_install_size)"}, - ) - assert response.status_code == 200 - assert f"{func}(max_install_size)" in response.data["meta"]["fields"] - - def test_get_with_actual_data(self) -> None: + with self.subTest(func=func): + response = self.get_success_response( + self.organization.slug, + qs_params={"field": f"{func}(max_install_size)"}, + ) + assert response.status_code == 200 + assert f"{func}(max_install_size)" in response.data["meta"]["fields"] + + def test_get_with_data(self) -> None: """Test querying actual data written to EAP.""" + project = self.create_project(organization=self.organization) now = before_now(minutes=5) start = now - timedelta(hours=2) - # Write some test data self.store_preprod_size_metric( - project_id=self.project.id, + project_id=project.id, organization_id=self.organization.id, timestamp=now - timedelta(hours=1), preprod_artifact_id=1, @@ -295,7 +224,7 @@ def test_get_with_actual_data(self) -> None: ) self.store_preprod_size_metric( - project_id=self.project.id, + project_id=project.id, organization_id=self.organization.id, timestamp=now - timedelta(minutes=30), preprod_artifact_id=2, @@ -308,6 +237,7 @@ def test_get_with_actual_data(self) -> None: response = self.get_success_response( self.organization.slug, qs_params={ + "project": [project.id], "start": str(int(start.timestamp())), "end": str(int(now.timestamp())), "interval": "1h", @@ -317,25 +247,22 @@ def test_get_with_actual_data(self) -> None: assert response.status_code == 200 assert "data" in response.data - # Verify we have time buckets with data data_points = response.data["data"] assert len(data_points) > 0 - # Check that at least one bucket has non-None values non_null_values = [d[1][0]["count"] for d in data_points if d[1][0]["count"] is not None] assert len(non_null_values) > 0 - # Verify the max value is correct assert max(non_null_values) == 105000 - def test_aggregation_with_real_data(self) -> None: + def test_aggregation_data(self) -> None: """Test different aggregation functions with real data.""" + project = self.create_project(organization=self.organization) now = before_now(minutes=5) start = now - timedelta(hours=1) - # Write multiple metrics in the same time bucket for i, size in enumerate([100000, 150000, 125000]): self.store_preprod_size_metric( - project_id=self.project.id, + project_id=project.id, organization_id=self.organization.id, timestamp=now - timedelta(minutes=30), preprod_artifact_id=i + 1, @@ -343,10 +270,10 @@ def test_aggregation_with_real_data(self) -> None: max_install_size=size, ) - # Test max aggregation response = self.get_success_response( self.organization.slug, qs_params={ + "project": [project.id], "start": str(int(start.timestamp())), "end": str(int(now.timestamp())), "interval": "1h", @@ -358,10 +285,10 @@ def test_aggregation_with_real_data(self) -> None: ] assert max(data_points) == 150000 - # Test min aggregation response = self.get_success_response( self.organization.slug, qs_params={ + "project": [project.id], "start": str(int(start.timestamp())), "end": str(int(now.timestamp())), "interval": "1h", @@ -375,12 +302,12 @@ def test_aggregation_with_real_data(self) -> None: def test_filter_by_app_id(self) -> None: """Test filtering by app_id.""" + project = self.create_project(organization=self.organization) now = before_now(minutes=5) start = now - timedelta(hours=1) - # Write metrics for different apps self.store_preprod_size_metric( - project_id=self.project.id, + project_id=project.id, organization_id=self.organization.id, timestamp=now - timedelta(minutes=30), preprod_artifact_id=1, @@ -390,7 +317,7 @@ def test_filter_by_app_id(self) -> None: ) self.store_preprod_size_metric( - project_id=self.project.id, + project_id=project.id, organization_id=self.organization.id, timestamp=now - timedelta(minutes=30), preprod_artifact_id=2, @@ -399,10 +326,10 @@ def test_filter_by_app_id(self) -> None: max_install_size=200000, ) - # Query for app1 only response = self.get_success_response( self.organization.slug, qs_params={ + "project": [project.id], "start": str(int(start.timestamp())), "end": str(int(now.timestamp())), "query": "app_id:com.example.app1", @@ -417,62 +344,15 @@ def test_filter_by_app_id(self) -> None: assert len(data_points) > 0 assert max(data_points) == 100000 - def test_include_filters_with_real_data(self) -> None: - """Test that includeFilters returns actual filter values.""" - now = before_now(minutes=5) - start = now - timedelta(minutes=10) - - self.store_preprod_size_metric( - project_id=self.project.id, - organization_id=self.organization.id, - timestamp=now, - app_id="com.test.filters.app1", - git_head_ref="main", - build_configuration_name="Release", - ) - - self.store_preprod_size_metric( - project_id=self.project.id, - organization_id=self.organization.id, - timestamp=now, - app_id="com.test.filters.app2", - git_head_ref="develop", - build_configuration_name="Debug", - ) - - response = self.get_success_response( - self.organization.slug, - qs_params={ - "includeFilters": "true", - "start": str(int(start.timestamp())), - "end": str(int((now + timedelta(minutes=1)).timestamp())), - }, - ) - - assert "filters" in response.data - app_ids = response.data["filters"]["app_ids"] - branches = response.data["filters"]["branches"] - configs = response.data["filters"]["build_configs"] - - assert "com.test.filters.app1" in app_ids - assert "com.test.filters.app2" in app_ids - assert "main" in branches - assert "develop" in branches - # Verify main comes before develop - assert branches.index("main") < branches.index("develop") - assert "Release" in configs - assert "Debug" in configs - def test_branch_sorting_priority(self) -> None: """Test that main and master branches are prioritized in the list.""" + project = self.create_project(organization=self.organization) now = before_now(minutes=5) start = now - timedelta(minutes=10) - # Create metrics with various branch names using different artifact IDs - # so they're stored as separate records for idx, branch in enumerate(["feature/test", "main", "develop", "master", "release/1.0"]): self.store_preprod_size_metric( - project_id=self.project.id, + project_id=project.id, organization_id=self.organization.id, timestamp=now, preprod_artifact_id=100 + idx, @@ -484,6 +364,7 @@ def test_branch_sorting_priority(self) -> None: response = self.get_success_response( self.organization.slug, qs_params={ + "project": [project.id], "includeFilters": "true", "start": str(int(start.timestamp())), "end": str(int((now + timedelta(minutes=1)).timestamp())), @@ -495,6 +376,5 @@ def test_branch_sorting_priority(self) -> None: # Verify main is first, master is second assert branches[0] == "main" assert branches[1] == "master" - # Others should be alphabetically sorted remaining = branches[2:] assert remaining == sorted(remaining, key=str.lower) From 6ac79f7488802c066ef09afe05a0611d928f0732 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 15:34:31 -0500 Subject: [PATCH 13/17] params --- .../organization_preprod_app_size_stats.py | 53 +++++++++--------- ...est_organization_preprod_app_size_stats.py | 54 ++++++++++++++++++- 2 files changed, 80 insertions(+), 27 deletions(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 5c4b3ed09b0ef6..80cf3035087952 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -40,15 +40,22 @@ def get(self, request: Request, organization: Organization) -> Response: """ Retrieve app size metrics over time based on the provided filters. - Query Parameters: - - project: Project ID(s) (can be repeated or comma-separated) - - start: Start timestamp (ISO format or Unix timestamp) - - end: End timestamp (ISO format or Unix timestamp) - - statsPeriod: Alternative to start/end (e.g., "14d", "24h") - - interval: Time interval for buckets (e.g., "1h", "1d") - - field: Aggregate field (e.g., "max(max_install_size)") - - query: Filter query string (e.g., "app_id:com.example.app artifact_type:0 git_head_ref:main") - - includeFilters: If "true", includes available filter values in response + Query Parameters (all optional): + - project: Project ID(s) (can be repeated or comma-separated). Default: all accessible projects + - start: Start timestamp (ISO format or Unix timestamp). Default: 14 days ago + - end: End timestamp (ISO format or Unix timestamp). Default: now + - statsPeriod: Alternative to start/end (e.g., "14d", "24h"). Default: "14d" + - interval: Time interval for buckets (e.g., "1h", "1d"). Default: "1d" + - field: Aggregate field (e.g., "max(max_install_size)"). Default: "max(max_install_size)" + + Filters (all optional): + - app_id: Filter by app ID (e.g., "com.example.app") + - artifact_type: Filter by artifact type (e.g., "0" or "1") + - git_head_ref: Filter by git branch (e.g., "main") + - build_configuration_name: Filter by build configuration (e.g., "Release") + + Other: + - includeFilters: If "true", includes available filter values in response. Default: false Response Format: { @@ -80,7 +87,7 @@ def get(self, request: Request, organization: Organization) -> Response: field = request.GET.get("field", "max(max_install_size)") aggregate_func, aggregate_field = self._parse_field(field) - filter_kwargs = self._parse_query(request.GET.get("query", "")) + filter_kwargs = self._parse_filters(request.GET) query_filter = self._build_filter(filter_kwargs) if filter_kwargs else None except InvalidParams: logger.exception("Invalid parameters for app size stats request") @@ -148,27 +155,21 @@ def _parse_field(self, field: str) -> tuple[str, str]: return func_name, field_name - def _parse_query(self, query: str) -> dict[str, Any]: - """Parse query string like 'app_id:com.example.app artifact_type:0' into filter kwargs.""" + def _parse_filters(self, query_params) -> dict[str, Any]: + """Parse filter query parameters.""" filters: dict[str, str | int] = {} - if not query: - return filters + if app_id := query_params.get("app_id"): + filters["app_id"] = app_id - for token in query.split(): - if ":" not in token: - continue + if artifact_type := query_params.get("artifact_type"): + filters["artifact_type"] = int(artifact_type) - key, value = token.split(":", 1) + if git_head_ref := query_params.get("git_head_ref"): + filters["git_head_ref"] = git_head_ref - if key == "app_id": - filters["app_id"] = value - elif key == "artifact_type": - filters["artifact_type"] = int(value) - elif key == "build_configuration_name": - filters["build_configuration_name"] = value - elif key == "git_head_ref": - filters["git_head_ref"] = value + if build_configuration_name := query_params.get("build_configuration_name"): + filters["build_configuration_name"] = build_configuration_name return filters diff --git a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py index 42fb8d6aad56c9..5a9c9a2e5184e9 100644 --- a/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py +++ b/tests/sentry/preprod/api/endpoints/test_organization_preprod_app_size_stats.py @@ -332,7 +332,7 @@ def test_filter_by_app_id(self) -> None: "project": [project.id], "start": str(int(start.timestamp())), "end": str(int(now.timestamp())), - "query": "app_id:com.example.app1", + "app_id": "com.example.app1", "field": "max(max_install_size)", }, ) @@ -344,6 +344,58 @@ def test_filter_by_app_id(self) -> None: assert len(data_points) > 0 assert max(data_points) == 100000 + def test_multiple_filters(self) -> None: + """Test combining multiple filter parameters.""" + project = self.create_project(organization=self.organization) + now = before_now(minutes=5) + start = now - timedelta(hours=1) + + self.store_preprod_size_metric( + project_id=project.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=1, + size_metric_id=1, + app_id="com.example.app", + git_head_ref="main", + build_configuration_name="Release", + artifact_type=0, + max_install_size=100000, + ) + + self.store_preprod_size_metric( + project_id=project.id, + organization_id=self.organization.id, + timestamp=now - timedelta(minutes=30), + preprod_artifact_id=2, + size_metric_id=2, + app_id="com.example.app", + git_head_ref="develop", + build_configuration_name="Debug", + artifact_type=1, + max_install_size=200000, + ) + + response = self.get_success_response( + self.organization.slug, + qs_params={ + "project": [project.id], + "start": str(int(start.timestamp())), + "end": str(int(now.timestamp())), + "app_id": "com.example.app", + "git_head_ref": "main", + "build_configuration_name": "Release", + "artifact_type": "0", + "field": "max(max_install_size)", + }, + ) + + data_points = [ + d[1][0]["count"] for d in response.data["data"] if d[1][0]["count"] is not None + ] + assert len(data_points) > 0 + assert max(data_points) == 100000 + def test_branch_sorting_priority(self) -> None: """Test that main and master branches are prioritized in the list.""" project = self.create_project(organization=self.organization) From 67387dc1aac8d43762a72c555d35e1c168cc4a96 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 16:21:24 -0500 Subject: [PATCH 14/17] type --- .../api/endpoints/organization_preprod_app_size_stats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 80cf3035087952..2dc5045bbfb14f 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta from typing import Any +from django.http import QueryDict from rest_framework.exceptions import ParseError from rest_framework.request import Request from rest_framework.response import Response @@ -155,7 +156,7 @@ def _parse_field(self, field: str) -> tuple[str, str]: return func_name, field_name - def _parse_filters(self, query_params) -> dict[str, Any]: + def _parse_filters(self, query_params: QueryDict) -> dict[str, Any]: """Parse filter query parameters.""" filters: dict[str, str | int] = {} From e0e1576b616fa9a066506619aa0a8543dbc2e808 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 16:59:04 -0500 Subject: [PATCH 15/17] check null --- .../api/endpoints/organization_preprod_app_size_stats.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py index 2dc5045bbfb14f..20a47a81394dd9 100644 --- a/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py +++ b/src/sentry/preprod/api/endpoints/organization_preprod_app_size_stats.py @@ -261,6 +261,9 @@ def _transform_to_timeseries( def _extract_numeric_value(self, result: AttributeValue) -> float | None: """Extract numeric value from protobuf result, supporting int/float/double.""" + if result.is_null: + return None + if result.HasField("val_int"): return float(result.val_int) elif result.HasField("val_double"): @@ -361,7 +364,7 @@ def _extract_unique_values_from_response( unique_values[attr_name] = set() for result in column.results: - if result.HasField("val_str") and result.val_str: + if not result.is_null and result.HasField("val_str") and result.val_str: unique_values[attr_name].add(result.val_str) return unique_values From 16177e9269a47dab0ff8042a67fb5863e373917a Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 19 Dec 2025 19:31:29 -0500 Subject: [PATCH 16/17] remove prebuilt dashboard --- .../dashboards/endpoints/organization_dashboards.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/sentry/dashboards/endpoints/organization_dashboards.py b/src/sentry/dashboards/endpoints/organization_dashboards.py index 9be904298468c2..9a806c4969635b 100644 --- a/src/sentry/dashboards/endpoints/organization_dashboards.py +++ b/src/sentry/dashboards/endpoints/organization_dashboards.py @@ -48,7 +48,11 @@ from sentry.auth.superuser import is_active_superuser from sentry.db.models.fields.text import CharField from sentry.locks import locks -from sentry.models.dashboard import Dashboard, DashboardFavoriteUser, DashboardLastVisited +from sentry.models.dashboard import ( + Dashboard, + DashboardFavoriteUser, + DashboardLastVisited, +) from sentry.models.organization import Organization from sentry.organizations.services.organization.model import ( RpcOrganization, @@ -71,7 +75,6 @@ class PrebuiltDashboardId(IntEnum): MOBILE_VITALS_APP_STARTS = 9 MOBILE_VITALS_SCREEN_LOADS = 10 MOBILE_VITALS_SCREEN_RENDERING = 11 - MOBILE_APP_SIZE = 12 class PrebuiltDashboard(TypedDict): @@ -121,10 +124,6 @@ class PrebuiltDashboard(TypedDict): "prebuilt_id": PrebuiltDashboardId.MOBILE_VITALS_SCREEN_RENDERING, "title": "Screen Rendering", }, - { - "prebuilt_id": PrebuiltDashboardId.MOBILE_APP_SIZE, - "title": "Mobile App Size Monitoring", - }, ] From 1dc780c3a8cb1b32825a42ca17bbcf16bb9a5fda Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Sat, 20 Dec 2025 00:32:48 +0000 Subject: [PATCH 17/17] :hammer_and_wrench: apply pre-commit fixes --- src/sentry/dashboards/endpoints/organization_dashboards.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sentry/dashboards/endpoints/organization_dashboards.py b/src/sentry/dashboards/endpoints/organization_dashboards.py index 9a806c4969635b..48823248bae680 100644 --- a/src/sentry/dashboards/endpoints/organization_dashboards.py +++ b/src/sentry/dashboards/endpoints/organization_dashboards.py @@ -48,11 +48,7 @@ from sentry.auth.superuser import is_active_superuser from sentry.db.models.fields.text import CharField from sentry.locks import locks -from sentry.models.dashboard import ( - Dashboard, - DashboardFavoriteUser, - DashboardLastVisited, -) +from sentry.models.dashboard import Dashboard, DashboardFavoriteUser, DashboardLastVisited from sentry.models.organization import Organization from sentry.organizations.services.organization.model import ( RpcOrganization,