From 372abcffffead728b2c4c8c2211da45b158ee1ec Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Mon, 10 Jun 2024 15:09:28 -0700 Subject: [PATCH] make article & question pageview crons more robust --- kitsune/sumo/googleanalytics.py | 33 +++++++++++++--------- kitsune/sumo/tests/test_googleanalytics.py | 8 ++++++ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/kitsune/sumo/googleanalytics.py b/kitsune/sumo/googleanalytics.py index 96642b262ec..1411aaada0d 100644 --- a/kitsune/sumo/googleanalytics.py +++ b/kitsune/sumo/googleanalytics.py @@ -288,13 +288,16 @@ def pageviews_by_document(period, verbose=False): for row in run_report(date_range, create_article_report_request, verbose=verbose): path = row.dimension_values[0].value - num_page_views = int(row.metric_values[0].value) - # The path is guaranteed to be a KB article path without any query parameters. - # If the URL path for KB articles changes, we'll need to continue to support - # the previous URL structure for a year -- the longest period of time we look - # backwards -- as well as the new URL structure. + # The path should be a KB article path without any query parameters, but in reality + # we've seen that it can sometimes be "/". If the URL path for KB articles changes, + # we'll need to continue to support the previous URL structure for a year -- the + # longest period of time we look backwards -- as well as the new URL structure. # Current URL structure: /{locale}/kb/{slug} - locale, slug = path.strip("/").split("/kb/") + try: + num_page_views = int(row.metric_values[0].value) + locale, slug = path.strip("/").split("/kb/") + except ValueError: + continue yield ((locale, slug), num_page_views) @@ -307,14 +310,18 @@ def pageviews_by_question(period=LAST_YEAR, verbose=False): for row in run_report(date_range, create_question_report_request, verbose=verbose): path = row.dimension_values[0].value - num_page_views = int(row.metric_values[0].value) - # The path is guaranteed to be a question path without any query parameters. - # If the URL path for questions changes, we'll need to continue to support - # the previous URL structure for a year -- the longest period of time we look - # backwards -- as well as the new URL structure. + # The path should be a question path without any query parameters, but in reality + # we've seen that it can sometimes be "/". If the URL path for questions changes, + # we'll need to continue to support the previous URL structure for a year -- the + # longest period of time we look backwards -- as well as the new URL structure. # Current URL structure: /{locale}/questions/{question_id} - locale, question_id = path.strip("/").split("/questions/") - yield (int(question_id), num_page_views) + try: + num_page_views = int(row.metric_values[0].value) + locale, question_id = path.strip("/").split("/questions/") + question_id = int(question_id) + except ValueError: + continue + yield (question_id, num_page_views) def search_clicks_and_impressions(start_date, end_date, verbose=False): diff --git a/kitsune/sumo/tests/test_googleanalytics.py b/kitsune/sumo/tests/test_googleanalytics.py index a6ea2dce6bb..114022542b5 100644 --- a/kitsune/sumo/tests/test_googleanalytics.py +++ b/kitsune/sumo/tests/test_googleanalytics.py @@ -72,6 +72,10 @@ def test_pageviews_by_document(self, run_report): dimension_values=[DimensionValue(value="/en-US/kb/doc1-slug")], metric_values=[MetricValue(value="1000")], ), + Row( + dimension_values=[DimensionValue(value="/")], + metric_values=[MetricValue(value="7")], + ), Row( dimension_values=[DimensionValue(value="/es/kb/doc2-slug")], metric_values=[MetricValue(value="2000")], @@ -97,6 +101,10 @@ def test_pageviews_by_question(self, run_report): dimension_values=[DimensionValue(value="/en-US/questions/123456")], metric_values=[MetricValue(value="1000")], ), + Row( + dimension_values=[DimensionValue(value="/")], + metric_values=[MetricValue(value="7")], + ), Row( dimension_values=[DimensionValue(value="/es/questions/782348")], metric_values=[MetricValue(value="2000")],