From 2dafb0da41c327d2831e5130086243b4dc5035c9 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Fri, 24 Jan 2025 17:53:31 +0530 Subject: [PATCH] Add documentation --- listenbrainz_spark/stats/incremental/__init__.py | 1 + listenbrainz_spark/stats/incremental/user/artist.py | 1 + listenbrainz_spark/stats/incremental/user/daily_activity.py | 3 ++- listenbrainz_spark/stats/incremental/user/entity.py | 4 ++++ .../stats/incremental/user/listening_activity.py | 1 + listenbrainz_spark/stats/incremental/user/recording.py | 1 + listenbrainz_spark/stats/incremental/user/release.py | 1 + listenbrainz_spark/stats/incremental/user/release_group.py | 1 + 8 files changed, 12 insertions(+), 1 deletion(-) diff --git a/listenbrainz_spark/stats/incremental/__init__.py b/listenbrainz_spark/stats/incremental/__init__.py index 5638b5e97e..e1602cc2e0 100644 --- a/listenbrainz_spark/stats/incremental/__init__.py +++ b/listenbrainz_spark/stats/incremental/__init__.py @@ -191,6 +191,7 @@ def create_partial_aggregate(self) -> DataFrame: return full_df def incremental_dump_exists(self) -> bool: + """ Check if incremental dump exists. """ return hdfs_connection.client.status(INCREMENTAL_DUMPS_SAVE_PATH, strict=False) def create_incremental_aggregate(self) -> DataFrame: diff --git a/listenbrainz_spark/stats/incremental/user/artist.py b/listenbrainz_spark/stats/incremental/user/artist.py index b4782c3bba..bd7f167f0d 100644 --- a/listenbrainz_spark/stats/incremental/user/artist.py +++ b/listenbrainz_spark/stats/incremental/user/artist.py @@ -8,6 +8,7 @@ class ArtistUserEntity(UserEntity): + """ See base class IncrementalStats for documentation. """ def __init__(self, stats_range, database, message_type, from_date=None, to_date=None): super().__init__(entity="artists", stats_range=stats_range, database=database, message_type=message_type, diff --git a/listenbrainz_spark/stats/incremental/user/daily_activity.py b/listenbrainz_spark/stats/incremental/user/daily_activity.py index 0cb0301a63..668f7af3a6 100644 --- a/listenbrainz_spark/stats/incremental/user/daily_activity.py +++ b/listenbrainz_spark/stats/incremental/user/daily_activity.py @@ -17,6 +17,7 @@ class DailyActivityUserEntity(UserEntity): + """ See base class IncrementalStats for documentation. """ def __init__(self, stats_range, database, message_type, from_date=None, to_date=None): super().__init__( @@ -26,7 +27,7 @@ def __init__(self, stats_range, database, message_type, from_date=None, to_date= self.setup_time_range() def setup_time_range(self): - # Genarate a dataframe containing hours of all days of the week + """ Genarate a dataframe containing hours of all days of the week. """ weekdays = [calendar.day_name[day] for day in range(0, 7)] hours = [hour for hour in range(0, 24)] time_range = itertools.product(weekdays, hours) diff --git a/listenbrainz_spark/stats/incremental/user/entity.py b/listenbrainz_spark/stats/incremental/user/entity.py index 3fee951fe2..a0b393477b 100644 --- a/listenbrainz_spark/stats/incremental/user/entity.py +++ b/listenbrainz_spark/stats/incremental/user/entity.py @@ -30,6 +30,7 @@ class UserEntity(IncrementalStats, abc.ABC): + """ See base class IncrementalStats for documentation. """ def __init__(self, entity: str, stats_range: str = None, database: str = None, message_type: str = None, from_date: datetime = None, to_date: datetime = None): @@ -47,6 +48,9 @@ def get_table_prefix(self) -> str: return f"user_{self.entity}_{self.stats_range}" def filter_existing_aggregate(self, existing_aggregate, incremental_aggregate): + """ Filter listens from existing aggregate to only include listens for entities having listens in the + incremental dumps. + """ query = f""" WITH incremental_users AS ( SELECT DISTINCT user_id FROM {incremental_aggregate} diff --git a/listenbrainz_spark/stats/incremental/user/listening_activity.py b/listenbrainz_spark/stats/incremental/user/listening_activity.py index 69264626d8..730ae5dbd3 100644 --- a/listenbrainz_spark/stats/incremental/user/listening_activity.py +++ b/listenbrainz_spark/stats/incremental/user/listening_activity.py @@ -14,6 +14,7 @@ class ListeningActivityUserEntity(UserEntity): + """ See base class IncrementalStats for documentation. """ def __init__(self, stats_range, database, message_type, year=None): super().__init__( diff --git a/listenbrainz_spark/stats/incremental/user/recording.py b/listenbrainz_spark/stats/incremental/user/recording.py index 5fee4f49a1..af87545450 100644 --- a/listenbrainz_spark/stats/incremental/user/recording.py +++ b/listenbrainz_spark/stats/incremental/user/recording.py @@ -10,6 +10,7 @@ class RecordingUserEntity(UserEntity): + """ See base class IncrementalStats for documentation. """ def __init__(self, stats_range, database, message_type, from_date=None, to_date=None): super().__init__(entity="recordings", stats_range=stats_range, database=database, message_type=message_type, diff --git a/listenbrainz_spark/stats/incremental/user/release.py b/listenbrainz_spark/stats/incremental/user/release.py index 994e719b1e..4e77d54e18 100644 --- a/listenbrainz_spark/stats/incremental/user/release.py +++ b/listenbrainz_spark/stats/incremental/user/release.py @@ -9,6 +9,7 @@ class ReleaseUserEntity(UserEntity): + """ See base class IncrementalStats for documentation. """ def __init__(self, stats_range, database, message_type, from_date=None, to_date=None): super().__init__(entity="releases", stats_range=stats_range, database=database, message_type=message_type, diff --git a/listenbrainz_spark/stats/incremental/user/release_group.py b/listenbrainz_spark/stats/incremental/user/release_group.py index e0fb723503..117d557421 100644 --- a/listenbrainz_spark/stats/incremental/user/release_group.py +++ b/listenbrainz_spark/stats/incremental/user/release_group.py @@ -10,6 +10,7 @@ class ReleaseGroupUserEntity(UserEntity): + """ See base class IncrementalStats for documentation. """ def __init__(self, stats_range, database, message_type, from_date=None, to_date=None): super().__init__(entity="release_groups", stats_range=stats_range, database=database, message_type=message_type,