Skip to content

Commit

Permalink
Add documentation
Browse files Browse the repository at this point in the history
amCap1712 committed Jan 24, 2025
1 parent 09841b9 commit 2dafb0d
Showing 8 changed files with 12 additions and 1 deletion.
1 change: 1 addition & 0 deletions listenbrainz_spark/stats/incremental/__init__.py
Original file line number Diff line number Diff line change
@@ -191,6 +191,7 @@ def create_partial_aggregate(self) -> DataFrame:
return full_df

def incremental_dump_exists(self) -> bool:
""" Check if incremental dump exists. """
return hdfs_connection.client.status(INCREMENTAL_DUMPS_SAVE_PATH, strict=False)

def create_incremental_aggregate(self) -> DataFrame:
1 change: 1 addition & 0 deletions listenbrainz_spark/stats/incremental/user/artist.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@


class ArtistUserEntity(UserEntity):
""" See base class IncrementalStats for documentation. """

def __init__(self, stats_range, database, message_type, from_date=None, to_date=None):
super().__init__(entity="artists", stats_range=stats_range, database=database, message_type=message_type,
3 changes: 2 additions & 1 deletion listenbrainz_spark/stats/incremental/user/daily_activity.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@


class DailyActivityUserEntity(UserEntity):
""" See base class IncrementalStats for documentation. """

def __init__(self, stats_range, database, message_type, from_date=None, to_date=None):
super().__init__(
@@ -26,7 +27,7 @@ def __init__(self, stats_range, database, message_type, from_date=None, to_date=
self.setup_time_range()

def setup_time_range(self):
# Genarate a dataframe containing hours of all days of the week
""" Genarate a dataframe containing hours of all days of the week. """
weekdays = [calendar.day_name[day] for day in range(0, 7)]
hours = [hour for hour in range(0, 24)]
time_range = itertools.product(weekdays, hours)
4 changes: 4 additions & 0 deletions listenbrainz_spark/stats/incremental/user/entity.py
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@


class UserEntity(IncrementalStats, abc.ABC):
""" See base class IncrementalStats for documentation. """

def __init__(self, entity: str, stats_range: str = None, database: str = None, message_type: str = None,
from_date: datetime = None, to_date: datetime = None):
@@ -47,6 +48,9 @@ def get_table_prefix(self) -> str:
return f"user_{self.entity}_{self.stats_range}"

def filter_existing_aggregate(self, existing_aggregate, incremental_aggregate):
""" Filter listens from existing aggregate to only include listens for entities having listens in the
incremental dumps.
"""
query = f"""
WITH incremental_users AS (
SELECT DISTINCT user_id FROM {incremental_aggregate}
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@


class ListeningActivityUserEntity(UserEntity):
""" See base class IncrementalStats for documentation. """

def __init__(self, stats_range, database, message_type, year=None):
super().__init__(
1 change: 1 addition & 0 deletions listenbrainz_spark/stats/incremental/user/recording.py
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@


class RecordingUserEntity(UserEntity):
""" See base class IncrementalStats for documentation. """

def __init__(self, stats_range, database, message_type, from_date=None, to_date=None):
super().__init__(entity="recordings", stats_range=stats_range, database=database, message_type=message_type,
1 change: 1 addition & 0 deletions listenbrainz_spark/stats/incremental/user/release.py
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@


class ReleaseUserEntity(UserEntity):
""" See base class IncrementalStats for documentation. """

def __init__(self, stats_range, database, message_type, from_date=None, to_date=None):
super().__init__(entity="releases", stats_range=stats_range, database=database, message_type=message_type,
1 change: 1 addition & 0 deletions listenbrainz_spark/stats/incremental/user/release_group.py
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@


class ReleaseGroupUserEntity(UserEntity):
""" See base class IncrementalStats for documentation. """

def __init__(self, stats_range, database, message_type, from_date=None, to_date=None):
super().__init__(entity="release_groups", stats_range=stats_range, database=database, message_type=message_type,

0 comments on commit 2dafb0d

Please sign in to comment.