From 9723fb82c85043b34d33efd248f64cb65aaf6281 Mon Sep 17 00:00:00 2001 From: "sv.giampa" Date: Mon, 29 Jan 2024 09:56:54 +0000 Subject: [PATCH] Documentation improvements --- .github/workflows/create-prerelease.yml | 2 +- parsoda/model/function/analyzer.py | 11 +++ parsoda/model/function/crawler.py | 5 + parsoda/model/function/filter.py | 13 ++- parsoda/model/function/mapper.py | 11 ++- parsoda/model/function/reducer.py | 13 ++- parsoda/model/function/visualizer.py | 5 + parsoda/model/social_data_app.py | 119 +++++++++++++++++++++--- 8 files changed, 151 insertions(+), 28 deletions(-) diff --git a/.github/workflows/create-prerelease.yml b/.github/workflows/create-prerelease.yml index c036ff2..68d1f94 100644 --- a/.github/workflows/create-prerelease.yml +++ b/.github/workflows/create-prerelease.yml @@ -31,7 +31,7 @@ jobs: run: | echo "::set-output name=version::$(date +'%Y-%m-%d' --utc)" - - name: Create Stable Release + name: Create Pre-Release id: create_release uses: actions/create-release@v1 env: diff --git a/parsoda/model/function/analyzer.py b/parsoda/model/function/analyzer.py index 40ff6cc..7675060 100644 --- a/parsoda/model/function/analyzer.py +++ b/parsoda/model/function/analyzer.py @@ -15,4 +15,15 @@ class Analyzer(ABC, Generic[K, R, A]): @abstractmethod def analyze(self, driver: ParsodaDriver, data: Dict[K, R]) -> A: + """Applies an analysis algorithm to the output data from reduction step. + The analyzer might be a sequential, parallel or distributed algorithm. + In the latter case, the algorithm would use the same driver used by the current application for running a new, nested, ParSoDA application. + + Args: + driver (ParsodaDriver): the driver used during the execution of the parallel phase + data (Dict[K, R]): output data from reducton step organized as a dictionary of key-value pairs + + Returns: + A: the outputdata type from the analysis + """ pass \ No newline at end of file diff --git a/parsoda/model/function/crawler.py b/parsoda/model/function/crawler.py index 5b4bcd6..1824bd4 100644 --- a/parsoda/model/function/crawler.py +++ b/parsoda/model/function/crawler.py @@ -64,6 +64,11 @@ def get_partitions(self, num_of_partitions=0, partition_size=1024*1024*1024) -> @abstractmethod def supports_remote_partitioning(self) -> bool: + """Checks if the crawler supports remote partitioning, i.e. the ability to read data directly from the worker nodes + + Returns: + bool: true if the crawler supports remote partitionig of data source. + """ pass diff --git a/parsoda/model/function/filter.py b/parsoda/model/function/filter.py index 6cf2cad..d528870 100644 --- a/parsoda/model/function/filter.py +++ b/parsoda/model/function/filter.py @@ -10,9 +10,12 @@ class Filter(ABC): @abstractmethod def test(self, item: SocialDataItem) -> bool: - """ - Test if the item satisfies the predicate of the filter - :param item: the item to test - :return: True if the item satisfies the predicate, False otherwise - """ + """Test if the item satisfies the predicate of the filter + + Args: + item (SocialDataItem): the item to test + + Returns: + bool: True if the item satisfies the predicate, False otherwise + """ pass \ No newline at end of file diff --git a/parsoda/model/function/mapper.py b/parsoda/model/function/mapper.py index 0f2fc37..b602831 100644 --- a/parsoda/model/function/mapper.py +++ b/parsoda/model/function/mapper.py @@ -14,10 +14,13 @@ class Mapper(ABC, Generic[K, V]): @abstractmethod def map(self, item: SocialDataItem) -> Iterable[Tuple[K, V]]: - """ - Returns a list of key-value pairs computed from the given item. + """Returns a list of key-value pairs computed from the given item. Example result: [ (item.user_id, item.tags[0]), (item.user_id, item.tags[1]), ... ] - :param item: the item to map - :return: a list of key-value pairs + + Args: + item (SocialDataItem): the item to map + + Returns: + Iterable[Tuple[K, V]]: an iterable of key-value pairs """ pass diff --git a/parsoda/model/function/reducer.py b/parsoda/model/function/reducer.py index 62f98af..4f3ef7d 100644 --- a/parsoda/model/function/reducer.py +++ b/parsoda/model/function/reducer.py @@ -12,10 +12,13 @@ class Reducer(ABC, Generic[K, V, R]): """ def reduce(self, key: K, values: List[V]) -> R: - """ - Applies the reduction algorithm to values - :param key: the key all values are associated to - :param values: all the values associated to the key - :return: the reduced value + """Applies the reduction algorithm to values + + Args: + key (K): the key all values are associated to + values (List[V]): all the values associated to the key + + Returns: + R: the reduced value """ pass \ No newline at end of file diff --git a/parsoda/model/function/visualizer.py b/parsoda/model/function/visualizer.py index d173198..30e2fbe 100644 --- a/parsoda/model/function/visualizer.py +++ b/parsoda/model/function/visualizer.py @@ -11,4 +11,9 @@ class Visualizer(ABC, Generic[A]): @abstractmethod def visualize(self, result: A) -> None: + """Transforms data from the analysis step in some output format, then write them to some output device or system. + + Args: + result (A): the data resulting from the analysis step + """ pass diff --git a/parsoda/model/social_data_app.py b/parsoda/model/social_data_app.py index 872b86f..4b748b0 100644 --- a/parsoda/model/social_data_app.py +++ b/parsoda/model/social_data_app.py @@ -44,49 +44,124 @@ def __init__( self.__reduce_result_length = reduce_result_length - def get_app_name(self): + def get_app_name(self)->str: + """Gets the referred application name + + Returns: + str: the app name + """ return self.__app_name - def get_driver(self): + def get_driver(self)->ParsodaDriver: + """Gets the driver used by the application + + Returns: + ParsodaDriver: the driver object + """ return self.__driver - def get_partitions(self): + def get_partitions(self)->int: + """Gets the number of partitions used during execution + + Returns: + int: number of partitions + """ return self.__partitions - def get_chunk_size(self): + def get_chunk_size(self)->int: + """Gets the data chunk size, i.e. the partitoin size, used during execution + + Returns: + int: data chunck size + """ return self.__chunk_size - def get_crawling_time(self): + def get_crawling_time(self)->float: + """Gets the time spent on crawling + + Returns: + float: the crawling time in seconds + """ return self.__crawling_time - def get_filter_time(self): + def get_filter_time(self)->float: + """Gets the time spent on filtering + + Returns: + float: the filter time in seconds + """ return self.__filter_time - def get_map_time(self): + def get_map_time(self)->float: + """Gets the time spent on mapping + + Returns: + float: the map time in seconds + """ return self.__map_time - def get_split_time(self): + def get_split_time(self)->float: + """Gets the time spent on splitting + + Returns: + float: the split time in seconds + """ return self.__split_time - def get_reduce_time(self): + def get_reduce_time(self)->float: + """Gets the time spent on reduction + + Returns: + float: the reduce time in seconds + """ return self.__reduce_time - def get_analysis_time(self): + def get_analysis_time(self)->float: + """Gets the time spent on analysis + + Returns: + float: the analysis time in seconds + """ return self.__analysis_time - def get_visualization_time(self): + def get_visualization_time(self)->float: + """Gets the time spent on visualization + + Returns: + float: the visualization time in seconds + """ return self.__visualization_time - def get_total_execution_time(self): + def get_parallel_execution_time(self)->float: + """Gets the time spent on parallel execution, i.e. the time spent from filtering to reduction. + + Returns: + float: the parallel execution time + """ return self.__filter_to_reduce_time - def get_total_execution_time(self): + def get_total_execution_time(self)->float: + """Gets the time spent on execution, from filtering to visualization, excluding the crawling step + + Returns: + float: the total execution time in seconds + """ return self.__total_execution_time def get_total_time(self): + """Gets the total time spent for completing the application, from crawling to visualization, i.e. the response time + + Returns: + float: the total response time + """ return self.__total_time def get_reduce_result_length(self): + """Gets the number of items obtained by executing the reduction. This value can be used for debbugging purposes and for testing the correctness of the parallel execution. + + Returns: + float: the length of the reduction result + """ return self.__reduce_result_length def __repr__(self): @@ -117,6 +192,14 @@ def __str__(self): "| Reduce result length: " + str(self.__reduce_result_length) + "\n" def to_csv_line(self, separator: str = ";") -> str: + """Creates a CSV (Comma Separated Value) line for this report, by using the specified separator + + Args: + separator (str, optional): The values separator. Defaults to ";". + + Returns: + str: the CSV line + """ return \ str(self.__app_name)+separator+\ str(self.__partitions)+separator+\ @@ -134,6 +217,16 @@ def to_csv_line(self, separator: str = ";") -> str: str(self.__reduce_result_length) def to_csv_titles(self, separator: str = ";") -> str: + """Creates a CSV (Comma Separated Value) header line, by using the specified separator. + The returned line contains just the standard titles of report columns. + It can be used for writing the first header line of a CSV file that would store more than one execution report. + + Args: + separator (str, optional): The values separator. Defaults to ";". + + Returns: + str: the columns titles in a CSV line + """ return \ "App Name"+separator+\ "Partitions"+separator+\