Documentation improvements

eflows4hpc · Jan 29, 2024 · 9723fb8 · 9723fb8
1 parent 8ba65a1
commit 9723fb8
Show file tree

Hide file tree

Showing 8 changed files with 151 additions and 28 deletions.
diff --git a/.github/workflows/create-prerelease.yml b/.github/workflows/create-prerelease.yml
@@ -31,7 +31,7 @@ jobs:
             run: |
                 echo "::set-output name=version::$(date +'%Y-%m-%d' --utc)"
         - 
-            name: Create Stable Release
+            name: Create Pre-Release
             id: create_release
             uses: actions/create-release@v1
             env:

diff --git a/parsoda/model/function/analyzer.py b/parsoda/model/function/analyzer.py
@@ -15,4 +15,15 @@ class Analyzer(ABC, Generic[K, R, A]):
 
     @abstractmethod
     def analyze(self, driver: ParsodaDriver, data: Dict[K, R]) -> A:
+        """Applies an analysis algorithm to the output data from reduction step.
+        The analyzer might be a sequential, parallel or distributed algorithm.
+        In the latter case, the algorithm would use the same driver used by the current application for running a new, nested, ParSoDA application.
+
+        Args:
+            driver (ParsodaDriver): the driver used during the execution of the parallel phase
+            data (Dict[K, R]): output data from reducton step organized as a dictionary of key-value pairs
+
+        Returns:
+            A: the outputdata type from the analysis
+        """
         pass
diff --git a/parsoda/model/function/crawler.py b/parsoda/model/function/crawler.py
@@ -64,6 +64,11 @@ def get_partitions(self, num_of_partitions=0, partition_size=1024*1024*1024) ->
 
     @abstractmethod
     def supports_remote_partitioning(self) -> bool:
+        """Checks if the crawler supports remote partitioning, i.e. the ability to read data directly from the worker nodes
+
+        Returns:
+            bool: true if the crawler supports remote partitionig of data source.
+        """
         pass
 
 

diff --git a/parsoda/model/function/filter.py b/parsoda/model/function/filter.py
@@ -10,9 +10,12 @@ class Filter(ABC):
 
     @abstractmethod
     def test(self, item: SocialDataItem) -> bool:
-        """
-        Test if the item satisfies the predicate of the filter
-        :param item: the item to test
-        :return: True if the item satisfies the predicate, False otherwise
-        """
+        """Test if the item satisfies the predicate of the filter
+
+        Args:
+            item (SocialDataItem): the item to test
+
+        Returns:
+            bool: True if the item satisfies the predicate, False otherwise
+        """        
         pass
diff --git a/parsoda/model/function/mapper.py b/parsoda/model/function/mapper.py
@@ -14,10 +14,13 @@ class Mapper(ABC, Generic[K, V]):
 
     @abstractmethod
     def map(self, item: SocialDataItem) -> Iterable[Tuple[K, V]]:
-        """
-        Returns a list of key-value pairs computed from the given item.
+        """Returns a list of key-value pairs computed from the given item.
         Example result: [ (item.user_id, item.tags[0]), (item.user_id, item.tags[1]), ... ]
-        :param item: the item to map
-        :return: a list of key-value pairs
+
+        Args:
+            item (SocialDataItem): the item to map
+
+        Returns:
+            Iterable[Tuple[K, V]]: an iterable of key-value pairs
         """
         pass
diff --git a/parsoda/model/function/reducer.py b/parsoda/model/function/reducer.py
@@ -12,10 +12,13 @@ class Reducer(ABC, Generic[K, V, R]):
     """
 
     def reduce(self, key: K, values: List[V]) -> R:
-        """
-        Applies the reduction algorithm to values
-        :param key: the key all values are associated to
-        :param values: all the values associated to the key
-        :return: the reduced value
+        """Applies the reduction algorithm to values
+
+        Args:
+            key (K): the key all values are associated to
+            values (List[V]): all the values associated to the key
+
+        Returns:
+            R: the reduced value
         """
         pass
diff --git a/parsoda/model/function/visualizer.py b/parsoda/model/function/visualizer.py
@@ -11,4 +11,9 @@ class Visualizer(ABC, Generic[A]):
 
     @abstractmethod
     def visualize(self, result: A) -> None:
+        """Transforms data from the analysis step in some output format, then write them to some output device or system.
+
+        Args:
+            result (A): the data resulting from the analysis step
+        """
         pass
diff --git a/parsoda/model/social_data_app.py b/parsoda/model/social_data_app.py
@@ -44,49 +44,124 @@ def __init__(
 
         self.__reduce_result_length = reduce_result_length
 
-    def get_app_name(self):
+    def get_app_name(self)->str:
+        """Gets the referred application name
+
+        Returns:
+            str: the app name
+        """
         return self.__app_name
 
-    def get_driver(self):
+    def get_driver(self)->ParsodaDriver:
+        """Gets the driver used by the application
+
+        Returns:
+            ParsodaDriver: the driver object
+        """
         return self.__driver
 
-    def get_partitions(self):
+    def get_partitions(self)->int:
+        """Gets the number of partitions used during execution
+
+        Returns:
+            int: number of partitions
+        """
         return self.__partitions
 
-    def get_chunk_size(self):
+    def get_chunk_size(self)->int:
+        """Gets the data chunk size, i.e. the partitoin size, used during execution
+
+        Returns:
+            int: data chunck size
+        """
         return self.__chunk_size
 
-    def get_crawling_time(self):
+    def get_crawling_time(self)->float:
+        """Gets the time spent on crawling
+
+        Returns:
+            float: the crawling time in seconds
+        """
         return self.__crawling_time
 
-    def get_filter_time(self):
+    def get_filter_time(self)->float:
+        """Gets the time spent on filtering
+
+        Returns:
+            float: the filter time in seconds
+        """
         return self.__filter_time
 
-    def get_map_time(self):
+    def get_map_time(self)->float:
+        """Gets the time spent on mapping
+
+        Returns:
+            float: the map time in seconds
+        """
         return self.__map_time
 
-    def get_split_time(self):
+    def get_split_time(self)->float:
+        """Gets the time spent on splitting
+
+        Returns:
+            float: the split time in seconds
+        """
         return self.__split_time
 
-    def get_reduce_time(self):
+    def get_reduce_time(self)->float:
+        """Gets the time spent on reduction
+
+        Returns:
+            float: the reduce time in seconds
+        """
         return self.__reduce_time
 
-    def get_analysis_time(self):
+    def get_analysis_time(self)->float:
+        """Gets the time spent on analysis
+
+        Returns:
+            float: the analysis time in seconds
+        """
         return self.__analysis_time
 
-    def get_visualization_time(self):
+    def get_visualization_time(self)->float:
+        """Gets the time spent on visualization
+
+        Returns:
+            float: the visualization time in seconds
+        """
         return self.__visualization_time
 
-    def get_total_execution_time(self):
+    def get_parallel_execution_time(self)->float:
+        """Gets the time spent on parallel execution, i.e. the time spent from filtering to reduction.
+
+        Returns:
+            float: the parallel execution time
+        """
         return self.__filter_to_reduce_time
 
-    def get_total_execution_time(self):
+    def get_total_execution_time(self)->float:
+        """Gets the time spent on execution, from filtering to visualization, excluding the crawling step
+
+        Returns:
+            float: the total execution time in seconds
+        """
         return self.__total_execution_time
 
     def get_total_time(self):
+        """Gets the total time spent for completing the application, from crawling to visualization, i.e. the response time
+
+        Returns:
+            float: the total response time
+        """
         return self.__total_time
 
     def get_reduce_result_length(self):
+        """Gets the number of items obtained by executing the reduction. This value can be used for debbugging purposes and for testing the correctness of the parallel execution.
+
+        Returns:
+            float: the length of the reduction result
+        """
         return self.__reduce_result_length
 
     def __repr__(self):
@@ -117,6 +192,14 @@ def __str__(self):
         "| Reduce result length: " + str(self.__reduce_result_length) + "\n"
 
     def to_csv_line(self, separator: str = ";") -> str:
+        """Creates a CSV (Comma Separated Value) line for this report, by using the specified separator
+
+        Args:
+            separator (str, optional): The values separator. Defaults to ";".
+
+        Returns:
+            str: the CSV line
+        """
         return \
             str(self.__app_name)+separator+\
             str(self.__partitions)+separator+\
@@ -134,6 +217,16 @@ def to_csv_line(self, separator: str = ";") -> str:
             str(self.__reduce_result_length)
 
     def to_csv_titles(self, separator: str = ";") -> str:
+        """Creates a CSV (Comma Separated Value) header line, by using the specified separator. 
+        The returned line contains just the standard titles of report columns.
+        It can be used for writing the first header line of a CSV file that would store more than one execution report.
+
+        Args:
+            separator (str, optional): The values separator. Defaults to ";".
+
+        Returns:
+            str: the columns titles in a CSV line
+        """
         return \
             "App Name"+separator+\
             "Partitions"+separator+\