Skip to content

Commit

Permalink
Documentation improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
sv-giampa committed Jan 29, 2024
1 parent 8ba65a1 commit 9723fb8
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/create-prerelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
run: |
echo "::set-output name=version::$(date +'%Y-%m-%d' --utc)"
-
name: Create Stable Release
name: Create Pre-Release
id: create_release
uses: actions/create-release@v1
env:
Expand Down
11 changes: 11 additions & 0 deletions parsoda/model/function/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,15 @@ class Analyzer(ABC, Generic[K, R, A]):

@abstractmethod
def analyze(self, driver: ParsodaDriver, data: Dict[K, R]) -> A:
"""Applies an analysis algorithm to the output data from reduction step.
The analyzer might be a sequential, parallel or distributed algorithm.
In the latter case, the algorithm would use the same driver used by the current application for running a new, nested, ParSoDA application.
Args:
driver (ParsodaDriver): the driver used during the execution of the parallel phase
data (Dict[K, R]): output data from reducton step organized as a dictionary of key-value pairs
Returns:
A: the outputdata type from the analysis
"""
pass
5 changes: 5 additions & 0 deletions parsoda/model/function/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ def get_partitions(self, num_of_partitions=0, partition_size=1024*1024*1024) ->

@abstractmethod
def supports_remote_partitioning(self) -> bool:
"""Checks if the crawler supports remote partitioning, i.e. the ability to read data directly from the worker nodes
Returns:
bool: true if the crawler supports remote partitionig of data source.
"""
pass


Expand Down
13 changes: 8 additions & 5 deletions parsoda/model/function/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ class Filter(ABC):

@abstractmethod
def test(self, item: SocialDataItem) -> bool:
"""
Test if the item satisfies the predicate of the filter
:param item: the item to test
:return: True if the item satisfies the predicate, False otherwise
"""
"""Test if the item satisfies the predicate of the filter
Args:
item (SocialDataItem): the item to test
Returns:
bool: True if the item satisfies the predicate, False otherwise
"""
pass
11 changes: 7 additions & 4 deletions parsoda/model/function/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ class Mapper(ABC, Generic[K, V]):

@abstractmethod
def map(self, item: SocialDataItem) -> Iterable[Tuple[K, V]]:
"""
Returns a list of key-value pairs computed from the given item.
"""Returns a list of key-value pairs computed from the given item.
Example result: [ (item.user_id, item.tags[0]), (item.user_id, item.tags[1]), ... ]
:param item: the item to map
:return: a list of key-value pairs
Args:
item (SocialDataItem): the item to map
Returns:
Iterable[Tuple[K, V]]: an iterable of key-value pairs
"""
pass
13 changes: 8 additions & 5 deletions parsoda/model/function/reducer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@ class Reducer(ABC, Generic[K, V, R]):
"""

def reduce(self, key: K, values: List[V]) -> R:
"""
Applies the reduction algorithm to values
:param key: the key all values are associated to
:param values: all the values associated to the key
:return: the reduced value
"""Applies the reduction algorithm to values
Args:
key (K): the key all values are associated to
values (List[V]): all the values associated to the key
Returns:
R: the reduced value
"""
pass
5 changes: 5 additions & 0 deletions parsoda/model/function/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ class Visualizer(ABC, Generic[A]):

@abstractmethod
def visualize(self, result: A) -> None:
"""Transforms data from the analysis step in some output format, then write them to some output device or system.
Args:
result (A): the data resulting from the analysis step
"""
pass
119 changes: 106 additions & 13 deletions parsoda/model/social_data_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,49 +44,124 @@ def __init__(

self.__reduce_result_length = reduce_result_length

def get_app_name(self):
def get_app_name(self)->str:
"""Gets the referred application name
Returns:
str: the app name
"""
return self.__app_name

def get_driver(self):
def get_driver(self)->ParsodaDriver:
"""Gets the driver used by the application
Returns:
ParsodaDriver: the driver object
"""
return self.__driver

def get_partitions(self):
def get_partitions(self)->int:
"""Gets the number of partitions used during execution
Returns:
int: number of partitions
"""
return self.__partitions

def get_chunk_size(self):
def get_chunk_size(self)->int:
"""Gets the data chunk size, i.e. the partitoin size, used during execution
Returns:
int: data chunck size
"""
return self.__chunk_size

def get_crawling_time(self):
def get_crawling_time(self)->float:
"""Gets the time spent on crawling
Returns:
float: the crawling time in seconds
"""
return self.__crawling_time

def get_filter_time(self):
def get_filter_time(self)->float:
"""Gets the time spent on filtering
Returns:
float: the filter time in seconds
"""
return self.__filter_time

def get_map_time(self):
def get_map_time(self)->float:
"""Gets the time spent on mapping
Returns:
float: the map time in seconds
"""
return self.__map_time

def get_split_time(self):
def get_split_time(self)->float:
"""Gets the time spent on splitting
Returns:
float: the split time in seconds
"""
return self.__split_time

def get_reduce_time(self):
def get_reduce_time(self)->float:
"""Gets the time spent on reduction
Returns:
float: the reduce time in seconds
"""
return self.__reduce_time

def get_analysis_time(self):
def get_analysis_time(self)->float:
"""Gets the time spent on analysis
Returns:
float: the analysis time in seconds
"""
return self.__analysis_time

def get_visualization_time(self):
def get_visualization_time(self)->float:
"""Gets the time spent on visualization
Returns:
float: the visualization time in seconds
"""
return self.__visualization_time

def get_total_execution_time(self):
def get_parallel_execution_time(self)->float:
"""Gets the time spent on parallel execution, i.e. the time spent from filtering to reduction.
Returns:
float: the parallel execution time
"""
return self.__filter_to_reduce_time

def get_total_execution_time(self):
def get_total_execution_time(self)->float:
"""Gets the time spent on execution, from filtering to visualization, excluding the crawling step
Returns:
float: the total execution time in seconds
"""
return self.__total_execution_time

def get_total_time(self):
"""Gets the total time spent for completing the application, from crawling to visualization, i.e. the response time
Returns:
float: the total response time
"""
return self.__total_time

def get_reduce_result_length(self):
"""Gets the number of items obtained by executing the reduction. This value can be used for debbugging purposes and for testing the correctness of the parallel execution.
Returns:
float: the length of the reduction result
"""
return self.__reduce_result_length

def __repr__(self):
Expand Down Expand Up @@ -117,6 +192,14 @@ def __str__(self):
"| Reduce result length: " + str(self.__reduce_result_length) + "\n"

def to_csv_line(self, separator: str = ";") -> str:
"""Creates a CSV (Comma Separated Value) line for this report, by using the specified separator
Args:
separator (str, optional): The values separator. Defaults to ";".
Returns:
str: the CSV line
"""
return \
str(self.__app_name)+separator+\
str(self.__partitions)+separator+\
Expand All @@ -134,6 +217,16 @@ def to_csv_line(self, separator: str = ";") -> str:
str(self.__reduce_result_length)

def to_csv_titles(self, separator: str = ";") -> str:
"""Creates a CSV (Comma Separated Value) header line, by using the specified separator.
The returned line contains just the standard titles of report columns.
It can be used for writing the first header line of a CSV file that would store more than one execution report.
Args:
separator (str, optional): The values separator. Defaults to ";".
Returns:
str: the columns titles in a CSV line
"""
return \
"App Name"+separator+\
"Partitions"+separator+\
Expand Down

0 comments on commit 9723fb8

Please sign in to comment.