Merge branch 'release' into release-github

# Conflicts: # pm4py/algo/transformation/__init__.py # pm4py/meta.py # pm4py/objects/conversion/dfg/converter.py # pm4py/objects/conversion/dfg/variants/__init__.py # pm4py/objects/random_variables/__init__.py
process-intelligence-solutions · Oct 1, 2021 · 5155b41 · 5155b41
2 parents 68a207d + 9470264
commit 5155b41
Show file tree

Hide file tree

Showing 32 changed files with 844 additions and 62 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,37 +1,81 @@
 # PM4Py Changelog
 
-## PM4Py 2.2.13.1
+## PM4Py 2.2.14 (2021.10.01)
+
+### Fixed
+
+* 706d42c0
+    * bug fix paths filter for Pandas
+* c5ecaa4f
+    * bug fix numeric attribute filter XES (custom case attribute glue)
+
+### Deprecated
+
+### Changed
+
+### Added
+
+* 8ba67034
+    * added random variables that are able to check gamma and log normal distributions
+* 1d22d99d
+    * added dfg -> petri net translation that has unique labels (routing is performed by invisible transitions)
+* 004ec93f
+    * add support for log-level fitness in the alignment output
+* 56efe270
+    * add fitness value for the dfg-based alignments
+* d9da1ab8
+    * add raw performance values for the elements of the performance-based dfg
+* 0eeda19d
+    * when visualizing a dfg without log attached to it, i.e., incoming edges are used to count
+* 03ee6b8e
+  * allow counting of occurrences of activities/open cases/resource activities in a given time range
+* ae5a3973
+  * add various new filtering functionalities
+
+### Other
+
+* ac00be2f
+    * added the specification of Python 3.9.x among the supported versions.
+    * not suggesting anymore Python 3.6.x
+
+---
+
+## PM4Py 2.2.13.1 (2021.09.21)
 
 ### Fixed
 
 * 816fb4ad
-  * Fixed a bug in the Pandas case size filter (the constraints were not applied correctly).
+    * fixed a bug in the Pandas case size filter (the constraints were not applied correctly).
 * 40f142c4
-  * Fixed a bug in the format_dataframe function (columns were duplicated if already existing with the same name).
+    * fixed a bug in the format_dataframe function (columns were duplicated if already existing with the same name).
 * 00d1a7de
-  * Reverted stream converter to old variant (in a slightly slower but safer way).
+    * reverted stream converter to old variant (in a slightly slower but safer way).
 
 ### Removed
 
 ### Deprecated
 
 ### Changed
+
 * 991a09d4
-  * Introduce a time limit in the DFG playout.
+    * introduce a time limit in the DFG playout.
 * ae5d2a07
-  * Return the state of the process tree along with the alignment for the process tree alignments.
+    * return the state of the process tree along with the alignment for the process tree alignments.
 * 8b77384f
-  * Refactoring of the calculation of the fitness for Petri net alignments (scattered code).
+    * refactoring of the calculation of the fitness for Petri net alignments (scattered code).
 
 ### Added
 
 ### Other
+
 * d58d34fd
-  * Upgraded Dockerfile to Python 3.9
+    * upgraded Dockerfile to Python 3.9
 * 50114175
-  * Resolved issue with the upcoming Python 3.10 release
+    * resolved issue with the upcoming Python 3.10 release
 * 89314905
-  * Security issue in requirements
+    * security issue in requirements
+
+---
 
 ## PM4Py 2.2.13 (2021.09.03)
 
@@ -42,25 +86,27 @@
 ### Deprecated
 
 ### Changed
+
 * 5723df7b
     * xes exporter now reports on xes features and xmlns
 * 3b632548
-  * graphviz based visualizations now expose background color as a parameter
+    * graphviz based visualizations now expose background color as a parameter
 
 ### Added
+
 * 0592157b
     * new dfg playout including performance specification
 * 85739ba0
-    * allow pandas df to be used as an iterable for streaming simulation 
+    * allow pandas df to be used as an iterable for streaming simulation
 * 2fa9993f
-    * path filter  that filters the cases of an event log where there is at least one occurrence of the provided path
-    occurring in a given time range.
+    * path filter that filters the cases of an event log where there is at least one occurrence of the provided path
+      occurring in a given time range.
 * a7ee73a8
     * added filter based on rework detection
 * c03b6188
-    * add petri net, reset/inhibitor net and data petri net semantics 
-### Other
+    * add petri net, reset/inhibitor net and data petri net semantics
 
+### Other
 
 ---
 
@@ -73,7 +119,7 @@
 * e88a6546
     * https://github.com/pm4py/pm4py-core/issues/249
 * 84511628
-    * fix minor bug in the calculation of the handover and subcontracting metrics. 
+    * fix minor bug in the calculation of the handover and subcontracting metrics.
 
 ### Removed
 
@@ -84,8 +130,7 @@
 * 01fd0402
     * The ```pm4py.view_petri_net()``` method now uses ```None``` as a default initial and final marking.
 * 72ed7d0d
-    * Improved performance of variant discovery of dataframes.   
-
+    * Improved performance of variant discovery of dataframes.
 
 ### Added
 
@@ -99,7 +144,7 @@
 * 4b594228
     * add support for adding decision points to data petri nets.
 * 9261270e
-  *  add support for performance dfg discovery in ```pm4py.discover_performance_dfg()```.
+    * add support for performance dfg discovery in ```pm4py.discover_performance_dfg()```.
 
 ### Other
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = '2.2'
 # The full version, including alpha/beta/rc tags
-release = '2.2.13.1'
+release = '2.2.14'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/examples/log_to_int_tree_open_paths.py b/examples/log_to_int_tree_open_paths.py
@@ -0,0 +1,16 @@
+import pm4py
+import os
+from pm4py.algo.transformation.log_to_interval_tree import algorithm as log_to_interval_tree
+
+
+def execute_script():
+    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes"))
+    tree = log_to_interval_tree.apply(log, variant=log_to_interval_tree.Variants.OPEN_PATHS)
+    # see how many paths are open at the timestamp 1319616410
+    print(len(tree[1319616410]))
+    # read the detailed information about the source and target event of each path
+    print(tree[1319616410])
+
+
+if __name__ == "__main__":
+    execute_script()
diff --git a/pm4py/__init__.py b/pm4py/__init__.py
@@ -34,7 +34,9 @@
     discover_performance_dfg
 from pm4py.filtering import filter_start_activities, filter_end_activities, filter_attribute_values, filter_variants, \
     filter_variants_percentage, filter_directly_follows_relation, filter_time_range, filter_trace_attribute, \
-    filter_eventually_follows_relation, filter_event_attribute_values, filter_trace_attribute_values
+    filter_eventually_follows_relation, filter_event_attribute_values, filter_trace_attribute_values, \
+    filter_between, filter_case_size, filter_case_performance, filter_activities_rework, filter_paths_performance, \
+    filter_variants_by_coverage_percentage, filter_variants_top_k
 from pm4py.hof import filter_log, filter_trace, sort_trace, sort_log
 from pm4py.meta import __name__, __version__, __doc__, __author__, __author_email__, \
     __maintainer__, __maintainer_email__

diff --git a/pm4py/algo/conformance/alignments/decomposed/variants/recompos_maximal.py b/pm4py/algo/conformance/alignments/decomposed/variants/recompos_maximal.py
@@ -506,6 +506,8 @@ def apply_trace(trace, list_nets, parameters=None):
         cost1 = cost // utils.STD_MODEL_LOG_MOVE_COST
         fitness = 1.0 - cost1 / (best_worst_cost + len(trace))
         res["fitness"] = fitness
+        res["bwc"] = (best_worst_cost + len(trace)) * utils.STD_MODEL_LOG_MOVE_COST
+
     return res
 
 

diff --git a/pm4py/algo/conformance/alignments/dfg/variants/classic.py b/pm4py/algo/conformance/alignments/dfg/variants/classic.py
@@ -132,12 +132,20 @@ def apply_log(log, dfg, sa, ea, parameters=None):
     aligned_traces = []
     align_dict = {}
 
+    al_empty_cost = __apply_list_activities([], dfg, sa, ea, parameters=parameters)["cost"]
+
     for trace in log:
         trace_act = tuple(x[activity_key] for x in trace)
         if trace_act in align_dict:
             aligned_traces.append(align_dict[trace_act])
         else:
+            log_move_cost_function = exec_utils.get_param_value(Parameters.LOG_MOVE_COST_FUNCTION, parameters,
+                                                                {x: align_utils.STD_MODEL_LOG_MOVE_COST for x in
+                                                                 trace_act})
+            trace_bwc_cost = sum(log_move_cost_function[x] for x in trace_act)
             al_tr = __apply_list_activities(trace_act, dfg, sa, ea, parameters=parameters)
+            al_tr["fitness"] = 1.0 - al_tr["cost"] / (al_empty_cost + trace_bwc_cost)
+            al_tr["bwc"] = al_empty_cost + trace_bwc_cost
             align_dict[trace_act] = al_tr
             aligned_traces.append(align_dict[trace_act])
 

diff --git a/pm4py/algo/conformance/alignments/edit_distance/variants/edit_distance.py b/pm4py/algo/conformance/alignments/edit_distance/variants/edit_distance.py
@@ -91,6 +91,7 @@ def apply(log1: EventLog, log2: EventLog, parameters: Optional[Dict[Union[str, P
                         (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (len(log1[index]) + best_worst_cost))
             else:
                 align['fitness'] = 0
+            align["bwc"] = (len(log1[index]) + best_worst_cost) * align_utils.STD_MODEL_LOG_MOVE_COST
 
     return aligned_traces
 

diff --git a/pm4py/algo/conformance/alignments/petri_net/algorithm.py b/pm4py/algo/conformance/alignments/petri_net/algorithm.py
@@ -152,6 +152,8 @@ def apply_trace(trace, petri_net, initial_marking, final_marking, parameters=Non
     """
 
     ali["fitness"] = fitness
+    # returning also the best worst cost, for log fitness computation
+    ali["bwc"] = ltrace_bwc
 
     return ali
 

diff --git a/pm4py/algo/discovery/dfg/adapters/pandas/df_statistics.py b/pm4py/algo/discovery/dfg/adapters/pandas/df_statistics.py
@@ -124,6 +124,8 @@ def get_dfg_graph(df, measure="frequency", activity_key="concept:name", case_id_
             dfg_performance = {}
             for key in dfg_performance_mean:
                 dfg_performance[key] = {"mean": dfg_performance_mean[key], "median": dfg_performance_median[key], "max": dfg_performance_max[key], "min": dfg_performance_min[key], "sum": dfg_performance_sum[key], "stdev": dfg_performance_std[key]}
+        elif perf_aggregation_key == "raw_values":
+            dfg_performance = directly_follows_grouping.apply(list).to_dict()
         else:
             dfg_performance = directly_follows_grouping.agg(perf_aggregation_key).to_dict()
 

diff --git a/pm4py/algo/discovery/dfg/variants/performance.py b/pm4py/algo/discovery/dfg/variants/performance.py
@@ -108,6 +108,8 @@ def performance(log: Union[EventLog, EventStream], parameters: Optional[Dict[Uni
             ret[key] = stdev(ret0[key]) if len(ret0[key]) > 1 else 0
         elif aggregation_measure == "sum":
             ret[key] = sum(ret0[key])
+        elif aggregation_measure == "raw_values":
+            ret[key] = ret0[key]
         elif aggregation_measure == "all":
             ret[key] = {"median": median(ret0[key]), "min": min(ret0[key]), "max": max(ret0[key]),
                         "stdev": stdev(ret0[key]) if len(ret0[key]) > 1 else 0, "sum": sum(ret0[key]), "mean": mean(ret0[key])}

diff --git a/pm4py/algo/evaluation/replay_fitness/variants/alignment_based.py b/pm4py/algo/evaluation/replay_fitness/variants/alignment_based.py
@@ -57,23 +57,29 @@ def evaluate(aligned_traces: typing.ListAlignments, parameters: Optional[Dict[Un
     no_traces = len([x for x in aligned_traces if x is not None])
     no_fit_traces = 0
     sum_fitness = 0.0
+    sum_bwc = 0.0
+    sum_cost = 0.0
 
     for tr in aligned_traces:
         if tr is not None:
             if tr["fitness"] == 1.0:
                 no_fit_traces = no_fit_traces + 1
-            sum_fitness = sum_fitness + tr["fitness"]
+            sum_fitness += tr["fitness"]
+            sum_bwc += tr["bwc"]
+            sum_cost += tr["cost"]
 
     perc_fit_traces = 0.0
     average_fitness = 0.0
+    log_fitness = 0.0
 
     if no_traces > 0:
         perc_fit_traces = (100.0 * float(no_fit_traces)) / (float(no_traces))
         average_fitness = float(sum_fitness) / float(no_traces)
+        log_fitness = 1.0 - float(sum_cost) / float(sum_bwc)
 
     return {"percFitTraces": perc_fit_traces, "averageFitness": average_fitness,
             "percentage_of_fitting_traces": perc_fit_traces,
-            "average_trace_fitness": average_fitness}
+            "average_trace_fitness": average_fitness, "log_fitness": log_fitness}
 
 
 def apply(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking, align_variant=alignments.DEFAULT_VARIANT, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Dict[str, float]:

diff --git a/pm4py/algo/filtering/log/attributes/attributes_filter.py b/pm4py/algo/filtering/log/attributes/attributes_filter.py
@@ -27,7 +27,7 @@
 from pm4py.util import exec_utils
 from pm4py.util import xes_constants as xes
 from pm4py.util.constants import PARAMETER_CONSTANT_ATTRIBUTE_KEY, PARAMETER_CONSTANT_ACTIVITY_KEY
-from pm4py.util.constants import PARAMETER_CONSTANT_CASEID_KEY
+from pm4py.util.constants import PARAMETER_CONSTANT_CASEID_KEY, PARAMETER_KEY_CASE_GLUE
 from pm4py.util.xes_constants import DEFAULT_NAME_KEY
 from copy import copy
 import deprecation
@@ -39,6 +39,7 @@ class Parameters(Enum):
     ATTRIBUTE_KEY = PARAMETER_CONSTANT_ATTRIBUTE_KEY
     ACTIVITY_KEY = PARAMETER_CONSTANT_ACTIVITY_KEY
     CASE_ID_KEY = PARAMETER_CONSTANT_CASEID_KEY
+    PARAMETER_KEY_CASE_GLUE = PARAMETER_KEY_CASE_GLUE
     DECREASING_FACTOR = "decreasingFactor"
     POSITIVE = "positive"
     STREAM_FILTER_KEY1 = "stream_filter_key1"
@@ -73,6 +74,7 @@ def apply_numeric(log: EventLog, int1: float, int2: float, parameters: Optional[
     attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
     case_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes.DEFAULT_TRACEID_KEY)
     positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
+    case_attribute_prefix = exec_utils.get_param_value(Parameters.PARAMETER_KEY_CASE_GLUE, parameters, constants.CASE_ATTRIBUTE_PREFIX)
     # stream_filter_key is helpful to filter on cases containing an event with an attribute
     # in the specified value set, but such events shall have an activity in particular.
 
@@ -106,7 +108,7 @@ def apply_numeric(log: EventLog, int1: float, int2: float, parameters: Optional[
             attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
             omni_present=log.omni_present, properties=log.properties)
 
-    all_cases_ids = set(x["case:" + case_key] for x in stream)
+    all_cases_ids = set(x[case_attribute_prefix + case_key] for x in stream)
 
     filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
                             omni_present=log.omni_present, properties=log.properties)
@@ -157,7 +159,7 @@ def apply_numeric_events(log: EventLog, int1: float, int2: float, parameters: Op
             list(filter(lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream)),
             attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
             omni_present=log.omni_present, properties=log.properties)
-    filtered_log = log_converter.apply(stream)
+    filtered_log = log_converter.apply(stream, parameters=conversion_parameters)
 
     return filtered_log
 
@@ -201,7 +203,7 @@ def apply_events(log: EventLog, values: List[str], parameters: Optional[Dict[Uni
                              extensions=log.extensions, classifiers=log.classifiers,
                              omni_present=log.omni_present, properties=log.properties)
 
-    filtered_log = log_converter.apply(stream)
+    filtered_log = log_converter.apply(stream, parameters=conversion_parameters)
 
     return filtered_log