-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathResultVisualizer.py
163 lines (144 loc) · 6.34 KB
/
ResultVisualizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from ErrorAnalyzer import ErrorAnalyzer
import pandas as pd
from utils import create_heatmap
class ResultVisualizer:
"""
Class to visualize or print the results of the ErrorAnalyzer with different plots.
"""
def __init__(self, analyzer: ErrorAnalyzer):
self.analyzer: ErrorAnalyzer = analyzer
def print_general_information(self):
"""
Print information about the number of errors in the different categories.
"""
(
graalpy_error_count,
both_error_count,
cpython_error_count,
) = self.analyzer.general_information()
print("Graalpy error count: {}".format(graalpy_error_count))
print("Both error count: {}".format(both_error_count))
print("Cpython error count: {}".format(cpython_error_count))
def plot_hist_error_types(self):
"""
Plot a histogram about the distribution of the error types.
"""
error_counts = self.analyzer.count_error_types()
df = pd.DataFrame.from_dict(error_counts, orient="index")
df.plot(subplots=True, kind="bar", rot=0.0, title="Error types")
def print_top_error_types(self, top=10):
"""
Print the top error types.
"""
print("--- TOP {} ERROR TYPES ---".format(top))
error_counts = self.analyzer.count_error_types()
self._print_top_error(error_counts, top)
def plot_hist_error_messages(self, minimum=0):
"""
Plot a histogram about the distribution of the error messages.
"""
error_counts = self.analyzer.count_error_messages()
important_error_counts = {
error: count for error, count in error_counts.items() if count >= minimum
}
df = pd.DataFrame.from_dict(important_error_counts, orient="index")
df.plot(subplots=True, kind="bar", title="Error messages")
def print_top_error_messages(self, top=10):
"""
Print the top error messages.
"""
print("--- TOP {} ERROR MESSAGES ---".format(top))
error_counts = self.analyzer.count_error_messages()
self._print_top_error(error_counts, top)
def plot_hist_last_stacktrace_lines(self, minimum=0):
"""
Plot a histogram about the distribution of the last stacktrace lines.
"""
error_counts = self.analyzer.count_last_lines()
important_error_counts = {
error: count for error, count in error_counts.items() if count >= minimum
}
df = pd.DataFrame.from_dict(important_error_counts, orient="index")
df.plot(subplots=True, kind="bar", title="Last lines")
def print_top_error_last_stacktrace_lines(self, top=10):
"""
Print the top last stacktrace lines.
"""
print("--- TOP {} LAST LINES ---".format(top))
error_counts = self.analyzer.count_last_lines()
self._print_top_error(error_counts, top)
def plot_hist_packages(self):
"""
Plot a histogram about the distribution of the packages.
"""
package_counts = self.analyzer.count_packages()
df = pd.DataFrame.from_dict(package_counts, orient="index")
df.plot(subplots=True, kind="bar", title="Packages")
def print_everything(self):
"""
Print all graalpy error documents.
"""
for errorDocument in self.analyzer.graalpy_error_documents:
print(errorDocument)
print()
def plot_tfidf_error_messages(self):
"""
Plot a heatmap about the similarity of the error messages.
"""
errors, similarity = self.analyzer.calculate_similarity_messages()
labels = [error[:20] for error in errors]
create_heatmap(similarity, labels, "TF-IDF Error Messages")
def plot_tfidf_error_stacktraces(self):
"""
Plot a heatmap about the similarity of the complete stacktraces.
"""
errors, similarity = self.analyzer.calculate_similarity_stacktraces()
labels = [error[:20] for error in errors]
create_heatmap(similarity, labels, "TF-IDF Stacktraces")
def plot_tfidf_last_stacktrace_lines(self):
"""
Plot a heatmap about the similarity of the last stacktraces lines.
"""
errors, similarity = self.analyzer.calculate_similarity_last_stacktrace_lines()
labels = [error[:20] for error in errors]
create_heatmap(similarity, labels, "TF-IDF Last Stacktrace Line")
def print_tfidf_error_stacktraces(self, bottom_limit, top_limit, top=10):
"""
Print the top similar stacktraces.
"""
print("--- SIMILAR ERROR STACKTRACES ---")
errors, similarity = self.analyzer.calculate_similarity_stacktraces()
self._print_tfidf(similarity, errors, bottom_limit, top_limit, top)
def print_tfidf_last_stacktrace_lines(self, bottom_limit, top_limit, top=10):
"""
Print the top similar last stacktraces lines.
"""
print("--- SIMILAR LAST STACKTRACE LINE ---")
errors, similarity = self.analyzer.calculate_similarity_last_stacktrace_lines()
self._print_tfidf(similarity, errors, bottom_limit, top_limit, top)
def _print_tfidf(self, similarity, errors, bottom_limit, top_limit, top):
"""
Helpful function to print the top similar stacktraces or last stacktrace lines.
"""
interested_index = []
for i in range(len(similarity)):
for j in range(len(similarity[i]) - i - 1):
# Only interested in the top similar stacktraces
if similarity[i][j] > bottom_limit and similarity[i][j] < top_limit:
interested_index.append((i, j))
# Sort by similarity
sorted_interested_index = sorted(
interested_index, key=lambda x: similarity[x[0]][x[1]], reverse=True
)
for i, j in sorted_interested_index[:top]:
print("Similarity: {}".format(similarity[i][j]))
print("Error 1: {}".format(errors[i]))
print("--------------------")
print("Error 2: {}".format(errors[j]))
print("####################")
def _print_top_error(self, error_counts, top):
"""
Helpful function to print the top error types, messages or last stacktrace lines.
"""
for error, count in error_counts.most_common(top):
print("#{} - {}".format(count, error))