-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtij.py
380 lines (308 loc) · 14.1 KB
/
tij.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
def conversion(path):
"""
Converts a tij.dat file into a np.array.
:param path: path of the tij.dat file
:type path: str
:return: np.array of the tij data
:rtype: np.array
"""
df = pd.read_csv(path, sep='\t')
tij_array = df.to_numpy()
return tij_array
def unique(ar):
"""
This function gives each unique value of an array and the number of occurrence of the value
:param ar: Array that is studied
:type ar: np.array
:return: Unique values of ar and the number of occurrences
:rtype: tuple of np.array
"""
values, counts = np.unique(ar, return_counts=True)
return values, counts
def common(ar1, ar2):
"""
This functions returns the common rows of ar1 and ar2
:param ar1: First array
:type ar1: np.array
:param ar2: Second array
:type ar2: np.array
:return: array of common rows
:rtype: np.array
"""
common_array = np.array([x for x in set(tuple(x) for x in ar1) & set(tuple(x) for x in ar2)])
return common_array
def lost(ar1, ar2):
"""
This function finds the rows that are in ar1 but not in ar2. These rows are called the lost rows.
:param ar1: First array
:type ar1: np.array
:param ar2: Second array
:type ar2: np.array
:return: array of the lost rows
:rtype: np.array
"""
set1 = {tuple(x) for x in ar1}
set2 = {tuple(x) for x in ar2}
lost_set = (set1 ^ set2) & set1
if len(lost_set) != 0:
lost_array = np.array(list(lost_set))
else:
lost_array = np.empty((0, 2), dtype=int)
return lost_array
def new(ar1, ar2):
"""
This function finds the rows that are in ar2 but not in ar1. These rows are called the new rows.
:param ar1: First array
:type ar1: np.array
:param ar2: Second array
:type ar2: np.array
:return: array of the lost rows
:rtype: np.array
"""
set1 = {tuple(x) for x in ar1}
set2 = {tuple(x) for x in ar2}
new_set = set2 - set1
if len(new_set) != 0:
new_array = np.array(list(new_set))
else:
new_array = np.empty((0, 2), dtype=int)
return new_array
def add_time(time, couples, timeline_array):
"""
This function adds
:param time:
:param couples:
:param timeline_array:
:return:
"""
for elt in couples:
i = elt[0]
j = elt[1]
if i < j:
timeline_array[i, j].append(time)
else:
timeline_array[j, i].append(time)
return timeline_array
def timeline(tij_array, dt):
"""
This function returns an array of timelines of interactions between all the particles. A timeline between particle
i and j has the following form [t1, t2, t3, t4 ...] with all the odd elements the time of the beginning of an
interaction and all the even elements the time of the end of an interaction. As the interaction between i and j is
strictly the same as the interaction between j and i the array should be symmetric, with all the diagonal elements
equal to 0 (no interaction between i and i). In our case the array is strictly upper triangular (no need to keep in
memory all the elements).
:param tij_array: Array of the tij elements, that are needed to create the timeline array
:type tij_array: np.array
:param dt: Increment of time for each step
:type dt: float or int
:return: Array of timelines.
:rtype: np.array of lists
"""
time_array, counts = unique(tij_array[:, 0])
ij_array = tij_array[:, 1:]
ij_array = np.int64(ij_array)
i_min = np.min(ij_array)
i_max = np.max(ij_array)
ij_array = ij_array - i_min
timeline_size = (i_max - i_min + 1,) * 2
timeline_array = np.frompyfunc(list, 0, 1)(np.empty(timeline_size, dtype=object))
count = counts[0]
couples = ij_array[0:count]
old_time = time_array[0]
timeline_array = add_time(old_time, couples, timeline_array)
for step, time in enumerate(time_array[1:]):
if time - old_time > dt:
timeline_array = add_time(old_time + dt, couples, timeline_array)
couples = []
new_count = count + counts[step + 1]
couples1 = ij_array[count: new_count, :]
new_couples = new(couples, couples1)
lost_couples = lost(couples, couples1)
if new_couples.size > 0:
timeline_array = add_time(time, new_couples, timeline_array)
if lost_couples.size > 0:
timeline_array = add_time(old_time + dt, lost_couples, timeline_array)
couples = couples1
count = new_count
old_time = time
return timeline_array
def quantities_calculator(timeline_array, dec=1):
"""
Calculates 4 different quantities - contact time, inter-contact time, number of contacts and weight - that are
needed to compare and validate different models with real data.
:param timeline_array: Array of timelines.
:type timeline_array: np.array of lists
:param dec: decimals to which we around the quantities. Default is equal to 1
:type dec: int, optional
"""
contact_time_array = []
inter_contact_time_array = []
number_contact_array = []
link_weight_array = []
for elt in timeline_array:
for elt1 in elt:
if len(elt1) % 2 == 1:
elt1.pop()
if len(elt1) > 0:
number_contact_array.append(len(elt1) // 2)
contact_time = [b - a for a, b in tuple(zip(elt1, elt1[1:]))[::2]]
contact_time_array.extend(contact_time)
link_weight_array.append(sum(contact_time))
inter_contact_time = [b - a for a, b in tuple(zip(elt1[1:], elt1[2:]))[::2]]
inter_contact_time_array.extend(inter_contact_time)
contact_time_array, inter_contact_time_array = np.array(contact_time_array), np.array(inter_contact_time_array)
number_contact_array, link_weight_array = np.array(number_contact_array, dtype=int), np.array(link_weight_array)
contact_time_array = np.around(contact_time_array, decimals=dec)
inter_contact_time_array = np.around(inter_contact_time_array, decimals=dec)
link_weight_array = np.around(link_weight_array, decimals=dec)
return contact_time_array, inter_contact_time_array, number_contact_array, link_weight_array
def regroup_data(ar):
"""
This function regroups the quantities with the same value and calculates the number of occurrence of the value.
The results are then put in a array where for all i, the first element of row i is value i and the second element
of row i is its number of occurrences.
:param ar: Array of all the values, of shape (n, )
:type ar: np.array
:return: array of shape (n', 2) of values and counts
:rtype: np.array
"""
values, counts = unique(ar)
return np.concatenate((values.reshape((-1, 1)), counts.reshape((-1, 1))), axis=1)
def representation(quantities, title, scale='linear'):
"""
Represents 4 different quantities - contact time, inter-contact time, number of contacts and weight - in histograms.
:param quantities: tuple of the 4 quantities that are represented
:type quantities: tuple of np.arrays
:param title: Title of the figure
:type title: str
:param scale: Scale of the plot. Can be 'linear' (default), 'log' or 'semi-log'
:type scale: str, optional
"""
fig = make_subplots(rows=2, cols=2)
index = [[1, 1], [1, 2], [2, 1], [2, 2]]
if scale == 'log':
scale_x, scale_y = scale, scale
elif scale == 'linear':
scale_x, scale_y = scale, scale
else:
scale_x, scale_y = 'linear', 'log'
# Update xaxis properties
fig.update_xaxes(title_text="Contact duration", type=scale_x, row=1, col=1)
fig.update_xaxes(title_text="Intercontact duration", type=scale_x, row=1, col=2)
fig.update_xaxes(title_text="Number of contacts", type=scale_x, row=2, col=1)
fig.update_xaxes(title_text="weight", type=scale_x, row=2, col=2)
# Update yaxis properties
fig.update_yaxes(title_text="Distribution of contact duration", type=scale_y, row=1, col=1)
fig.update_yaxes(title_text="Distribution of intercontact duration", type=scale_y, row=1, col=2)
fig.update_yaxes(title_text="Distribution of number of contacts", type=scale_y, row=2, col=1)
fig.update_yaxes(title_text="Distribution of weight", type=scale_y, row=2, col=2)
for i, data in enumerate(quantities):
a = index[i][0]
b = index[i][1]
if scale == 'log':
counts, bins = np.histogram(data, bins=np.logspace(np.log10(np.min(data - 0.5)),
np.log10(np.max(data + 0.5))), density=True)
else:
counts, bins = np.histogram(data, bins='auto', density=True)
bins = 0.5 * (bins[:-1] + bins[1:])
fig.add_trace(go.Scatter(x=bins, y=counts, mode='markers', showlegend=False), row=a, col=b)
fig.show()
def make_hist(quantities, title, scale='linear'):
"""
Represents 4 different quantities - contact time, inter-contact time, number of contacts and weight - in histograms.
:param quantities: tuple of the 4 quantities that are represented
:type quantities: tuple of np.arrays
:param title: Title of the figure
:type title: str
:param scale: Scale of the plot. Can be 'linear' (default), 'log' or 'semi-log'
:type scale: str, optional
"""
fig = make_subplots(rows=2, cols=2)
index = [[1, 1], [1, 2], [2, 1], [2, 2]]
if scale == 'log':
scale_x, scale_y = scale, scale
elif scale == 'linear':
scale_x, scale_y = scale, scale
else:
scale_x, scale_y = 'linear', 'log'
# Update x axis properties
fig.update_xaxes(title_text="Contact duration", type=scale_x, row=1, col=1)
fig.update_xaxes(title_text="Inter contact duration", type=scale_x, row=1, col=2)
fig.update_xaxes(title_text="Number of contacts", type=scale_x, row=2, col=1)
fig.update_xaxes(title_text="weight", type=scale_x, row=2, col=2)
# Update y axis properties
fig.update_yaxes(title_text="Contact duration distribution", type=scale_y, row=1, col=1)
fig.update_yaxes(title_text="Inter contact duration distribution", type=scale_y, row=1, col=2)
fig.update_yaxes(title_text="Number of contacts distribution", type=scale_y, row=2, col=1)
fig.update_yaxes(title_text="Weight distribution", type=scale_y, row=2, col=2)
for i, data in enumerate(quantities):
a = index[i][0]
b = index[i][1]
if scale == 'log':
counts, bins = np.histogram(data, bins=np.logspace(np.log10(min(data - 0.5)), np.log10(max(data + 0.5))),
density=True)
else:
counts, bins = np.histogram(data, bins='auto', density=True)
bins = 0.5 * (bins[:-1] + bins[1:])
fig.add_trace(go.Histogram(x=bins, y=counts, showlegend=False), row=a, col=b)
fig.show()
def compare_quantities(quantities_array, label_array, title='Comparison tij data', scale='linear'):
fig = make_subplots(rows=2, cols=2)
index = [[1, 1], [1, 2], [2, 1], [2, 2]]
colors = ['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
markers = ['star-triangle-up', 'circle', 'x', 'diamond']
if scale == 'log':
scale_x, scale_y = scale, scale
elif scale == 'linear':
scale_x, scale_y = scale, scale
else:
scale_x, scale_y = 'linear', 'log'
# Update xaxis properties
fig.update_xaxes(title_text="Contact duration", type=scale_x, row=1, col=1)
fig.update_xaxes(title_text="Intercontact duration", type=scale_x, row=1, col=2)
fig.update_xaxes(title_text="Number of contacts", type=scale_x, row=2, col=1)
fig.update_xaxes(title_text="weight", type=scale_x, row=2, col=2)
# Update yaxis properties
fig.update_yaxes(title_text="Contact duration distribution", type=scale_y, row=1, col=1)
fig.update_yaxes(title_text="Inter contact duration distribution", type=scale_y, row=1, col=2)
fig.update_yaxes(title_text="Number of contacts distribution", type=scale_y, row=2, col=1)
fig.update_yaxes(title_text="Weight distribution", type=scale_y, row=2, col=2)
for j, data in reversed(list(enumerate(quantities_array))):
data_label = label_array[j]
for i in range(4):
a = index[i][0]
b = index[i][1]
data = quantities_array[j][i]
if scale == 'log':
counts, bins = np.histogram(data, bins=np.logspace(np.log10(np.min(data - 0.5)),
np.log10(np.max(data + 0.5))), density=True)
else:
counts, bins = np.histogram(data, bins='auto', density=True)
bins = np.array([(elt + bins[i + 1]) / 2 for i, elt in enumerate(bins[:-1])])
non_null_index = np.where(counts != 0)[0]
bins, counts = bins[non_null_index], counts[non_null_index]
if j == 0:
if i == 0:
fig.add_trace(
go.Scatter(x=bins, y=counts, mode='lines', marker={'color': colors[j]}, fillcolor=colors[j],
name=data_label), row=a, col=b)
else:
fig.add_trace(
go.Scatter(x=bins, y=counts, mode='lines', marker={'color': colors[j]}, fillcolor=colors[j],
name=data_label, showlegend=False), row=a, col=b)
else:
if i == 0:
fig.add_trace(go.Scatter(x=bins, y=counts, marker={'color': colors[j], 'symbol': markers[j - 1]},
name=data_label, mode='markers'), row=a, col=b)
else:
fig.add_trace(go.Scatter(x=bins, y=counts, marker={'color': colors[j], 'symbol': markers[j - 1]},
name=data_label, mode='markers', showlegend=False), row=a, col=b)
fig.show()