16
16
17
17
use std:: sync:: { atomic:: AtomicBool , Arc , Mutex } ;
18
18
19
- use bloom:: U64BloomFilter ;
20
19
use rustc_hash:: FxHashMap ;
21
20
22
- use super :: { DhtTable as _, Mapper , Meta , ShortestPathJob , ShortestPathTables } ;
21
+ use super :: {
22
+ updated_nodes:: { UpdatedNodes , UpdatedNodesKind } ,
23
+ worker:: ShortestPathWorker ,
24
+ DhtTable as _, Mapper , Meta , ShortestPathJob , ShortestPathTables ,
25
+ } ;
23
26
use crate :: {
24
27
ampc:: {
25
28
dht:: { U64Min , UpsertAction } ,
26
29
DhtConn ,
27
30
} ,
28
- webgraph,
31
+ webgraph:: { self , query } ,
29
32
webpage:: html:: links:: RelFlags ,
30
33
} ;
31
34
@@ -84,7 +87,7 @@ impl ShortestPathMapper {
84
87
85
88
fn map_batch (
86
89
batch : & [ webgraph:: SmallEdge ] ,
87
- new_changed_nodes : & Mutex < U64BloomFilter > ,
90
+ new_changed_nodes : & Mutex < UpdatedNodes > ,
88
91
round_had_changes : & AtomicBool ,
89
92
dht : & DhtConn < ShortestPathTables > ,
90
93
) {
@@ -93,11 +96,94 @@ impl ShortestPathMapper {
93
96
94
97
for ( node, action) in updates {
95
98
if action. is_changed ( ) {
96
- new_changed_nodes. insert_u128 ( node. as_u128 ( ) ) ;
99
+ new_changed_nodes. add ( node) ;
97
100
round_had_changes. store ( true , std:: sync:: atomic:: Ordering :: Relaxed ) ;
98
101
}
99
102
}
100
103
}
104
+
105
+ fn relax_all_edges (
106
+ worker : & ShortestPathWorker ,
107
+ changed_nodes : & UpdatedNodes ,
108
+ new_changed_nodes : & Mutex < UpdatedNodes > ,
109
+ round_had_changes : & AtomicBool ,
110
+ dht : & DhtConn < ShortestPathTables > ,
111
+ ) {
112
+ let pool = rayon:: ThreadPoolBuilder :: new ( ) . build ( ) . unwrap ( ) ;
113
+ pool. scope ( |s| {
114
+ let mut batch = Vec :: with_capacity ( BATCH_SIZE ) ;
115
+
116
+ for edge in worker. graph ( ) . page_edges ( ) {
117
+ if edge. rel_flags . intersects ( * SKIPPED_REL ) {
118
+ continue ;
119
+ }
120
+
121
+ if changed_nodes. contains ( edge. from ) {
122
+ batch. push ( edge) ;
123
+ }
124
+
125
+ if batch. len ( ) >= BATCH_SIZE {
126
+ let update_batch = batch. clone ( ) ;
127
+ s. spawn ( move |_| {
128
+ Self :: map_batch ( & update_batch, new_changed_nodes, round_had_changes, dht)
129
+ } ) ;
130
+ batch. clear ( ) ;
131
+ }
132
+ }
133
+
134
+ if !batch. is_empty ( ) {
135
+ Self :: map_batch ( & batch, new_changed_nodes, round_had_changes, dht) ;
136
+ }
137
+ } ) ;
138
+ }
139
+
140
+ fn relax_exact_edges (
141
+ worker : & ShortestPathWorker ,
142
+ changed_nodes : & UpdatedNodes ,
143
+ exact_changed_nodes : & [ webgraph:: NodeID ] ,
144
+ new_changed_nodes : & Mutex < UpdatedNodes > ,
145
+ round_had_changes : & AtomicBool ,
146
+ dht : & DhtConn < ShortestPathTables > ,
147
+ ) {
148
+ let mut batch = Vec :: with_capacity ( BATCH_SIZE ) ;
149
+
150
+ let pool = rayon:: ThreadPoolBuilder :: new ( ) . build ( ) . unwrap ( ) ;
151
+
152
+ pool. scope ( |s| {
153
+ for node in exact_changed_nodes {
154
+ for edge in worker
155
+ . graph ( )
156
+ . search ( & query:: ForwardlinksQuery :: new ( * node) )
157
+ . unwrap_or_default ( )
158
+ {
159
+ if edge. rel_flags . intersects ( * SKIPPED_REL ) {
160
+ continue ;
161
+ }
162
+
163
+ if changed_nodes. contains ( edge. from ) {
164
+ batch. push ( edge) ;
165
+ }
166
+
167
+ if batch. len ( ) >= BATCH_SIZE {
168
+ let update_batch = batch. clone ( ) ;
169
+ s. spawn ( move |_| {
170
+ Self :: map_batch (
171
+ & update_batch,
172
+ new_changed_nodes,
173
+ round_had_changes,
174
+ dht,
175
+ )
176
+ } ) ;
177
+ batch. clear ( ) ;
178
+ }
179
+ }
180
+ }
181
+ } ) ;
182
+
183
+ if !batch. is_empty ( ) {
184
+ Self :: map_batch ( & batch, new_changed_nodes, round_had_changes, dht) ;
185
+ }
186
+ }
101
187
}
102
188
103
189
impl Mapper for ShortestPathMapper {
@@ -112,47 +198,41 @@ impl Mapper for ShortestPathMapper {
112
198
match self {
113
199
ShortestPathMapper :: RelaxEdges => {
114
200
let round_had_changes = Arc :: new ( AtomicBool :: new ( false ) ) ;
115
- let pool = rayon:: ThreadPoolBuilder :: new ( ) . build ( ) . unwrap ( ) ;
116
-
117
- let new_changed_nodes = Arc :: new ( Mutex :: new ( U64BloomFilter :: empty_from (
118
- & worker. changed_nodes ( ) . lock ( ) . unwrap ( ) ,
119
- ) ) ) ;
120
-
121
- pool. scope ( |s| {
122
- let mut changed_nodes = worker. changed_nodes ( ) . lock ( ) . unwrap ( ) ;
123
- changed_nodes. insert_u128 ( job. source . as_u128 ( ) ) ;
124
-
125
- let mut batch = Vec :: with_capacity ( BATCH_SIZE ) ;
126
-
127
- for edge in worker. graph ( ) . page_edges ( ) {
128
- if edge. rel_flags . intersects ( * SKIPPED_REL ) {
129
- continue ;
130
- }
131
-
132
- if changed_nodes. contains_u128 ( edge. from . as_u128 ( ) ) {
133
- batch. push ( edge) ;
134
- }
135
-
136
- if batch. len ( ) >= BATCH_SIZE {
137
- let update_batch = batch. clone ( ) ;
138
- let update_new_changed_nodes = new_changed_nodes. clone ( ) ;
139
- let update_round_had_changes = round_had_changes. clone ( ) ;
140
- s. spawn ( move |_| {
141
- Self :: map_batch (
142
- & update_batch,
143
- & update_new_changed_nodes,
144
- & update_round_had_changes,
145
- dht,
146
- )
147
- } ) ;
148
- batch. clear ( ) ;
149
- }
150
- }
151
201
152
- if !batch. is_empty ( ) {
153
- Self :: map_batch ( & batch, & new_changed_nodes, & round_had_changes, dht) ;
202
+ let mut changed_nodes = worker. changed_nodes ( ) . lock ( ) . unwrap ( ) ;
203
+ changed_nodes. add ( job. source ) ;
204
+
205
+ let new_changed_nodes =
206
+ Arc :: new ( Mutex :: new ( UpdatedNodes :: empty_from ( & changed_nodes) ) ) ;
207
+
208
+ match changed_nodes. kind ( ) {
209
+ UpdatedNodesKind :: Exact => {
210
+ let exact_changed_nodes: Vec < _ > = changed_nodes
211
+ . as_exact ( )
212
+ . unwrap ( )
213
+ . clone ( )
214
+ . into_iter ( )
215
+ . collect ( ) ;
216
+
217
+ Self :: relax_exact_edges (
218
+ worker,
219
+ & changed_nodes,
220
+ & exact_changed_nodes,
221
+ & new_changed_nodes,
222
+ & round_had_changes,
223
+ dht,
224
+ ) ;
154
225
}
155
- } ) ;
226
+ UpdatedNodesKind :: Sketch => {
227
+ Self :: relax_all_edges (
228
+ worker,
229
+ & changed_nodes,
230
+ & new_changed_nodes,
231
+ & round_had_changes,
232
+ dht,
233
+ ) ;
234
+ }
235
+ }
156
236
157
237
dht. next ( )
158
238
. changed_nodes
@@ -169,10 +249,10 @@ impl Mapper for ShortestPathMapper {
169
249
let all_changed_nodes: Vec < _ > =
170
250
dht. next ( ) . changed_nodes . iter ( ) . map ( |( _, v) | v) . collect ( ) ;
171
251
let mut changed_nodes =
172
- U64BloomFilter :: empty_from ( & worker. changed_nodes ( ) . lock ( ) . unwrap ( ) ) ;
252
+ UpdatedNodes :: empty_from ( & worker. changed_nodes ( ) . lock ( ) . unwrap ( ) ) ;
173
253
174
- for bloom in all_changed_nodes {
175
- changed_nodes. union ( bloom . clone ( ) ) ;
254
+ for other in & all_changed_nodes {
255
+ changed_nodes = changed_nodes . union ( other ) ;
176
256
}
177
257
178
258
* worker. changed_nodes ( ) . lock ( ) . unwrap ( ) = changed_nodes;
0 commit comments