Skip to content

Commit 1912829

Browse files
authored
Merge pull request #207 from vbarrielle/faster_ldl
Faster ldl decomposition
2 parents 7078c44 + 121f1d7 commit 1912829

File tree

1 file changed

+25
-10
lines changed

1 file changed

+25
-10
lines changed

sprs-ldl/src/lib.rs

+25-10
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,8 @@ where
484484
I: SpIndex,
485485
PStorage: Deref<Target = [I]>,
486486
{
487+
assert!(y_workspace.len() == mat.outer_dims());
488+
assert!(diag.len() == mat.outer_dims());
487489
let outer_it = mat.outer_iterator_papt(perm.view());
488490
for (k, (_, vec)) in outer_it.enumerate() {
489491
// compute the nonzero pattern of the kth row of L
@@ -500,7 +502,7 @@ where
500502
y_workspace[inner_ind] = y_workspace[inner_ind] + val;
501503
let mut i = inner_ind;
502504
pattern_workspace.clear_left();
503-
while flag_workspace[i].index() != k {
505+
while flag_workspace[i].index_unchecked() != k {
504506
pattern_workspace.push_left(I::from_usize(i));
505507
flag_workspace[i] = I::from_usize(k);
506508
i = parents.get_parent(i).expect("enforced by ldl_symbolic");
@@ -513,24 +515,37 @@ where
513515
diag[k] = y_workspace[k];
514516
y_workspace[k] = N::zero();
515517
'pattern: for &i in pattern_workspace.iter_right() {
516-
let i = i.index();
518+
let i = i.index_unchecked();
517519
let yi = y_workspace[i];
518520
y_workspace[i] = N::zero();
519-
let p2 = l_colptr[i] + l_nz[i];
520-
for p in l_colptr[i].index()..p2.index() {
521+
let p2 = (l_colptr[i] + l_nz[i]).index();
522+
let r0 = l_colptr[i].index()..p2;
523+
let r1 = l_colptr[i].index()..p2; // Hack because not Copy
524+
for (y_index, lx) in l_indices[r0].iter().zip(l_data[r1].iter()) {
521525
// we cannot go inside this loop before something has actually
522526
// be written into l_indices[l_colptr[i]..p2] so this
523527
// read is actually not into garbage
524528
// actually each iteration of the 'pattern loop adds writes the
525529
// value in l_indices that will be read on the next iteration
526530
// TODO: can some design change make this fact more obvious?
527-
let y_index = l_indices[p].index();
528-
y_workspace[y_index] = y_workspace[y_index] - l_data[p] * yi;
531+
// This means we always know it will fit in an usize
532+
let y_index = y_index.index_unchecked();
533+
// Safety: `y_index` can take the values taken by `k`, so
534+
// it is in `0..mat.outer_dims()`, and we have asserted
535+
// that `y_workspace.len() == mat.outer_dims()`.
536+
unsafe {
537+
let yw = y_workspace.get_unchecked_mut(y_index);
538+
*yw = *yw - *lx * yi;
539+
}
529540
}
530-
let l_ki = yi / diag[i];
531-
diag[k] = diag[k] - l_ki * yi;
532-
l_indices[p2.index()] = I::from_usize(k);
533-
l_data[p2.index()] = l_ki;
541+
// Safety: i and k are <= `mat.outer_dims()` and we have asserted
542+
// that `diag.len() == mat.outer_dims()`.
543+
let di = *unsafe { diag.get_unchecked(i) };
544+
let dk = unsafe { diag.get_unchecked_mut(k) };
545+
let l_ki = yi / di;
546+
*dk = *dk - l_ki * yi;
547+
l_indices[p2] = I::from_usize(k);
548+
l_data[p2] = l_ki;
534549
l_nz[i] += I::one();
535550
}
536551
if diag[k] == N::zero() {

0 commit comments

Comments
 (0)