From 93399e338924b16664c47722910a4c66aaf8e605 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Wed, 13 Sep 2023 15:38:52 +0200 Subject: [PATCH 1/4] Fix GPU clustering Commit #6554009 messed up clustering on the GPU, which requires that for all array ops, both arrays are on the same device. Fix this. --- vamb/cluster.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/vamb/cluster.py b/vamb/cluster.py index 71a58c99..5e67169b 100644 --- a/vamb/cluster.py +++ b/vamb/cluster.py @@ -245,11 +245,9 @@ def _init_histogram_kept_mask(self, N: int) -> tuple[_Tensor, _Tensor]: kept_mask = _torch.ones(N, dtype=_torch.bool) if self.cuda: - histogram = _torch.empty(_ceil(_XMAX / _DELTA_X), dtype=_torch.float).cuda() kept_mask = kept_mask.cuda() - else: - histogram = _torch.empty(_ceil(_XMAX / _DELTA_X)) + histogram = _torch.empty(_ceil(_XMAX / _DELTA_X)) return histogram, kept_mask def __init__( @@ -279,8 +277,10 @@ def __init__( _normalize(torch_matrix, inplace=True) # Move to GPU + torch_lengths = _torch.Tensor(lengths) if cuda: torch_matrix = torch_matrix.cuda() + torch_lengths = torch_lengths.cuda() self.maxsteps: int = maxsteps self.minsuccesses: int = minsuccesses @@ -293,7 +293,7 @@ def __init__( self.indices = _torch.arange(len(matrix)) self.order = _np.argsort(lengths)[::-1] self.order_index = 0 - self.lengths = _torch.Tensor(lengths) + self.lengths = torch_lengths self.n_emitted_clusters = 0 self.n_remaining_points = len(torch_matrix) self.peak_valley_ratio = 0.1 @@ -330,13 +330,16 @@ def __next__(self) -> Cluster: def pack(self): "Remove all used points from the matrix and indices, and reset kept_mask." if self.cuda: + cpu_kept_mask = self.kept_mask.cpu() self.matrix = _vambtools.torch_inplace_maskarray( - self.matrix.cpu(), self.kept_mask + self.matrix.cpu(), cpu_kept_mask ).cuda() + self.indices = self.indices[cpu_kept_mask] + else: _vambtools.torch_inplace_maskarray(self.matrix, self.kept_mask) + self.indices = self.indices[self.kept_mask] - self.indices = self.indices[self.kept_mask] self.lengths = self.lengths[self.kept_mask] self.kept_mask.resize_(len(self.matrix)) self.kept_mask[:] = 1 @@ -475,15 +478,17 @@ def find_threshold( # We need to make a histogram of only the unclustered distances - when run on GPU # these have not been removed and we must use the kept_mask if self.cuda: - picked_distances = distances[self.kept_mask] + picked_distances = distances[self.kept_mask].cpu() + picked_lengths = self.lengths[self.kept_mask].cpu() else: picked_distances = distances + picked_lengths = self.lengths _torch.histogram( input=picked_distances, bins=len(self.histogram), range=(0.0, _XMAX), out=((self.histogram, self.histogram_edges)), - weight=self.lengths, + weight=picked_lengths, ) # TODO: Decide: Should we remove the self point? This might create an invalid initial peak. # On the other hand, if it's large, the peak is valid... @@ -669,8 +674,17 @@ def _sample_medoid( """ distances = _calc_distances(matrix, medoid) - cluster = _smaller_indices(distances, kept_mask, threshold, cuda) - local_density = (lengths[cluster] * (threshold - distances[cluster])).sum().item() + + if cuda: + within_threshold = (distances <= threshold) & kept_mask + cluster = _torch.nonzero(within_threshold).flatten().cpu() + else: + within_threshold = distances.numpy() <= threshold + cluster = _torch.from_numpy(within_threshold.nonzero()[0]) + + closeness = threshold - distances[within_threshold] + local_density = (lengths[within_threshold] * closeness).sum().item() + return cluster, distances, local_density From d39bc34e23e6ccca668345627e2ab87a4283080f Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Wed, 13 Sep 2023 16:35:29 +0200 Subject: [PATCH 2/4] Reduce data movement to CPU --- vamb/cluster.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/vamb/cluster.py b/vamb/cluster.py index 5e67169b..694d5b4f 100644 --- a/vamb/cluster.py +++ b/vamb/cluster.py @@ -478,11 +478,18 @@ def find_threshold( # We need to make a histogram of only the unclustered distances - when run on GPU # these have not been removed and we must use the kept_mask if self.cuda: - picked_distances = distances[self.kept_mask].cpu() - picked_lengths = self.lengths[self.kept_mask].cpu() + below_xmax = (distances <= _XMAX) & self.kept_mask + picked_distances = distances[below_xmax].cpu() + picked_lengths = self.lengths[below_xmax].cpu() else: - picked_distances = distances - picked_lengths = self.lengths + below_xmax = (distances <= _XMAX) + picked_distances = distances[below_xmax] + picked_lengths = self.lengths[below_xmax] + + # TODO: https://github.com/pytorch/pytorch/issues/69519 + # Currently, this function does not run on GPU. This means we must + # copy over the lengths and distances to CPU each time, which is very slow. + # If the issue is resolved, there can be large speedups on GPU _torch.histogram( input=picked_distances, bins=len(self.histogram), @@ -490,8 +497,6 @@ def find_threshold( out=((self.histogram, self.histogram_edges)), weight=picked_lengths, ) - # TODO: Decide: Should we remove the self point? This might create an invalid initial peak. - # On the other hand, if it's large, the peak is valid... # When the peak_valley_ratio is too high, we need to return something to not get caught # in an infinite loop. From b621f54193a84061ec4ab4b8e971291c839be598 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Thu, 14 Sep 2023 09:49:02 +0200 Subject: [PATCH 3/4] Format --- vamb/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vamb/cluster.py b/vamb/cluster.py index 694d5b4f..18c18c2e 100644 --- a/vamb/cluster.py +++ b/vamb/cluster.py @@ -482,7 +482,7 @@ def find_threshold( picked_distances = distances[below_xmax].cpu() picked_lengths = self.lengths[below_xmax].cpu() else: - below_xmax = (distances <= _XMAX) + below_xmax = distances <= _XMAX picked_distances = distances[below_xmax] picked_lengths = self.lengths[below_xmax] From e0a86fdfdf1394fdde13e7d85d57e5d74e6137df Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Thu, 14 Sep 2023 11:25:03 +0200 Subject: [PATCH 4/4] Fix: Skip emitted medoids --- vamb/cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vamb/cluster.py b/vamb/cluster.py index 18c18c2e..32172f7a 100644 --- a/vamb/cluster.py +++ b/vamb/cluster.py @@ -384,6 +384,7 @@ def get_next_seed(self) -> int: if ( new_index >= len(self.indices) or self.indices[new_index].item() != order + or (self.cuda and not self.kept_mask[new_index].item()) ): self.order[i] = -1 continue