-
Notifications
You must be signed in to change notification settings - Fork 51
/
density.py
68 lines (54 loc) · 2.54 KB
/
density.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from sklearn.covariance import LedoitWolf
from sklearn.neighbors import KernelDensity
import torch
class Density(object):
def fit(self, embeddings):
raise NotImplementedError
def predict(self, embeddings):
raise NotImplementedError
class GaussianDensityTorch(object):
"""Gaussian Density estimation similar to the implementation used by Ripple et al.
The code of Ripple et al. can be found here: https://github.com/ORippler/gaussian-ad-mvtec.
"""
def fit(self, embeddings):
self.mean = torch.mean(embeddings, axis=0)
self.inv_cov = torch.Tensor(LedoitWolf().fit(embeddings.cpu()).precision_,device="cpu")
def predict(self, embeddings):
distances = self.mahalanobis_distance(embeddings, self.mean, self.inv_cov)
return distances
@staticmethod
def mahalanobis_distance(
values: torch.Tensor, mean: torch.Tensor, inv_covariance: torch.Tensor
) -> torch.Tensor:
"""Compute the batched mahalanobis distance.
values is a batch of feature vectors.
mean is either the mean of the distribution to compare, or a second
batch of feature vectors.
inv_covariance is the inverse covariance of the target distribution.
from https://github.com/ORippler/gaussian-ad-mvtec/blob/4e85fb5224eee13e8643b684c8ef15ab7d5d016e/src/gaussian/model.py#L308
"""
assert values.dim() == 2
assert 1 <= mean.dim() <= 2
assert len(inv_covariance.shape) == 2
assert values.shape[1] == mean.shape[-1]
assert mean.shape[-1] == inv_covariance.shape[0]
assert inv_covariance.shape[0] == inv_covariance.shape[1]
if mean.dim() == 1: # Distribution mean.
mean = mean.unsqueeze(0)
x_mu = values - mean # batch x features
# Same as dist = x_mu.t() * inv_covariance * x_mu batch wise
dist = torch.einsum("im,mn,in->i", x_mu, inv_covariance, x_mu)
return dist.sqrt()
class GaussianDensitySklearn():
"""Li et al. use sklearn for density estimation.
This implementation uses sklearn KernelDensity module for fitting and predicting.
"""
def fit(self, embeddings):
# estimate KDE parameters
# use grid search cross-validation to optimize the bandwidth
self.kde = KernelDensity(kernel='gaussian', bandwidth=1).fit(embeddings)
def predict(self, embeddings):
scores = self.kde.score_samples(embeddings)
# invert scores, so they fit to the class labels for the auc calculation
scores = -scores
return scores