Included evaluation metrics based on cosine similarities - see metrics folder and README.md

Confusezius · Confusezius · commit 8ec13c10c2d8 · 2020-08-24T20:33:44.000+02:00
diff --git a/README.md b/README.md
@@ -240,11 +240,19 @@ For a detailed explanation of everything, please refer to the supplementary of o
 
 
 ### Evaluation Metrics
-
+**Metrics based on Euclidean Distances**
 * **Recall@k**: Include R@1 e.g. with `e_recall@1` into the list of evaluation metrics `--evaluation_metrics`.
 * **Normalized Mutual Information (NMI)**: Include with `nmi`.
-* **F1**: include with `nmi`.
+* **F1**: include with `f1`.
 * **mAP (class-averaged)**: Include standard mAP at Recall with `mAP_lim`. You may also include `mAP_1000` for mAP limited to Recall@1000, and `mAP_c` limited to mAP at Recall@Max_Num_Samples_Per_Class. Note that all of these are heavily correlated.
+
+**Metrics based on Cosine Similarities** *(not included by default)*
+* **Cosine Recall@k**: Cosine-Similarity variant of Recall@k. Include with `c_recall@k` in `--evaluation_metrics`.
+* **Cosine Normalized Mutual Information (NMI)**: Include with `c_nmi`.
+* **Cosine F1**: include with `c_f1`.
+* **Cosine mAP (class-averaged)**: Include cosine similarity mAP at Recall variants with `c_mAP_lim`. You may also include `c_mAP_1000` for mAP limited to Recall@1000, and `c_mAP_c` limited to mAP at Recall@Max_Num_Samples_Per_Class.
+
+**Embedding Space Metrics**
 * **Spectral Variance**: This metric refers to the spectral decay metric used in our ICML paper. Include it with `rho_spectrum@1`. To exclude the `k` largest spectral values for a more robust estimate, simply include `rho_spectrum@k+1`. Adding `rho_spectrum@0` logs the whole singular value distribution, and `rho_spectrum@-1` computes KL(q,p) instead of KL(p,q).
 * **Mean Intraclass Distance**: Include the mean intraclass distance via `dists@intra`.
 * **Mean Interclass Distance**: Include the mean interlcass distance via `dists@inter`.
diff --git a/metrics/__init__.py b/metrics/__init__.py
@@ -1,13 +1,16 @@
-from metrics import e_recall, dists, rho_spectrum
-from metrics import nmi, f1, mAP, mAP_c, mAP_1000, mAP_lim
+from metrics import e_recall, nmi, f1, mAP, mAP_c, mAP_1000, mAP_lim
+from metrics import dists, rho_spectrum
+from metrics import c_recall, c_nmi, c_f1, c_mAP_c, c_mAP_1000, c_mAP_lim
 import numpy as np
 import faiss
 import torch
+from sklearn.preprocessing import normalize
 from tqdm import tqdm
 import copy
 
 
 def select(metricname, opt):
+    #### Metrics based on euclidean distances
     if 'e_recall' in metricname:
         k = int(metricname.split('@')[-1])
         return e_recall.Metric(k)
@@ -23,6 +26,25 @@ def select(metricname, opt):
         return mAP_1000.Metric()
     elif metricname=='f1':
         return f1.Metric()
+
+    #### Metrics based on cosine similarity
+    elif 'c_recall' in metricname:
+        k = int(metricname.split('@')[-1])
+        return c_recall.Metric(k)
+    elif metricname=='c_nmi':
+        return c_nmi.Metric()
+    elif metricname=='c_mAP':
+        return c_mAP.Metric()
+    elif metricname=='c_mAP_c':
+        return c_mAP_c.Metric()
+    elif metricname=='c_mAP_lim':
+        return c_mAP_lim.Metric()
+    elif metricname=='c_mAP_1000':
+        return c_mAP_1000.Metric()
+    elif metricname=='c_f1':
+        return c_f1.Metric()
+
+    #### Generic Embedding space metrics
     elif 'dists' in metricname:
         mode = metricname.split('@')[-1]
         return dists.Metric(mode)
@@ -91,9 +113,12 @@ def compute_standard(self, opt, model, dataloader, evaltypes, device, **kwargs):
 
         import time
         for evaltype in evaltypes:
-            features = np.vstack(feature_colls[evaltype]).astype('float32')
+            features        = np.vstack(feature_colls[evaltype]).astype('float32')
+            features_cosine = normalize(features, axis=1)
 
             start = time.time()
+
+            """============ Compute k-Means ==============="""
             if 'kmeans' in self.requires:
                 ### Set CPU Cluster index
                 cluster_idx = faiss.IndexFlatL2(features.shape[-1])
@@ -106,13 +131,36 @@ def compute_standard(self, opt, model, dataloader, evaltypes, device, **kwargs):
                 kmeans.train(features, cluster_idx)
                 centroids = faiss.vector_float_to_array(kmeans.centroids).reshape(n_classes, features.shape[-1])
 
+            if 'kmeans_cosine' in self.requires:
+                ### Set CPU Cluster index
+                cluster_idx = faiss.IndexFlatL2(features_cosine.shape[-1])
+                if res is not None: cluster_idx = faiss.index_cpu_to_gpu(res, 0, cluster_idx)
+                kmeans            = faiss.Clustering(features_cosine.shape[-1], n_classes)
+                kmeans.niter = 20
+                kmeans.min_points_per_centroid = 1
+                kmeans.max_points_per_centroid = 1000000000
+                ### Train Kmeans
+                kmeans.train(features_cosine, cluster_idx)
+                centroids_cosine = faiss.vector_float_to_array(kmeans.centroids).reshape(n_classes, features_cosine.shape[-1])
+                centroids_cosine = normalize(centroids,axis=1)
+
 
+            """============ Compute Cluster Labels ==============="""
             if 'kmeans_nearest' in self.requires:
                 faiss_search_index = faiss.IndexFlatL2(centroids.shape[-1])
                 if res is not None: faiss_search_index = faiss.index_cpu_to_gpu(res, 0, faiss_search_index)
                 faiss_search_index.add(centroids)
                 _, computed_cluster_labels = faiss_search_index.search(features, 1)
 
+            if 'kmeans_nearest_cosine' in self.requires:
+                faiss_search_index = faiss.IndexFlatIP(centroids_cosine.shape[-1])
+                if res is not None: faiss_search_index = faiss.index_cpu_to_gpu(res, 0, faiss_search_index)
+                faiss_search_index.add(centroids_cosine)
+                _, computed_cluster_labels_cosine = faiss_search_index.search(features_cosine, 1)
+
+
+
+            """============ Compute Nearest Neighbours ==============="""
             if 'nearest_features' in self.requires:
                 faiss_search_index  = faiss.IndexFlatL2(features.shape[-1])
                 if res is not None: faiss_search_index = faiss.index_cpu_to_gpu(res, 0, faiss_search_index)
@@ -122,18 +170,38 @@ def compute_standard(self, opt, model, dataloader, evaltypes, device, **kwargs):
                 _, k_closest_points = faiss_search_index.search(features, int(max_kval+1))
                 k_closest_classes   = target_labels.reshape(-1)[k_closest_points[:,1:]]
 
+            if 'nearest_features_cosine' in self.requires:
+                faiss_search_index  = faiss.IndexFlatIP(features_cosine.shape[-1])
+                if res is not None: faiss_search_index = faiss.index_cpu_to_gpu(res, 0, faiss_search_index)
+                faiss_search_index.add(normalize(features_cosine,axis=1))
+
+                max_kval                   = np.max([int(x.split('@')[-1]) for x in self.metric_names if 'recall' in x])
+                _, k_closest_points_cosine = faiss_search_index.search(normalize(features_cosine,axis=1), int(max_kval+1))
+                k_closest_classes_cosine   = target_labels.reshape(-1)[k_closest_points_cosine[:,1:]]
+
+
+
             ###
             if self.pars.evaluate_on_gpu:
-                features = torch.from_numpy(features).to(self.pars.device)
+                features        = torch.from_numpy(features).to(self.pars.device)
+                features_cosine = torch.from_numpy(features_cosine).to(self.pars.device)
 
             start = time.time()
             for metric in self.list_of_metrics:
                 input_dict = {}
                 if 'features' in metric.requires:         input_dict['features'] = features
                 if 'target_labels' in metric.requires:    input_dict['target_labels'] = target_labels
+
                 if 'kmeans' in metric.requires:           input_dict['centroids'] = centroids
                 if 'kmeans_nearest' in metric.requires:   input_dict['computed_cluster_labels'] = computed_cluster_labels
                 if 'nearest_features' in metric.requires: input_dict['k_closest_classes'] = k_closest_classes
+
+                if 'features_cosine' in metric.requires:         input_dict['features_cosine'] = features_cosine
+
+                if 'kmeans_cosine' in metric.requires:           input_dict['centroids_cosine'] = centroids_cosine
+                if 'kmeans_nearest_cosine' in metric.requires:   input_dict['computed_cluster_labels_cosine'] = computed_cluster_labels_cosine
+                if 'nearest_features_cosine' in metric.requires: input_dict['k_closest_classes_cosine'] = k_closest_classes_cosine
+
                 computed_metrics[evaltype][metric.name] = metric(**input_dict)
 
             extra_infos[evaltype] = {'features':features, 'target_labels':target_labels,
diff --git a/metrics/c_f1.py b/metrics/c_f1.py
@@ -0,0 +1,92 @@
+import numpy as np
+from scipy.special import comb, binom
+import torch
+
+class Metric():
+    def __init__(self, **kwargs):
+        self.requires = ['kmeans_cosine', 'kmeans_nearest_cosine', 'features_cosine', 'target_labels']
+        self.name     = 'c_f1'
+
+    def __call__(self, target_labels, computed_cluster_labels_cosine, features_cosine, centroids_cosine):
+        import time
+        start = time.time()
+        if isinstance(features_cosine, torch.Tensor):
+            features_cosine = features_cosine.detach().cpu().numpy()
+        d = np.zeros(len(features_cosine))
+        for i in range(len(features_cosine)):
+            d[i] = np.linalg.norm(features_cosine[i,:] - centroids_cosine[computed_cluster_labels_cosine[i],:])
+
+        start = time.time()
+        labels_pred = np.zeros(len(features_cosine))
+        for i in np.unique(computed_cluster_labels_cosine):
+            index = np.where(computed_cluster_labels_cosine == i)[0]
+            ind = np.argmin(d[index])
+            cid = index[ind]
+            labels_pred[index] = cid
+
+
+        start = time.time()
+        N = len(target_labels)
+
+        # cluster n_labels
+        avail_labels = np.unique(target_labels)
+        n_labels     = len(avail_labels)
+
+        # count the number of objects in each cluster
+        count_cluster = np.zeros(n_labels)
+        for i in range(n_labels):
+            count_cluster[i] = len(np.where(target_labels == avail_labels[i])[0])
+
+        # build a mapping from item_id to item index
+        keys     = np.unique(labels_pred)
+        num_item = len(keys)
+        values   = range(num_item)
+        item_map = dict()
+        for i in range(len(keys)):
+            item_map.update([(keys[i], values[i])])
+
+
+        # count the number of objects of each item
+        count_item = np.zeros(num_item)
+        for i in range(N):
+            index = item_map[labels_pred[i]]
+            count_item[index] = count_item[index] + 1
+
+        # compute True Positive (TP) plus False Positive (FP)
+        # tp_fp = 0
+        tp_fp = comb(count_cluster, 2).sum()
+        # for k in range(n_labels):
+        #     if count_cluster[k] > 1:
+        #         tp_fp = tp_fp + comb(count_cluster[k], 2)
+
+        # compute True Positive (TP)
+        tp     = 0
+        start = time.time()
+        for k in range(n_labels):
+            member     = np.where(target_labels == avail_labels[k])[0]
+            member_ids = labels_pred[member]
+            count = np.zeros(num_item)
+            for j in range(len(member)):
+                index = item_map[member_ids[j]]
+                count[index] = count[index] + 1
+            # for i in range(num_item):
+            #     if count[i] > 1:
+            #         tp = tp + comb(count[i], 2)
+            tp += comb(count,2).sum()
+        # False Positive (FP)
+        fp = tp_fp - tp
+
+        # Compute False Negative (FN)
+        count = comb(count_item, 2).sum()
+        # count = 0
+        # for j in range(num_item):
+            # if count_item[j] > 1:
+            #     count = count + comb(count_item[j], 2)
+        fn = count - tp
+
+        # compute F measure
+        P = tp / (tp + fp)
+        R = tp / (tp + fn)
+        beta = 1
+        F = (beta*beta + 1) * P * R / (beta*beta * P + R)
+        return F
diff --git a/metrics/c_mAP_1000.py b/metrics/c_mAP_1000.py
@@ -0,0 +1,37 @@
+import torch
+import numpy as np
+import faiss
+
+
+
+class Metric():
+    def __init__(self, **kwargs):
+        self.requires = ['features_cosine', 'target_labels']
+        self.name     = 'c_mAP_1000'
+
+    def __call__(self, target_labels, features_cosine):
+        labels, freqs = np.unique(target_labels, return_counts=True)
+        R             = 1000
+
+        faiss_search_index  = faiss.IndexFlatIP(features_cosine.shape[-1])
+        if isinstance(features_cosine, torch.Tensor):
+            features_cosine = features_cosine.detach().cpu().numpy()
+            res = faiss.StandardGpuResources()
+            faiss_search_index = faiss.index_cpu_to_gpu(res, 0, faiss_search_index)
+        faiss_search_index.add(features_cosine)
+        nearest_neighbours  = faiss_search_index.search(features_cosine, int(R+1))[1][:,1:]
+
+        target_labels = target_labels.reshape(-1)
+        nn_labels = target_labels[nearest_neighbours]
+
+        avg_r_precisions = []
+        for label, freq in zip(labels, freqs):
+            rows_with_label = np.where(target_labels==label)[0]
+            for row in rows_with_label:
+                n_recalled_samples           = np.arange(1,R+1)
+                target_label_occ_in_row      = nn_labels[row,:]==label
+                cumsum_target_label_freq_row = np.cumsum(target_label_occ_in_row)
+                avg_r_pr_row = np.sum(cumsum_target_label_freq_row*target_label_occ_in_row/n_recalled_samples)/freq
+                avg_r_precisions.append(avg_r_pr_row)
+
+        return np.mean(avg_r_precisions)
diff --git a/metrics/c_mAP_c.py b/metrics/c_mAP_c.py
@@ -0,0 +1,37 @@
+import torch
+import numpy as np
+import faiss
+
+
+
+class Metric():
+    def __init__(self, **kwargs):
+        self.requires = ['features_cosine', 'target_labels']
+        self.name     = 'c_mAP_c'
+
+    def __call__(self, target_labels, features_cosine):
+        labels, freqs = np.unique(target_labels, return_counts=True)
+        R             = np.max(freqs)
+
+        faiss_search_index  = faiss.IndexFlatIP(features_cosine.shape[-1])
+        if isinstance(features_cosine, torch.Tensor):
+            features_cosine = features_cosine.detach().cpu().numpy()
+            res = faiss.StandardGpuResources()
+            faiss_search_index = faiss.index_cpu_to_gpu(res, 0, faiss_search_index)
+        faiss_search_index.add(features_cosine)
+        nearest_neighbours  = faiss_search_index.search(features_cosine, int(R+1))[1][:,1:]
+
+        target_labels = target_labels.reshape(-1)
+        nn_labels = target_labels[nearest_neighbours]
+
+        avg_r_precisions = []
+        for label, freq in zip(labels, freqs):
+            rows_with_label = np.where(target_labels==label)[0]
+            for row in rows_with_label:
+                n_recalled_samples           = np.arange(1,freq+1)
+                target_label_occ_in_row      = nn_labels[row,:freq]==label
+                cumsum_target_label_freq_row = np.cumsum(target_label_occ_in_row)
+                avg_r_pr_row = np.sum(cumsum_target_label_freq_row*target_label_occ_in_row/n_recalled_samples)/freq
+                avg_r_precisions.append(avg_r_pr_row)
+
+        return np.mean(avg_r_precisions)
diff --git a/metrics/c_mAP_lim.py b/metrics/c_mAP_lim.py
@@ -0,0 +1,38 @@
+import torch
+import numpy as np
+import faiss
+
+
+
+class Metric():
+    def __init__(self, **kwargs):
+        self.requires = ['features_cosine', 'target_labels']
+        self.name     = 'c_mAP_lim'
+
+    def __call__(self, target_labels, features_cosine):
+        labels, freqs = np.unique(target_labels, return_counts=True)
+        ## Account for faiss-limit at k=1023
+        R             = min(1023,len(features_cosine))
+
+        faiss_search_index  = faiss.IndexFlatIP(features_cosine.shape[-1])
+        if isinstance(features_cosine, torch.Tensor):
+            features_cosine = features_cosine.detach().cpu().numpy()
+            res = faiss.StandardGpuResources()
+            faiss_search_index = faiss.index_cpu_to_gpu(res, 0, faiss_search_index)
+        faiss_search_index.add(features_cosine)
+        nearest_neighbours  = faiss_search_index.search(features_cosine, int(R+1))[1][:,1:]
+
+        target_labels = target_labels.reshape(-1)
+        nn_labels = target_labels[nearest_neighbours]
+
+        avg_r_precisions = []
+        for label, freq in zip(labels, freqs):
+            rows_with_label = np.where(target_labels==label)[0]
+            for row in rows_with_label:
+                n_recalled_samples           = np.arange(1,R+1)
+                target_label_occ_in_row      = nn_labels[row,:]==label
+                cumsum_target_label_freq_row = np.cumsum(target_label_occ_in_row)
+                avg_r_pr_row = np.sum(cumsum_target_label_freq_row*target_label_occ_in_row/n_recalled_samples)/freq
+                avg_r_precisions.append(avg_r_pr_row)
+
+        return np.mean(avg_r_precisions)
diff --git a/metrics/c_nmi.py b/metrics/c_nmi.py
@@ -0,0 +1,10 @@
+from sklearn import metrics
+
+class Metric():
+    def __init__(self, **kwargs):
+        self.requires = ['kmeans_nearest_cosine', 'target_labels']
+        self.name     = 'c_nmi'
+
+    def __call__(self, target_labels, computed_cluster_labels_cosine):
+        NMI = metrics.cluster.normalized_mutual_info_score(computed_cluster_labels_cosine.reshape(-1), target_labels.reshape(-1))
+        return NMI
diff --git a/metrics/c_recall.py b/metrics/c_recall.py
@@ -0,0 +1,11 @@
+import numpy as np
+
+class Metric():
+    def __init__(self, k, **kwargs):
+        self.k        = k
+        self.requires = ['nearest_features_cosine', 'target_labels']
+        self.name     = 'c_recall@{}'.format(k)
+
+    def __call__(self, target_labels, k_closest_classes_cosine, **kwargs):
+        recall_at_k = np.sum([1 for target, recalled_predictions in zip(target_labels, k_closest_classes_cosine) if target in recalled_predictions[:self.k]])/len(target_labels)
+        return recall_at_k
diff --git a/metrics/e_recall.py b/metrics/e_recall.py
@@ -6,6 +6,6 @@ def __init__(self, k, **kwargs):
         self.requires = ['nearest_features', 'target_labels']
         self.name     = 'e_recall@{}'.format(k)
 
-    def __call__(self, target_labels, k_closest_classes):
+    def __call__(self, target_labels, k_closest_classes, **kwargs):
         recall_at_k = np.sum([1 for target, recalled_predictions in zip(target_labels, k_closest_classes) if target in recalled_predictions[:self.k]])/len(target_labels)
         return recall_at_k
diff --git a/metrics/mAP_c.py b/metrics/mAP_c.py
diff --git a/parameters.py b/parameters.py