Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clipeval/eval_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
("slip", "clipeval.slip.eval_slip"),
("xm3600", "clipeval.xm3600.eval_xm3600"),
("cvqa", "clipeval.cvqa.eval_cvqa"),
("zero_shot_classification_dollar_street", "clipeval.zero_shot_classification.eval_dollar_street"),
("zero_shot_classification_GeoDE", "clipeval.zero_shot_classification.eval_GeoDE"),
("zero_shot_classification_GLDv2", "clipeval.zero_shot_classification.eval_GLDv2"),
("few_shot_geo_localization_dollar_street", "clipeval.few_shot_geo_localization.eval_dollar_street"),
("few_shot_geo_localization_GeoDE", "clipeval.few_shot_geo_localization.eval_GeoDE"),
("few_shot_geo_localization_xm3600", "clipeval.few_shot_geo_localization.eval_xm3600"),
]


Expand Down
105 changes: 105 additions & 0 deletions clipeval/few_shot_geo_localization/eval_GeoDE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import torch
import json
from PIL import Image
from tqdm import tqdm
import pandas as pd
import numpy as np

import sys
if "external/big_vision" not in sys.path:
sys.path.append("external/big_vision")
# or directly copy the functions from https://github.com/google-research/big_vision/blob/main/big_vision/evaluators/fewshot_lsr.py

from big_vision.evaluators.fewshot_lsr import _precompute_cache, _eig_fewshot_acc_fn

data_dir = 'data/geode/'
GROUP_KEY = 'ip_country' # 'ip_country'

# Evaluation Function
def evaluate(model, preprocess_val):
geo_df = pd.read_csv(data_dir + 'index.csv')
geo_df = geo_df.sample(frac=1).reset_index(drop=True) #shuffle
train_df = geo_df.iloc[:20000]
test_df = geo_df.iloc[20000:]
print("done load data", len(geo_df), len(train_df), len(test_df))

batch_size = 16
device = torch.cuda.current_device()

## train classification probe
classification_probes = []
country_ids_list = [] # each n_shot has a list, theoretically should be the same, but GeoDE is special, some countries are very rare
for n_shot in [5, 10, 25]:
train_sampled = train_df.groupby(GROUP_KEY, group_keys=False).apply(lambda x: x.sample(n=min(len(x), n_shot), random_state=42))
country_ids = sorted(list(set(train_sampled[GROUP_KEY])))

df = train_sampled
with torch.no_grad():
all_features = []
all_labels = []
for start in tqdm(range(0, len(df), batch_size)):
end = min(start + batch_size, len(df))
batch_imgs = []
for i in range(start, end):
data = df.iloc[i]
try:
batch_imgs.append(Image.open(data_dir + 'images/' + data['file_path']).convert("RGB"))
all_labels.append(country_ids.index(data[GROUP_KEY]))
except:
print(f"missing image {data['file_path']}")

images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs])
image_embs = model.encode_image(images)
image_embs /= image_embs.norm(dim=-1, keepdim=True)

all_features.append(image_embs)

all_features = torch.cat(all_features, dim=0)
print(all_features.shape)

classification_probes.append(_precompute_cache(all_features.cpu().numpy(), all_labels, len(set(all_labels))))
country_ids_list.append(country_ids)

## start eval
n = 0
correct = [0] * len(classification_probes)

with torch.no_grad():
for local_start in tqdm(range(0, len(test_df), batch_size)):
local_end = min(local_start + batch_size, len(test_df))
batch_imgs = []
country_labels = []

for i in range(local_start, local_end):
data = test_df.iloc[i]
try:
batch_imgs.append(Image.open(data_dir + 'images/' + data['file_path']).convert("RGB"))
country_labels.append(data[GROUP_KEY])
except:
print(f"missing image {data['file_path']}")

images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs])
image_features = model.encode_image(images)
image_features /= image_features.norm(dim=-1, keepdim=True)

for ind, cache in enumerate(classification_probes):
labels = [country_ids_list[ind].index(c) if c in country_ids_list[ind] else -1 for c in country_labels]
if labels.count(-1) > 0:
print(f"WARNING: there are {labels.count(-1)} out of {len(labels)} samples country are not in the training set.")
correct[ind] += _eig_fewshot_acc_fn(cache, image_features.cpu().numpy(), labels, 2.0 ** 10).item()

n += len(labels)

print(f"few_shot [5, 10, 25] geo-localization on GeoDE, {correct}, {n}, {np.array(correct)/n}")
return correct, n

def parse_results(results, result_json):
with open(result_json) as f:
result = json.load(f)
print("few-shot geo-localization GeoDE:", result['acc'])
results['few_shot_geo_loc_GeoDE'] = result['acc']

def main(model, preprocess_val, tokenizer, result_json):
correct, n = evaluate(model, preprocess_val)
with open(result_json, "w") as f:
json.dump({"correct": correct, "total": n, "acc": (np.array(correct)/n).tolist()}, f)
96 changes: 96 additions & 0 deletions clipeval/few_shot_geo_localization/eval_dollar_street.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import torch
import json
from PIL import Image
from tqdm import tqdm
import pandas as pd
import numpy as np

import sys
if "external/big_vision" not in sys.path:
sys.path.append("external/big_vision")
# or directly copy the functions from https://github.com/google-research/big_vision/blob/main/big_vision/evaluators/fewshot_lsr.py

from big_vision.evaluators.fewshot_lsr import _precompute_cache, _eig_fewshot_acc_fn


data_dir = 'data/DollarStreet/dataset_dollarstreet/'

# Evaluation Function
def evaluate(model, preprocess_val):
train_df = pd.read_csv(data_dir + 'images_v2_imagenet_train.csv')
test_df = pd.read_csv(data_dir + 'images_v2_imagenet_test.csv')
print("done load data", len(train_df), len(test_df))

batch_size = 16
device = torch.cuda.current_device()

## train classification probe
classification_probes = []
country_ids_list = [] # each n_shot has a list, theoretically should be the same, but just in case
for n_shot in [5, 10, 25]:
train_sampled = train_df.groupby('country.id', group_keys=False).apply(lambda x: x.sample(n=min(len(x), n_shot), random_state=42))
country_ids = sorted(list(set(train_sampled['country.id'])))

df = train_sampled
with torch.no_grad():
all_features = []
all_labels = []
for start in tqdm(range(0, len(df), batch_size)):
end = min(start + batch_size, len(df))
batch_imgs = []
for i in range(start, end):
data = df.iloc[i]
batch_imgs.append(Image.open(data_dir + data['imageRelPath']).convert("RGB"))
all_labels.append(country_ids.index(data['country.id']))


images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs])
image_embs = model.encode_image(images)
image_embs /= image_embs.norm(dim=-1, keepdim=True)

all_features.append(image_embs)

all_features = torch.cat(all_features, dim=0)
print(all_features.shape)

classification_probes.append(_precompute_cache(all_features.cpu().numpy(), all_labels, len(set(all_labels))))
country_ids_list.append(country_ids)

## start eval
n = 0
correct = [0] * len(classification_probes)

with torch.no_grad():
for local_start in tqdm(range(0, len(test_df), batch_size)):
local_end = min(local_start + batch_size, len(test_df))
batch_imgs = []
country_labels = []

for i in range(local_start, local_end):
data = test_df.iloc[i]
batch_imgs.append(Image.open(data_dir + data['imageRelPath']).convert("RGB"))
country_labels.append(data['country.id'])

images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs])
image_features = model.encode_image(images)
image_features /= image_features.norm(dim=-1, keepdim=True)

for ind, cache in enumerate(classification_probes):
labels = [country_ids_list[ind].index(c) for c in country_labels]
correct[ind] += _eig_fewshot_acc_fn(cache, image_features.cpu().numpy(), labels, 2.0 ** 10).item()

n += len(labels)

print(f"few_shot [5, 10, 25] geo-localization on DollarStreet, {correct}, {n}, {np.array(correct)/n}")
return correct, n

def parse_results(results, result_json):
with open(result_json) as f:
result = json.load(f)
print("few-shot geo-localization dollar street:", result['acc'])
results['few_shot_geo_loc_dollar_street'] = result['acc']

def main(model, preprocess_val, tokenizer, result_json):
correct, n = evaluate(model, preprocess_val)
with open(result_json, "w") as f:
json.dump({"correct": correct, "total": n, "acc": (np.array(correct)/n).tolist()}, f)
107 changes: 107 additions & 0 deletions clipeval/few_shot_geo_localization/eval_xm3600.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import torch
import json
from PIL import Image
from tqdm import tqdm
import pandas as pd
import numpy as np

import sys
if "external/big_vision" not in sys.path:
sys.path.append("external/big_vision")
# or directly copy the functions from https://github.com/google-research/big_vision/blob/main/big_vision/evaluators/fewshot_lsr.py

from big_vision.evaluators.fewshot_lsr import _precompute_cache, _eig_fewshot_acc_fn

data_dir = 'data/XM3600/'
GROUP_KEY = 'image/locale'

# Evaluation Function
def evaluate(model, preprocess_val):
with open(data_dir + 'captions.jsonl', 'r') as f:
data = [{k: v for k, v in json.loads(line).items() if k in ['image/key', 'image/locale']} for line in f]
df = pd.DataFrame(data)
df = df.sample(frac=1).reset_index(drop=True) #shuffle
train_df = df.iloc[:1800]
test_df = df.iloc[1800:]
print("done load data", len(df), len(train_df), len(test_df))

batch_size = 16
device = torch.cuda.current_device()

## train classification probe
classification_probes = []
country_ids_list = [] # each n_shot has a list, theoretically should be the same, but GeoDE is special, some countries are very rare
for n_shot in [5, 10, 25]:
train_sampled = train_df.groupby(GROUP_KEY, group_keys=False).apply(lambda x: x.sample(n=min(len(x), n_shot), random_state=42))
country_ids = sorted(list(set(train_sampled[GROUP_KEY])))

df = train_sampled
with torch.no_grad():
all_features = []
all_labels = []
for start in tqdm(range(0, len(df), batch_size)):
end = min(start + batch_size, len(df))
batch_imgs = []
for i in range(start, end):
data = df.iloc[i]
try:
batch_imgs.append(Image.open(data_dir + f"images/{data['image/key']}.jpg").convert("RGB"))
all_labels.append(country_ids.index(data[GROUP_KEY]))
except:
print(f"missing image {data['image/key']}")

images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs])
image_embs = model.encode_image(images)
image_embs /= image_embs.norm(dim=-1, keepdim=True)

all_features.append(image_embs)

all_features = torch.cat(all_features, dim=0)
print(all_features.shape)

classification_probes.append(_precompute_cache(all_features.cpu().numpy(), all_labels, len(set(all_labels))))
country_ids_list.append(country_ids)

## start eval
n = 0
correct = [0] * len(classification_probes)

with torch.no_grad():
for local_start in tqdm(range(0, len(test_df), batch_size)):
local_end = min(local_start + batch_size, len(test_df))
batch_imgs = []
country_labels = []

for i in range(local_start, local_end):
data = test_df.iloc[i]
try:
batch_imgs.append(Image.open(data_dir + f"images/{data['image/key']}.jpg").convert("RGB"))
country_labels.append(data[GROUP_KEY])
except:
print(f"missing image {data['image/key']}")

images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs])
image_features = model.encode_image(images)
image_features /= image_features.norm(dim=-1, keepdim=True)

for ind, cache in enumerate(classification_probes):
labels = [country_ids_list[ind].index(c) if c in country_ids_list[ind] else -1 for c in country_labels]
if labels.count(-1) > 0:
print(f"WARNING: there are {labels.count(-1)} out of {len(labels)} samples country are not in the training set.")
correct[ind] += _eig_fewshot_acc_fn(cache, image_features.cpu().numpy(), labels, 2.0 ** 10).item()

n += len(labels)

print(f"few_shot [5, 10, 25] geo-localization on XM3600, {correct}, {n}, {np.array(correct)/n}")
return correct, n

def parse_results(results, result_json):
with open(result_json) as f:
result = json.load(f)
print("few-shot geo-localization XM3600:", result['acc'])
results['few_shot_geo_loc_xm3600'] = result['acc']

def main(model, preprocess_val, tokenizer, result_json):
correct, n = evaluate(model, preprocess_val)
with open(result_json, "w") as f:
json.dump({"correct": correct, "total": n, "acc": (np.array(correct)/n).tolist()}, f)
Loading