Skip to content

Commit

Permalink
Major Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
rafay-pk committed May 29, 2023
1 parent 9120fa6 commit a9f3469
Show file tree
Hide file tree
Showing 25 changed files with 25,512 additions and 36 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Development
data/folders/
data/database.db

eval/dataset
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
27 changes: 27 additions & 0 deletions eval/face-recog-cnn-1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
drstrange (1).jpg:person_51498
drstrange (10).jpg:person_94487
drstrange (11).jpg:person_94487
drstrange (12).jpg:person_94487
drstrange (13).jpg:person_68981
drstrange (14).jpg:person_47168
drstrange (15).jpg:person_94487, person_63672, person_93678
drstrange (16).jpg:
drstrange (17).jpg:person_63672
drstrange (18).jpg:
drstrange (19).jpg:person_51498, person_93678
drstrange (2).jpg:person_51498
drstrange (20).jpg:person_94487
drstrange (21).jpg:person_94487
drstrange (22).jpg:person_94487
drstrange (23).jpg:person_79475
drstrange (24).jpg:person_93678
drstrange (25).jpg:person_68981
drstrange (26).jpg:person_68981, person_19140, person_81322, person_35790
drstrange (27).jpg:person_62945
drstrange (3).jpg:person_94487
drstrange (4).jpg:
drstrange (5).jpg:person_44472
drstrange (6).jpg:
drstrange (7).jpg:person_68981, person_68981, person_72238
drstrange (8).jpg:person_68981
drstrange (9).jpg:person_64669, person_94487, person_35790, person_61740
49 changes: 49 additions & 0 deletions eval/face-recognition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os, face_recognition, numpy as np, random, dlib, cv2
from PIL import Image

path = r'C:\Users\Rafay\Downloads\drstrange'
upscaled = 'temp.png'
detector = dlib.get_frontal_face_detector()

file_tags = {}
unique_people = {}
faces_detected = 0

for file in os.listdir(path):
print(f'Processing File:{file}')
file_tags[file] = []
full_path = os.path.join(path, file)
# image = Image.open(full_path)
# temp = image.resize(tuple(int(x * 1.5) for x in image.size),resample= Image.LANCZOS)
# temp.save(upscaled)
# print(detector(cv2.imread(full_path)))
# image = face_recognition.load_image_file(upscaled)
image = face_recognition.load_image_file(full_path)
locations = face_recognition.face_locations(image, 1, model='cnn')
encodings = face_recognition.face_encodings(image, locations, num_jitters=1, model='small')
if len(encodings) > 0:
for encoding in encodings:
faces_detected += 1
print('Detected Person')
unique = list(unique_people.values())
result = face_recognition.compare_faces(unique, encoding)
check = np.count_nonzero(result)
if check == 0:
name = f'person_{random.randint(10000, 99999)}'
unique_people[name] = encoding
print('Added New Person')
elif check == 1:
name = list(unique_people.keys())[result.index(True)]
elif check > 1:
distances = face_recognition.face_distance(unique, encoding)
name = list(unique_people.keys())[np.argmin(distances)]
file_tags[file].append(name)

log_file = 'face-recog.txt'
log = open(log_file, "w")

for file_tag in file_tags.items():
log.write(f'{file_tag[0]}:{", ".join(file_tag[1])}\n')

log.close()
print(f'Detcted {faces_detected} faces and {len(unique_people)} poeple')
1 change: 1 addition & 0 deletions eval/hashtable.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"C:/Users/Rafay/OneDrive/Pictures\\Screenshot 2022-12-18 133723.png":
77 changes: 77 additions & 0 deletions eval/image-captioning-eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import csv, numpy as np

ref_captions_fp = "C:/Users/Rafay/repos/image-search/eval/dataset/flickr8k/captions.csv"
gen_captions_fp = "C:/Users/Rafay/repos/image-search/eval/matrix.csv"

ref_dict = {}

with open(ref_captions_fp, "r") as file:
reader = csv.DictReader(file)
for row in reader:
image = row["image"]
caption = row["caption"]

if image in ref_dict:
ref_dict[image].append(caption)
else:
ref_dict[image] = [caption]

print("CSV file converted to Python dictionary successfully.")

references = {}
for filename, captions in ref_dict.items():
temp = []
for caption in captions:
caption = caption[:caption.index(' .')] + caption[caption.index('.') + 1:] if ' .' in caption else caption
temp.append(caption[:1].lower() + caption[1:])
references[filename] = temp

hypothesis = {}

with open(gen_captions_fp, "r") as file:
reader = csv.DictReader(file)
for row in reader:
image = row["filename"]
caption = row["column_name"]

if image in hypothesis:
hypothesis[image].append(caption)
else:
hypothesis[image] = [caption]

from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score

bleu_scores = []
meteor_scores = []
chen_and_cherry = SmoothingFunction()
for file in ref_dict.keys():
ref = references[file]
ref = [x.split(' ') for x in ref]
hyp = hypothesis[file][0]
hyp = hyp.split(' ')
if hyp != ['']:
bleu_scores.append(sentence_bleu(ref, hyp, smoothing_function=chen_and_cherry.method2))
meteor_scores.append(meteor_score(ref, hyp))

print(f'BLEU: Computed Scores for {len(bleu_scores)} generated captions with a mean score of {np.mean(bleu_scores)}')
print(f'METEOR: Computed Scores for {len(meteor_scores)} generated captions with a mean score of {np.mean(meteor_scores)}')

from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.spice.spice import Spice

hyp = {}
ref = {}
for filename, caption in hypothesis.items():
if caption != ['']:
hyp[filename] = caption

for file in hyp.keys():
ref[file] = references[file]

cider_scorer = Cider()
spice_scorer = Spice()
cider_score, _ = cider_scorer.compute_score(ref, hyp)
spice_score, _ = spice_scorer.compute_score(ref, hyp)
print(f'CIDER: Computed Scores for {len(hyp)} generated captions with a mean score of {cider_score}')
print(f'SPICE: Computed Scores for {len(hyp)} generated captions with a mean score of {spice_score}')
76 changes: 76 additions & 0 deletions eval/image-hash-add-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import sqlite3, os, imagehash, numpy as np
from PIL import Image
from difflib import SequenceMatcher


def list_files(folder):
return [f"{root.replace(chr(92), '/')}/{f}" for root, _, file in os.walk(folder) for f in file]


conn = sqlite3.connect(r'C:\Users\Rafay\repos\image-search\data\database.db')
cursor = conn.cursor()


def add_data(folder_path):
extensions = {'.png', '.jpg'}

for file in list_files(folder_path):
if any([file.endswith(ext) for ext in extensions]):
path = os.path.join(folder_path, file)
img_hash = imagehash.phash(Image.open(path))
print(f'{path}:{img_hash}')
cursor.execute('INSERT OR IGNORE INTO Files (path, hash) VALUES (?, ?)', (path, str(img_hash)))

conn.commit()

cursor.execute('SELECT f.id, f.hash FROM Files f;')
data = np.array(cursor.fetchall())
similar = []

for i in range(len(data) - 1):
# if score > 0.7:
if SequenceMatcher(None, data[i][1], data[i + 1][1]).ratio() > 0.7:
cursor.execute(f'SELECT f.path FROM Files f WHERE f.id = {data[i][0]}')
path1 = cursor.fetchone()
cursor.execute(f'SELECT f.path FROM Files f WHERE f.id = {data[i + 1][0]}')
path2 = cursor.fetchone()
similar.append([path1, path2])

[print(x, sep='\n') for x in similar]
print('merging')

deleted = 0
for i in range(len(similar) - 1):
i = i - deleted
curr_list = set(similar[i])
next_list = set(similar[i + 1])
if curr_list.intersection(next_list):
similar[i] = list(curr_list.union(next_list))
del similar[i + 1]
deleted += 1

[print(x, sep='\n') for x in similar]

cursor.execute('DROP TABLE IF EXISTS Collections;')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Collections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER,
file_id INTEGER
);
''')

collection_counter = 0

for collection in similar:
collection_counter += 1
for file in collection:
cursor.execute('SELECT f.id FROM Files f WHERE f.path = ?', (file))
file_id = cursor.fetchone()[0]
cursor.execute('INSERT OR IGNORE INTO Collections (collection_id, file_id) VALUES (?, ?)', (collection_counter, file_id))

conn.commit()


add_data('C:/Users/Rafay/OneDrive/Pictures')
15 changes: 15 additions & 0 deletions eval/image-hash-create.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sqlite3

# Create a connection to the database
conn = sqlite3.connect(r'C:\Users\Rafay\repos\image-search\data\database.db')
cursor = conn.cursor()
cursor.execute('DROP TABLE IF EXISTS Files;')
# Create the table to store image data
cursor.execute('''
CREATE TABLE IF NOT EXISTS Files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT UNIQUE NOT NULL,
hash TEXT NOT NULL
);
''')
cursor.execute('create index idx_hash on Files(hash asc);')
7 changes: 7 additions & 0 deletions eval/image-hash-get-collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import sqlite3

conn = sqlite3.connect('image_data.db')
cursor = conn.cursor()

cursor.execute('SELECT * FROM Collections')
print(cursor.fetchall())
110 changes: 110 additions & 0 deletions eval/image-hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import imagehash, os
from PIL import Image

folder_path = 'C:/Users/Rafay/OneDrive/Pictures'
extensions = {'.png', '.jpg'}
table = {}

for file in os.listdir(folder_path):
if any([file.endswith(ext) for ext in extensions]):
path = os.path.join(folder_path, file)
table[path] = imagehash.phash(Image.open(path))

table = dict(sorted(table.items(), key=lambda x: str(x[1])))
paths = list(table.keys())
hashes = list(table.values())
similar = []

for i in range(len(hashes) - 1):
diff = hashes[i + 1] - hashes[i]
if diff < 5:
similar.append([paths[i], paths[i + 1]])

[print(x, sep='\n') for x in similar]

deleted = 0
for i in range(len(similar) - 1):
i = i - deleted
curr_list = set(similar[i])
next_list = set(similar[i + 1])
if curr_list.intersection(next_list):
similar[i] = list(curr_list.union(next_list))
del similar[i + 1]
deleted += 1

print('\nLists Merged\n')
[print(x, sep='\n') for x in similar]

# import imagehash
# import os
# from PIL import Image
# import sqlite3
#
#
# def file_op(f):
# return f.replace('\\', '/')
#
# # Create a connection to the database
# conn = sqlite3.connect('image_data.db')
# cursor = conn.cursor()
# cursor.execute('DROP TABLE IF EXISTS images;')
# # Create the table to store image data
# cursor.execute('''
# CREATE TABLE IF NOT EXISTS images (
# id INTEGER PRIMARY KEY AUTOINCREMENT,
# path TEXT,
# hash INTEGER
# );
# ''')
# cursor.execute('create index idx_hash on images(hash asc);')
# # Folder path and extensions
# folder_path = 'C:/Users/Rafay/OneDrive/Pictures'
# extensions = {'.png', '.jpg'}
# count = 0
# # Populate the table with image data
# for file in [f'{file_op(root)}/{f}' for root, dir, file in os.walk(folder_path) for f in file]:
# if any([file.endswith(ext) for ext in extensions]):
# path = os.path.join(folder_path, file)
# img = Image.open(path)
# count += 1
# img_hash = imagehash.phash(img, hash_size=16)
#
# # Insert image data into the table
# cursor.execute('INSERT INTO images (path, hash) VALUES (?, ?)', (path, str(img_hash)))
#
# # Commit the changes to the database
# conn.commit()
#
# # Retrieve similar images using SQL queries
# cursor.execute('''
# SELECT a.path, b.path
# FROM images a
# JOIN images b ON a.hash - b.hash = 0
# WHERE a.id = b.id + 1
# ''')
# similar = cursor.fetchall()
#
# # Print the similar image pairs
# # for pair in similar:
# # print(pair)
#
# # Merge similar image lists
# deleted = 0
# for i in range(len(similar) - 1):
# i = i - deleted
# curr_list = set(similar[i])
# next_list = set(similar[i + 1])
# if curr_list.intersection(next_list):
# similar[i] = list(curr_list.union(next_list))
# del similar[i + 1]
# deleted += 1
#
# print('\nLists Merged\n')
# for lst in similar:
# print(lst)
#
# # Close the database connection
# conn.close()
#
#
# print(f'OriginalFiles={count}, Classified={sum(len(sublist) for sublist in similar)}')
Loading

0 comments on commit a9f3469

Please sign in to comment.