Major Updates

rafay-pk · May 29, 2023 · a9f3469 · a9f3469
1 parent 9120fa6
commit a9f3469
Show file tree

Hide file tree

Showing 25 changed files with 25,512 additions and 36 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,7 @@
 # Development
 data/folders/
 data/database.db
-
+eval/dataset
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/eval/face-recog-cnn-1.txt b/eval/face-recog-cnn-1.txt
@@ -0,0 +1,27 @@
+drstrange (1).jpg:person_51498
+drstrange (10).jpg:person_94487
+drstrange (11).jpg:person_94487
+drstrange (12).jpg:person_94487
+drstrange (13).jpg:person_68981
+drstrange (14).jpg:person_47168
+drstrange (15).jpg:person_94487, person_63672, person_93678
+drstrange (16).jpg:
+drstrange (17).jpg:person_63672
+drstrange (18).jpg:
+drstrange (19).jpg:person_51498, person_93678
+drstrange (2).jpg:person_51498
+drstrange (20).jpg:person_94487
+drstrange (21).jpg:person_94487
+drstrange (22).jpg:person_94487
+drstrange (23).jpg:person_79475
+drstrange (24).jpg:person_93678
+drstrange (25).jpg:person_68981
+drstrange (26).jpg:person_68981, person_19140, person_81322, person_35790
+drstrange (27).jpg:person_62945
+drstrange (3).jpg:person_94487
+drstrange (4).jpg:
+drstrange (5).jpg:person_44472
+drstrange (6).jpg:
+drstrange (7).jpg:person_68981, person_68981, person_72238
+drstrange (8).jpg:person_68981
+drstrange (9).jpg:person_64669, person_94487, person_35790, person_61740
diff --git a/eval/face-recognition.py b/eval/face-recognition.py
@@ -0,0 +1,49 @@
+import os, face_recognition, numpy as np, random, dlib, cv2
+from PIL import Image
+
+path = r'C:\Users\Rafay\Downloads\drstrange'
+upscaled = 'temp.png'
+detector = dlib.get_frontal_face_detector()
+
+file_tags = {}
+unique_people = {}
+faces_detected = 0
+
+for file in os.listdir(path):
+    print(f'Processing File:{file}')
+    file_tags[file] = []
+    full_path = os.path.join(path, file)
+    # image = Image.open(full_path)
+    # temp = image.resize(tuple(int(x * 1.5) for x in image.size),resample= Image.LANCZOS)
+    # temp.save(upscaled)
+    # print(detector(cv2.imread(full_path)))
+    # image = face_recognition.load_image_file(upscaled)
+    image = face_recognition.load_image_file(full_path)
+    locations = face_recognition.face_locations(image, 1, model='cnn')
+    encodings = face_recognition.face_encodings(image, locations, num_jitters=1, model='small')
+    if len(encodings) > 0:
+        for encoding in encodings:
+            faces_detected += 1
+            print('Detected Person')
+            unique = list(unique_people.values())
+            result = face_recognition.compare_faces(unique, encoding)
+            check = np.count_nonzero(result)
+            if check == 0:
+                name = f'person_{random.randint(10000, 99999)}'
+                unique_people[name] = encoding
+                print('Added New Person')
+            elif check == 1:
+                name = list(unique_people.keys())[result.index(True)]
+            elif check > 1:
+                distances = face_recognition.face_distance(unique, encoding)
+                name = list(unique_people.keys())[np.argmin(distances)]
+            file_tags[file].append(name)
+
+log_file = 'face-recog.txt'
+log = open(log_file, "w")
+
+for file_tag in file_tags.items():
+    log.write(f'{file_tag[0]}:{", ".join(file_tag[1])}\n')
+
+log.close()
+print(f'Detcted {faces_detected} faces and {len(unique_people)} poeple')
diff --git a/eval/hashtable.json b/eval/hashtable.json
@@ -0,0 +1 @@
+{"C:/Users/Rafay/OneDrive/Pictures\\Screenshot 2022-12-18 133723.png": 
diff --git a/eval/image-captioning-eval.py b/eval/image-captioning-eval.py
@@ -0,0 +1,77 @@
+import csv, numpy as np
+
+ref_captions_fp = "C:/Users/Rafay/repos/image-search/eval/dataset/flickr8k/captions.csv"
+gen_captions_fp = "C:/Users/Rafay/repos/image-search/eval/matrix.csv"
+
+ref_dict = {}
+
+with open(ref_captions_fp, "r") as file:
+    reader = csv.DictReader(file)
+    for row in reader:
+        image = row["image"]
+        caption = row["caption"]
+
+        if image in ref_dict:
+            ref_dict[image].append(caption)
+        else:
+            ref_dict[image] = [caption]
+
+print("CSV file converted to Python dictionary successfully.")
+
+references = {}
+for filename, captions in ref_dict.items():
+    temp = []
+    for caption in captions:
+        caption = caption[:caption.index(' .')] + caption[caption.index('.') + 1:] if ' .' in caption else caption
+        temp.append(caption[:1].lower() + caption[1:])
+    references[filename] = temp
+
+hypothesis = {}
+
+with open(gen_captions_fp, "r") as file:
+    reader = csv.DictReader(file)
+    for row in reader:
+        image = row["filename"]
+        caption = row["column_name"]
+
+        if image in hypothesis:
+            hypothesis[image].append(caption)
+        else:
+            hypothesis[image] = [caption]
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from nltk.translate.meteor_score import meteor_score
+
+bleu_scores = []
+meteor_scores = []
+chen_and_cherry = SmoothingFunction()
+for file in ref_dict.keys():
+    ref = references[file]
+    ref = [x.split(' ') for x in ref]
+    hyp = hypothesis[file][0]
+    hyp = hyp.split(' ')
+    if hyp != ['']:
+        bleu_scores.append(sentence_bleu(ref, hyp, smoothing_function=chen_and_cherry.method2))
+        meteor_scores.append(meteor_score(ref, hyp))
+
+print(f'BLEU: Computed Scores for {len(bleu_scores)} generated captions with a mean score of {np.mean(bleu_scores)}')
+print(f'METEOR: Computed Scores for {len(meteor_scores)} generated captions with a mean score of {np.mean(meteor_scores)}')
+
+from pycocoevalcap.cider.cider import Cider
+from pycocoevalcap.spice.spice import Spice
+
+hyp = {}
+ref = {}
+for filename, caption in hypothesis.items():
+    if caption != ['']:
+        hyp[filename] = caption
+
+for file in hyp.keys():
+    ref[file] = references[file]
+
+cider_scorer = Cider()
+spice_scorer = Spice()
+cider_score, _ = cider_scorer.compute_score(ref, hyp)
+spice_score, _ = spice_scorer.compute_score(ref, hyp)
+print(f'CIDER: Computed Scores for {len(hyp)} generated captions with a mean score of {cider_score}')
+print(f'SPICE: Computed Scores for {len(hyp)} generated captions with a mean score of {spice_score}')
diff --git a/eval/image-hash-add-data.py b/eval/image-hash-add-data.py
@@ -0,0 +1,76 @@
+import sqlite3, os, imagehash, numpy as np
+from PIL import Image
+from difflib import SequenceMatcher
+
+
+def list_files(folder):
+    return [f"{root.replace(chr(92), '/')}/{f}" for root, _, file in os.walk(folder) for f in file]
+
+
+conn = sqlite3.connect(r'C:\Users\Rafay\repos\image-search\data\database.db')
+cursor = conn.cursor()
+
+
+def add_data(folder_path):
+    extensions = {'.png', '.jpg'}
+
+    for file in list_files(folder_path):
+        if any([file.endswith(ext) for ext in extensions]):
+            path = os.path.join(folder_path, file)
+            img_hash = imagehash.phash(Image.open(path))
+            print(f'{path}:{img_hash}')
+            cursor.execute('INSERT OR IGNORE INTO Files (path, hash) VALUES (?, ?)', (path, str(img_hash)))
+
+    conn.commit()
+
+    cursor.execute('SELECT f.id, f.hash FROM Files f;')
+    data = np.array(cursor.fetchall())
+    similar = []
+
+    for i in range(len(data) - 1):
+        # if score > 0.7:
+        if SequenceMatcher(None, data[i][1], data[i + 1][1]).ratio() > 0.7:
+            cursor.execute(f'SELECT f.path FROM Files f WHERE f.id = {data[i][0]}')
+            path1 = cursor.fetchone()
+            cursor.execute(f'SELECT f.path FROM Files f WHERE f.id = {data[i + 1][0]}')
+            path2 = cursor.fetchone()
+            similar.append([path1, path2])
+
+    [print(x, sep='\n') for x in similar]
+    print('merging')
+
+    deleted = 0
+    for i in range(len(similar) - 1):
+        i = i - deleted
+        curr_list = set(similar[i])
+        next_list = set(similar[i + 1])
+        if curr_list.intersection(next_list):
+            similar[i] = list(curr_list.union(next_list))
+            del similar[i + 1]
+            deleted += 1
+
+    [print(x, sep='\n') for x in similar]
+
+    cursor.execute('DROP TABLE IF EXISTS Collections;')
+
+    cursor.execute('''
+            CREATE TABLE IF NOT EXISTS Collections (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            collection_id INTEGER,
+            file_id INTEGER
+        );
+    ''')
+
+    collection_counter = 0
+
+    for collection in similar:
+        collection_counter += 1
+        for file in collection:
+            cursor.execute('SELECT f.id FROM Files f WHERE f.path = ?', (file))
+            file_id = cursor.fetchone()[0]
+            cursor.execute('INSERT OR IGNORE INTO Collections (collection_id, file_id) VALUES (?, ?)', (collection_counter, file_id))
+
+    conn.commit()
+
+
+add_data('C:/Users/Rafay/OneDrive/Pictures')
diff --git a/eval/image-hash-create.py b/eval/image-hash-create.py
@@ -0,0 +1,15 @@
+import sqlite3
+
+# Create a connection to the database
+conn = sqlite3.connect(r'C:\Users\Rafay\repos\image-search\data\database.db')
+cursor = conn.cursor()
+cursor.execute('DROP TABLE IF EXISTS Files;')
+# Create the table to store image data
+cursor.execute('''
+    CREATE TABLE IF NOT EXISTS Files (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        path TEXT UNIQUE NOT NULL,
+        hash TEXT NOT NULL
+    );
+''')
+cursor.execute('create index idx_hash on Files(hash asc);')
diff --git a/eval/image-hash-get-collections.py b/eval/image-hash-get-collections.py
@@ -0,0 +1,7 @@
+import sqlite3
+
+conn = sqlite3.connect('image_data.db')
+cursor = conn.cursor()
+
+cursor.execute('SELECT * FROM Collections')
+print(cursor.fetchall())
diff --git a/eval/image-hash.py b/eval/image-hash.py
@@ -0,0 +1,110 @@
+import imagehash, os
+from PIL import Image
+
+folder_path = 'C:/Users/Rafay/OneDrive/Pictures'
+extensions = {'.png', '.jpg'}
+table = {}
+
+for file in os.listdir(folder_path):
+    if any([file.endswith(ext) for ext in extensions]):
+        path = os.path.join(folder_path, file)
+        table[path] = imagehash.phash(Image.open(path))
+
+table = dict(sorted(table.items(), key=lambda x: str(x[1])))
+paths = list(table.keys())
+hashes = list(table.values())
+similar = []
+
+for i in range(len(hashes) - 1):
+    diff = hashes[i + 1] - hashes[i]
+    if diff < 5:
+        similar.append([paths[i], paths[i + 1]])
+
+[print(x, sep='\n') for x in similar]
+
+deleted = 0
+for i in range(len(similar) - 1):
+    i = i - deleted
+    curr_list = set(similar[i])
+    next_list = set(similar[i + 1])
+    if curr_list.intersection(next_list):
+        similar[i] = list(curr_list.union(next_list))
+        del similar[i + 1]
+        deleted += 1
+
+print('\nLists Merged\n')
+[print(x, sep='\n') for x in similar]
+
+# import imagehash
+# import os
+# from PIL import Image
+# import sqlite3
+#
+#
+# def file_op(f):
+#     return f.replace('\\', '/')
+#
+# # Create a connection to the database
+# conn = sqlite3.connect('image_data.db')
+# cursor = conn.cursor()
+# cursor.execute('DROP TABLE IF EXISTS images;')
+# # Create the table to store image data
+# cursor.execute('''
+#     CREATE TABLE IF NOT EXISTS images (
+#         id INTEGER PRIMARY KEY AUTOINCREMENT,
+#         path TEXT,
+#         hash INTEGER
+#     );
+# ''')
+# cursor.execute('create index idx_hash on images(hash asc);')
+# # Folder path and extensions
+# folder_path = 'C:/Users/Rafay/OneDrive/Pictures'
+# extensions = {'.png', '.jpg'}
+# count = 0
+# # Populate the table with image data
+# for file in [f'{file_op(root)}/{f}' for root, dir, file in os.walk(folder_path) for f in file]:
+#     if any([file.endswith(ext) for ext in extensions]):
+#         path = os.path.join(folder_path, file)
+#         img = Image.open(path)
+#         count += 1
+#         img_hash = imagehash.phash(img, hash_size=16)
+#
+#         # Insert image data into the table
+#         cursor.execute('INSERT INTO images (path, hash) VALUES (?, ?)', (path, str(img_hash)))
+#
+# # Commit the changes to the database
+# conn.commit()
+#
+# # Retrieve similar images using SQL queries
+# cursor.execute('''
+#     SELECT a.path, b.path
+#     FROM images a
+#     JOIN images b ON a.hash - b.hash = 0
+#     WHERE a.id = b.id + 1
+# ''')
+# similar = cursor.fetchall()
+#
+# # Print the similar image pairs
+# # for pair in similar:
+# #     print(pair)
+#
+# # Merge similar image lists
+# deleted = 0
+# for i in range(len(similar) - 1):
+#     i = i - deleted
+#     curr_list = set(similar[i])
+#     next_list = set(similar[i + 1])
+#     if curr_list.intersection(next_list):
+#         similar[i] = list(curr_list.union(next_list))
+#         del similar[i + 1]
+#         deleted += 1
+#
+# print('\nLists Merged\n')
+# for lst in similar:
+#     print(lst)
+#
+# # Close the database connection
+# conn.close()
+#
+#
+# print(f'OriginalFiles={count}, Classified={sum(len(sublist) for sublist in similar)}')
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"C:/Users/Rafay/OneDrive/Pictures\\Screenshot 2022-12-18 133723.png":