-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_single_annotation_images.py
118 lines (91 loc) · 4.32 KB
/
get_single_annotation_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import json
from pathlib import Path
from typing import List
def import_annotations(file_path: str) -> dict:
"""
Import annotations from a JSON file
:param str file_path: Path to COCO annotations JSON file
:return dict: JSON data imported into dictionary
"""
json_file = Path(file_path)
with open(json_file) as annotations:
json_contents = json.load(annotations)
return json_contents
def extract_image_ids(json_contents: dict) -> set:
"""
Grabs IDs of images with only one annotation
:param dict json_contents: Imported COCO JSON data
:return set: Set of IDs of images that only have one image
"""
unique_image_ids = []
multiple_annotations_ids = []
for annotation in json_contents["annotations"]:
image_id = annotation["image_id"]
if image_id in unique_image_ids:
unique_image_ids.remove(image_id)
multiple_annotations_ids.append(image_id)
elif image_id not in multiple_annotations_ids:
unique_image_ids.append(image_id)
return set(unique_image_ids)
def get_links(id_set: set | list, json_contents: dict) -> List[tuple]:
"""
Gets the links associated with images in an ID set or list
:param set | list id_set: Set or list of image IDs to grab
:param dict json_contents: Imported JSON contents
:return List[tuple]: A list of tuples in the format (jpg_link, png_link)
"""
images = []
for image in json_contents["images"]:
if image["id"] in id_set:
jpg_image = image["flickr_640_url"]
png_image = image["flickr_url"]
images.append((jpg_image, png_image))
return images
def get_subcategory_proportions(id_set: set | list, json_contents: dict) -> dict:
"""
Returns the proportions of every img class within the single-class imgs
:param set | list id_set: Set or list of image IDs to grab
:param dict json_contents: Imported JSON contents
:return dict: A descending sorted dict of image superclass as the key and proportion as the value
"""
# Get image category name from "categories", "annotations" within annotations.json
subcategory_props = dict()
category_id_to_subcategory = {category["id"]: category["supercategory"] for category in json_contents["categories"]}
for annotation in json_contents["annotations"]:
image_id = annotation["image_id"]
category_id = annotation["category_id"]
if image_id in id_set:
# Update supercategory_frequencies
supercategory = category_id_to_subcategory[category_id]
subcategory_props[supercategory] = subcategory_props.get(supercategory, 0) + 1
for key, value in subcategory_props.items():
subcategory_props[key] = value / len(id_set)
subcategory_props = dict(sorted(subcategory_props.items(), key=lambda item: item[1], reverse=True))
return subcategory_props
def generate_annotations_json_from_ids(id_set: set | list, json_contents: dict) -> dict:
output_json = {"info": json_contents["info"],
"scene_annotations": json_contents["scene_annotations"],
"licenses": json_contents["licenses"], "categories": json_contents["categories"],
"scene_categories": json_contents["scene_categories"], "annotations": [],
"images": []}
for image in json_contents["images"]:
if image["id"] in id_set:
output_json["images"].append(image)
for annotation in json_contents["annotations"]:
if annotation["image_id"] in id_set:
output_json["annotations"].append(annotation)
return output_json
if __name__ == "__main__":
json_data = import_annotations("./data/annotations.json")
image_ids = extract_image_ids(json_data)
# Information about the number and ids of images with 1 annotation:
# print(image_ids)
# print(get_links(image_ids, json_data))
# ------
# Information about the proportions of super categories in one annotation images
# print(get_subcategory_proportions(image_ids, json_data))
# ------
# Get the annotations_json file from one annotation image ids
# print(generate_annotations_json_from_ids(image_ids, json_data))
# correct_annotations = generate_annotations_json_from_ids(image_ids, json_data)
# print(correct_annotations)