-
Notifications
You must be signed in to change notification settings - Fork 5
/
vocToCoco_unity_synthetic.py
140 lines (118 loc) · 4.44 KB
/
vocToCoco_unity_synthetic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/python
# pip install lxml
import sys
import os
import json
import xml.etree.ElementTree as ET
import glob
import pickle
START_BOUNDING_BOX_ID = 1
PRE_DEFINE_CATEGORIES = {'Pistol': 1}
def get(root, name):
vars = root.findall(name)
return vars
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise ValueError("Can not find %s in %s." % (name, root.tag))
if length > 0 and len(vars) != length:
raise ValueError(
"The size of %s is supposed to be %d, but is %d."
% (name, length, len(vars))
)
if length == 1:
vars = vars[0]
return vars
def get_categories(xml_files):
classes_names = []
for xml_file in xml_files:
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall("object"):
classes_names.append(member[0].text)
classes_names = list(set(classes_names))
classes_names.sort()
return {name: i for i, name in enumerate(classes_names)}
def convert(xml_files, json_file):
json_dict = {"images": [], 'categories': [], "annotations": [], "type": "instances"}
if PRE_DEFINE_CATEGORIES is not None:
categories = PRE_DEFINE_CATEGORIES
else:
categories = get_categories(xml_files)
bnd_id = START_BOUNDING_BOX_ID
for xml_file in sorted(xml_files):
tree = ET.parse(xml_file)
root = tree.getroot()
path = get(root, "path")
if len(path) == 1:
filename = os.path.basename(path[0].text)
elif len(path) == 0:
filename = get_and_check(root, "filename", 1).text
else:
raise ValueError("%d paths found in %s" % (len(path), xml_file))
fname = filename.split('\\')[-1]
objects = []
for obj in get(root, "object"):
category = get_and_check(obj, "name", 1).text
# TODO: Change to match edgecase dataset
if category == 'Knife':
continue
else:
category = 'Pistol'
if category not in categories:
new_id = len(categories)
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, "bndbox", 1)
xmin = int(float(get_and_check(bndbox, "xmin", 1).text)) - 1
ymin = int(float(get_and_check(bndbox, "ymin", 1).text)) - 1
xmax = int(float(get_and_check(bndbox, "xmax", 1).text))
ymax = int(float(get_and_check(bndbox, "ymax", 1).text))
assert xmax > xmin
assert ymax > ymin
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
ann = {
"area": int(o_width * o_height),
"iscrowd": 0,
"image_id": fname,
"bbox": [xmin, ymin, o_width, o_height],
"category_id": category_id,
"ignore": 0,
"segmentation": [],
"id": bnd_id,
}
json_dict["annotations"].append(ann)
objects.append(ann)
bnd_id = bnd_id + 1
size = get_and_check(root, "size", 1)
width = int(float(get_and_check(size, "width", 1).text))
height = int(float(get_and_check(size, "height", 1).text))
image = {
"file_name": fname,
"height": height,
"width": width,
#"objects": objects,
"id": fname,
}
json_dict["images"].append(image)
for cate, cid in categories.items():
cat = {"supercategory": "none", "id": cid, "name": cate}
json_dict["categories"].append(cat)
os.makedirs(os.path.dirname(json_file), exist_ok=True)
with open(json_file, "w") as f:
json.dump(json_dict, f)
if __name__ == "__main__":
from glob import glob
for folder in [
'/media/datos/shared_datasets/unity_syntectic_victory/split-500',
'/media/datos/shared_datasets/unity_syntectic_victory/split-1000',
'/media/datos/shared_datasets/unity_syntectic_victory/split-2500',
'/media/datos/shared_datasets/unity_syntectic_victory/split-5000',
]:
name = os.path.basename(folder)
allFiles = glob(os.path.join(folder, '*.xml'))
fname = os.path.join(folder, f'split_coco.json')
print("Number of xml files: {}".format(len(allFiles)))
convert(allFiles, fname)
print("Success: {}".format(fname))