Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create predictions only for non-labeled images #51

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,8 @@ models/*.h5
tag/*.csv

# TF exported graph files
.pb
*.pb
#train/ppf_inference_graphs/frozen_inference_graph.pb

# ignore configs
work_config.ini
2 changes: 1 addition & 1 deletion tag/download_vott_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def create_vott_json(file_location, num_rows, user_folders, pick_max, image_loc,
str(csv_file_loc / "totag.csv"))
file_date = [(blob.name, blob.properties.last_modified) for blob in
block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)]
ideal_class_balance = config_file["ideal_class_balance"].split(",")

if file_date:
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x: x[1])[0],
str(csv_file_loc / "tagging.csv"))
Expand Down
2 changes: 2 additions & 0 deletions test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.jpg
*.gz
6 changes: 6 additions & 0 deletions test/tagged.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence
st1026.png,knot,0.69150746,0.7407868,0.3375946,0.39474854,512,488,board_images_png,0.9990602,0.54169416
st1026.png,knot,0.29255274,0.37531677,0.41773036,0.48604906,512,488,board_images_png,0.74185294,0.54169416
st1026.png,knot,0.29603952,0.35703427,0.40142354,0.49790853,512,488,board_images_png,0.54169416,0.54169416
st1578.png,knot,0.54391885,0.60184073,0.7846939,0.85633487,512,488,board_images_png,0.9994636,0.9942725
st1578.png,knot,0.60079277,0.6762777,0.36906424,0.4369791,512,488,board_images_png,0.9942725,0.9942725
16 changes: 15 additions & 1 deletion test/test_create_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def setUp(self):
opener = urllib.request.URLopener()
opener.retrieve(url, model_file)

def tearDown(self):
def tearDown(self):
if os.path.exists("untagged.csv"):
os.remove("untagged.csv")
if os.path.exists("tagged_preds.csv"):
Expand Down Expand Up @@ -66,6 +66,20 @@ def test_get_suggestions(self):
user_folders=True)
self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_source.csv'), True, "generated untagged.csv is correct")

def test_get_suggestions_pretagged(self):
classes = 'knot,defect'
cur_detector = TFDetector(classes.split(','), 'model_knots.pb')
image_dir = "test_workdir_train"
untagged_output = 'untagged.csv'
tagged_output = 'tagged_preds.csv'
cur_tagged = "tagged.csv"
cur_tagging = None
get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging,
filetype="*.png", min_confidence=0.5,
user_folders=True)
self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_source_exclude_tagged.csv'), True,
"generated untagged.csv is correct")


if __name__ == '__main__':
unittest.main()
4 changes: 4 additions & 0 deletions test/untagged_source_exclude_tagged.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence
st1194.png,knot,0.6518282,0.70353997,0.7374667,0.80387944,512,488,board_images_png,0.99921286,0.99921286
st1611.png,knot,0.65116334,0.7139255,0.86043906,0.9666604,512,488,board_images_png,0.99822897,0.9488958
st1611.png,knot,0.07768918,0.1141083,0.332125,0.36988598,512,488,board_images_png,0.9488958,0.9488958
87 changes: 64 additions & 23 deletions train/create_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from collections import defaultdict
import numpy as np

CV2_COLOR_LOAD_FLAG = 1

NUM_CHANNELS=3
FOLDER_LOCATION=8

Expand All @@ -17,6 +19,7 @@

#name,prediction[CLASS_IDX],prediction[XMIN_IDX],prediction[XMAX_IDX],prediction[YMIN_IDX],prediction[YMAX_IDX],height,width,folder,prediction[BOX_CONFID_IDX], confidence
BOX_CONFID_IDX = 0
FILENAME_LOCATION = 0
CLASS_IDX = 1
XMIN_IDX = 3
XMAX_IDX = 5
Expand Down Expand Up @@ -65,10 +68,48 @@ def make_csv_output(all_predictions: List[List[List[int]]], all_names: List[str]
prediction[CLASS_IDX], prediction[XMIN_IDX], prediction[XMAX_IDX],
prediction[YMIN_IDX], prediction[YMAX_IDX], height, width,
prediction[BOX_CONFID_IDX], confidence])
print(untagged_output, tagged_output)

def get_images_for_prediction(subdir, filetype, already_tagged_this_folder, image_size):
'''
Function walks though the given directory of images and consucts ndarrays of image data that will be
used to get model's predictions (for subsequent review by human annotators).
Images that already have been reviewed (tagged) are excluded from the list.
:param subdir: local directory where images are
:param filetype: extension of the images
:param already_tagged_this_folder: list of images (names), that already have been tagged and thus can be skiepped
:param image_size: target image size
:return: function retuns 3 arrays
- all_images_this_folder -- ndarray representation of image
- all_names_this_folder -- names of the images that will get pre-tagged,
- all_sizes_this_folder -- original size of images
'''
all_image_files = []
cur_image_names = list(subdir.rglob(filetype))
print("Total image names: ", len(cur_image_names))
all_image_files += [str(image_name) for image_name in cur_image_names]
foldername = subdir.stem
all_nonfilt_names = [(foldername, filename.name) for filename in cur_image_names]

# check if images have been tagged already
print("Already tagged {0} images in folder {1}.".format(len(already_tagged_this_folder), foldername))
all_names_this_folder = [t for t in all_nonfilt_names if t[1] not in already_tagged_this_folder]
print("{0} images are taken for prediction from folder {1}".format(len(all_names_this_folder), foldername))

all_filt_imagepaths = [filepath for filepath in cur_image_names if filepath.name not in already_tagged_this_folder]

# Reversed because numpy is row-major
all_sizes_this_folder = [cv2.imread(str(image_path), CV2_COLOR_LOAD_FLAG).shape[:2] for image_path in
all_filt_imagepaths]
all_images_this_folder = np.zeros((len(all_names_this_folder), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8)
for curindex, image_path in enumerate(all_filt_imagepaths):
all_images_this_folder[curindex] = cv2.resize(cv2.imread(str(image_path), CV2_COLOR_LOAD_FLAG), image_size)

return all_images_this_folder, all_names_this_folder, all_sizes_this_folder

def get_suggestions(detector, basedir: str, untagged_output: str,
tagged_output: str, cur_tagged: str, cur_tagging: str, min_confidence: float =.2,
image_size: Tuple=(1000,750), filetype: str="*.jpg", minibatchsize: int=50,
image_size: Tuple=(1000,750), filetype: str="*.jpg", minibatchsize: int=16,
user_folders: bool=True):
'''Gets suggestions from a given detector and uses them to generate VOTT tags

Expand All @@ -81,9 +122,9 @@ def get_suggestions(detector, basedir: str, untagged_output: str,
arguments to the Detector class
'''
basedir = Path(basedir)
CV2_COLOR_LOAD_FLAG = 1
all_predictions = []
all_images = None
all_tagged = []
all_names = []
if user_folders:
# TODO: Cross reference with ToTag
# download latest tagging and tagged
Expand All @@ -99,25 +140,26 @@ def get_suggestions(detector, basedir: str, untagged_output: str,
all_tagged.extend(list(reader))
already_tagged = defaultdict(set)
for row in all_tagged:
already_tagged[row[FOLDER_LOCATION]].add(row[0])
already_tagged[row[FOLDER_LOCATION]].add(row[FILENAME_LOCATION])
print("Already tagged {0} images.".format(sum(map(len, already_tagged.values()))))
subdirs = [subfile for subfile in basedir.iterdir() if subfile.is_dir()]
print("subdirs: ", subdirs)
all_names = []
all_image_files = []
all_sizes = []
for subdir in subdirs:
cur_image_names = list(subdir.rglob(filetype))
print("Total image names: ", len(cur_image_names))
all_image_files += [str(image_name) for image_name in cur_image_names]
foldername = subdir.stem
all_names += [(foldername, filename.name) for filename in cur_image_names]
# Reversed because numpy is row-major
all_sizes = [cv2.imread(image, CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8)
for curindex, image in enumerate(all_image_files):
all_images[curindex] = cv2.resize(cv2.imread(image, CV2_COLOR_LOAD_FLAG), image_size)
already_tagged_this_folder = already_tagged[foldername]
all_images_this_folder, all_names_this_folder, all_sizes_this_folder =\
get_images_for_prediction(subdir, filetype, already_tagged_this_folder, image_size)

all_names += all_names_this_folder
if (all_images == None):
all_images = all_images_this_folder
else:
all_images = np.concatenate((all_images,all_images_this_folder), axis=0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest either getting rid of the np.concatenate or doing it at the end instead of at each folder.

all_sizes += all_sizes_this_folder

print("Shape of all_images: ", all_images.shape)
all_predictions = detector.predict(all_images, min_confidence=min_confidence)
all_predictions = detector.predict(all_images, min_confidence=min_confidence, batch_size=minibatchsize)
else:
with open(cur_tagged, 'r') as file:
reader = csv.reader(file)
Expand All @@ -127,13 +169,11 @@ def get_suggestions(detector, basedir: str, untagged_output: str,
reader = csv.reader(file)
next(reader, None)
already_tagged |= {row[0] for row in reader}
all_image_files = list(basedir.rglob(filetype))
all_names = [filename.name for filename in all_image_files]
all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8)
for curindex, image in enumerate(all_image_files):
all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size)
all_predictions = detector.predict(all_images, batch_size=2, min_confidence=min_confidence)

already_tagged_this_folder = already_tagged
all_images, all_names, all_sizes = \
get_images_for_prediction(basedir, filetype, already_tagged_this_folder, image_size)
all_predictions = detector.predict(all_images, batch_size=1, min_confidence=min_confidence)
make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged, user_folders)

if __name__ == "__main__":
Expand Down Expand Up @@ -169,6 +209,7 @@ def get_suggestions(detector, basedir: str, untagged_output: str,
else:
classes = config_file["classes"].split(",")
model = str(Path(config_file["inference_output_dir"])/"frozen_inference_graph.pb")
print("using model: ", model)
if file_date:
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], "tagged.csv")
cur_tagged = "tagged.csv"
Expand Down