From e56712f99daae4912a49793c0a8e94030e5066ac Mon Sep 17 00:00:00 2001 From: JasonWildMe Date: Mon, 30 Mar 2026 22:02:57 -0700 Subject: [PATCH 1/5] Allow Wildbook to set custom image UUIDs via add_images API Instead of always computing UUIDs from file content hashes, callers can now pass image_uuid_list to /api/image/json/ to provide their own UUIDs. This lets Wildbook maintain consistent MediaAsset UUIDs across systems. Co-Authored-By: Claude Opus 4.6 --- wbia/control/manual_image_funcs.py | 14 ++++++++++++++ wbia/web/apis_json.py | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/wbia/control/manual_image_funcs.py b/wbia/control/manual_image_funcs.py index 276bd2ab5e..277141ea99 100644 --- a/wbia/control/manual_image_funcs.py +++ b/wbia/control/manual_image_funcs.py @@ -381,6 +381,7 @@ def add_images( ensure_unique=False, ensure_loadable=True, ensure_exif=True, + image_uuid_list=None, **kwargs, ): r""" @@ -460,6 +461,19 @@ def add_images( logger.info('Using cache_uri_dict = {}'.format(ut.repr3(cache_uri_dict))) + # Override hash-computed UUIDs with caller-provided ones + if image_uuid_list is not None: + assert len(image_uuid_list) == len(params_list), ( + 'image_uuid_list length %d != gpath_list length %d' + % (len(image_uuid_list), len(params_list)) + ) + new_params_list = [] + for custom_uuid, (gpath_, params_) in zip(image_uuid_list, params_list): + if params_ is not None and custom_uuid is not None: + params_ = (custom_uuid,) + params_[1:] + new_params_list.append((gpath_, params_)) + params_list = new_params_list + # debug = False if debug: diff --git a/wbia/web/apis_json.py b/wbia/web/apis_json.py index dfe68f3cae..6596a1d8c2 100644 --- a/wbia/web/apis_json.py +++ b/wbia/web/apis_json.py @@ -59,6 +59,7 @@ def add_imagesets_json( def add_images_json( ibs, image_uri_list, + image_uuid_list=None, image_unixtime_list=None, image_gps_lat_list=None, image_gps_lon_list=None, @@ -210,7 +211,6 @@ def _verify(list_, tag, length, allow_none=False): kwargs['sanitize'] = kwargs.get('sanitize', False) depricated_list = [ - 'image_uuid_list', 'image_width_list', 'image_height_list', 'image_orig_name_list', @@ -236,6 +236,19 @@ def _verify(list_, tag, length, allow_none=False): # Rectify values image_uri_list = _rectify_uri(image_uri_list, None, expected_length, str) image_uri_list = _verify(image_uri_list, 'image_uri_list', expected_length) + + if image_uuid_list is not None: + import uuid as uuid_module + + image_uuid_list = [ + uuid_module.UUID(u) if isinstance(u, str) else u for u in image_uuid_list + ] + assert len(image_uuid_list) == expected_length, ( + 'image_uuid_list length %d != image_uri_list length %d' + % (len(image_uuid_list), expected_length) + ) + kwargs['image_uuid_list'] = image_uuid_list + gid_list = ibs.add_images(image_uri_list, **kwargs) # NOQA if image_unixtime_list is not None: From 2e61684f07da33b94a077e6145118f48092e8af3 Mon Sep 17 00:00:00 2001 From: JasonWildMe Date: Mon, 30 Mar 2026 22:05:47 -0700 Subject: [PATCH 2/5] Fix review issues: replace asserts with ValueError, add warnings - Replace assert with raise ValueError for API input validation (asserts are stripped by python -O and produce 500 instead of 400) - Wrap UUID parsing in try/except for malformed strings - Log warning when custom UUID is dropped due to failed image load - Document that None elements mean "use hash-computed UUID" Co-Authored-By: Claude Opus 4.6 --- wbia/control/manual_image_funcs.py | 18 ++++++++++++------ wbia/web/apis_json.py | 19 ++++++++++++------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/wbia/control/manual_image_funcs.py b/wbia/control/manual_image_funcs.py index 277141ea99..0ebda1781e 100644 --- a/wbia/control/manual_image_funcs.py +++ b/wbia/control/manual_image_funcs.py @@ -461,15 +461,21 @@ def add_images( logger.info('Using cache_uri_dict = {}'.format(ut.repr3(cache_uri_dict))) - # Override hash-computed UUIDs with caller-provided ones + # Override hash-computed UUIDs with caller-provided ones. + # A None element means "use the hash-computed UUID for this image". if image_uuid_list is not None: - assert len(image_uuid_list) == len(params_list), ( - 'image_uuid_list length %d != gpath_list length %d' - % (len(image_uuid_list), len(params_list)) - ) + if len(image_uuid_list) != len(params_list): + raise ValueError( + 'image_uuid_list length %d != params_list length %d' + % (len(image_uuid_list), len(params_list)) + ) new_params_list = [] for custom_uuid, (gpath_, params_) in zip(image_uuid_list, params_list): - if params_ is not None and custom_uuid is not None: + if params_ is None and custom_uuid is not None: + logger.warning( + 'Custom UUID %s dropped: image failed to load' % (custom_uuid,) + ) + elif params_ is not None and custom_uuid is not None: params_ = (custom_uuid,) + params_[1:] new_params_list.append((gpath_, params_)) params_list = new_params_list diff --git a/wbia/web/apis_json.py b/wbia/web/apis_json.py index 6596a1d8c2..255b67ac77 100644 --- a/wbia/web/apis_json.py +++ b/wbia/web/apis_json.py @@ -240,13 +240,18 @@ def _verify(list_, tag, length, allow_none=False): if image_uuid_list is not None: import uuid as uuid_module - image_uuid_list = [ - uuid_module.UUID(u) if isinstance(u, str) else u for u in image_uuid_list - ] - assert len(image_uuid_list) == expected_length, ( - 'image_uuid_list length %d != image_uri_list length %d' - % (len(image_uuid_list), expected_length) - ) + if len(image_uuid_list) != expected_length: + raise ValueError( + 'image_uuid_list length %d != image_uri_list length %d' + % (len(image_uuid_list), expected_length) + ) + try: + image_uuid_list = [ + uuid_module.UUID(u) if isinstance(u, str) else u + for u in image_uuid_list + ] + except (ValueError, AttributeError) as ex: + raise ValueError('Invalid UUID in image_uuid_list: %s' % (ex,)) kwargs['image_uuid_list'] = image_uuid_list gid_list = ibs.add_images(image_uri_list, **kwargs) # NOQA From 187051f24d8683d9f160367977b92fc9f7b7fe68 Mon Sep 17 00:00:00 2001 From: Wild Me Date: Thu, 9 Apr 2026 17:34:47 -0700 Subject: [PATCH 3/5] More gracefully handle 0-size annots --- wbia/core_annots.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/wbia/core_annots.py b/wbia/core_annots.py index c1f2203b38..4ad78f6abd 100644 --- a/wbia/core_annots.py +++ b/wbia/core_annots.py @@ -353,11 +353,35 @@ def compute_chip(depc, aid_list, config=None): bbox_list = ibs.get_annot_bboxes(aid_list) theta_list = ibs.get_annot_thetas(aid_list) - result_list = gen_chip_configure_and_compute( - ibs, gid_list, aid_list, bbox_list, theta_list, config - ) - for result in result_list: - yield result + # Filter out annotations with zero-area bounding boxes. We must still + # yield one result per input aid so the depcache stays aligned, so yield + # None for invalid annotations (the depcache's filter_Nones handles them). + bbox_sizes = ut.take_column(bbox_list, [2, 3]) + valid_flags = [w != 0 and h != 0 for (w, h) in bbox_sizes] + invalid_aids = ut.compress(aid_list, [not f for f in valid_flags]) + if len(invalid_aids) > 0: + logger.warning( + 'Skipping %d annotations with zero-area bounding boxes: %r' + % (len(invalid_aids), invalid_aids) + ) + + valid_gids = ut.compress(gid_list, valid_flags) + valid_aids = ut.compress(aid_list, valid_flags) + valid_bboxes = ut.compress(bbox_list, valid_flags) + valid_thetas = ut.compress(theta_list, valid_flags) + + # Build a lookup from aid -> result for valid annotations + valid_results = {} + if len(valid_aids) > 0: + result_list = gen_chip_configure_and_compute( + ibs, valid_gids, valid_aids, valid_bboxes, valid_thetas, config + ) + for aid, result in zip(valid_aids, result_list): + valid_results[aid] = result + + # Yield in original order: real result for valid aids, None for invalid + for aid in aid_list: + yield valid_results.get(aid, None) logger.info('Done Preprocessing Chips') From 2d3d1ae4453c65ea81f5952a89810ac4f17f04b2 Mon Sep 17 00:00:00 2001 From: JasonWildMe Date: Thu, 9 Apr 2026 18:35:33 -0700 Subject: [PATCH 4/5] Skip annotations with missing chip paths in classifier/labeler When an annotation references a missing or broken image, the chip generation returns None for its path. Previously this caused a TypeError crash in the densenet/efficientnet DataLoader. Now we filter out null paths, log a warning with the affected annotation IDs, and return safe defaults (score=0.0, species=UNKNOWN) so the batch can complete. Affects: compute_classifications (densenet), compute_labels_annotations (efficientnet and densenet labeler paths). Co-Authored-By: Claude Opus 4.6 --- wbia/core_annots.py | 107 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 7 deletions(-) diff --git a/wbia/core_annots.py b/wbia/core_annots.py index 4ad78f6abd..00b4fdb286 100644 --- a/wbia/core_annots.py +++ b/wbia/core_annots.py @@ -1860,7 +1860,37 @@ def compute_classifications(depc, aid_list, config=None): chip_filepath_list = depc.get_property( 'chips', aid_list, 'img', config=config2, read_extern=False, ensure=True ) - result_list = densenet.test(chip_filepath_list, **config) # yield detections + + # Filter out annotations with missing chip paths + valid_indices = set() + skipped_aids = [] + for i, p in enumerate(chip_filepath_list): + if p is not None: + valid_indices.add(i) + else: + skipped_aids.append(aid_list[i]) + if len(valid_indices) < len(chip_filepath_list): + logger.warning( + 'Skipping %d/%d annotations with missing chip paths: aids=%r' + % (len(skipped_aids), len(chip_filepath_list), skipped_aids) + ) + valid_chip_list = [chip_filepath_list[i] for i in sorted(valid_indices)] + else: + valid_chip_list = chip_filepath_list + + if valid_chip_list: + valid_results = list(densenet.test(valid_chip_list, **config)) + else: + valid_results = [] + + # Reconstruct full result list with defaults for skipped annotations + valid_iter = iter(valid_results) + result_list = [] + for i in range(len(chip_filepath_list)): + if i in valid_indices: + result_list.append(next(valid_iter)) + else: + result_list.append((0.0, 'UNKNOWN')) else: raise ValueError( 'specified classifier algo is not supported in config = {!r}'.format(config) @@ -2043,11 +2073,42 @@ def compute_labels_annotations(depc, aid_list, config=None): chip_filepath_list = depc.get_property( 'chips', aid_list, 'img', config=config_, read_extern=False, ensure=True ) + + # Filter out annotations with missing chip paths + valid_indices = set() + skipped_aids = [] + for i, p in enumerate(chip_filepath_list): + if p is not None: + valid_indices.add(i) + else: + skipped_aids.append(aid_list[i]) + if len(valid_indices) < len(chip_filepath_list): + logger.warning( + 'Skipping %d/%d annotations with missing chip paths: aids=%r' + % (len(skipped_aids), len(chip_filepath_list), skipped_aids) + ) + valid_chip_list = [chip_filepath_list[i] for i in sorted(valid_indices)] + else: + valid_chip_list = chip_filepath_list + config = dict(config) config['classifier_weight_filepath'] = config['labeler_weight_filepath'] - result_gen = efficientnet.test_dict( - chip_filepath_list, return_dict=True, **config - ) + + if valid_chip_list: + valid_results = list(efficientnet.test_dict( + valid_chip_list, return_dict=True, **config + )) + else: + valid_results = [] + + default_label = (0.0, 'UNKNOWN', None, None, 0.0, {}) + valid_iter = iter(valid_results) + result_gen = [] + for i in range(len(chip_filepath_list)): + if i in valid_indices: + result_gen.append(next(valid_iter)) + else: + result_gen.append(default_label) elif config['labeler_algo'] in ['densenet']: from wbia.algo.detect import densenet @@ -2061,11 +2122,43 @@ def compute_labels_annotations(depc, aid_list, config=None): chip_filepath_list = depc.get_property( 'chips', aid_list, 'img', config=config_, read_extern=False, ensure=True ) + + # Filter out annotations with missing chip paths + valid_indices = set() + skipped_aids = [] + for i, p in enumerate(chip_filepath_list): + if p is not None: + valid_indices.add(i) + else: + skipped_aids.append(aid_list[i]) + if len(valid_indices) < len(chip_filepath_list): + logger.warning( + 'Skipping %d/%d annotations with missing chip paths: aids=%r' + % (len(skipped_aids), len(chip_filepath_list), skipped_aids) + ) + valid_chip_list = [chip_filepath_list[i] for i in sorted(valid_indices)] + else: + valid_chip_list = chip_filepath_list + config = dict(config) config['classifier_weight_filepath'] = config['labeler_weight_filepath'] - result_gen = densenet.test_dict( - chip_filepath_list, return_dict=True, **config - ) + + if valid_chip_list: + valid_results = list(densenet.test_dict( + valid_chip_list, return_dict=True, **config + )) + else: + valid_results = [] + + # Reconstruct full results with defaults for skipped annotations + default_label = (0.0, 'UNKNOWN', None, None, 0.0, {}) + valid_iter = iter(valid_results) + result_gen = [] + for i in range(len(chip_filepath_list)): + if i in valid_indices: + result_gen.append(next(valid_iter)) + else: + result_gen.append(default_label) else: labeler_weight_filepath = config['labeler_weight_filepath'] labeler_weight_filepath = labeler_weight_filepath.strip() From 2e52aa7bfea6b7d9c2f438c9bbd24a2624795ae0 Mon Sep 17 00:00:00 2001 From: JasonWildMe Date: Thu, 9 Apr 2026 23:01:33 -0700 Subject: [PATCH 5/5] Return None instead of fake defaults for missing chip paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review caught two issues with the initial defensive check: 1. Fake (0.0, 'UNKNOWN') results get cached by the depcache, turning a transient missing-image problem into a persistent wrong prediction that survives image repair. 2. Empty probs dict {} breaks downstream evaluation code that expects real label keys. Fix: yield None for annotations with missing chips. The depcache's filter_Nones mechanism already handles this — the row is skipped without caching anything, so a retry after fixing the image will produce a real prediction. Co-Authored-By: Claude Opus 4.6 --- wbia/core_annots.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/wbia/core_annots.py b/wbia/core_annots.py index 00b4fdb286..e0b5eafb62 100644 --- a/wbia/core_annots.py +++ b/wbia/core_annots.py @@ -1861,7 +1861,8 @@ def compute_classifications(depc, aid_list, config=None): 'chips', aid_list, 'img', config=config2, read_extern=False, ensure=True ) - # Filter out annotations with missing chip paths + # Filter out annotations with missing chip paths; yield None for + # skipped rows so the depcache skips them without caching a fake result. valid_indices = set() skipped_aids = [] for i, p in enumerate(chip_filepath_list): @@ -1883,14 +1884,13 @@ def compute_classifications(depc, aid_list, config=None): else: valid_results = [] - # Reconstruct full result list with defaults for skipped annotations valid_iter = iter(valid_results) result_list = [] for i in range(len(chip_filepath_list)): if i in valid_indices: result_list.append(next(valid_iter)) else: - result_list.append((0.0, 'UNKNOWN')) + result_list.append(None) else: raise ValueError( 'specified classifier algo is not supported in config = {!r}'.format(config) @@ -2101,14 +2101,13 @@ def compute_labels_annotations(depc, aid_list, config=None): else: valid_results = [] - default_label = (0.0, 'UNKNOWN', None, None, 0.0, {}) valid_iter = iter(valid_results) result_gen = [] for i in range(len(chip_filepath_list)): if i in valid_indices: result_gen.append(next(valid_iter)) else: - result_gen.append(default_label) + result_gen.append(None) elif config['labeler_algo'] in ['densenet']: from wbia.algo.detect import densenet @@ -2150,15 +2149,13 @@ def compute_labels_annotations(depc, aid_list, config=None): else: valid_results = [] - # Reconstruct full results with defaults for skipped annotations - default_label = (0.0, 'UNKNOWN', None, None, 0.0, {}) valid_iter = iter(valid_results) result_gen = [] for i in range(len(chip_filepath_list)): if i in valid_indices: result_gen.append(next(valid_iter)) else: - result_gen.append(default_label) + result_gen.append(None) else: labeler_weight_filepath = config['labeler_weight_filepath'] labeler_weight_filepath = labeler_weight_filepath.strip()