diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000000..d55bcc6706 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,10 @@ +{ + "permissions": { + "allow": [ + "Bash(git log:*)", + "Bash(gh pr:*)", + "Bash(mvn compile:*)", + "Bash(mvn test:*)" + ] + } +} diff --git a/frontend/src/locale/de.json b/frontend/src/locale/de.json index 779b61b988..d856b32087 100644 --- a/frontend/src/locale/de.json +++ b/frontend/src/locale/de.json @@ -647,6 +647,11 @@ "ENCOUNTER_ANNOTATION": "Begegnungsanmerkung", "COCO_FORMAT": "COCO-Format", "COCO_FORMAT_DESCRIPTION": "Exportieren Sie Annotationen im COCO-Format für KI/ML-Training. Enthält Bilder und Begrenzungsrahmen.", + "COCO_PHASE_PREPARING": "Preparing...", + "COCO_PHASE_IMAGES": "Downloading images", + "COCO_PHASE_MANIFEST": "Building manifest...", + "COCO_PHASE_PACKAGING": "Packaging ZIP...", + "COCO_RETRY_DOWNLOAD": "Download wiederholen", "EXPORT_ZIP_FILE": "ZIP-Datei exportieren", "THIS_ENCOUNTER": "Diese Begegnung", "CLICK_ANNOTATION_TO_SEE_MATCH_RESULTS": "Klicken Sie auf eine Anmerkung, um die Übereinstimmungsergebnisse zu sehen.", diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index c942603f8f..352552ee76 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -647,6 +647,11 @@ "ENCOUNTER_ANNOTATION": "Encounter Annotation", "COCO_FORMAT": "COCO Format", "COCO_FORMAT_DESCRIPTION": "Export annotations in COCO format for AI/ML training. Includes images and bounding boxes.", + "COCO_PHASE_PREPARING": "Preparing...", + "COCO_PHASE_IMAGES": "Downloading images", + "COCO_PHASE_MANIFEST": "Building manifest...", + "COCO_PHASE_PACKAGING": "Packaging ZIP...", + "COCO_RETRY_DOWNLOAD": "Retry Download", "EXPORT_ZIP_FILE": "Export ZIP File", "THIS_ENCOUNTER": "This Encounter", "CLICK_ANNOTATION_TO_SEE_MATCH_RESULTS": "Click on an annotation to view match results", diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index bfd47a3b52..787521f82f 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -646,6 +646,11 @@ "ENCOUNTER_ANNOTATION": "Anotación de encuentro", "COCO_FORMAT": "Formato COCO", "COCO_FORMAT_DESCRIPTION": "Exportar anotaciones en formato COCO para entrenamiento de IA/ML. Incluye imágenes y cuadros delimitadores.", + "COCO_PHASE_PREPARING": "Preparing...", + "COCO_PHASE_IMAGES": "Downloading images", + "COCO_PHASE_MANIFEST": "Building manifest...", + "COCO_PHASE_PACKAGING": "Packaging ZIP...", + "COCO_RETRY_DOWNLOAD": "Reintentar descarga", "EXPORT_ZIP_FILE": "Exportar archivo ZIP", "THIS_ENCOUNTER": "Este encuentro", "CLICK_ANNOTATION_TO_SEE_MATCH_RESULTS": "Haz clic en la anotación para ver los resultados de coincidencia", diff --git a/frontend/src/locale/fr.json b/frontend/src/locale/fr.json index 46ccf1e5bf..36d858acbc 100644 --- a/frontend/src/locale/fr.json +++ b/frontend/src/locale/fr.json @@ -646,6 +646,11 @@ "ENCOUNTER_ANNOTATION": "Annotation de rencontre", "COCO_FORMAT": "Format COCO", "COCO_FORMAT_DESCRIPTION": "Exporter les annotations au format COCO pour l'entraînement IA/ML. Inclut les images et les boîtes englobantes.", + "COCO_PHASE_PREPARING": "Preparing...", + "COCO_PHASE_IMAGES": "Downloading images", + "COCO_PHASE_MANIFEST": "Building manifest...", + "COCO_PHASE_PACKAGING": "Packaging ZIP...", + "COCO_RETRY_DOWNLOAD": "Réessayer le téléchargement", "EXPORT_ZIP_FILE": "Exporter le fichier ZIP", "THIS_ENCOUNTER": "Cette rencontre", "CLICK_ANNOTATION_TO_SEE_MATCH_RESULTS": "Cliquez sur l’annotation pour voir les résultats de correspondance", diff --git a/frontend/src/locale/it.json b/frontend/src/locale/it.json index 8cb1997a62..679b3995b5 100644 --- a/frontend/src/locale/it.json +++ b/frontend/src/locale/it.json @@ -646,6 +646,11 @@ "ENCOUNTER_ANNOTATION": "Annotazione dell'incontro", "COCO_FORMAT": "Formato COCO", "COCO_FORMAT_DESCRIPTION": "Esporta le annotazioni in formato COCO per l'addestramento AI/ML. Include immagini e bounding box.", + "COCO_PHASE_PREPARING": "Preparing...", + "COCO_PHASE_IMAGES": "Downloading images", + "COCO_PHASE_MANIFEST": "Building manifest...", + "COCO_PHASE_PACKAGING": "Packaging ZIP...", + "COCO_RETRY_DOWNLOAD": "Riprova download", "EXPORT_ZIP_FILE": "Esporta file ZIP", "THIS_ENCOUNTER": "Questo incontro", "CLICK_ANNOTATION_TO_SEE_MATCH_RESULTS": "Clicca sull'annotazione per vedere i risultati della corrispondenza", diff --git a/frontend/src/pages/SearchPages/components/ExportModal.jsx b/frontend/src/pages/SearchPages/components/ExportModal.jsx index 6acc928e49..bfc6ec2121 100644 --- a/frontend/src/pages/SearchPages/components/ExportModal.jsx +++ b/frontend/src/pages/SearchPages/components/ExportModal.jsx @@ -11,7 +11,7 @@ import { Spinner, Alert, } from "react-bootstrap"; -import { useState } from "react"; +import { useState, useRef, useCallback, useEffect } from "react"; import { FormattedMessage } from "react-intl"; const downloadFunction = async (url, setLoading) => { @@ -75,6 +75,26 @@ export default function ExportDialog({ open, setOpen, searchQueryId }) { }); const [error, setError] = useState(null); + const [cocoProgress, setCocoProgress] = useState(null); + const [cocoJobId, setCocoJobId] = useState(null); + const cocoPollingRef = useRef(null); + + // Clean up polling interval on unmount (e.g., modal close) + useEffect(() => { + return () => { + if (cocoPollingRef.current) { + clearInterval(cocoPollingRef.current); + cocoPollingRef.current = null; + } + }; + }, []); + + // Auto-expire the retry button after 1 hour (matches server-side job TTL) + useEffect(() => { + if (!cocoJobId) return; + const timer = setTimeout(() => setCocoJobId(null), 60 * 60 * 1000); + return () => clearTimeout(timer); + }, [cocoJobId]); const setLoading = (key, value) => { setLoadingStates((prev) => ({ ...prev, [key]: value })); @@ -90,6 +110,117 @@ export default function ExportDialog({ open, setOpen, searchQueryId }) { } }; + const handleCocoExport = useCallback(async () => { + setError(null); + setLoading("cocoFormat", true); + setCocoProgress(null); + + try { + // Start the async export job + const startUrl = `/EncounterSearchExportCOCO?action=start&searchQueryId=${searchQueryId}®ularQuery=true`; + const startResp = await fetch(startUrl); + const startData = await startResp.json(); + if (!startResp.ok || !startData.jobId) { + throw new Error(startData.error || "Failed to start export"); + } + const { jobId } = startData; + + // Poll for progress (timeout after 2 hours for very large exports) + const MAX_POLL_MS = 2 * 60 * 60 * 1000; + const pollStart = Date.now(); + let consecutiveErrors = 0; + const result = await new Promise((resolve, reject) => { + cocoPollingRef.current = setInterval(async () => { + try { + if (Date.now() - pollStart > MAX_POLL_MS) { + clearInterval(cocoPollingRef.current); + cocoPollingRef.current = null; + reject(new Error("Export timed out after 2 hours")); + return; + } + + const statusResp = await fetch( + `/EncounterSearchExportCOCO?action=status&jobId=${jobId}`, + ); + if (!statusResp.ok) { + throw new Error(`Status check failed (HTTP ${statusResp.status})`); + } + const status = await statusResp.json(); + consecutiveErrors = 0; + + if (status.totalImages > 0 || status.phase) { + setCocoProgress(status); + } + + if (status.status === "complete") { + clearInterval(cocoPollingRef.current); + cocoPollingRef.current = null; + resolve(jobId); + } else if (status.status === "error") { + clearInterval(cocoPollingRef.current); + cocoPollingRef.current = null; + reject(new Error(status.error || "Export failed")); + } + } catch (e) { + consecutiveErrors++; + // Tolerate up to 3 transient network errors before giving up + if (consecutiveErrors >= 3) { + clearInterval(cocoPollingRef.current); + cocoPollingRef.current = null; + reject(e); + } + } + }, 3000); + }); + + // Trigger native browser download — no buffering in JS memory. + // The Content-Disposition: attachment header tells the browser to + // save the file without navigating away from the page. + setCocoJobId(result); + triggerCocoDownload(result); + } catch (err) { + console.error("COCO export error:", err); + setError(`Failed to export: ${err.message}`); + } finally { + if (cocoPollingRef.current) { + clearInterval(cocoPollingRef.current); + cocoPollingRef.current = null; + } + setLoading("cocoFormat", false); + setCocoProgress(null); + } + }, [searchQueryId]); + + const triggerCocoDownload = useCallback(async (jobId) => { + // Pre-flight check: verify the job is still available before triggering + // the native download (which can't surface HTTP errors in-app). + try { + const checkResp = await fetch( + `/EncounterSearchExportCOCO?action=status&jobId=${jobId}`, + ); + if (!checkResp.ok) { + setCocoJobId(null); + setError("Export is no longer available. Please run a new export."); + return; + } + const checkData = await checkResp.json(); + if (checkData.status !== "complete") { + setCocoJobId(null); + setError("Export is no longer available. Please run a new export."); + return; + } + } catch { + // Network error on pre-flight — try the download anyway + } + + const a = document.createElement("a"); + a.href = `/EncounterSearchExportCOCO?action=download&jobId=${jobId}`; + a.download = "wildbook-coco-export.zip"; + document.body.appendChild(a); + a.click(); + a.remove(); + }, []); + const scrollToSection = (sectionId) => { setActiveSection(sectionId); const element = document.getElementById(sectionId); @@ -249,17 +380,12 @@ export default function ExportDialog({ open, setOpen, searchQueryId }) { -
+
+ {cocoJobId && !loadingStates.cocoFormat && ( + + )}
diff --git a/src/main/java/org/ecocean/export/EncounterCOCOExportFile.java b/src/main/java/org/ecocean/export/EncounterCOCOExportFile.java index d931e079ac..0e44fbe906 100644 --- a/src/main/java/org/ecocean/export/EncounterCOCOExportFile.java +++ b/src/main/java/org/ecocean/export/EncounterCOCOExportFile.java @@ -3,6 +3,7 @@ import org.ecocean.Annotation; import org.ecocean.Encounter; import org.ecocean.MarkedIndividual; +import org.ecocean.MultiValue; import org.ecocean.media.MediaAsset; import org.ecocean.shepherd.core.Shepherd; import org.json.JSONArray; @@ -25,183 +26,318 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +/** + * Generates a COCO-format ZIP export from a list of encounters. + * + * Design: all JDO/database data is eagerly extracted into plain data structures + * during {@link #extractData()}, so the caller can close the DB transaction + * before calling {@link #writeTo(OutputStream)} which does long-running HTTP + * image downloads. + */ public class EncounterCOCOExportFile { private static final Logger log = Logger.getLogger(EncounterCOCOExportFile.class.getName()); private static final int BUFFER_SIZE = 8192; private static final int CONNECTION_TIMEOUT_MS = 30000; private static final int READ_TIMEOUT_MS = 60000; - private final List encounters; - private final Shepherd shepherd; + // Progress tracking for async exports + private volatile int totalImages; + private volatile int processedImages; + private volatile int failedImages; + private volatile String phase = "preparing"; + + // --- Plain data extracted from JDO objects --- + // Populated by extractData(), consumed by writeTo() + private Map mediaAssetUrls; // uuid -> webURL + private Map mediaAssetIsJpeg; // uuid -> true if JPEG + private Map mediaAssetMeta; // uuid -> metadata for JSON + private Map mediaAssetToImageId; // uuid -> sequential image ID + private Map categoryMap; // iaClass -> category ID + private Map individualIdMap; // individual ID -> sequential int + private List annotationDataList; // flattened annotation records + + /** Immutable snapshot of image metadata needed for the COCO JSON. */ + static class ImageMeta { + final String uuid; + final int width; + final int height; + final String dateTime; // nullable + final Double latitude; // nullable + final Double longitude; // nullable + + ImageMeta(MediaAsset ma) { + this.uuid = ma.getUUID(); + this.width = (int) ma.getWidth(); + this.height = (int) ma.getHeight(); + this.dateTime = ma.getDateTime() != null ? ma.getDateTime().toString() : null; + this.latitude = ma.getLatitude(); + this.longitude = ma.getLongitude(); + } + } + /** Immutable snapshot of one annotation's data needed for the COCO JSON. */ + static class AnnotationData { + final String annotationId; + final String mediaAssetUuid; + final String iaClass; + final int[] bbox; + final String viewpoint; // nullable + final double theta; + final String individualId; // nullable + final String individualDisplayName; // nullable + final List allNames; // list of [key, value] pairs + final String locationId; // nullable + final String encounterId; + + AnnotationData(Annotation ann, Encounter enc) { + this.annotationId = ann.getId(); + this.mediaAssetUuid = ann.getMediaAsset().getUUID(); + this.iaClass = ann.getIAClass().trim(); + this.bbox = ann.getBbox(); + this.viewpoint = ann.getViewpoint(); + this.theta = ann.getTheta(); + this.locationId = enc.getLocationID(); + this.encounterId = enc.getCatalogNumber(); + + MarkedIndividual ind = enc.getIndividual(); + if (ind != null && ind.getId() != null) { + this.individualId = ind.getId(); + this.individualDisplayName = ind.getDisplayName(); + this.allNames = extractNames(ind); + } else { + this.individualId = null; + this.individualDisplayName = null; + this.allNames = Collections.emptyList(); + } + } + + private static List extractNames(MarkedIndividual ind) { + List result = new ArrayList<>(); + MultiValue names = ind.getNames(); + if (names == null) return result; + Set keys = names.getKeys(); + if (keys == null) return result; + for (String key : keys) { + List vals = names.getValuesByKey(key); + if (vals == null) continue; + for (String val : vals) { + result.add(new String[]{key, val}); + } + } + return result; + } + } + + /** + * Constructs the export file and eagerly extracts all data from JDO objects. + * After this constructor returns, the caller may close the DB transaction. + */ public EncounterCOCOExportFile(List encounters, Shepherd shepherd) { - this.encounters = encounters; - this.shepherd = shepherd; + extractData(encounters); } - public void writeTo(OutputStream outputStream) throws IOException { - // Collect data - Map mediaAssetMap = collectUniqueMediaAssets(); - Map categoryMap = buildCategoryMap(); - Map individualIdMap = buildIndividualIdMap(); + public int getTotalImages() { return totalImages; } + public int getProcessedImages() { return processedImages; } + public int getFailedImages() { return failedImages; } + public String getPhase() { return phase; } + + /** + * Eagerly extracts all needed data from JDO-managed objects into plain + * data structures. Must be called while the DB transaction is still active. + */ + private void extractData(List encounters) { + // Collect unique media assets and their metadata/URLs/content types + mediaAssetUrls = new LinkedHashMap<>(); + mediaAssetIsJpeg = new LinkedHashMap<>(); + mediaAssetMeta = new LinkedHashMap<>(); + for (Encounter enc : encounters) { + if (enc.getAnnotations() == null) continue; + for (Annotation ann : enc.getAnnotations()) { + if (!isValidAnnotation(ann)) continue; + MediaAsset ma = ann.getMediaAsset(); + String uuid = ma.getUUID(); + if (!mediaAssetUrls.containsKey(uuid)) { + URL url = ma.webURL(); + mediaAssetUrls.put(uuid, url); + mediaAssetMeta.put(uuid, new ImageMeta(ma)); + mediaAssetIsJpeg.put(uuid, detectIsJpeg(ma, url)); + } + } + } - // Assign image IDs - Map mediaAssetToImageId = new LinkedHashMap<>(); + // Assign sequential image IDs + mediaAssetToImageId = new LinkedHashMap<>(); int imageId = 1; - for (String uuid : mediaAssetMap.keySet()) { + for (String uuid : mediaAssetUrls.keySet()) { mediaAssetToImageId.put(uuid, imageId++); } - // Build JSON arrays - JSONArray imagesArray = new JSONArray(); - for (Map.Entry entry : mediaAssetMap.entrySet()) { - MediaAsset ma = entry.getValue(); - int imgId = mediaAssetToImageId.get(entry.getKey()); - imagesArray.put(buildImageObject(ma, imgId)); + // Build category map + Set iaClasses = new LinkedHashSet<>(); + for (Encounter enc : encounters) { + if (enc.getAnnotations() == null) continue; + for (Annotation ann : enc.getAnnotations()) { + String iaClass = ann.getIAClass(); + if (iaClass != null && !iaClass.trim().isEmpty()) { + iaClasses.add(iaClass.trim()); + } + } + } + categoryMap = new LinkedHashMap<>(); + int catId = 1; + for (String iaClass : iaClasses) { + categoryMap.put(iaClass, catId++); + } + + // Build individual ID map + Set individualIds = new TreeSet<>(); + for (Encounter enc : encounters) { + MarkedIndividual ind = enc.getIndividual(); + if (ind != null && ind.getId() != null) { + individualIds.add(ind.getId()); + } + } + individualIdMap = new LinkedHashMap<>(); + int idx = 0; + for (String indId : individualIds) { + individualIdMap.put(indId, idx++); } - JSONArray annotationsArray = new JSONArray(); - int annotationId = 1; + // Extract all annotation data + annotationDataList = new ArrayList<>(); for (Encounter enc : encounters) { if (enc.getAnnotations() == null) continue; for (Annotation ann : enc.getAnnotations()) { if (!isValidAnnotation(ann)) continue; - MediaAsset ma = ann.getMediaAsset(); - int imgId = mediaAssetToImageId.get(ma.getUUID()); - annotationsArray.put(buildAnnotationObject(ann, annotationId++, imgId, - categoryMap, individualIdMap, enc)); + annotationDataList.add(new AnnotationData(ann, enc)); } } + } - // Build complete COCO JSON - JSONObject coco = new JSONObject(); - coco.put("info", buildInfo(individualIdMap)); - coco.put("licenses", buildLicenses()); - coco.put("categories", buildCategories(categoryMap)); - coco.put("images", imagesArray); - coco.put("annotations", annotationsArray); - - // Write ZIP + /** + * Writes the COCO ZIP to the given output stream. This method performs + * long-running HTTP image downloads and does NOT require a DB transaction. + */ + public void writeTo(OutputStream outputStream) throws IOException { + // Write ZIP: images first, then JSON manifest so it only references + // images that were actually written successfully. try (ZipOutputStream zipOut = new ZipOutputStream(outputStream)) { - // Write annotations.json - log.info("COCO Export: Writing annotations JSON..."); - byte[] jsonBytes = coco.toString(2).getBytes(StandardCharsets.UTF_8); - ZipEntry jsonEntry = new ZipEntry("coco/annotations/instances.json"); - zipOut.putNextEntry(jsonEntry); - zipOut.write(jsonBytes); - zipOut.closeEntry(); - zipOut.flush(); - - // Write images with streaming to minimize memory usage - int totalImages = mediaAssetMap.size(); - int processedImages = 0; - int failedImages = 0; + totalImages = mediaAssetUrls.size(); + processedImages = 0; + failedImages = 0; + phase = "images"; + Set exportedUuids = new LinkedHashSet<>(); log.info("COCO Export: Starting export of " + totalImages + " images..."); - for (Map.Entry entry : mediaAssetMap.entrySet()) { - MediaAsset ma = entry.getValue(); - processedImages++; + for (Map.Entry entry : mediaAssetUrls.entrySet()) { + String uuid = entry.getKey(); + URL url = entry.getValue(); try { - boolean success = writeImageToZip(ma, zipOut); - if (!success) { + boolean success = writeImageToZip(uuid, url, zipOut); + if (success) { + exportedUuids.add(uuid); + } else { failedImages++; } - // Progress logging every 100 images - if (processedImages % 100 == 0) { - log.info("COCO Export: Processed " + processedImages + "/" + totalImages + - " images (" + failedImages + " failed)"); - } } catch (Exception e) { failedImages++; - log.warning("COCO Export: Failed to export image " + ma.getUUID() + ": " + e.getMessage()); + log.warning("COCO Export: Failed to export image " + uuid + + ": " + e.getMessage()); } + processedImages++; + if (processedImages % 100 == 0) { + log.info("COCO Export: Processed " + processedImages + "/" + totalImages + + " images (" + failedImages + " failed)"); + } + } + + // Build JSON arrays using only successfully exported images + phase = "manifest"; + JSONArray imagesArray = new JSONArray(); + for (String uuid : exportedUuids) { + ImageMeta meta = mediaAssetMeta.get(uuid); + int imgId = mediaAssetToImageId.get(uuid); + imagesArray.put(buildImageObject(meta, imgId)); } + JSONArray annotationsArray = new JSONArray(); + int annotationId = 1; + for (AnnotationData ad : annotationDataList) { + if (!exportedUuids.contains(ad.mediaAssetUuid)) continue; + int imgId = mediaAssetToImageId.get(ad.mediaAssetUuid); + annotationsArray.put(buildAnnotationObject(ad, annotationId++, imgId)); + } + + JSONObject coco = new JSONObject(); + coco.put("info", buildInfo()); + coco.put("licenses", buildLicenses()); + coco.put("categories", buildCategories()); + coco.put("images", imagesArray); + coco.put("annotations", annotationsArray); + + // Write annotations.json as the last entry + phase = "packaging"; + log.info("COCO Export: Writing annotations JSON..."); + byte[] jsonBytes = coco.toString(2).getBytes(StandardCharsets.UTF_8); + ZipEntry jsonEntry = new ZipEntry("coco/annotations/instances.json"); + zipOut.putNextEntry(jsonEntry); + zipOut.write(jsonBytes); + zipOut.closeEntry(); + zipOut.finish(); - log.info("COCO Export: Completed. " + (processedImages - failedImages) + "/" + totalImages + + log.info("COCO Export: Completed. " + exportedUuids.size() + "/" + totalImages + " images exported successfully, " + failedImages + " failed."); } } /** * Writes an image directly to the ZipOutputStream using streaming. - * This avoids loading entire images into memory, critical for large exports. * * @return true if image was written successfully, false if skipped */ - private boolean writeImageToZip(MediaAsset ma, ZipOutputStream zipOut) throws IOException { - URL url = ma.webURL(); + private boolean writeImageToZip(String uuid, URL url, ZipOutputStream zipOut) throws IOException { if (url == null) { - log.fine("COCO Export: Skipping image " + ma.getUUID() + " - no URL available"); + log.fine("COCO Export: Skipping image " + uuid + " - no URL available"); return false; } - String contentType = detectContentType(url); - boolean isJpeg = isJpegContentType(contentType); + boolean isJpeg = Boolean.TRUE.equals(mediaAssetIsJpeg.get(uuid)); - ZipEntry imgEntry = new ZipEntry("coco/images/" + ma.getUUID() + ".jpg"); + ZipEntry imgEntry = new ZipEntry("coco/images/" + uuid + ".jpg"); zipOut.putNextEntry(imgEntry); try { if (isJpeg) { - // Stream JPEG directly - most memory efficient streamImageDirectly(url, zipOut); } else { - // Non-JPEG: must decode and re-encode (uses more memory but unavoidable) convertAndWriteImage(url, zipOut); } } finally { zipOut.closeEntry(); - zipOut.flush(); // Keep data flowing to client + zipOut.flush(); } return true; } /** - * Detects the content type of a URL, handling redirects. + * Determines if a MediaAsset is JPEG using DB metadata (no HTTP request). + * Falls back to URL path extension if metadata is unavailable. */ - private String detectContentType(URL url) { - try { - URLConnection conn = url.openConnection(); - conn.setConnectTimeout(CONNECTION_TIMEOUT_MS); - conn.setReadTimeout(READ_TIMEOUT_MS); - - if (conn instanceof HttpURLConnection) { - ((HttpURLConnection) conn).setRequestMethod("HEAD"); - ((HttpURLConnection) conn).setInstanceFollowRedirects(true); - } - - String contentType = conn.getContentType(); - - if (conn instanceof HttpURLConnection) { - ((HttpURLConnection) conn).disconnect(); - } - - return contentType; - } catch (Exception e) { - // Fall back to checking file extension + private static boolean detectIsJpeg(MediaAsset ma, URL url) { + String[] mimeType = ma.getMimeType(); + if (mimeType != null && mimeType.length >= 2) { + String minor = mimeType[1].split(";")[0].trim(); + return "jpeg".equals(minor) || "jpg".equals(minor); + } + // Fall back to URL path extension + if (url != null) { String path = url.getPath().toLowerCase(); - if (path.endsWith(".jpg") || path.endsWith(".jpeg")) { - return "image/jpeg"; - } - return null; + return path.endsWith(".jpg") || path.endsWith(".jpeg"); } + return false; } - /** - * Checks if content type indicates JPEG. - */ - private boolean isJpegContentType(String contentType) { - if (contentType == null) return false; - contentType = contentType.toLowerCase(); - return contentType.contains("image/jpeg") || contentType.contains("image/jpg"); - } - - /** - * Streams image bytes directly from URL to ZipOutputStream without decoding. - * This is the most memory-efficient method for JPEG images. - */ private void streamImageDirectly(URL url, ZipOutputStream zipOut) throws IOException { URLConnection conn = url.openConnection(); conn.setConnectTimeout(CONNECTION_TIMEOUT_MS); @@ -224,10 +360,6 @@ private void streamImageDirectly(URL url, ZipOutputStream zipOut) throws IOExcep } } - /** - * Converts a non-JPEG image to JPEG and writes to ZipOutputStream. - * This requires loading the image into memory but handles PNG, GIF, etc. - */ private void convertAndWriteImage(URL url, ZipOutputStream zipOut) throws IOException { URLConnection conn = url.openConnection(); conn.setConnectTimeout(CONNECTION_TIMEOUT_MS); @@ -243,16 +375,21 @@ private void convertAndWriteImage(URL url, ZipOutputStream zipOut) throws IOExce throw new IOException("Failed to decode image"); } - // For images with alpha channel (PNG), convert to RGB for JPEG if (image.getColorModel().hasAlpha()) { BufferedImage rgbImage = new BufferedImage( image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_RGB); - rgbImage.createGraphics().drawImage(image, 0, 0, java.awt.Color.WHITE, null); + java.awt.Graphics2D g2d = rgbImage.createGraphics(); + try { + g2d.drawImage(image, 0, 0, java.awt.Color.WHITE, null); + } finally { + g2d.dispose(); + } image = rgbImage; } - // Write directly to zip stream - ImageIO.write(image, "jpg", zipOut); + java.io.ByteArrayOutputStream imgBuf = new java.io.ByteArrayOutputStream(); + ImageIO.write(image, "jpg", imgBuf); + zipOut.write(imgBuf.toByteArray()); } finally { if (conn instanceof HttpURLConnection) { ((HttpURLConnection) conn).disconnect(); @@ -260,174 +397,49 @@ private void convertAndWriteImage(URL url, ZipOutputStream zipOut) throws IOExce } } - /** - * Collects all unique MediaAssets from valid annotations. - * An annotation is valid if it has a non-null/non-empty iaClass and a valid bbox. - */ - private Map collectUniqueMediaAssets() { - Map mediaAssetMap = new LinkedHashMap<>(); - for (Encounter enc : encounters) { - if (enc.getAnnotations() == null) continue; - for (Annotation ann : enc.getAnnotations()) { - if (!isValidAnnotation(ann)) continue; - MediaAsset ma = ann.getMediaAsset(); - if (ma != null && ma.getUUID() != null) { - mediaAssetMap.putIfAbsent(ma.getUUID(), ma); - } - } - } - return mediaAssetMap; - } - - /** - * Builds a map from iaClass to sequential category ID. - */ - private Map buildCategoryMap() { - Set iaClasses = new LinkedHashSet<>(); - for (Encounter enc : encounters) { - if (enc.getAnnotations() == null) continue; - for (Annotation ann : enc.getAnnotations()) { - String iaClass = ann.getIAClass(); - if (iaClass != null && !iaClass.trim().isEmpty()) { - iaClasses.add(iaClass.trim()); - } - } - } - Map categoryMap = new LinkedHashMap<>(); - int id = 1; - for (String iaClass : iaClasses) { - categoryMap.put(iaClass, id++); - } - return categoryMap; - } - - /** - * Builds a map from MarkedIndividual UUID to sequential integer ID. - */ - private Map buildIndividualIdMap() { - Set individualIds = new TreeSet<>(); // TreeSet for sorted, deterministic order - for (Encounter enc : encounters) { - MarkedIndividual ind = enc.getIndividual(); - if (ind != null && ind.getId() != null) { - individualIds.add(ind.getId()); - } - } - Map idMap = new LinkedHashMap<>(); - int idx = 0; - for (String indId : individualIds) { - idMap.put(indId, idx++); - } - return idMap; - } - - /** - * Checks if an annotation is valid for COCO export. - */ private boolean isValidAnnotation(Annotation ann) { if (ann == null) return false; String iaClass = ann.getIAClass(); if (iaClass == null || iaClass.trim().isEmpty()) return false; int[] bbox = ann.getBbox(); if (bbox == null || bbox.length < 4) return false; - if (bbox[2] <= 0 || bbox[3] <= 0) return false; // width/height must be positive + if (bbox[2] <= 0 || bbox[3] <= 0) return false; MediaAsset ma = ann.getMediaAsset(); if (ma == null || ma.getUUID() == null) return false; return true; } - /** - * Builds the "info" section of COCO JSON. - */ - private JSONObject buildInfo(Map individualIdMap) { - JSONObject info = new JSONObject(); - info.put("description", "Wildbook COCO Export"); - info.put("version", "1.0"); - info.put("date_created", ZonedDateTime.now().format(DateTimeFormatter.ISO_INSTANT)); - info.put("contributor", "Wildbook"); + // --- JSON builders using plain data (no JDO access) --- - JSONObject idMapping = new JSONObject(); - for (Map.Entry entry : individualIdMap.entrySet()) { - idMapping.put(entry.getKey(), entry.getValue()); - } - info.put("individual_id_mapping", idMapping); - return info; - } - - /** - * Builds the "licenses" section of COCO JSON. - */ - private JSONArray buildLicenses() { - JSONArray licenses = new JSONArray(); - JSONObject license = new JSONObject(); - license.put("id", 1); - license.put("name", "See Wildbook Terms"); - license.put("url", ""); - licenses.put(license); - return licenses; - } - - /** - * Builds the "categories" section of COCO JSON. - */ - private JSONArray buildCategories(Map categoryMap) { - JSONArray categories = new JSONArray(); - for (Map.Entry entry : categoryMap.entrySet()) { - JSONObject cat = new JSONObject(); - cat.put("id", entry.getValue()); - cat.put("name", entry.getKey()); - cat.put("supercategory", "animal"); - categories.put(cat); - } - return categories; - } - - /** - * Builds an image object for COCO JSON. - */ - private JSONObject buildImageObject(MediaAsset ma, int imageId) { + private JSONObject buildImageObject(ImageMeta meta, int imageId) { JSONObject img = new JSONObject(); img.put("id", imageId); - img.put("file_name", ma.getUUID() + ".jpg"); - img.put("width", (int) ma.getWidth()); - img.put("height", (int) ma.getHeight()); - img.put("uuid", ma.getUUID()); - - // Always include all fields - use null for missing values so every record - // has the same schema (enables NaN counting in EDA instead of probing for missing keys) - img.put("date_captured", ma.getDateTime() != null ? ma.getDateTime().toString() : JSONObject.NULL); - Double lat = ma.getLatitude(); - Double lon = ma.getLongitude(); - img.put("gps_lat_captured", lat != null ? lat : JSONObject.NULL); - img.put("gps_lon_captured", lon != null ? lon : JSONObject.NULL); + img.put("file_name", meta.uuid + ".jpg"); + img.put("width", meta.width); + img.put("height", meta.height); + img.put("uuid", meta.uuid); + img.put("date_captured", meta.dateTime != null ? meta.dateTime : JSONObject.NULL); + img.put("gps_lat_captured", meta.latitude != null ? meta.latitude : JSONObject.NULL); + img.put("gps_lon_captured", meta.longitude != null ? meta.longitude : JSONObject.NULL); return img; } - /** - * Builds an annotation object for COCO JSON. - */ - private JSONObject buildAnnotationObject(Annotation ann, int annotationId, int imageId, - Map categoryMap, - Map individualIdMap, - Encounter enc) { + private JSONObject buildAnnotationObject(AnnotationData ad, int annotationId, int imageId) { JSONObject annJson = new JSONObject(); annJson.put("id", annotationId); annJson.put("image_id", imageId); + annJson.put("category_id", categoryMap.get(ad.iaClass)); - String iaClass = ann.getIAClass().trim(); - annJson.put("category_id", categoryMap.get(iaClass)); - - int[] bbox = ann.getBbox(); + int[] bbox = ad.bbox; JSONArray bboxArray = new JSONArray(); - bboxArray.put(bbox[0]); // x - bboxArray.put(bbox[1]); // y - bboxArray.put(bbox[2]); // width - bboxArray.put(bbox[3]); // height + bboxArray.put(bbox[0]); + bboxArray.put(bbox[1]); + bboxArray.put(bbox[2]); + bboxArray.put(bbox[3]); annJson.put("bbox", bboxArray); - int area = bbox[2] * bbox[3]; - annJson.put("area", area); + annJson.put("area", bbox[2] * bbox[3]); - // Segmentation: rectangle polygon from bbox JSONArray segmentation = new JSONArray(); JSONArray polygon = new JSONArray(); int x = bbox[0], y = bbox[1], w = bbox[2], h = bbox[3]; @@ -439,38 +451,70 @@ private JSONObject buildAnnotationObject(Annotation ann, int annotationId, int i annJson.put("segmentation", segmentation); annJson.put("iscrowd", 0); + annJson.put("uuid", ad.annotationId); + annJson.put("viewpoint", ad.viewpoint != null ? ad.viewpoint : JSONObject.NULL); + annJson.put("theta", ad.theta); - // Custom fields - annJson.put("uuid", ann.getId()); - - String viewpoint = ann.getViewpoint(); - annJson.put("viewpoint", viewpoint != null ? viewpoint : JSONObject.NULL); - - annJson.put("theta", ann.getTheta()); - - // Individual info - MarkedIndividual ind = enc.getIndividual(); JSONArray individualIds = new JSONArray(); - if (ind != null && ind.getId() != null && individualIdMap.containsKey(ind.getId())) { - individualIds.put(individualIdMap.get(ind.getId())); - annJson.put("individual_uuid", ind.getId()); - String displayName = ind.getDisplayName(); - if (displayName != null) { - annJson.put("name", displayName); - } else { - annJson.put("name", JSONObject.NULL); + if (ad.individualId != null && individualIdMap.containsKey(ad.individualId)) { + individualIds.put(individualIdMap.get(ad.individualId)); + annJson.put("individual_uuid", ad.individualId); + annJson.put("name", ad.individualDisplayName != null ? ad.individualDisplayName : JSONObject.NULL); + JSONArray allNames = new JSONArray(); + for (String[] kv : ad.allNames) { + JSONObject entry = new JSONObject(); + entry.put("key", kv[0]); + entry.put("value", kv[1]); + allNames.put(entry); } + annJson.put("all_names", allNames); } else { annJson.put("individual_uuid", JSONObject.NULL); annJson.put("name", JSONObject.NULL); + annJson.put("all_names", new JSONArray()); } annJson.put("individual_ids", individualIds); - // Encounter info - String locationId = enc.getLocationID(); - annJson.put("location_id", locationId != null ? locationId : JSONObject.NULL); - annJson.put("encounter_id", enc.getCatalogNumber()); + annJson.put("location_id", ad.locationId != null ? ad.locationId : JSONObject.NULL); + annJson.put("encounter_id", ad.encounterId); return annJson; } + + private JSONObject buildInfo() { + JSONObject info = new JSONObject(); + info.put("description", "Wildbook COCO Export"); + info.put("version", "1.0"); + info.put("date_created", ZonedDateTime.now().format(DateTimeFormatter.ISO_INSTANT)); + info.put("contributor", "Wildbook"); + + JSONObject idMapping = new JSONObject(); + for (Map.Entry entry : individualIdMap.entrySet()) { + idMapping.put(entry.getKey(), entry.getValue()); + } + info.put("individual_id_mapping", idMapping); + return info; + } + + private JSONArray buildLicenses() { + JSONArray licenses = new JSONArray(); + JSONObject license = new JSONObject(); + license.put("id", 1); + license.put("name", "See Wildbook Terms"); + license.put("url", ""); + licenses.put(license); + return licenses; + } + + private JSONArray buildCategories() { + JSONArray categories = new JSONArray(); + for (Map.Entry entry : categoryMap.entrySet()) { + JSONObject cat = new JSONObject(); + cat.put("id", entry.getValue()); + cat.put("name", entry.getKey()); + cat.put("supercategory", "animal"); + categories.put(cat); + } + return categories; + } } diff --git a/src/main/java/org/ecocean/ia/Task.java b/src/main/java/org/ecocean/ia/Task.java index d840c5a561..b38d4b7a78 100644 --- a/src/main/java/org/ecocean/ia/Task.java +++ b/src/main/java/org/ecocean/ia/Task.java @@ -502,6 +502,14 @@ public String getStatus(Shepherd myShepherd) { status = "completed"; } else if (logs.toString().indexOf("score") > -1) { status = "completed"; + } else if (islObj.optJSONObject("status") != null && + islObj.optJSONObject("status").optJSONObject("error") != null && + islObj.optJSONObject("status").optJSONObject("error").optBoolean( + "emptyTargetAnnotations", false)) { + // No target annotations to match against is a terminal state, not a failure. + // Treating it as completed lets import progress reach 100%. + System.out.println("[Task.getStatus] emptyTargetAnnotations detected for task " + getId() + ", marking completed"); + status = "completed"; } else if (islObj.toString().indexOf("HTTP error code") > -1) { status = "error"; } else if (!islObj.optString("queueStatus").equals("")) { @@ -511,7 +519,7 @@ public String getStatus(Shepherd myShepherd) { status = "queuing"; } // if(islObj.optString("queueStatus").equals("queued")){sendIdentify=false;} - // if(status.equals("waiting to queue"))System.out.println("islObj: "+islObj.toString()); + if(status.equals("waiting to queue"))System.out.println("[Task.getStatus DEBUG] waiting to queue, islObj: "+islObj.toString()); } return status; } diff --git a/src/main/java/org/ecocean/servlet/export/EncounterSearchExportCOCO.java b/src/main/java/org/ecocean/servlet/export/EncounterSearchExportCOCO.java index 913a3055c5..30debce003 100644 --- a/src/main/java/org/ecocean/servlet/export/EncounterSearchExportCOCO.java +++ b/src/main/java/org/ecocean/servlet/export/EncounterSearchExportCOCO.java @@ -14,15 +14,45 @@ import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.io.PrintWriter; +import java.io.RandomAccessFile; +import java.nio.file.Files; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.UUID; import java.util.Vector; +import java.util.concurrent.ConcurrentHashMap; +/** + * Async COCO export servlet. Three actions: + * ?action=start — kicks off background export, returns JSON {"jobId":"..."} + * ?action=status&jobId=... — returns JSON with progress/completion + * ?action=download&jobId=... — streams the completed ZIP file + * + * Legacy (no action param) falls back to synchronous export for backwards compatibility. + */ public class EncounterSearchExportCOCO extends HttpServlet { + private static final long MAX_JOB_AGE_MS = 60 * 60 * 1000; // 1 hour + + private static final Map jobs = new ConcurrentHashMap<>(); + + static class ExportJob { + final String jobId; + final long createdAt = System.currentTimeMillis(); + volatile String status = "running"; // running | complete | error + volatile String errorMessage; + volatile File tempFile; + volatile EncounterCOCOExportFile exportFile; + + ExportJob(String jobId) { this.jobId = jobId; } + } + public void init(ServletConfig config) throws ServletException { super.init(config); } @@ -35,22 +65,240 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + String action = request.getParameter("action"); + if ("start".equals(action)) { + handleStart(request, response); + } else if ("status".equals(action)) { + handleStatus(request, response); + } else if ("download".equals(action)) { + handleDownload(request, response); + } else { + handleSynchronous(request, response); + } + } + + private void handleStart(HttpServletRequest request, HttpServletResponse response) + throws IOException { + purgeStaleJobs(); + String context = ServletUtilities.getContext(request); Shepherd myShepherd = new Shepherd(context); - myShepherd.setAction("EncounterSearchExportCOCO.class"); - + myShepherd.setAction("EncounterSearchExportCOCO.start"); myShepherd.beginDBTransaction(); + // Collect encounter IDs on the request thread (JDO objects stay local) + List encounterIds; try { - // Process query EncounterQueryResult queryResult = EncounterQueryProcessor.processQuery( myShepherd, request, "year descending, month descending, day descending"); Vector rEncounters = queryResult.getResult(); - - // Filter hidden encounters HiddenEncReporter hiddenData = new HiddenEncReporter(rEncounters, request, myShepherd); - // Convert to list, excluding hidden + encounterIds = new ArrayList<>(); + for (Object obj : rEncounters) { + Encounter enc = (Encounter) obj; + if (!hiddenData.contains(enc)) { + encounterIds.add(enc.getCatalogNumber()); + } + } + } catch (Exception e) { + e.printStackTrace(); + sendJson(response, 500, "{\"error\":\"Query failed: " + + escapeJson(e.getMessage()) + "\"}"); + return; + } finally { + myShepherd.rollbackDBTransaction(); + myShepherd.closeDBTransaction(); + } + + String jobId = UUID.randomUUID().toString(); + ExportJob job = new ExportJob(jobId); + jobs.put(jobId, job); + + // Background thread opens its own Shepherd, extracts data, closes the + // DB transaction, then does the long-running image I/O without a DB connection. + List encIds = encounterIds; + Thread exportThread = new Thread(() -> { + // Phase 1: extract all JDO data while transaction is alive + Shepherd bgShepherd = new Shepherd(context); + bgShepherd.setAction("EncounterSearchExportCOCO.export-" + jobId); + bgShepherd.beginDBTransaction(); + try { + List encounters = new ArrayList<>(); + for (String id : encIds) { + Encounter enc = bgShepherd.getEncounter(id); + if (enc != null) encounters.add(enc); + } + // Constructor eagerly extracts all JDO data into plain objects + job.exportFile = new EncounterCOCOExportFile(encounters, bgShepherd); + } catch (Throwable t) { + t.printStackTrace(); + job.status = "error"; + job.errorMessage = t.getMessage() != null ? t.getMessage() : t.getClass().getName(); + return; + } finally { + bgShepherd.rollbackDBTransaction(); + bgShepherd.closeDBTransaction(); + } + + // Phase 2: write ZIP (image downloads + JSON manifest) — no DB needed + try { + File tmpDir = new File(CommonConfiguration.getUploadTmpDir(context)); + if (!tmpDir.exists()) tmpDir.mkdirs(); + File tempFile = File.createTempFile("wildbook-coco-export-", ".zip", tmpDir); + tempFile.deleteOnExit(); + try (FileOutputStream fos = new FileOutputStream(tempFile)) { + job.exportFile.writeTo(fos); + } + job.tempFile = tempFile; + job.status = "complete"; + } catch (Throwable t) { + t.printStackTrace(); + job.status = "error"; + job.errorMessage = t.getMessage() != null ? t.getMessage() : t.getClass().getName(); + } + }, "coco-export-" + jobId); + exportThread.setDaemon(true); + exportThread.start(); + + sendJson(response, 200, "{\"jobId\":\"" + jobId + "\"}"); + } + + private void handleStatus(HttpServletRequest request, HttpServletResponse response) + throws IOException { + String jobId = request.getParameter("jobId"); + ExportJob job = (jobId != null) ? jobs.get(jobId) : null; + if (job == null) { + sendJson(response, 404, "{\"error\":\"Job not found\"}"); + return; + } + StringBuilder json = new StringBuilder(); + json.append("{\"jobId\":\"").append(job.jobId).append("\""); + json.append(",\"status\":\"").append(job.status).append("\""); + if (job.exportFile != null) { + json.append(",\"totalImages\":").append(job.exportFile.getTotalImages()); + json.append(",\"processedImages\":").append(job.exportFile.getProcessedImages()); + json.append(",\"failedImages\":").append(job.exportFile.getFailedImages()); + json.append(",\"phase\":\"").append(job.exportFile.getPhase()).append("\""); + } + if (job.errorMessage != null) { + json.append(",\"error\":\"").append(escapeJson(job.errorMessage)).append("\""); + } + json.append("}"); + sendJson(response, 200, json.toString()); + } + + private static final java.util.logging.Logger log = + java.util.logging.Logger.getLogger(EncounterSearchExportCOCO.class.getName()); + + private void handleDownload(HttpServletRequest request, HttpServletResponse response) + throws IOException { + String jobId = request.getParameter("jobId"); + ExportJob job = (jobId != null) ? jobs.get(jobId) : null; + if (job == null) { + log.warning("COCO Download: job not found, jobId=" + jobId); + sendJson(response, 404, "{\"error\":\"Job not found\"}"); + return; + } + if (!"complete".equals(job.status) || job.tempFile == null || !job.tempFile.exists()) { + log.warning("COCO Download: job not ready, jobId=" + jobId + + " status=" + job.status + " tempFile=" + job.tempFile + + " exists=" + (job.tempFile != null && job.tempFile.exists())); + sendJson(response, 400, "{\"error\":\"Export not ready\"}"); + return; + } + + long fileLength = job.tempFile.length(); + long start = 0; + long end = fileLength - 1; + + String rangeHeader = request.getHeader("Range"); + log.info("COCO Download: jobId=" + jobId + " fileLength=" + fileLength + + " Range=" + rangeHeader + " method=" + request.getMethod()); + + if (rangeHeader != null && rangeHeader.startsWith("bytes=")) { + String rangeSpec = rangeHeader.substring(6).trim(); + String[] parts = rangeSpec.split("-", 2); + try { + if (!parts[0].isEmpty()) { + start = Long.parseLong(parts[0]); + } + if (parts.length > 1 && !parts[1].isEmpty()) { + end = Long.parseLong(parts[1]); + } + } catch (NumberFormatException e) { + response.setStatus(HttpServletResponse.SC_REQUESTED_RANGE_NOT_SATISFIABLE); + response.setHeader("Content-Range", "bytes */" + fileLength); + return; + } + if (start < 0 || start > end || start >= fileLength) { + response.setStatus(HttpServletResponse.SC_REQUESTED_RANGE_NOT_SATISFIABLE); + response.setHeader("Content-Range", "bytes */" + fileLength); + return; + } + if (end >= fileLength) { + end = fileLength - 1; + } + long contentLength = end - start + 1; + response.setStatus(HttpServletResponse.SC_PARTIAL_CONTENT); + response.setHeader("Content-Range", + "bytes " + start + "-" + end + "/" + fileLength); + response.setContentLengthLong(contentLength); + log.info("COCO Download: sending 206, start=" + start + " end=" + end + + " contentLength=" + contentLength); + } else { + response.setContentLengthLong(fileLength); + log.info("COCO Download: sending 200, full file, length=" + fileLength); + } + + response.setContentType("application/zip"); + response.setHeader("Content-Disposition", + "attachment; filename=\"wildbook-coco-export.zip\""); + response.setHeader("Accept-Ranges", "bytes"); + // Tell nginx to stream directly to the client instead of buffering. + // Without this, nginx buffers up to proxy_max_temp_file_size (default 1GB), + // then stalls Tomcat's writes, eventually timing out and cutting the connection + // — causing browsers to restart the download in an infinite loop. + response.setHeader("X-Accel-Buffering", "no"); + + long bytesSent = 0; + OutputStream out = response.getOutputStream(); + try (RandomAccessFile raf = new RandomAccessFile(job.tempFile, "r")) { + raf.seek(start); + byte[] buffer = new byte[65536]; + long remaining = end - start + 1; + int read; + while (remaining > 0 && (read = raf.read(buffer, 0, + (int) Math.min(buffer.length, remaining))) != -1) { + out.write(buffer, 0, read); + remaining -= read; + bytesSent += read; + } + } catch (IOException e) { + log.warning("COCO Download: connection broken after " + bytesSent + + " bytes (of " + (end - start + 1) + " expected). " + e.getMessage()); + throw e; + } + out.flush(); + log.info("COCO Download: completed, sent " + bytesSent + " bytes for jobId=" + jobId); + } + + /** Synchronous fallback for legacy/non-JS callers. */ + private void handleSynchronous(HttpServletRequest request, HttpServletResponse response) + throws IOException { + + String context = ServletUtilities.getContext(request); + Shepherd myShepherd = new Shepherd(context); + myShepherd.setAction("EncounterSearchExportCOCO.class"); + myShepherd.beginDBTransaction(); + + // Phase 1: extract data while transaction is alive + EncounterCOCOExportFile exportFile; + try { + EncounterQueryResult queryResult = EncounterQueryProcessor.processQuery( + myShepherd, request, "year descending, month descending, day descending"); + Vector rEncounters = queryResult.getResult(); + HiddenEncReporter hiddenData = new HiddenEncReporter(rEncounters, request, myShepherd); List encounters = new ArrayList<>(); for (Object obj : rEncounters) { Encounter enc = (Encounter) obj; @@ -58,30 +306,74 @@ public void doPost(HttpServletRequest request, HttpServletResponse response) encounters.add(enc); } } + exportFile = new EncounterCOCOExportFile(encounters, myShepherd); + } catch (Exception e) { + e.printStackTrace(); + if (!response.isCommitted()) { + sendJson(response, 500, "{\"error\":\"" + escapeJson(e.getMessage()) + "\"}"); + } + return; + } finally { + myShepherd.rollbackDBTransaction(); + myShepherd.closeDBTransaction(); + } - // Set response headers - response.setContentType("application/zip"); - response.setHeader("Content-Disposition", "attachment; filename=\"wildbook-coco-export.zip\""); + // Phase 2: write ZIP (no DB needed) + File tempFile = null; + try { + File tmpDir = new File(CommonConfiguration.getUploadTmpDir(context)); + if (!tmpDir.exists()) tmpDir.mkdirs(); + tempFile = File.createTempFile("wildbook-coco-export-", ".zip", tmpDir); + tempFile.deleteOnExit(); + try (FileOutputStream fos = new FileOutputStream(tempFile)) { + exportFile.writeTo(fos); + } - // Write export + response.setContentType("application/zip"); + response.setHeader("Content-Disposition", + "attachment; filename=\"wildbook-coco-export.zip\""); + response.setContentLengthLong(tempFile.length()); OutputStream out = response.getOutputStream(); - EncounterCOCOExportFile exportFile = new EncounterCOCOExportFile(encounters, myShepherd); - exportFile.writeTo(out); + Files.copy(tempFile.toPath(), out); out.flush(); } catch (Exception e) { e.printStackTrace(); - response.setContentType("text/html"); - PrintWriter out = response.getWriter(); - out.println(ServletUtilities.getHeader(request)); - out.println("

Error encountered

"); - out.println("

Error: " + e.getMessage() + "

"); - out.println("

Please let the webmaster know you encountered an error at: EncounterSearchExportCOCO servlet

"); - out.println(ServletUtilities.getFooter(context)); - out.close(); + if (!response.isCommitted()) { + sendJson(response, 500, "{\"error\":\"" + escapeJson(e.getMessage()) + "\"}"); + } } finally { - myShepherd.rollbackDBTransaction(); - myShepherd.closeDBTransaction(); + if (tempFile != null) { + tempFile.delete(); + } + } + } + + private void sendJson(HttpServletResponse response, int status, String json) + throws IOException { + response.setStatus(status); + response.setContentType("application/json"); + response.setCharacterEncoding("UTF-8"); + response.getWriter().write(json); + } + + private static String escapeJson(String s) { + if (s == null) return ""; + return s.replace("\\", "\\\\").replace("\"", "\\\"") + .replace("\n", "\\n").replace("\r", "\\r"); + } + + private void purgeStaleJobs() { + long now = System.currentTimeMillis(); + Iterator> it = jobs.entrySet().iterator(); + while (it.hasNext()) { + ExportJob job = it.next().getValue(); + if ((now - job.createdAt) > MAX_JOB_AGE_MS) { + if (job.tempFile != null && job.tempFile.exists()) { + job.tempFile.delete(); + } + it.remove(); + } } } } diff --git a/src/test/java/org/ecocean/export/EncounterCOCOExportFileTest.java b/src/test/java/org/ecocean/export/EncounterCOCOExportFileTest.java index 97ec570347..b8b119b7a2 100644 --- a/src/test/java/org/ecocean/export/EncounterCOCOExportFileTest.java +++ b/src/test/java/org/ecocean/export/EncounterCOCOExportFileTest.java @@ -24,28 +24,17 @@ class EncounterCOCOExportFileTest { @Test - void testBuildsCOCOStructure() throws Exception { - // Create mock objects + void testManifestExcludesFailedImages() throws Exception { + // When an image URL is null (download fails), the manifest should NOT + // reference that image or its annotations — ensuring consistency. Shepherd shepherd = mock(Shepherd.class); - // Create a mock MediaAsset MediaAsset ma = mock(MediaAsset.class); when(ma.getUUID()).thenReturn("test-media-uuid"); when(ma.getWidth()).thenReturn(800.0); when(ma.getHeight()).thenReturn(600.0); - when(ma.webURL()).thenReturn(null); // Skip actual image download in test - - // Create a mock Feature with bbox - Feature feature = mock(Feature.class); - JSONObject params = new JSONObject(); - params.put("x", 100); - params.put("y", 200); - params.put("width", 300); - params.put("height", 400); - params.put("theta", 0.5); - when(feature.getParameters()).thenReturn(params); - - // Create a mock Annotation + when(ma.webURL()).thenReturn(null); // image cannot be fetched + Annotation ann = mock(Annotation.class); when(ann.getId()).thenReturn("test-ann-uuid"); when(ann.getIAClass()).thenReturn("whale_shark"); @@ -54,12 +43,10 @@ void testBuildsCOCOStructure() throws Exception { when(ann.getViewpoint()).thenReturn("left"); when(ann.getTheta()).thenReturn(0.5); - // Create a mock MarkedIndividual MarkedIndividual ind = mock(MarkedIndividual.class); when(ind.getId()).thenReturn("test-individual-uuid"); when(ind.getDisplayName()).thenReturn("Stumpy"); - // Create a mock Encounter Encounter enc = mock(Encounter.class); ArrayList annotations = new ArrayList<>(); annotations.add(ann); @@ -69,19 +56,16 @@ void testBuildsCOCOStructure() throws Exception { List encounters = new ArrayList<>(); encounters.add(enc); - // Run export ByteArrayOutputStream baos = new ByteArrayOutputStream(); EncounterCOCOExportFile exportFile = new EncounterCOCOExportFile(encounters, shepherd); exportFile.writeTo(baos); - // Parse ZIP and extract annotations.json byte[] zipBytes = baos.toByteArray(); assertTrue(zipBytes.length > 0, "Export should produce output"); String jsonContent = extractJsonFromZip(zipBytes); assertNotNull(jsonContent, "Should contain annotations.json"); - // Verify JSON structure JSONObject coco = new JSONObject(jsonContent); assertTrue(coco.has("info")); assertTrue(coco.has("licenses")); @@ -89,21 +73,18 @@ void testBuildsCOCOStructure() throws Exception { assertTrue(coco.has("images")); assertTrue(coco.has("annotations")); - // Verify categories + // Image failed to export, so both images and annotations arrays should be empty + assertEquals(0, coco.getJSONArray("images").length(), + "Failed images should be excluded from manifest"); + assertEquals(0, coco.getJSONArray("annotations").length(), + "Annotations for failed images should be excluded from manifest"); + + // Categories are built from encounter data, independent of image success JSONArray categories = coco.getJSONArray("categories"); assertEquals(1, categories.length()); assertEquals("whale_shark", categories.getJSONObject(0).getString("name")); - // Verify annotations - JSONArray anns = coco.getJSONArray("annotations"); - assertEquals(1, anns.length()); - JSONObject annJson = anns.getJSONObject(0); - assertEquals("left", annJson.getString("viewpoint")); - assertEquals(0.5, annJson.getDouble("theta"), 0.001); - assertEquals("test-individual-uuid", annJson.getString("individual_uuid")); - assertEquals("Stumpy", annJson.getString("name")); - - // Verify individual_id_mapping in info + // Individual mapping is also independent of image success JSONObject info = coco.getJSONObject("info"); assertTrue(info.has("individual_id_mapping")); JSONObject mapping = info.getJSONObject("individual_id_mapping");