Merge pull request #497 from VH-Lab/feature-NDI-123-update-document-list

stevevanhooser · web-flow · commit c75dcdb4c091 · 2025-11-01T18:25:06.000-04:00
Add update-checking to listDatasetDocumentsAll
diff --git a/.github/badges/code_issues.svg b/.github/badges/code_issues.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="112.0" height="20"><linearGradient id="smooth" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="round"><rect width="112.0" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#round)"><rect width="74.0" height="20" fill="#555"/><rect x="74.0" width="38.0" height="20" fill="#e05d44"/><rect width="112.0" height="20" fill="url(#smooth)"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="380.0" y="150" fill="#010101" fill-opacity=".3" transform="scale(0.1)" textLength="640.0" lengthAdjust="spacing">code issues</text><text x="380.0" y="140" transform="scale(0.1)" textLength="640.0" lengthAdjust="spacing">code issues</text><text x="920.0" y="150" fill="#010101" fill-opacity=".3" transform="scale(0.1)" textLength="280.0" lengthAdjust="spacing">2246</text><text x="920.0" y="140" transform="scale(0.1)" textLength="280.0" lengthAdjust="spacing">2246</text></g></svg>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="112.0" height="20"><linearGradient id="smooth" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="round"><rect width="112.0" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#round)"><rect width="74.0" height="20" fill="#555"/><rect x="74.0" width="38.0" height="20" fill="#e05d44"/><rect width="112.0" height="20" fill="url(#smooth)"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="380.0" y="150" fill="#010101" fill-opacity=".3" transform="scale(0.1)" textLength="640.0" lengthAdjust="spacing">code issues</text><text x="380.0" y="140" transform="scale(0.1)" textLength="640.0" lengthAdjust="spacing">code issues</text><text x="920.0" y="150" fill="#010101" fill-opacity=".3" transform="scale(0.1)" textLength="280.0" lengthAdjust="spacing">2247</text><text x="920.0" y="140" transform="scale(0.1)" textLength="280.0" lengthAdjust="spacing">2247</text></g></svg>
diff --git a/src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m b/src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m
@@ -11,6 +11,16 @@
 %   Name-Value Inputs:
 %       pageSize        - (Optional) The number of results to fetch per API call.
 %                         Default is 1000.
+%       checkForUpdates - (Optional) If true, the function will check for new
+%                         documents that were added while it was running and
+%                         will attempt to retrieve them before returning.
+%                         Default is true.
+%       waitForUpdates  - (Optional) The time in seconds to wait before
+%                         re-checking the document count for updates.
+%                         Default is 5.
+%       maximumNumberUpdateReads - (Optional) The maximum number of times the
+%                         function will re-poll for updates to prevent an
+%                         infinite loop. Default is 100.
 %
 %   Outputs:
 %       b            - True if the call succeeded, false otherwise.
@@ -26,12 +36,18 @@
     arguments
         cloudDatasetID (1,1) string
         args.pageSize (1,1) double = 1000
+        args.checkForUpdates (1,1) logical = true
+        args.waitForUpdates (1,1) double = 5
+        args.maximumNumberUpdateReads (1,1) double = 100
     end
 
     % 1. Create an instance of the implementation class.
     api_call = ndi.cloud.api.implementation.documents.ListDatasetDocumentsAll(...
         'cloudDatasetID', cloudDatasetID, ...
-        'pageSize', args.pageSize);
+        'pageSize', args.pageSize, ...
+        'checkForUpdates', args.checkForUpdates, ...
+        'waitForUpdates', args.waitForUpdates, ...
+        'maximumNumberUpdateReads', args.maximumNumberUpdateReads);
     
     % 2. Call the execute method and return its outputs directly.
     [b, answer, apiResponse, apiURL] = api_call.execute();
diff --git a/src/ndi/+ndi/+cloud/+api/+implementation/+documents/ListDatasetDocumentsAll.m b/src/ndi/+ndi/+cloud/+api/+implementation/+documents/ListDatasetDocumentsAll.m
@@ -1,7 +1,14 @@
 classdef ListDatasetDocumentsAll < ndi.cloud.api.call
 %LISTDATASETDOCUMENTSALL Implementation class for retrieving all documents in a dataset.
+%   This class handles the paginated retrieval of all document summaries from a
+%   cloud dataset. It also includes an optional mechanism to check for and fetch
+%   newly-added documents that may appear while the initial list is being read.
+%
     properties
         retries (1,1) double = 10
+        checkForUpdates (1,1) logical = true
+        waitForUpdates (1,1) double = 5
+        maximumNumberUpdateReads (1,1) double = 100
     end
 
     methods
@@ -15,20 +22,38 @@
             %   Optional Name-Value Inputs:
             %       'pageSize'   - The number of results per page (default 1000).
             %       'retries'    - The number of times to retry a failed page read (default 10).
+            %       'checkForUpdates' - Flag to enable checking for new documents (default true).
+            %       'waitForUpdates'  - Pause duration in seconds before re-checking (default 5).
+            %       'maximumNumberUpdateReads' - Limit on update re-polls (default 100).
             %
             arguments
                 args.cloudDatasetID (1,1) string
                 args.pageSize (1,1) double = 1000
                 args.retries (1,1) double = 10
+                args.checkForUpdates (1,1) logical = true
+                args.waitForUpdates (1,1) double = 5
+                args.maximumNumberUpdateReads (1,1) double = 100
             end
             
             this.cloudDatasetID = args.cloudDatasetID;
             this.pageSize = args.pageSize;
             this.retries = args.retries;
+            this.checkForUpdates = args.checkForUpdates;
+            this.waitForUpdates = args.waitForUpdates;
+            this.maximumNumberUpdateReads = args.maximumNumberUpdateReads;
         end
 
         function [b, answer, apiResponse, apiURL] = execute(this)
             %EXECUTE Performs the API call to list all documents.
+            %   This method first determines the total number of pages and then iterates
+            %   through them, fetching each one using the private `fetch_and_append_page`
+            %   helper method.
+            %
+            %   If `checkForUpdates` is true, it then enters a loop to re-check the
+            %   total document count. If new documents have been added, it fetches
+            %   the new pages, de-duplicating results to ensure no duplicates are
+            %   added. This continues until no new documents are found or the
+            %   `maximumNumberUpdateReads` limit is reached.
             %
             %   [B, ANSWER, APIRESPONSE, APIURL] = EXECUTE(THIS)
             %
@@ -53,36 +78,84 @@
             end
         
             numPages = ceil(double(numDocs) / this.pageSize);
+            last_page_read = 0;
 
             for p = 1:numPages
-                page_succeeded = false;
-                for attempt = 1:this.retries
-                    [b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(...
-                        this.cloudDatasetID, 'page', p, 'pageSize', this.pageSize);
-                    
-                    apiURL(end+1) = url_page;
-                    apiResponse(end+1) = resp_page;
+                [b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p, answer, apiResponse, apiURL, false);
+                if ~b_page
+                    b = false;
+                    break;
+                end
+                last_page_read = p;
+            end
+
+            if this.checkForUpdates && b
+                update_reads = 0;
+                [b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID);
+                while b_count && newNumDocs > numDocs && update_reads < this.maximumNumberUpdateReads
+                    pause(this.waitForUpdates);
+                    numDocs = newNumDocs;
+
+                    start_page = max(1, last_page_read);
 
-                    if b_page
-                        if isempty(answer.documents)
-                            answer = ans_page;
-                        else
-                            answer.documents = cat(1, answer.documents, ans_page.documents);
+                    numPages = ceil(double(numDocs) / this.pageSize);
+                    for p_update = start_page:numPages
+                        [b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p_update, answer, apiResponse, apiURL, true);
+                        if ~b_page
+                            b = false;
+                            break;
                         end
-                        page_succeeded = true;
-                        break; % Exit retry loop on success
+                        last_page_read = p_update;
                     end
-                end
-
-                if ~page_succeeded
-                    b = false; % Mark overall operation as failed
-                    break; % Exit the main page loop
+                    if ~b, break; end;
+                    update_reads = update_reads + 1;
+                    [b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID);
                 end
             end
+
             if isfield(answer, 'documents') && isempty(answer.documents) && ~iscell(answer.documents)
                 answer.documents = {};
             end
         end
     end
-end
 
+    methods (Access = private)
+        function [b, answer, apiResponse, apiURL] = fetch_and_append_page(this, page_num, answer, apiResponse, apiURL, deduplicate)
+            %FETCH_AND_APPEND_PAGE Fetches a single page of documents and appends them.
+            %   This helper method is responsible for fetching a single page of document
+            %   summaries. It handles the retry logic internally. If `deduplicate` is
+            %   true, it will compare the IDs of the fetched documents with the
+            %   existing documents in `answer` and only append the new ones.
+            b = false;
+            for attempt = 1:this.retries
+                [b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(...
+                    this.cloudDatasetID, 'page', page_num, 'pageSize', this.pageSize);
+
+                apiURL(end+1) = url_page;
+                apiResponse(end+1) = resp_page;
+
+                if b_page
+                    if isempty(answer.documents)
+                        answer = ans_page;
+                    else
+                        new_docs = ans_page.documents;
+                        if ~isempty(new_docs)
+                            if deduplicate
+                                existing_ids = string({answer.documents.id});
+                                new_ids = string({new_docs.id});
+                                [~, new_indices] = setdiff(new_ids, existing_ids);
+                                if ~isempty(new_indices)
+                                    answer.documents = cat(1, answer.documents, new_docs(new_indices));
+                                end
+                            else
+                                answer.documents = cat(1, answer.documents, new_docs);
+                            end
+                        end
+                    end
+                    b = true;
+                    break; % Exit retry loop on success
+                end
+            end
+        end
+    end
+end