From cb8794212607dfa4e686276d2fa1d7feae6e23ad Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 1 Nov 2025 21:59:12 +0000 Subject: [PATCH 1/2] feat: Add update-checking to listDatasetDocumentsAll This commit introduces a new feature to the `ndi.cloud.api.documents.listDatasetDocumentsAll` function and its implementation class. The function now includes an optional mechanism to check for and retrieve new documents that may have been added to the dataset while the function was executing. The following new name-value pair arguments have been added: - `checkForUpdates`: (Default `true`) Enables or disables the update-checking feature. - `waitForUpdates`: (Default `5` seconds) The duration to pause before re-checking for new documents. - `maximumNumberUpdateReads`: (Default `100`) A limit to prevent infinite re-reading loops. The implementation now includes a de-duplication mechanism to prevent duplicate entries when re-reading pages that may contain a mix of old and new documents. The code has also been refactored to improve readability and reduce duplication. This commit also includes comprehensive help documentation for the new feature and addresses a critical bug where the function would crash if called on an empty dataset. --- .../+api/+documents/listDatasetDocumentsAll.m | 18 ++- .../+documents/ListDatasetDocumentsAll.m | 113 ++++++++++++++---- 2 files changed, 110 insertions(+), 21 deletions(-) diff --git a/src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m b/src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m index 2c1778146..ca4635a6d 100644 --- a/src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m +++ b/src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m @@ -11,6 +11,16 @@ % Name-Value Inputs: % pageSize - (Optional) The number of results to fetch per API call. % Default is 1000. +% checkForUpdates - (Optional) If true, the function will check for new +% documents that were added while it was running and +% will attempt to retrieve them before returning. +% Default is true. +% waitForUpdates - (Optional) The time in seconds to wait before +% re-checking the document count for updates. +% Default is 5. +% maximumNumberUpdateReads - (Optional) The maximum number of times the +% function will re-poll for updates to prevent an +% infinite loop. Default is 100. % % Outputs: % b - True if the call succeeded, false otherwise. @@ -26,12 +36,18 @@ arguments cloudDatasetID (1,1) string args.pageSize (1,1) double = 1000 + args.checkForUpdates (1,1) logical = true + args.waitForUpdates (1,1) double = 5 + args.maximumNumberUpdateReads (1,1) double = 100 end % 1. Create an instance of the implementation class. api_call = ndi.cloud.api.implementation.documents.ListDatasetDocumentsAll(... 'cloudDatasetID', cloudDatasetID, ... - 'pageSize', args.pageSize); + 'pageSize', args.pageSize, ... + 'checkForUpdates', args.checkForUpdates, ... + 'waitForUpdates', args.waitForUpdates, ... + 'maximumNumberUpdateReads', args.maximumNumberUpdateReads); % 2. Call the execute method and return its outputs directly. [b, answer, apiResponse, apiURL] = api_call.execute(); diff --git a/src/ndi/+ndi/+cloud/+api/+implementation/+documents/ListDatasetDocumentsAll.m b/src/ndi/+ndi/+cloud/+api/+implementation/+documents/ListDatasetDocumentsAll.m index df9a549f6..7773ed380 100644 --- a/src/ndi/+ndi/+cloud/+api/+implementation/+documents/ListDatasetDocumentsAll.m +++ b/src/ndi/+ndi/+cloud/+api/+implementation/+documents/ListDatasetDocumentsAll.m @@ -1,7 +1,14 @@ classdef ListDatasetDocumentsAll < ndi.cloud.api.call %LISTDATASETDOCUMENTSALL Implementation class for retrieving all documents in a dataset. +% This class handles the paginated retrieval of all document summaries from a +% cloud dataset. It also includes an optional mechanism to check for and fetch +% newly-added documents that may appear while the initial list is being read. +% properties retries (1,1) double = 10 + checkForUpdates (1,1) logical = true + waitForUpdates (1,1) double = 5 + maximumNumberUpdateReads (1,1) double = 100 end methods @@ -15,20 +22,38 @@ % Optional Name-Value Inputs: % 'pageSize' - The number of results per page (default 1000). % 'retries' - The number of times to retry a failed page read (default 10). + % 'checkForUpdates' - Flag to enable checking for new documents (default true). + % 'waitForUpdates' - Pause duration in seconds before re-checking (default 5). + % 'maximumNumberUpdateReads' - Limit on update re-polls (default 100). % arguments args.cloudDatasetID (1,1) string args.pageSize (1,1) double = 1000 args.retries (1,1) double = 10 + args.checkForUpdates (1,1) logical = true + args.waitForUpdates (1,1) double = 5 + args.maximumNumberUpdateReads (1,1) double = 100 end this.cloudDatasetID = args.cloudDatasetID; this.pageSize = args.pageSize; this.retries = args.retries; + this.checkForUpdates = args.checkForUpdates; + this.waitForUpdates = args.waitForUpdates; + this.maximumNumberUpdateReads = args.maximumNumberUpdateReads; end function [b, answer, apiResponse, apiURL] = execute(this) %EXECUTE Performs the API call to list all documents. + % This method first determines the total number of pages and then iterates + % through them, fetching each one using the private `fetch_and_append_page` + % helper method. + % + % If `checkForUpdates` is true, it then enters a loop to re-check the + % total document count. If new documents have been added, it fetches + % the new pages, de-duplicating results to ensure no duplicates are + % added. This continues until no new documents are found or the + % `maximumNumberUpdateReads` limit is reached. % % [B, ANSWER, APIRESPONSE, APIURL] = EXECUTE(THIS) % @@ -53,36 +78,84 @@ end numPages = ceil(double(numDocs) / this.pageSize); + last_page_read = 0; for p = 1:numPages - page_succeeded = false; - for attempt = 1:this.retries - [b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(... - this.cloudDatasetID, 'page', p, 'pageSize', this.pageSize); - - apiURL(end+1) = url_page; - apiResponse(end+1) = resp_page; + [b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p, answer, apiResponse, apiURL, false); + if ~b_page + b = false; + break; + end + last_page_read = p; + end + + if this.checkForUpdates && b + update_reads = 0; + [b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID); + while b_count && newNumDocs > numDocs && update_reads < this.maximumNumberUpdateReads + pause(this.waitForUpdates); + numDocs = newNumDocs; + + start_page = max(1, last_page_read); - if b_page - if isempty(answer.documents) - answer = ans_page; - else - answer.documents = cat(1, answer.documents, ans_page.documents); + numPages = ceil(double(numDocs) / this.pageSize); + for p_update = start_page:numPages + [b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p_update, answer, apiResponse, apiURL, true); + if ~b_page + b = false; + break; end - page_succeeded = true; - break; % Exit retry loop on success + last_page_read = p_update; end - end - - if ~page_succeeded - b = false; % Mark overall operation as failed - break; % Exit the main page loop + if ~b, break; end; + update_reads = update_reads + 1; + [b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID); end end + if isfield(answer, 'documents') && isempty(answer.documents) && ~iscell(answer.documents) answer.documents = {}; end end end -end + methods (Access = private) + function [b, answer, apiResponse, apiURL] = fetch_and_append_page(this, page_num, answer, apiResponse, apiURL, deduplicate) + %FETCH_AND_APPEND_PAGE Fetches a single page of documents and appends them. + % This helper method is responsible for fetching a single page of document + % summaries. It handles the retry logic internally. If `deduplicate` is + % true, it will compare the IDs of the fetched documents with the + % existing documents in `answer` and only append the new ones. + b = false; + for attempt = 1:this.retries + [b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(... + this.cloudDatasetID, 'page', page_num, 'pageSize', this.pageSize); + + apiURL(end+1) = url_page; + apiResponse(end+1) = resp_page; + + if b_page + if isempty(answer.documents) + answer = ans_page; + else + new_docs = ans_page.documents; + if ~isempty(new_docs) + if deduplicate + existing_ids = string({answer.documents.id}); + new_ids = string({new_docs.id}); + [~, new_indices] = setdiff(new_ids, existing_ids); + if ~isempty(new_indices) + answer.documents = cat(1, answer.documents, new_docs(new_indices)); + end + else + answer.documents = cat(1, answer.documents, new_docs); + end + end + end + b = true; + break; % Exit retry loop on success + end + end + end + end +end From 7fdce69b6831cde00dadd9fdbc9a0b3b185fd9ad Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 1 Nov 2025 22:23:52 +0000 Subject: [PATCH 2/2] Update GitHub badges --- .github/badges/code_issues.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/badges/code_issues.svg b/.github/badges/code_issues.svg index 6b2137857..e85af5260 100644 --- a/.github/badges/code_issues.svg +++ b/.github/badges/code_issues.svg @@ -1 +1 @@ -code issuescode issues22422242 \ No newline at end of file +code issuescode issues22472247 \ No newline at end of file