Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@
% Name-Value Inputs:
% pageSize - (Optional) The number of results to fetch per API call.
% Default is 1000.
% checkForUpdates - (Optional) If true, the function will check for new
% documents that were added while it was running and
% will attempt to retrieve them before returning.
% Default is true.
% waitForUpdates - (Optional) The time in seconds to wait before
% re-checking the document count for updates.
% Default is 5.
% maximumNumberUpdateReads - (Optional) The maximum number of times the
% function will re-poll for updates to prevent an
% infinite loop. Default is 100.
%
% Outputs:
% b - True if the call succeeded, false otherwise.
Expand All @@ -26,12 +36,18 @@
arguments
cloudDatasetID (1,1) string
args.pageSize (1,1) double = 1000
args.checkForUpdates (1,1) logical = true
args.waitForUpdates (1,1) double = 5
args.maximumNumberUpdateReads (1,1) double = 100
end

% 1. Create an instance of the implementation class.
api_call = ndi.cloud.api.implementation.documents.ListDatasetDocumentsAll(...
'cloudDatasetID', cloudDatasetID, ...
'pageSize', args.pageSize);
'pageSize', args.pageSize, ...
'checkForUpdates', args.checkForUpdates, ...
'waitForUpdates', args.waitForUpdates, ...
'maximumNumberUpdateReads', args.maximumNumberUpdateReads);

% 2. Call the execute method and return its outputs directly.
[b, answer, apiResponse, apiURL] = api_call.execute();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
classdef ListDatasetDocumentsAll < ndi.cloud.api.call
%LISTDATASETDOCUMENTSALL Implementation class for retrieving all documents in a dataset.
% This class handles the paginated retrieval of all document summaries from a
% cloud dataset. It also includes an optional mechanism to check for and fetch
% newly-added documents that may appear while the initial list is being read.
%
properties
retries (1,1) double = 10
checkForUpdates (1,1) logical = true
waitForUpdates (1,1) double = 5
maximumNumberUpdateReads (1,1) double = 100
end

methods
Expand All @@ -15,20 +22,38 @@
% Optional Name-Value Inputs:
% 'pageSize' - The number of results per page (default 1000).
% 'retries' - The number of times to retry a failed page read (default 10).
% 'checkForUpdates' - Flag to enable checking for new documents (default true).
% 'waitForUpdates' - Pause duration in seconds before re-checking (default 5).
% 'maximumNumberUpdateReads' - Limit on update re-polls (default 100).
%
arguments
args.cloudDatasetID (1,1) string
args.pageSize (1,1) double = 1000
args.retries (1,1) double = 10
args.checkForUpdates (1,1) logical = true
args.waitForUpdates (1,1) double = 5
args.maximumNumberUpdateReads (1,1) double = 100
end

this.cloudDatasetID = args.cloudDatasetID;
this.pageSize = args.pageSize;
this.retries = args.retries;
this.checkForUpdates = args.checkForUpdates;
this.waitForUpdates = args.waitForUpdates;
this.maximumNumberUpdateReads = args.maximumNumberUpdateReads;
end

function [b, answer, apiResponse, apiURL] = execute(this)
%EXECUTE Performs the API call to list all documents.
% This method first determines the total number of pages and then iterates
% through them, fetching each one using the private `fetch_and_append_page`
% helper method.
%
% If `checkForUpdates` is true, it then enters a loop to re-check the
% total document count. If new documents have been added, it fetches
% the new pages, de-duplicating results to ensure no duplicates are
% added. This continues until no new documents are found or the
% `maximumNumberUpdateReads` limit is reached.
%
% [B, ANSWER, APIRESPONSE, APIURL] = EXECUTE(THIS)
%
Expand All @@ -53,36 +78,84 @@
end

numPages = ceil(double(numDocs) / this.pageSize);
last_page_read = 0;

for p = 1:numPages
page_succeeded = false;
for attempt = 1:this.retries
[b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(...
this.cloudDatasetID, 'page', p, 'pageSize', this.pageSize);

apiURL(end+1) = url_page;
apiResponse(end+1) = resp_page;
[b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p, answer, apiResponse, apiURL, false);
if ~b_page
b = false;
break;
end
last_page_read = p;
end

if this.checkForUpdates && b
update_reads = 0;
[b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID);
while b_count && newNumDocs > numDocs && update_reads < this.maximumNumberUpdateReads
pause(this.waitForUpdates);
numDocs = newNumDocs;

start_page = max(1, last_page_read);

if b_page
if isempty(answer.documents)
answer = ans_page;
else
answer.documents = cat(1, answer.documents, ans_page.documents);
numPages = ceil(double(numDocs) / this.pageSize);
for p_update = start_page:numPages
[b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p_update, answer, apiResponse, apiURL, true);
if ~b_page
b = false;
break;
end
page_succeeded = true;
break; % Exit retry loop on success
last_page_read = p_update;
end
end

if ~page_succeeded
b = false; % Mark overall operation as failed
break; % Exit the main page loop
if ~b, break; end;

Check notice

Code scanning / Code Analyzer

Extra semicolon is unnecessary. Note

Extra semicolon is unnecessary.
update_reads = update_reads + 1;
[b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID);
end
end

if isfield(answer, 'documents') && isempty(answer.documents) && ~iscell(answer.documents)
answer.documents = {};
end
end
end
end

methods (Access = private)
function [b, answer, apiResponse, apiURL] = fetch_and_append_page(this, page_num, answer, apiResponse, apiURL, deduplicate)
%FETCH_AND_APPEND_PAGE Fetches a single page of documents and appends them.
% This helper method is responsible for fetching a single page of document
% summaries. It handles the retry logic internally. If `deduplicate` is
% true, it will compare the IDs of the fetched documents with the
% existing documents in `answer` and only append the new ones.
b = false;
for attempt = 1:this.retries
[b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(...
this.cloudDatasetID, 'page', page_num, 'pageSize', this.pageSize);

apiURL(end+1) = url_page;

Check notice

Code scanning / Code Analyzer

Variable appears to change size on every loop iteration. Consider preallocating for speed. Note

Variable appears to change size on every loop iteration. Consider preallocating for speed.
apiResponse(end+1) = resp_page;

Check notice

Code scanning / Code Analyzer

Variable appears to change size on every loop iteration. Consider preallocating for speed. Note

Variable appears to change size on every loop iteration. Consider preallocating for speed.

if b_page
if isempty(answer.documents)
answer = ans_page;
else
new_docs = ans_page.documents;
if ~isempty(new_docs)
if deduplicate
existing_ids = string({answer.documents.id});
new_ids = string({new_docs.id});
[~, new_indices] = setdiff(new_ids, existing_ids);
if ~isempty(new_indices)
answer.documents = cat(1, answer.documents, new_docs(new_indices));
end
else
answer.documents = cat(1, answer.documents, new_docs);
end
end
end
b = true;
break; % Exit retry loop on success
end
end
end
end
end