Skip to content

Commit c75dcdb

Browse files
Merge pull request #497 from VH-Lab/feature-NDI-123-update-document-list
Add update-checking to listDatasetDocumentsAll
2 parents b0f407a + 839dbc5 commit c75dcdb

File tree

3 files changed

+111
-22
lines changed

3 files changed

+111
-22
lines changed

.github/badges/code_issues.svg

Lines changed: 1 addition & 1 deletion
Loading

src/ndi/+ndi/+cloud/+api/+documents/listDatasetDocumentsAll.m

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@
1111
% Name-Value Inputs:
1212
% pageSize - (Optional) The number of results to fetch per API call.
1313
% Default is 1000.
14+
% checkForUpdates - (Optional) If true, the function will check for new
15+
% documents that were added while it was running and
16+
% will attempt to retrieve them before returning.
17+
% Default is true.
18+
% waitForUpdates - (Optional) The time in seconds to wait before
19+
% re-checking the document count for updates.
20+
% Default is 5.
21+
% maximumNumberUpdateReads - (Optional) The maximum number of times the
22+
% function will re-poll for updates to prevent an
23+
% infinite loop. Default is 100.
1424
%
1525
% Outputs:
1626
% b - True if the call succeeded, false otherwise.
@@ -26,12 +36,18 @@
2636
arguments
2737
cloudDatasetID (1,1) string
2838
args.pageSize (1,1) double = 1000
39+
args.checkForUpdates (1,1) logical = true
40+
args.waitForUpdates (1,1) double = 5
41+
args.maximumNumberUpdateReads (1,1) double = 100
2942
end
3043

3144
% 1. Create an instance of the implementation class.
3245
api_call = ndi.cloud.api.implementation.documents.ListDatasetDocumentsAll(...
3346
'cloudDatasetID', cloudDatasetID, ...
34-
'pageSize', args.pageSize);
47+
'pageSize', args.pageSize, ...
48+
'checkForUpdates', args.checkForUpdates, ...
49+
'waitForUpdates', args.waitForUpdates, ...
50+
'maximumNumberUpdateReads', args.maximumNumberUpdateReads);
3551

3652
% 2. Call the execute method and return its outputs directly.
3753
[b, answer, apiResponse, apiURL] = api_call.execute();

src/ndi/+ndi/+cloud/+api/+implementation/+documents/ListDatasetDocumentsAll.m

Lines changed: 93 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
classdef ListDatasetDocumentsAll < ndi.cloud.api.call
22
%LISTDATASETDOCUMENTSALL Implementation class for retrieving all documents in a dataset.
3+
% This class handles the paginated retrieval of all document summaries from a
4+
% cloud dataset. It also includes an optional mechanism to check for and fetch
5+
% newly-added documents that may appear while the initial list is being read.
6+
%
37
properties
48
retries (1,1) double = 10
9+
checkForUpdates (1,1) logical = true
10+
waitForUpdates (1,1) double = 5
11+
maximumNumberUpdateReads (1,1) double = 100
512
end
613

714
methods
@@ -15,20 +22,38 @@
1522
% Optional Name-Value Inputs:
1623
% 'pageSize' - The number of results per page (default 1000).
1724
% 'retries' - The number of times to retry a failed page read (default 10).
25+
% 'checkForUpdates' - Flag to enable checking for new documents (default true).
26+
% 'waitForUpdates' - Pause duration in seconds before re-checking (default 5).
27+
% 'maximumNumberUpdateReads' - Limit on update re-polls (default 100).
1828
%
1929
arguments
2030
args.cloudDatasetID (1,1) string
2131
args.pageSize (1,1) double = 1000
2232
args.retries (1,1) double = 10
33+
args.checkForUpdates (1,1) logical = true
34+
args.waitForUpdates (1,1) double = 5
35+
args.maximumNumberUpdateReads (1,1) double = 100
2336
end
2437

2538
this.cloudDatasetID = args.cloudDatasetID;
2639
this.pageSize = args.pageSize;
2740
this.retries = args.retries;
41+
this.checkForUpdates = args.checkForUpdates;
42+
this.waitForUpdates = args.waitForUpdates;
43+
this.maximumNumberUpdateReads = args.maximumNumberUpdateReads;
2844
end
2945

3046
function [b, answer, apiResponse, apiURL] = execute(this)
3147
%EXECUTE Performs the API call to list all documents.
48+
% This method first determines the total number of pages and then iterates
49+
% through them, fetching each one using the private `fetch_and_append_page`
50+
% helper method.
51+
%
52+
% If `checkForUpdates` is true, it then enters a loop to re-check the
53+
% total document count. If new documents have been added, it fetches
54+
% the new pages, de-duplicating results to ensure no duplicates are
55+
% added. This continues until no new documents are found or the
56+
% `maximumNumberUpdateReads` limit is reached.
3257
%
3358
% [B, ANSWER, APIRESPONSE, APIURL] = EXECUTE(THIS)
3459
%
@@ -53,36 +78,84 @@
5378
end
5479

5580
numPages = ceil(double(numDocs) / this.pageSize);
81+
last_page_read = 0;
5682

5783
for p = 1:numPages
58-
page_succeeded = false;
59-
for attempt = 1:this.retries
60-
[b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(...
61-
this.cloudDatasetID, 'page', p, 'pageSize', this.pageSize);
62-
63-
apiURL(end+1) = url_page;
64-
apiResponse(end+1) = resp_page;
84+
[b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p, answer, apiResponse, apiURL, false);
85+
if ~b_page
86+
b = false;
87+
break;
88+
end
89+
last_page_read = p;
90+
end
91+
92+
if this.checkForUpdates && b
93+
update_reads = 0;
94+
[b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID);
95+
while b_count && newNumDocs > numDocs && update_reads < this.maximumNumberUpdateReads
96+
pause(this.waitForUpdates);
97+
numDocs = newNumDocs;
98+
99+
start_page = max(1, last_page_read);
65100

66-
if b_page
67-
if isempty(answer.documents)
68-
answer = ans_page;
69-
else
70-
answer.documents = cat(1, answer.documents, ans_page.documents);
101+
numPages = ceil(double(numDocs) / this.pageSize);
102+
for p_update = start_page:numPages
103+
[b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p_update, answer, apiResponse, apiURL, true);
104+
if ~b_page
105+
b = false;
106+
break;
71107
end
72-
page_succeeded = true;
73-
break; % Exit retry loop on success
108+
last_page_read = p_update;
74109
end
75-
end
76-
77-
if ~page_succeeded
78-
b = false; % Mark overall operation as failed
79-
break; % Exit the main page loop
110+
if ~b, break; end;
111+
update_reads = update_reads + 1;
112+
[b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID);
80113
end
81114
end
115+
82116
if isfield(answer, 'documents') && isempty(answer.documents) && ~iscell(answer.documents)
83117
answer.documents = {};
84118
end
85119
end
86120
end
87-
end
88121

122+
methods (Access = private)
123+
function [b, answer, apiResponse, apiURL] = fetch_and_append_page(this, page_num, answer, apiResponse, apiURL, deduplicate)
124+
%FETCH_AND_APPEND_PAGE Fetches a single page of documents and appends them.
125+
% This helper method is responsible for fetching a single page of document
126+
% summaries. It handles the retry logic internally. If `deduplicate` is
127+
% true, it will compare the IDs of the fetched documents with the
128+
% existing documents in `answer` and only append the new ones.
129+
b = false;
130+
for attempt = 1:this.retries
131+
[b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(...
132+
this.cloudDatasetID, 'page', page_num, 'pageSize', this.pageSize);
133+
134+
apiURL(end+1) = url_page;
135+
apiResponse(end+1) = resp_page;
136+
137+
if b_page
138+
if isempty(answer.documents)
139+
answer = ans_page;
140+
else
141+
new_docs = ans_page.documents;
142+
if ~isempty(new_docs)
143+
if deduplicate
144+
existing_ids = string({answer.documents.id});
145+
new_ids = string({new_docs.id});
146+
[~, new_indices] = setdiff(new_ids, existing_ids);
147+
if ~isempty(new_indices)
148+
answer.documents = cat(1, answer.documents, new_docs(new_indices));
149+
end
150+
else
151+
answer.documents = cat(1, answer.documents, new_docs);
152+
end
153+
end
154+
end
155+
b = true;
156+
break; % Exit retry loop on success
157+
end
158+
end
159+
end
160+
end
161+
end

0 commit comments

Comments
 (0)