|
1 | 1 | classdef ListDatasetDocumentsAll < ndi.cloud.api.call |
2 | 2 | %LISTDATASETDOCUMENTSALL Implementation class for retrieving all documents in a dataset. |
| 3 | +% This class handles the paginated retrieval of all document summaries from a |
| 4 | +% cloud dataset. It also includes an optional mechanism to check for and fetch |
| 5 | +% newly-added documents that may appear while the initial list is being read. |
| 6 | +% |
3 | 7 | properties |
4 | 8 | retries (1,1) double = 10 |
| 9 | + checkForUpdates (1,1) logical = true |
| 10 | + waitForUpdates (1,1) double = 5 |
| 11 | + maximumNumberUpdateReads (1,1) double = 100 |
5 | 12 | end |
6 | 13 |
|
7 | 14 | methods |
|
15 | 22 | % Optional Name-Value Inputs: |
16 | 23 | % 'pageSize' - The number of results per page (default 1000). |
17 | 24 | % 'retries' - The number of times to retry a failed page read (default 10). |
| 25 | + % 'checkForUpdates' - Flag to enable checking for new documents (default true). |
| 26 | + % 'waitForUpdates' - Pause duration in seconds before re-checking (default 5). |
| 27 | + % 'maximumNumberUpdateReads' - Limit on update re-polls (default 100). |
18 | 28 | % |
19 | 29 | arguments |
20 | 30 | args.cloudDatasetID (1,1) string |
21 | 31 | args.pageSize (1,1) double = 1000 |
22 | 32 | args.retries (1,1) double = 10 |
| 33 | + args.checkForUpdates (1,1) logical = true |
| 34 | + args.waitForUpdates (1,1) double = 5 |
| 35 | + args.maximumNumberUpdateReads (1,1) double = 100 |
23 | 36 | end |
24 | 37 |
|
25 | 38 | this.cloudDatasetID = args.cloudDatasetID; |
26 | 39 | this.pageSize = args.pageSize; |
27 | 40 | this.retries = args.retries; |
| 41 | + this.checkForUpdates = args.checkForUpdates; |
| 42 | + this.waitForUpdates = args.waitForUpdates; |
| 43 | + this.maximumNumberUpdateReads = args.maximumNumberUpdateReads; |
28 | 44 | end |
29 | 45 |
|
30 | 46 | function [b, answer, apiResponse, apiURL] = execute(this) |
31 | 47 | %EXECUTE Performs the API call to list all documents. |
| 48 | + % This method first determines the total number of pages and then iterates |
| 49 | + % through them, fetching each one using the private `fetch_and_append_page` |
| 50 | + % helper method. |
| 51 | + % |
| 52 | + % If `checkForUpdates` is true, it then enters a loop to re-check the |
| 53 | + % total document count. If new documents have been added, it fetches |
| 54 | + % the new pages, de-duplicating results to ensure no duplicates are |
| 55 | + % added. This continues until no new documents are found or the |
| 56 | + % `maximumNumberUpdateReads` limit is reached. |
32 | 57 | % |
33 | 58 | % [B, ANSWER, APIRESPONSE, APIURL] = EXECUTE(THIS) |
34 | 59 | % |
|
53 | 78 | end |
54 | 79 |
|
55 | 80 | numPages = ceil(double(numDocs) / this.pageSize); |
| 81 | + last_page_read = 0; |
56 | 82 |
|
57 | 83 | for p = 1:numPages |
58 | | - page_succeeded = false; |
59 | | - for attempt = 1:this.retries |
60 | | - [b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(... |
61 | | - this.cloudDatasetID, 'page', p, 'pageSize', this.pageSize); |
62 | | - |
63 | | - apiURL(end+1) = url_page; |
64 | | - apiResponse(end+1) = resp_page; |
| 84 | + [b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p, answer, apiResponse, apiURL, false); |
| 85 | + if ~b_page |
| 86 | + b = false; |
| 87 | + break; |
| 88 | + end |
| 89 | + last_page_read = p; |
| 90 | + end |
| 91 | + |
| 92 | + if this.checkForUpdates && b |
| 93 | + update_reads = 0; |
| 94 | + [b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID); |
| 95 | + while b_count && newNumDocs > numDocs && update_reads < this.maximumNumberUpdateReads |
| 96 | + pause(this.waitForUpdates); |
| 97 | + numDocs = newNumDocs; |
| 98 | + |
| 99 | + start_page = max(1, last_page_read); |
65 | 100 |
|
66 | | - if b_page |
67 | | - if isempty(answer.documents) |
68 | | - answer = ans_page; |
69 | | - else |
70 | | - answer.documents = cat(1, answer.documents, ans_page.documents); |
| 101 | + numPages = ceil(double(numDocs) / this.pageSize); |
| 102 | + for p_update = start_page:numPages |
| 103 | + [b_page, answer, apiResponse, apiURL] = this.fetch_and_append_page(p_update, answer, apiResponse, apiURL, true); |
| 104 | + if ~b_page |
| 105 | + b = false; |
| 106 | + break; |
71 | 107 | end |
72 | | - page_succeeded = true; |
73 | | - break; % Exit retry loop on success |
| 108 | + last_page_read = p_update; |
74 | 109 | end |
75 | | - end |
76 | | - |
77 | | - if ~page_succeeded |
78 | | - b = false; % Mark overall operation as failed |
79 | | - break; % Exit the main page loop |
| 110 | + if ~b, break; end; |
| 111 | + update_reads = update_reads + 1; |
| 112 | + [b_count, newNumDocs, ~, ~] = ndi.cloud.api.documents.documentCount(this.cloudDatasetID); |
80 | 113 | end |
81 | 114 | end |
| 115 | + |
82 | 116 | if isfield(answer, 'documents') && isempty(answer.documents) && ~iscell(answer.documents) |
83 | 117 | answer.documents = {}; |
84 | 118 | end |
85 | 119 | end |
86 | 120 | end |
87 | | -end |
88 | 121 |
|
| 122 | + methods (Access = private) |
| 123 | + function [b, answer, apiResponse, apiURL] = fetch_and_append_page(this, page_num, answer, apiResponse, apiURL, deduplicate) |
| 124 | + %FETCH_AND_APPEND_PAGE Fetches a single page of documents and appends them. |
| 125 | + % This helper method is responsible for fetching a single page of document |
| 126 | + % summaries. It handles the retry logic internally. If `deduplicate` is |
| 127 | + % true, it will compare the IDs of the fetched documents with the |
| 128 | + % existing documents in `answer` and only append the new ones. |
| 129 | + b = false; |
| 130 | + for attempt = 1:this.retries |
| 131 | + [b_page, ans_page, resp_page, url_page] = ndi.cloud.api.documents.listDatasetDocuments(... |
| 132 | + this.cloudDatasetID, 'page', page_num, 'pageSize', this.pageSize); |
| 133 | + |
| 134 | + apiURL(end+1) = url_page; |
| 135 | + apiResponse(end+1) = resp_page; |
| 136 | + |
| 137 | + if b_page |
| 138 | + if isempty(answer.documents) |
| 139 | + answer = ans_page; |
| 140 | + else |
| 141 | + new_docs = ans_page.documents; |
| 142 | + if ~isempty(new_docs) |
| 143 | + if deduplicate |
| 144 | + existing_ids = string({answer.documents.id}); |
| 145 | + new_ids = string({new_docs.id}); |
| 146 | + [~, new_indices] = setdiff(new_ids, existing_ids); |
| 147 | + if ~isempty(new_indices) |
| 148 | + answer.documents = cat(1, answer.documents, new_docs(new_indices)); |
| 149 | + end |
| 150 | + else |
| 151 | + answer.documents = cat(1, answer.documents, new_docs); |
| 152 | + end |
| 153 | + end |
| 154 | + end |
| 155 | + b = true; |
| 156 | + break; % Exit retry loop on success |
| 157 | + end |
| 158 | + end |
| 159 | + end |
| 160 | + end |
| 161 | +end |
0 commit comments