Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

URO-352 DTS importer setup for narrative #3677

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions kbase-extension/static/kbase/config/staging_upload.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@
"id": "import_specification",
"name": "Import Specification"
},
{
"id": "dts_manifest",
"name": "Data Transfer Service Manifest"
},
{
"id": "decompress",
"name": "Decompress/Unpack"
Expand All @@ -62,6 +66,7 @@
"fastq_reads_interleaved",
"fastq_reads_noninterleaved",
"gff_metagenome",
"gff_genome",
"assembly",
"genbank_genome"
],
Expand Down
5 changes: 4 additions & 1 deletion kbase-extension/static/kbase/js/api/StagingServiceClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ define(['RestAPIClient'], (RestAPIClient) => {
rename: { method: 'post', path: 'rename/${path}' },
decompress: { method: 'patch', path: 'decompress/${path}' },
importer_mappings: { method: 'get', path: 'importer_mappings/?${file_list}' },
bulkSpecification: { method: 'get', path: 'bulk_specification/?files=${files}' },
bulkSpecification: {
method: 'get',
path: 'bulk_specification/?files=${files}&${flag}',
},
write_bulk_specification: { method: 'post', path: 'write_bulk_specification/' },
},
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ define([

/**
* This makes a call to the Staging Service to fetch information from bulk specification files.
* This then gets processed through `processSpreadsheetFileData` before being returned.
* This then gets processed through `processBulkImportSpecData` before being returned.
* @param {Array[string]} files - array of file path names to treat as import specifications
* @returns Promise that resolves into information that can be used to open a bulk import cell.
* This has the format:
Expand All @@ -32,45 +32,15 @@ define([
* }
* }
* @throws Errors.ImportSetupError if an error occurs in either data fetching from the Staging
* Service, or in the initial parsing done by `processSpreadsheetFileData`
* Service, or in the initial parsing done by `processBulkImportSpecData`
*/
function getSpreadsheetFileInfo(files) {
if (!files || files.length === 0) {
return Promise.resolve({});
}
const stagingUrl = Config.url('staging_api_url');
const stagingServiceClient = new StagingServiceClient({
root: stagingUrl,
token: Runtime.make().authToken(),
});

// This is overkill, but a little future proofing. We have to make a GET call with an
// undetermined number of files, so if there are more than we can allow in the URL, gotta break that
// into multiple calls.

// a little cheating here to figure out the length allowance. Maybe it should be in the client?
const maxQueryLength = 2048 - stagingUrl.length - '/bulk_specification/?files='.length;
const bulkSpecProms = [];

while (files.length) {
const fileBatch = [];
let remainingLength = maxQueryLength;
while (
files.length &&
remainingLength - files[0].length - 1 >= 0 // -1 is for the comma
) {
const nextFile = files.shift();
fileBatch.push(nextFile);
remainingLength -= nextFile.length + 1;
}
bulkSpecProms.push(
stagingServiceClient.bulkSpecification({
files: encodeURIComponent(fileBatch.join(',')),
})
);
}
function getBulkImportFileInfo(xsvFiles, dtsFiles) {
// noting here that these are effectively jQuery promises, so we can't just
// make this an async/await function, but need to wrap with Promise.all.
const xsvFileProms = requestBulkImportSpec(xsvFiles);
const dtsFileProms = requestBulkImportSpec(dtsFiles, 'dts');

return Promise.all(bulkSpecProms)
return Promise.all([...xsvFileProms, ...dtsFileProms])
.then((result) => {
// join results of all calls together
const errors = [];
Expand Down Expand Up @@ -123,10 +93,54 @@ define([
);
})
.then((result) => {
return processSpreadsheetFileData(result);
return processBulkImportSpecData(result);
});
}

function requestBulkImportSpec(files, flag = '') {
/**
* This returns an array of jQuery Promises, which is what
* the staging service client uses, so it can't be (easily)
* cast into async/await.
*/
if (!files || files.length === 0) {
return [];
}
const stagingUrl = Config.url('staging_api_url');
const stagingServiceClient = new StagingServiceClient({
root: stagingUrl,
token: Runtime.make().authToken(),
});
// This is overkill, but a little future proofing. We have to make a GET call with an
// undetermined number of files, so if there are more than we can allow in the URL, gotta break that
// into multiple calls.

// a little cheating here to figure out the length allowance. Maybe it should be in the client?
const path = '/bulk_specification/?' + (flag ? flag + '&' : '') + 'files=';
const maxQueryLength = 2048 - stagingUrl.length - path.length;
const bulkSpecProms = [];

while (files.length) {
const fileBatch = [];
let remainingLength = maxQueryLength;
while (
files.length &&
remainingLength - files[0].length - 1 >= 0 // -1 is for the comma
) {
const nextFile = files.shift();
fileBatch.push(nextFile);
remainingLength -= nextFile.length + 1;
}
bulkSpecProms.push(
stagingServiceClient.bulkSpecification({
files: encodeURIComponent(fileBatch.join(',')),
flag,
})
);
}
return bulkSpecProms;
}

/**
* This function does some preprocessing on the spreadsheet file data. Specifically,
* those parameters that are static dropdowns or checkboxes need to translate their input
Expand All @@ -153,7 +167,7 @@ define([
* TODO: also return the fetched app specs to avoid fetching them twice?
* @param {Object} data
*/
async function processSpreadsheetFileData(data) {
async function processBulkImportSpecData(data) {
// map from given datatype to app id.
// if any data types are missing, record that
// if any data types are not bulk import ready, record that, too.
Expand Down Expand Up @@ -390,6 +404,7 @@ define([
const bulkFiles = {};
const singleFiles = [];
const xsvFiles = [];
const dtsFiles = [];
fileInfo.forEach((file) => {
const importType = file.type;
if (bulkIds.has(importType)) {
Expand All @@ -404,33 +419,38 @@ define([
bulkFiles[importType].files.push(file.name);
} else if (importType === 'import_specification') {
xsvFiles.push(file.name);
} else if (importType === 'dts_manifest') {
dtsFiles.push(file.name);
} else {
singleFiles.push(file);
}
});
return getSpreadsheetFileInfo(xsvFiles)
.then((result) => {
if (result.types) {
Object.keys(result.types).forEach((dataType) => {
if (!(dataType in bulkFiles)) {
bulkFiles[dataType] = {
appId: uploaders.app_info[dataType].app_id,
files: [],
outputSuffix: uploaders.app_info[dataType].app_output_suffix,
};
}
bulkFiles[dataType].appParameters = result.types[dataType];
});
}
if (Object.keys(bulkFiles).length) {
return Jupyter.narrative.insertBulkImportCell(bulkFiles);
} else {
return Promise.resolve();
}
})
.then(() => {
return initSingleFileUploads(singleFiles);
});
return (
getBulkImportFileInfo(xsvFiles, dtsFiles)
// return getSpreadsheetFileInfo(xsvFiles)
.then((result) => {
if (result.types) {
Object.keys(result.types).forEach((dataType) => {
if (!(dataType in bulkFiles)) {
bulkFiles[dataType] = {
appId: uploaders.app_info[dataType].app_id,
files: [],
outputSuffix: uploaders.app_info[dataType].app_output_suffix,
};
}
bulkFiles[dataType].appParameters = result.types[dataType];
});
}
if (Object.keys(bulkFiles).length) {
return Jupyter.narrative.insertBulkImportCell(bulkFiles);
} else {
return Promise.resolve();
}
})
.then(() => {
return initSingleFileUploads(singleFiles);
})
);
}

return {
Expand Down
6 changes: 3 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading