Skip to content

Commit 238bede

Browse files
committed
wip: Single file manifest generation
2 parents 8c38067 + 6588c0d commit 238bede

File tree

6 files changed

+77
-172
lines changed

6 files changed

+77
-172
lines changed

src/pyflask/curate/curate.py

+49-150
Original file line numberDiff line numberDiff line change
@@ -3705,184 +3705,83 @@ def copytree(src, dst, symlinks=False, ignore=None):
37053705

37063706

37073707
def generate_manifest_file_data(dataset_structure_obj):
3708-
# modify this function here to handle paths from pennsieve
3709-
# create path using bfpath key from json object
3710-
37113708
local_timezone = TZLOCAL()
3712-
3709+
37133710
double_extensions = [
3714-
".ome.tiff",
3715-
".ome.tif",
3716-
".ome.tf2,",
3717-
".ome.tf8",
3718-
".ome.btf",
3719-
".ome.xml",
3720-
".brukertiff.gz",
3721-
".mefd.gz",
3722-
".moberg.gz",
3723-
".nii.gz",
3724-
".mgh.gz",
3725-
".tar.gz",
3726-
".bcl.gz",
3711+
".ome.tiff", ".ome.tif", ".ome.tf2,", ".ome.tf8", ".ome.btf", ".ome.xml",
3712+
".brukertiff.gz", ".mefd.gz", ".moberg.gz", ".nii.gz", ".mgh.gz", ".tar.gz", ".bcl.gz"
37273713
]
37283714

37293715
def get_name_extension(file_name):
3730-
double_ext = False
37313716
for ext in double_extensions:
3732-
if file_name.find(ext) != -1:
3733-
double_ext = True
3734-
break
3735-
3736-
ext = ""
3737-
3738-
if double_ext == False:
3739-
ext = os.path.splitext(file_name)[1]
3717+
if file_name.endswith(ext):
3718+
# Extract the base extension before the double extension
3719+
base_ext = os.path.splitext(os.path.splitext(file_name)[0])[1]
3720+
return base_ext + ext
3721+
return os.path.splitext(file_name)[1]
3722+
3723+
def build_file_entry(item, folder, ds_struct_path, timestamp_entry, file_name):
3724+
file_manifest_template_data = []
3725+
filename_entry = "/".join(ds_struct_path) + "/" + file_name if ds_struct_path else file_name
3726+
file_type_entry = get_name_extension(file_name)
3727+
3728+
if filename_entry[:1] == "/":
3729+
file_manifest_template_data.append(filename_entry[1:])
37403730
else:
3741-
ext = (
3742-
os.path.splitext(os.path.splitext(file_name)[0])[1]
3743-
+ os.path.splitext(file_name)[1]
3744-
)
3745-
return ext
3746-
3747-
def guided_recursive_folder_traversal(folder, hlf_data_array, ds_struct_path):
3748-
if "files" in folder.keys():
3749-
standard_manifest_columns = ["filename", "timestamp", "description", "file type", "Additional Metadata"]
3750-
if(len(hlf_data_array) < 1):
3751-
hlf_data_array.append(standard_manifest_columns)
3752-
for item in list(folder["files"]):
3753-
# do not generate a manifest file entry for the manifest file itself
3754-
if item in ["manifest.xlsx", "manifest.csv"]:
3755-
continue
3756-
file_manifest_template_data = []
3757-
local_path_to_file = folder["files"][item]["path"].replace("\\", "/")
3758-
item_description = folder["files"][item]["description"]
3759-
item_additional_info = folder["files"][item]["additional-metadata"]
3760-
3761-
# The name of the file eg "file.txt"
3762-
file_name = os.path.basename(local_path_to_file)
3763-
if file_name != item:
3764-
file_name = item
3765-
if len(ds_struct_path) > 0:
3766-
filename_entry = "/".join(ds_struct_path) + "/" + file_name
3767-
else:
3768-
filename_entry = file_name
3731+
file_manifest_template_data.append(filename_entry)
37693732

3770-
# The extension of the file eg ".txt"
3771-
file_type_entry = get_name_extension(file_name)
3733+
file_manifest_template_data.append(timestamp_entry)
3734+
file_manifest_template_data.append(folder["files"][item]["description"])
3735+
file_manifest_template_data.append(file_type_entry)
3736+
file_manifest_template_data.append(folder["files"][item]["additional-metadata"])
37723737

3773-
# The timestamp of the file on the user's local machine
3774-
file_path = pathlib.Path(local_path_to_file)
3775-
mtime = file_path.stat().st_mtime
3776-
last_mod_time = datetime.fromtimestamp(mtime, tz=local_timezone).fromtimestamp(mtime).astimezone(
3777-
local_timezone
3778-
)
3779-
timestamp_entry = last_mod_time.isoformat().replace(".", ",").replace("+00:00", "Z")
3738+
if "extra_columns" in folder["files"][item]:
3739+
for key, value in folder["files"][item]["extra_columns"].items():
3740+
file_manifest_template_data.append(value)
3741+
if key not in hlf_data_array[0]:
3742+
hlf_data_array[0].append(key)
37803743

3781-
if filename_entry[:1] == "/":
3782-
file_manifest_template_data.append(filename_entry[:1])
3783-
else:
3784-
file_manifest_template_data.append(filename_entry)
3785-
3786-
file_manifest_template_data.append(timestamp_entry)
3787-
file_manifest_template_data.append(item_description)
3788-
file_manifest_template_data.append(file_type_entry)
3789-
file_manifest_template_data.append(item_additional_info)
3790-
3791-
# extra column key is an object of all extra columns of a manifest
3792-
# key will be the column header and value will be the value of the column+row
3793-
# (from the excel) (now in the form of a dict)
3794-
if "extra_columns" in folder["files"][item]:
3795-
for key in folder["files"][item]["extra_columns"]:
3796-
file_manifest_template_data.append(folder["files"][item]["extra_columns"][key])
3797-
if key not in hlf_data_array[0]:
3798-
# add column name to manifest column names array
3799-
hlf_data_array[0].append(key)
3800-
3801-
hlf_data_array.append(file_manifest_template_data)
3802-
3803-
if "folders" in folder.keys():
3804-
for item in list(folder["folders"]):
3805-
relative_structure_path.append(item)
3806-
guided_recursive_folder_traversal(
3807-
folder["folders"][item], hlf_data_array, relative_structure_path
3808-
)
3809-
relative_structure_path.pop()
3810-
return
3744+
return file_manifest_template_data
38113745

3812-
def pennsieve_recursive_folder_traversal(folder, hlf_data_array, ds_struct_path):
3813-
if "files" in folder.keys():
3814-
standard_manifest_columns = ["filename", "timestamp", "description", "file type", "Additional Metadata"]
3815-
if(len(hlf_data_array) < 1):
3746+
def recursive_folder_traversal(folder, hlf_data_array, ds_struct_path, is_pennsieve):
3747+
if "files" in folder:
3748+
standard_manifest_columns = ["filename", "timestamp", "description", "file type", "entity", "data modality", "also in dataset", "data dictionary path", "entity is transitive", "Additional Metadata"]
3749+
if not hlf_data_array:
38163750
hlf_data_array.append(standard_manifest_columns)
3817-
for item in list(folder["files"]):
3818-
file_manifest_template_data = []
3751+
3752+
for item in folder["files"]:
38193753
if item in ["manifest.xlsx", "manifest.csv"]:
38203754
continue
3821-
item_description = folder["files"][item]["description"]
3822-
item_additional_info = folder["files"][item]["additional-metadata"]
3823-
file_name = ""
3824-
if folder["files"][item]["type"] == "bf":
3755+
3756+
if is_pennsieve and folder["files"][item]["type"] == "bf":
38253757
file_name = os.path.basename(item)
38263758
timestamp_entry = folder["files"][item]["timestamp"]
38273759
else:
38283760
local_path_to_file = folder["files"][item]["path"].replace("\\", "/")
38293761
file_name = os.path.basename(local_path_to_file)
3830-
file_path = pathlib.Path(local_path_to_file)
3831-
mtime = file_path.stat().st_mtime
3832-
last_mod_time = datetime.fromtimestamp(mtime, tz=local_timezone).fromtimestamp(mtime).astimezone(local_timezone)
3833-
timestamp_entry = last_mod_time.isoformat().replace(".", ",").replace("+00:00", "Z")
3834-
3762+
mtime = pathlib.Path(local_path_to_file).stat().st_mtime
3763+
timestamp_entry = datetime.fromtimestamp(mtime, tz=local_timezone).isoformat().replace(".", ",").replace("+00:00", "Z")
38353764

3836-
filename_entry = "/".join(ds_struct_path) + "/" + file_name
3837-
file_type_entry = get_name_extension(file_name)
3765+
hlf_data_array.append(build_file_entry(item, folder, ds_struct_path, timestamp_entry, file_name))
38383766

3839-
if filename_entry[:1] == "/":
3840-
file_manifest_template_data.append(filename_entry[1:])
3841-
else:
3842-
file_manifest_template_data.append(filename_entry)
3843-
file_manifest_template_data.append(timestamp_entry)
3844-
file_manifest_template_data.append(item_description)
3845-
file_manifest_template_data.append(file_type_entry)
3846-
file_manifest_template_data.append(item_additional_info)
3847-
# extra column key is an object of all extra columns of a manifest
3848-
# key will be the column header and value will be the value of the column+row
3849-
# (from the excel) (now in the form of a dict)
3850-
if "extra_columns" in folder["files"][item]:
3851-
for key in folder["files"][item]["extra_columns"]:
3852-
file_manifest_template_data.append(folder["files"][item]["extra_columns"][key])
3853-
if key not in hlf_data_array[0]:
3854-
# add column name to manifest column names array
3855-
hlf_data_array[0].append(key)
3856-
3857-
hlf_data_array.append(file_manifest_template_data)
3767+
if "folders" in folder:
3768+
for item in folder["folders"]:
3769+
ds_struct_path.append(item)
3770+
recursive_folder_traversal(folder["folders"][item], hlf_data_array, ds_struct_path, is_pennsieve)
3771+
ds_struct_path.pop()
38583772

3859-
if "folders" in folder.keys():
3860-
for item in list(folder["folders"]):
3861-
relative_structure_path.append(item)
3862-
pennsieve_recursive_folder_traversal(
3863-
folder["folders"][item], hlf_data_array, relative_structure_path
3864-
)
3865-
relative_structure_path.pop()
3866-
return
3867-
3868-
# Initialize the array that the manifest data will be added to.
38693773
hlf_manifest_data = {}
3870-
# any additional columns created by the user will be appended with the high level folder when found
38713774

3775+
namespace_logger.info("Generating manifest file data")
3776+
namespace_logger.info(dataset_structure_obj)
38723777

3873-
# Loop through each high level folder and create a manifest data array for each.
3874-
for high_level_folder in list(dataset_structure_obj["folders"]):
3778+
for high_level_folder in dataset_structure_obj["folders"]:
38753779
hlf_data_array = []
3876-
3877-
# create an array to keep track of the path to the obj being recursed over
38783780
relative_structure_path = []
3879-
# hlf_data_array.append(standard_manifest_columns)
38803781

3881-
if "bfpath" in dataset_structure_obj["folders"][high_level_folder]:
3882-
# means the json is from a pennsieve dataset
3883-
pennsieve_recursive_folder_traversal(dataset_structure_obj["folders"][high_level_folder], hlf_data_array, relative_structure_path)
3884-
else:
3885-
guided_recursive_folder_traversal(dataset_structure_obj["folders"][high_level_folder], hlf_data_array, relative_structure_path)
3782+
is_pennsieve = "bfpath" in dataset_structure_obj["folders"][high_level_folder]
3783+
recursive_folder_traversal(dataset_structure_obj["folders"][high_level_folder], hlf_data_array, relative_structure_path, is_pennsieve)
3784+
38863785
hlf_manifest_data[high_level_folder] = hlf_data_array
38873786

38883787
return hlf_manifest_data

src/renderer/src/components/shared/DatasetTreeViewRenderer/index.jsx

+1
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,5 @@ const DatasetTreeViewRenderer = ({ datasetStructure }) => {
106106
</Stack>
107107
);
108108
};
109+
109110
export default DatasetTreeViewRenderer;

src/renderer/src/components/shared/manifest/ManifestEntitySelector.jsx

+4-3
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@ const ManifestEntitySelector = () => {
99

1010
const handleButtonClick = () => {
1111
console.log("Button clicked!");
12-
setDatasetStructureJSONObj(window.datasetStructureJSONObj);
12+
const datasetStructureJSONObjCopy = JSON.parse(JSON.stringify(window.datasetStructureJSONObj));
13+
setDatasetStructureJSONObj(datasetStructureJSONObjCopy);
1314
};
1415

1516
return (
1617
<FullWidthContainer>
1718
<Button onClick={handleButtonClick}>Set Name</Button>
1819
<Grid>
19-
<Grid.Col span={3}>Entity</Grid.Col>
20-
<Grid.Col span={9}>
20+
<Grid.Col span={4}>Entity</Grid.Col>
21+
<Grid.Col span={8}>
2122
<DatasetTreeViewRenderer datasetStructure={datasetStructureJSONObj} />
2223
</Grid.Col>
2324
</Grid>

src/renderer/src/scripts/guided-mode/guided-curate-dataset.js

+20-19
Original file line numberDiff line numberDiff line change
@@ -1278,7 +1278,7 @@ const savePageChanges = async (pageBeingLeftID) => {
12781278
}
12791279

12801280
if (pageBeingLeftID === "guided-subjects-addition-tab") {
1281-
if (getExistingSubjectNames().length === 0) {
1281+
if (window.getExistingSubjectNames().length === 0) {
12821282
errorArray.push({
12831283
type: "notyf",
12841284
message: "Please add at least one subject",
@@ -1461,7 +1461,7 @@ const savePageChanges = async (pageBeingLeftID) => {
14611461
throw errorArray;
14621462
}
14631463

1464-
const subjects = getExistingSubjectNames();
1464+
const subjects = window.getExistingSubjectNames();
14651465
if (subjects.length === 0) {
14661466
errorArray.push({
14671467
type: "swal",
@@ -5284,7 +5284,7 @@ window.openPage = async (targetPageID) => {
52845284
// The page is unskipped only if the user has not added any subjects,
52855285
// indicated that they will be adding subjects, and the user is not starting from Pennsieve
52865286
if (
5287-
getExistingSubjectNames().length === 0 &&
5287+
window.getExistingSubjectNames().length === 0 &&
52885288
window.sodaJSONObj["starting-point"]["type"] != "bf" &&
52895289
window.sodaJSONObj["button-config"]["dataset-contains-subjects"] === "yes"
52905290
) {
@@ -5373,19 +5373,16 @@ window.openPage = async (targetPageID) => {
53735373
}
53745374

53755375
if (targetPageID === "guided-dataset-structure-review-tab") {
5376-
// Delete the guided high level folders if they are empty
5377-
// They could possibly be empty if the user did not add any subject data
5378-
// These will be added back safely when the user traverses back to the high level folder's page
5379-
for (const folder of guidedHighLevelFolders) {
5380-
const rootFolderPath = window.datasetStructureJSONObj["folders"][folder];
5381-
5376+
// Remove empty guided high-level folders (primary, source, derivative)
5377+
guidedHighLevelFolders.forEach((folder) => {
5378+
const rootFolderPath = window.datasetStructureJSONObj?.folders?.[folder];
53825379
if (rootFolderPath && folderIsEmpty(rootFolderPath)) {
5383-
delete window.datasetStructureJSONObj["folders"][folder];
5380+
delete window.datasetStructureJSONObj?.folders?.[folder];
53845381
}
5385-
}
5382+
});
53865383

53875384
guidedShowTreePreview(
5388-
window.sodaJSONObj["digital-metadata"]["name"],
5385+
window.sodaJSONObj?.["digital-metadata"]?.name,
53895386
"guided-folder-structure-review"
53905387
);
53915388
}
@@ -5432,6 +5429,8 @@ window.openPage = async (targetPageID) => {
54325429
{ timeout: 0 }
54335430
);
54345431
const manifestRes = res.data;
5432+
console.log("manifestRes");
5433+
console.log(manifestRes);
54355434
//loop through each of the high level folders and store their manifest headers and data
54365435
//into the window.sodaJSONObj
54375436

@@ -5463,6 +5462,8 @@ window.openPage = async (targetPageID) => {
54635462
//Rerender the manifest cards
54645463
renderManifestCards();
54655464
}
5465+
if (targetPageID === "guided-manifest-subject-entity-selector-tab") {
5466+
}
54665467

54675468
if (targetPageID === "guided-create-submission-metadata-tab") {
54685469
if (pageNeedsUpdateFromPennsieve(targetPageID)) {
@@ -6843,7 +6844,7 @@ const guidedOpenEntityEditSwal = async (entityName) => {
68436844
let entityPrefix;
68446845

68456846
if (entityName.startsWith("sub-")) {
6846-
preExistingEntities = getExistingSubjectNames();
6847+
preExistingEntities = window.getExistingSubjectNames();
68476848
entityNameSingular = "subject";
68486849
entityPrefix = "sub-";
68496850
}
@@ -11192,7 +11193,7 @@ const addSubjectSpecificationTableRow = () => {
1119211193
}
1119311194
};
1119411195

11195-
const getExistingSubjectNames = () => {
11196+
window.getExistingSubjectNames = () => {
1119611197
// Get all subjects in pools and outside of pools
1119711198
const [subjectsInPools, subjectsOutsidePools] = window.sodaJSONObj.getAllSubjects();
1119811199
// Combine the two arrays
@@ -11489,7 +11490,7 @@ document
1148911490
const sampleName = lowercaseFirstLetter(row["sample id"]);
1149011491

1149111492
// Check to see if the subject already exists
11492-
const subjectAlreadyExists = getExistingSubjectNames().includes(subjectName);
11493+
const subjectAlreadyExists = window.getExistingSubjectNames().includes(subjectName);
1149311494
if (!subjectAlreadyExists) {
1149411495
window.sodaJSONObj.addSubject(subjectName);
1149511496
if (subjectsPool) {
@@ -11569,7 +11570,7 @@ const guidedAddListOfSubjects = async (subjectNameArray, showWarningForExistingS
1156911570
formattedSubjectNameArray.filter((subjectName) => subjectName.length > 0);
1157011571

1157111572
// Get an array of existing subjects to check for duplicates
11572-
const existingSubjects = getExistingSubjectNames();
11573+
const existingSubjects = window.getExistingSubjectNames();
1157311574

1157411575
// Array of the subjects that already exist in the dataset
1157511576
const duplicateSubjects = formattedSubjectNameArray.filter((subjectName) =>
@@ -11647,14 +11648,14 @@ const convertArrayToCommaSeparatedString = (array) => {
1164711648

1164811649
const guidedOpenEntityAdditionSwal = async (entityName) => {
1164911650
// Get a list of the existing entities so we can check for duplicates
11650-
// const subjects = getExistingSubjectNames();
11651+
// const subjects = window.getExistingSubjectNames();
1165111652
let preExistingEntities;
1165211653
let entityNameSingular;
1165311654
let entityPrefix;
1165411655

1165511656
// case when adding subjects
1165611657
if (entityName === "subjects") {
11657-
preExistingEntities = getExistingSubjectNames();
11658+
preExistingEntities = window.getExistingSubjectNames();
1165811659
entityNameSingular = "subject";
1165911660
entityPrefix = "sub-";
1166011661
}
@@ -13376,7 +13377,7 @@ const handleMultipleSubSectionDisplay = async (controlledSectionID) => {
1337613377
// (case for updating a dataset from Pennsieve or old progress files),
1337713378
// Select the option for them
1337813379
if (!sodaJSONObj["button-config"]["subject-addition-method"]) {
13379-
if (getExistingSubjectNames().length > 0) {
13380+
if (window.getExistingSubjectNames().length > 0) {
1338013381
document.getElementById("guided-button-add-subject-structure-manually").click();
1338113382
}
1338213383
}

0 commit comments

Comments
 (0)