diff --git a/.gitignore b/.gitignore index 30efcaf..e0c9a04 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store -old/* \ No newline at end of file +old/* +*.jar \ No newline at end of file diff --git a/DataModel/Project/Subject/Core/Subject-core-definition.yaml b/DataModel/Project/Subject/Core/Subject-core-definition.yaml deleted file mode 100644 index 8d4f9e8..0000000 --- a/DataModel/Project/Subject/Core/Subject-core-definition.yaml +++ /dev/null @@ -1,185 +0,0 @@ -Subject-core: - description: Describes information such as age gender, race, and other core details related to the subject - properties: - project_id: - description: Foreign key or unique identifier to map a project - type: string - example: P024 - donor_id: - description: Primary key for a patient - type: string - example: D001 - age: - description: The age of the organism in years - type: integer - format: numeric - example: 35 - lower_limit: 0 - age_unit: - description: Unit in with the age is measured - type: string - example: years - developmental_stage_unit: - description: Represents the unit used for denoting the development stage - type: string - example: stage - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/efo/ - developmental_stage: - description: Represents the stage of development/formation of the embryo - of humans and mice - type: string - example: fetal - enum: - - fetal - - neonatal - - adolescent - - adult - - geriatric - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/efo/, https://www.ebi.ac.uk/ols4/ontologies/uberon - gender: - description: Gender of the organism from which the sample was derived - type: string - example: Female - enum: - - Male - - Female - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/gsso - maritial_status: - description: Marital status at the time of sampling - type: string - example: Married - enum: - - Married - - Single - - Divorced - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/snomed/classes/http%253A%252F%252Fsnomed.info%252Fid%252F87915002 - pre_menopause_cycle_type: - description: Type of pre-menopause cycle - type: string - example: Regular, Irregular - enum: - - Regular - - Irregular - pre_menopause_cycle_duration: - description: Duration of the pre-menopause - type: string - example: 28 - lower_limit: 0 - menstruation: - description: Menstruation status of Donor - type: string - example: Menstruating - enum: - - Menstruating - - Not menstruating - occupation: - description: Occupation of the subject - type: string - example: Engineer - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/occo - cohort: - description: Group of individuals included in the study or dataset, which - may have implications for data analysis and interpretation - type: string - example: Study Group A - strain_characteristics: - description: Characteristics of the mouse strain - type: string - example: Wildtype - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/ncit, https://sites.google.com/site/environmentontology/ - strain: - description: The name of the mouse strain - type: string - example: C57BL/6 - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/clo, https://www.ebi.ac.uk/ols4/ontologies/efo, - country_of_origin: - description: Country of origin or nationality of the patient, which may - provide insights into geographic variations in disease incidence or environmental - factors. - type: string - example: USA - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/geo, https://www.ebi.ac.uk/ols4/ontologies/envo - race: - description: A geographic ancestral origin category that is assigned to a population group based mainly on physical characteristics that are thought to be distinct and inherent. - type: string - example: Asian - enum: - - African - - African Caribbea - - American Indian or Alaska Native - - Asian - - Asian American - - Australian - - Black or African American - - Caribbean Indian - - Eskimo - - European - - Hispanic or Latino - - Latin American - - Mediterranean - - Middle Eastern or North African - - Multiracial - - Native Hawaiian or Other Pacific Islander - - New Zealander - - North American - - Other Race - - South or Central American Indian - - White - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/ncit/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FNCIT_C17049?lang=en - ethinicity: - description: A social group characterized by a distinctive social and cultural tradition that is maintained from generation to generation. - type: string - example: Yi Chinese - ontology_link: - type: string - description: Link to the ontology file - url: https://www.ebi.ac.uk/ols4/ontologies/ncit/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FNCIT_C16564 - number_of_children: - description: Number of children - type: integer - example: 2 - lower_limit: 0 - pregnancy: - description: Whether preganant? - type: boolean - example: True - enum: - - True - - False - cause_of_death: - description: Cause of death - type: string - example: hysterectomy, stroke, Kidney disease, neonatal condition - ontology_link: - type: string - description: Link to the ontology file - url: https://www.disease-ontology.org/, https://hpo.jax.org/ - type: object - version: 1.0.0 \ No newline at end of file diff --git a/DataModel/Project/Subject/Events/Core/Events-core-definition.yaml b/DataModel/Project/Subject/Events/Core/Events-core-definition.yaml deleted file mode 100644 index fb17002..0000000 --- a/DataModel/Project/Subject/Events/Core/Events-core-definition.yaml +++ /dev/null @@ -1,28 +0,0 @@ -Events-core: - description: Lists any significant occurrence or activity that is recorded within - a patient's health record. Contains information like project ID, donor ID, event - ID and event type. - properties: - project_id: - description: Foreign key or unique identifier to map a project - type: string - example: P012 - donor_id: - description: Foreign key or unique identifier for a patient - type: string - example: D001 - event_id: - description: Primary key or unique identifier for a event - type: string - example: E001 - event_type: - description: Describing the events occurence for a patient - type: string - example: Diagnosis - enum: - - Diagnosis - - Treatment - - Laboratory test - - Follow-up - type: object - version: 1.0.0 \ No newline at end of file diff --git a/DataModel/Project/Subject/Events/Follow Up/Follow-up-definition.yaml b/DataModel/Project/Subject/Events/Follow Up/Follow-up-definition.yaml deleted file mode 100644 index 6b7e613..0000000 --- a/DataModel/Project/Subject/Events/Follow Up/Follow-up-definition.yaml +++ /dev/null @@ -1,213 +0,0 @@ -Follow-up: - description: Describes the ongoing monitoring and evaluation of a patient's health - status after an initial diagnosis or treatment. - properties: - donor_id: - description: Foreign key or unique identifier to map donor - type: string - example: D001 - event_id: - description: Foreign key or unique identifier to map a patient to an event - type: string - example: EV20201124 - treatment_id: - description: Foreign key or unique identifier to map a treatment event - type: string - example: TR-980743 - follow_up_id: - description: Primary Key or unique identifier for follow-up - type: string - example: FU-001 - date_of_last_follow_up: - description: Date of the most recent follow-up appointment or assessment - in YYYY-MM-DD format - type: string - pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" - example: 2023-06-15 - status_last_follow_up: - description: Vital status of the donor at last follow up - type: string - example: Active - enum: - - Active - - Dead - status_of_remission: - description: Indicates whether a reduction or disappearance of the signs - and symptoms of a disease has occured in a patient - type: string - example: Remission - enum: - - Remission - - Relapse - date_of_death: - description: Date when the patient passed away in YYYY-MM-DD format - type: string - pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" - example: 2023-09-25 - non_fatal_myocardial_infraction: - description: Patient status if they have suffered from a non fatal heart attack - type: boolean - example: True - non_fatal_stroke: - description: Patient status if they have suffered from a stroke - type: boolean - example: False - status_of_progression: - description: Current state of a patients disease in terms of its advancement - or regression - type: string - example: Stable - enum: - - Progressing - - Stable - - Regressing - status_of_recurrence: - description: Presense of relapse of the patients disease - type: string - example: No recurrence - enum: - - No recurrence - - Local recurrence - - Regional recurrence - - Distant recurrence - progression_free_survival: - description: Length of time during and after treatment in which a patients - disease does not worsen or progress (in months) - type: integer - format: numeric - example: 12 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: months - encoding: UTF-8 - overall_survival: - description: Length of time from either the date of diagnosis or the start - of treatment for a disease that patients diagnosed with the disease are - still alive (in months) - type: integer - format: numeric - example: 55 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: months - encoding: UTF-8 - relapse_type: - description: A disease stage which is preceded by a remission and characterized - by the return of a manifestation of a disease - type: string - example: Bone Marrow Relapse - relapse_interval: - description: Duration of time between the end of a period of remission and - the occurrence of a relapse in a patients disease (in months) - type: integer - format: numeric - example: 2 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: months - encoding: UTF-8 - method_of_progression_status: - description: Techniques or diagnostic tools used to assess and monitor the - progression status of a disease - type: string - example: Clinical Assessment - anatomic_site_progression_or_recurrence: - description: Anatomic site(s) where a disease has either progressed (worsened) - or recurred (returned) after a period of remission - type: string - example: Liver, Lung - recurrence_tumor_stage: - description: Stage of a tumor at the time it recurs after an initial treatment - and a period of remission - type: string - example: Stage II - recurrence_tumor_staging_system: - description: Tumor staging system used to determine the reurrance tumor - stage - type: string - example: AJCC - enum: - - COG liver - - COG renal - - FIGO - - IGCCCG - - AJCC - - MMSE - - Hoehn and Yahr's Staging - - INRG - - INSS - - IRS - - Masaoka - - Enneking-msts - recurrence_tumor_staging_system_version: - description: Version of the tumor staging system used to determine the reurrance - tumor stage - type: string - example: 8th Edition - recurrence_tumor_grade: - description: Classification of a tumor based on the appearance of the cancer - cells under a microscope at the time of its recurrence - type: string - example: Grade 3 - recurrence_tumor_grading_system: - description: Tumor garding system used to determine the reurrance tumor - grade - type: string - example: TNM - enum: - - Enneking_msts - - INPC - - Tertiary Gleason - - Nottingham - recurrence_tumor_grading_version: - description: Version of the tumor grading system used to determine the reurrance - tumor grade - type: string - example: 8th Edition - interval_of_followup: - description: Interval from the primary diagnosis date to the follow-up date, - in days. - type: integer - format: numeric - example: 15 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: days - encoding: UTF-8 - weight_at_followup: - description: Indicate the donors weight, in kilograms (kg), at the time - of follow-up. - type: integer - format: numeric - example: 75 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: kg - encoding: UTF-8 - posttherapy_tumour_staging_system: - description: Specify the tumour staging system used to stage the cancer - after treatment for patients receiving systemic and/or radiation therapy - alone or as a component of their initial treatment, or as neoadjuvant - therapy before planned surgery. This may be represented as ypTNM or ycTNM - in the medical report. - type: string - example: Durie-Salmon staging system - enum: - - AJCC 8th edition - - AJCC 7th edition - - AJCC 6th edition - - Ann Arbor staging system - - Binet staging system - - Durie-Salmon staging system - - FIGO staging system - - Lugano staging system - - Rai staging system - - Revised International staging system (R-ISS) - - St Jude staging system - type: object - version: 1.0.0 \ No newline at end of file diff --git a/DataModel/Project/Subject/Events/Investigation/Imaging/Imaging-definition.yaml b/DataModel/Project/Subject/Events/Investigation/Imaging/Imaging-definition.yaml deleted file mode 100644 index 2c8e5fc..0000000 --- a/DataModel/Project/Subject/Events/Investigation/Imaging/Imaging-definition.yaml +++ /dev/null @@ -1,318 +0,0 @@ -Imaging: - description: Describes the various techniques of viewing the inside of the body - to help figure out the causes of an illness or injury and confirm a diagnosis - properties: - event_id: - description: Foreign key or unique identifier to map a patient to an event - type: string - example: EVT-20201 - treatment_id: - description: Foreign key or unique identifier for this treatment - type: string - example: TR-7391493 - diagnosis_id: - description: Foreign key or unique identifier for the specific diagnosis - type: string - example: DGN3457 - specimen_id: - description: Foreign key or unique identifier to map a specimen - type: string - example: SP-7391493 - donor_id: - description: Foreign key or unique identifier to map to donor - type: string - example: DO-056148 - result_id: - description: Primary key or unique identifier for the result from imaging - type: string - example: R-231 - cell_type: - description: The specific type of cell being analyzed or observed in a sample, - such as epithelial cells or lymphocytes - type: string - example: epithelial cells - ontology_link: - type: string - description: Link to the ontology file - url: https://obofoundry.org/ontology/cl.html - percentage_cell_infiltration: - description: The percentage of tissue area occupied by infiltrating cells, - indicating the level of immune cell infiltration into the tissue - type: float - format: numeric - example: 12 - lower_limit: 0.0 - upper_limit: 100.0 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: '%' - encoding: UTF-8 - cell_count: - description: The total count of cells within a specified area or volume - of tissue, typically measured per unit area or volume - type: integer - format: numeric - example: 150 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: cells/mm² - encoding: UTF-8 - nucleus_size: - description: The size of the cell nucleus, often measured in micrometers - (μm), which can indicate cellular activity or abnormality - type: float - format: numeric - example: 12.5 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: µm² - encoding: UTF-8 - stain_intensity: - description: The intensity of staining in histological or immunohistochemical - samples, reflecting the abundance or presence of specific molecules or - structures - type: string - enum: - - Staining Intensity 0 - - Staining Intensity 2+ - - Staining Intensity 1+ - - Staining Intensity 4+ - - Staining Intensity 3+ - ontology_link: - type: string - description: Link to the ontology file - url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C127762 - epithelium_thickness: - description: The thickness of epithelial tissue layers, which can vary based - on the tissue type and pathological conditions. - type: float - format: numeric - example: 50 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: µm - encoding: UTF-8 - histologic_type: - description: The classification of tissue based on its microscopic structure - and appearance, such as adenocarcinoma or squamous cell carcinoma - type: string - example: Rhabdoid - enum: - - Rhabdoid - - Sarcomatoid - - Unclassified features - microarray_coordinates: - description: The coordinates or location information of spots or features - on a microarray slide used for gene expression analysis - type: string - example: 3C9 - percent_cell_type: - description: The percentage of a specific cell type (e.g., tumor cells, - immune cells) relative to the total cell population in a sample - type: string - example: tumor - percent_necrosis: - description: The percentage of tissue necrosis, indicating the extent of - cell death in a tissue sample - type: float - format: numeric - example: 11.11 - lower_limit: 0.0 - upper_limit: 100.0 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: '%' - encoding: UTF-8 - prostate_chip_count: - description: The number of microarray chips or slides used for analyzing - gene expression or biomarkers in prostate tissue samples - type: integer - format: numeric - example: 12 - tissue_microarray_coordinates: - description: The coordinates or location information of spots or features - on a tissue microarray slide used for molecular analysis - type: string - example: 3C9, 4C9 - is_ffpe: - description: Indicates whether the tissue sample is formalin-fixed paraffin-embedded - (FFPE), which is a common preservation method in histology - type: boolean - tissue_type: - description: The type of tissue or organ from which the sample is obtained, - such as prostate tissue, lung tissue, or lymph node tissue - type: string - example: malignant - enum: - - malignant - - benign - - non-cancerous - ihc_score: - description: The scoring system used in immunohistochemistry (IHC) analysis - to assess protein expression levels or cellular markers - type: integer - format: numeric - example: 3 - organ_imaging_site: - description: The anatomical site or organ imaged during medical imaging - procedures, such as the brain, abdomen, or chest - type: string - example: Thorax - type_of_Imaging: - description: The modality or type of medical imaging used, such as CT scan, - MRI, ultrasound, or PET scan - type: string - example: MRI - contrast_agent_used: - description: The type of contrast agent (if any) used during imaging studies - to enhance visualization of structures or abnormalities - type: string - example: gadolinium - date_of_scanning: - description: The date when the imaging study was conducted or when the images - were acquired in YYYY-MM-DD format - type: string - example: 2020-10-21 - imaging_evidence_of_lesion: - description: The presence or evidence of a lesion or abnormality detected - during medical imaging examinations - type: string - example: Bening - enum: - - Bening - - Malignant - lymph_nodes_detected: - description: The number of lymph nodes detected or identified in imaging - studies, which can be important in cancer staging and diagnosis - type: boolean - region_of_lymph_node: - description: The specific anatomical region or location of a lymph node, - often described relative to nearby structures or landmarks - type: string - example: axilla - region_of_lymph_node_length: - description: The length or size measurement of a lymph node in imaging studies, - typically reported in millimeters (mm) - type: integer - format: numeric - example: 22 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: mm - encoding: UTF-8 - region_of_lymph_node_width: - description: The width or diameter measurement of a lymph node in imaging - studies, typically reported in millimeters (mm) - type: integer - format: numeric - example: 7 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: mm - encoding: UTF-8 - rads_score: - description: The Radiologic Assessment Determination Score (RADS) used in - radiology to classify imaging findings based on likelihood of malignancy - or other criteria - type: integer - format: numeric - example: 3 - other_abnormalities: - description: Any additional abnormalities or findings noted during imaging - studies that are not specifically categorized in other terms - type: string - ct_system_imaging: - description: The specific imaging system or equipment used for CT scans, - such as CT scanner model or manufacturer - type: string - count_of_series: - description: Count of series generated per imaging study - type: integer - format: numeric - example: 45636 - count_of_images: - description: Count of instances (images) generated per imaging study - type: integer - format: numeric - example: 45637 - radiology_note: - description: Recognition findings described by radiology specialists - type: string - example: Nonspecific patchy ground glass opacification and septal thickening - bilateral upper lobes and superior segment of left lower lobe likely infectious - or inflammatory in etiology; Mild cardiomegaly, diffuse atherosclerosis - radiology_image_id: - description: Unique ID of each image, acting as the primary key of the Radiology - image table - type: integer - format: numeric - example: 235 - radiology_occurrence_id: - description: Unique ID for each image shooting, acting as the primary key - of the Radiology study table - type: integer - format: numeric - example: 765 - radiology_series_id: - description: Unique ID of each series - type: integer - format: numeric - example: 123 - file_path: - description: File path of each image files - type: string - body_part_source_value: - description: Value indicating the photographed body part - type: string - example: 0018, 0015 - laterality_concept_id: - description: Image shooting direction (anatomical plane) - type: string - example: R, B, L - ontology_link: - type: string - description: Link to the ontology file - url: https://bioportal.bioontology.org/ontologies/NCIT/?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C25185 - series_type_source_value: - description: Additional source values describing the series - type: string - series_type_concept_id: - description: Value indicating the type of the series - type: string - series_total_number: - description: Number of images constituting each series - type: integer - format: numeric - series_serial_number: - description: Order of images within each series - type: integer - format: numeric - image_resolution_rows: - description: Image resolution (number of horizontal pixels) - type: integer - format: numeric - example: 512 - image_resolution_columns: - description: Image resolution (number of vertical pixels) - type: integer - format: numeric - example: 600 - CT_slice_thickness: - description: Thickness of CT image slide - type: float - format: numeric - example: 1.24 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: mm - encoding: UTF-8 - type: object - version: 1.0.0 \ No newline at end of file diff --git a/DataModel/Project/Subject/Events/Investigation/Laboratory test/Laboratory-test-definition.yaml b/DataModel/Project/Subject/Events/Investigation/Laboratory test/Laboratory-test-definition.yaml deleted file mode 100644 index c9d7571..0000000 --- a/DataModel/Project/Subject/Events/Investigation/Laboratory test/Laboratory-test-definition.yaml +++ /dev/null @@ -1,145 +0,0 @@ -Laboratory-test: - description: Describes the details of the laboratory tests taken by a patient and - their interpretation - properties: - donor_id: - description: Foreign key or unique identifier to map donor - type: string - example: DO-31724 - result_id: - description: Primary key or unique identifier to map the result - type: string - example: R-7391493 - event_id: - description: Foreign key or unique identifier to map a patient to an event - type: string - example: EVT-20201124 - specimen_id: - description: Foreign key or unique identifier to map a specimen - type: string - example: SP-7391493 - order_date: - description: Date when the lab test was ordered in YYYY-MM-DD format - type: string - pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" - example: 2021-03-09 - test_type: - description: Type of lab test taken by the patient - type: string - example: Biopsy - enum: - - Lipid panel - - Thyroid function tests - - CBC test - - Creatinine test - - G6PD test - - Liver panel - - Prothrombin time - - Urinalysis - - Basic metabolic panel - - Blood test - - Pregnancy test - - Ham test - - Amniocentesis - - Biopsy - - Estrogen levels test - - HbA1c test - - Magnesium blood test - - Serological test - - Urine tests - test_date: - description: Date when the lab test was performed in YYYY-MM-DD - type: string - pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" - example: 2023-02-11 - test_name: - description: Name or code of lab tests to ensure accurate tracking, analysis, - and interpretation of diagnostic results - type: string - example: CBC - reference_range: - description: Set of values to interpret a patients lab test results - type: float - format: numeric - example: 4000 - lower_limit: 0.0 - interpretation: - description: Interpretation and analysis of lab test results in relation - to current clinical condition. - type: string - example: Normal - enum: - - Normal - - Abnormal - - Critical - comments: - description: Additional comments or notes for the tests taken - type: string - example: No abnormalities - panel_name: - description: Group of lab tests that are commonly ordered together to evaluate - a specific aspect of health or diagnose certain conditions - type: string - example: CBC panel - tests_included: - description: List of individual tests or panels included in this panel - type: string - example: WBC, RBC, Platelets - panel_description: - description: Description of the panel or group of related lab tests. - type: string - example: Complete Blood Count with Differential - result_unit_preferred: - description: Standardized unit of measure the test - type: string - example: μg/mL - enum: - - M - - mg - - mg/mL - - mM - - ng/mL - - nM - - percent - - pfu - - pM - - rad - - U/mL - - units - - μg - - μg/kg - - μg/mL - - μM - result_unit_reported: - description: Unit of measure reported in the test - type: string - example: μg/mL - enum: - - M - - mg - - mg/mL - - mM - - ng/mL - - nM - - percent - - pfu - - pM - - rad - - U/mL - - units - - μg - - μg/kg - - μg/mL - - μM - result_value_preferred: - description: Standardized numerical result value - type: float - format: numeric - example: 7 - lower_limit: 0.0 - result_value_reported: - description: Numerical result value as reported in test - type: string - example: 7 milligrams per deciliter - type: object - version: 1.0.0 \ No newline at end of file diff --git a/DataModel/Project/Subject/Exposure/Exposure-definition.yaml b/DataModel/Project/Subject/Exposure/Exposure-definition.yaml deleted file mode 100644 index 3cef000..0000000 --- a/DataModel/Project/Subject/Exposure/Exposure-definition.yaml +++ /dev/null @@ -1,146 +0,0 @@ -Exposure: - description: Describes the various types of exposures, their duration, and other - details related to diet and exercise - properties: - donor_id: - description: Foreign key or unique identifier to map donor - type: string - example: D001 - exposure_id: - description: Primary key or unique identifier for exposure - type: string - example: EXP001 - event_id: - description: Foreign key or unique identifier to map a patient to an event - type: string - example: EVT001 - exposure_type: - description: Classification of how individuals come into contact with various - hazardous substances or environmental factors - type: string - example: Coal dust - enum: - - Smoking - - Alcohol - - Asbestos - - Coal dust - - Respirable crystaline silica - - Secondhand smoke as child - - Radon exposure - - Marijuana - - Virus - - Allergen - ontology_link: - type: string - description: Link to the ontology file - url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C157103 - exposure_unit: - description: Unit used to measure exposure - type: string - example: months - exposure_duration: - description: Time associated with exposure - type: integer - format: numeric - example: 17 - lower_limit: 0 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: months - encoding: UTF-8 - exposure_frequency: - description: Frequency of exposure - type: string - example: once - enum: - - once - - multiple - - none - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: day - encoding: UTF-8 - diet: - description: General diet of the donor - type: string - example: Mediterranean - diet_duration: - description: Years associated with the diet - type: float - format: numeric - example: 3 - lower_limit: 0.0 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: years - encoding: UTF-8 - exercise: - description: Bodily activity that enhances or maintains physical fitness - and overall health and wellness of the donor - type: string - example: Jogging - exercise_specify: - description: Years associated with the exercise regime - type: integer - format: numeric - example: 10 - lower_limit: 0 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: years - encoding: UTF-8 - time_between_waking_and_first_exposure: - description: Details of first tobacco use in months - type: integer - format: numeric - example: 15 - lower_limit: 0 - units: - description: Unit is a quantity of constant magnitude which is used to - measure the magnitudes of other quantities of the same manner - uom: months - encoding: UTF-8 - exposure_category: - description: Describe the patients current level of exposure as self-reported - by the donor - type: string - example: Occasional (< once a month) - enum: - - Daily - - None - - Not applicable - - Occasional (< once a month) - - Social (> once a month - - < once a week) - - Unknown - - Weekly (>=1x a week) - exercise_frequency: - description: Indicate how many times per week the donor exercises for at - least 30 minutes - type: string - example: Less than once a month - enum: - - Never - - Less than once a month - - 1-3 times a month - - Not applicable - - Once or twice a week - - Most days but not every day - - Every day - - Unknown - exercise_intensity: - description: Indicate the intensity of exercise - type: string - example: Low - No increase in the heart beat and no perspiration - enum: - - Low - No increase in the heart beat and no perspiration - - Moderate - Increase in the heart beat slightly with some light perspiration - - Vigorous - Increase in the heart beat substantially with heavy perspiration - - Not applicable - - Unknown - type: object - version: 1.0.0 \ No newline at end of file diff --git a/DataModel/Project/Subject/Events/Consultation/Consultation-definition.yaml b/model/project/arm/arm-definition.md similarity index 100% rename from DataModel/Project/Subject/Events/Consultation/Consultation-definition.yaml rename to model/project/arm/arm-definition.md diff --git a/DataModel/Project/Subject/Events/Referral/Referral-definition.yaml b/model/project/arm/arm-definition.yaml similarity index 100% rename from DataModel/Project/Subject/Events/Referral/Referral-definition.yaml rename to model/project/arm/arm-definition.yaml diff --git a/DataModel/Project/Subject/Events/Treatment/Chemotherapy/Chemotherapy-definition.yaml b/model/project/assay/ctdna/ctdna-definition.yaml similarity index 100% rename from DataModel/Project/Subject/Events/Treatment/Chemotherapy/Chemotherapy-definition.yaml rename to model/project/assay/ctdna/ctdna-definition.yaml diff --git a/DataModel/Project/Subject/Events/Treatment/Cognitive Behavioural Therapy/CBT-definition.yaml b/model/project/assay/cytokine/cytokine-definition.yaml similarity index 100% rename from DataModel/Project/Subject/Events/Treatment/Cognitive Behavioural Therapy/CBT-definition.yaml rename to model/project/assay/cytokine/cytokine-definition.yaml diff --git a/model/project/assay/data_file/data-file-definition.yaml b/model/project/assay/data_file/data-file-definition.yaml new file mode 100644 index 0000000..3b2ef4b --- /dev/null +++ b/model/project/assay/data_file/data-file-definition.yaml @@ -0,0 +1,113 @@ +Data-file: + description: Contains details related to the files associated with the experiment/study. + This may include file type, file name, file size, etc + properties: + file_id: + description: Foreign key or unique identifier assigned to a file + type: string + example: F001 + protocol_id: + description: Foreign key or unique identifier for the protocol + type: string + example: PRT092 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-7391493 + data_category: + description: Broad categorization of the contents of the data file. + type: string + example: Transcriptome Profiling + data_format: + description: Format of the data files + type: string + example: CSV + enum: + - CSV + - HDF5 + - MEX + - TSV + - TXT + data_type: + description: Specific content type of the data file. + type: string + example: Gene Expression Quantification + enum: + - Exon Expression Quantification + - Gene Expression Quantification + - Isoform Expression Quantification + - Splice Junction Quantification + experimental_strategy: + description: The sequencing strategy used to generate the data file. + type: string + example: RNA-Seq + enum: + - m6A MeRIP-Seq + - RNA-Seq + - scRNA-Seq + - Total RNA-Seq + file_name: + description: Name of the file + type: string + example: SRR25637231-trimmed-pair2.fastq + file_size: + description: Size of the file + type: integer + format: numeric + example: 367 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: kb, mb, gb + encoding: UTF-8 + md5sum: + description: An MD5 checksum data file contains MD5 checksums for one or + more files, used to verify file integrity + type: string + matrix_cell_count: + description: Number of cells analyzed in a matrix file + type: integer + format: numeric + example: 2500000 + lower_limit: 0 + upper_limit: 1000000000 + genome_assembly_version: + description: Name of the genome assembly used to generate this file + type: string + example: GRCh38 + enum: + - GRCh38 + - GRCh37 + - GRCm39 + - GRCm38 + - GRCm37 + - Not Applicable + genome_patch_version: + description: Patch version of the genome assembly used to generate this + file + type: string + example: p11 + reference_type: + description: The type of the reference file + type: string + example: Genome sequence + enum: + - Genome sequence + - Transcriptome sequence + - Annotation reference + - Transcriptome index + - Genome sequence index + assembly_type: + description: The assembly type of the genome reference file + type: string + example: Primary assembly + enum: + - Primary assembly + - Complete assembly + - Patch assembly + reference_version: + description: The genome version of the reference file + type: string + example: GencodeV27; Ensembl 87 + type: object + version: 1.0.0 diff --git a/model/project/assay/elisa/elisa-definition.yaml b/model/project/assay/elisa/elisa-definition.yaml new file mode 100644 index 0000000..f26ea6e --- /dev/null +++ b/model/project/assay/elisa/elisa-definition.yaml @@ -0,0 +1,91 @@ +ELISA: + description: This assay technique is designed for detecting and quantifying soluble + substances such as peptides, proteins, antibodies, and hormones + properties: + donor_id: + description: Foreign key or unique identifier to map donor + type: string + example: DO-31724 + result_id: + description: Primary key or unique identifier to map the result + type: string + example: R-7391493 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-123456 + analyte_accession: + description: Foreign key or unique identifier to map the analyte + type: string + example: AN-34567 + analyte_preferred: + description: Preferred analyte name + type: string + example: Albumin + analyte_reported: + description: Reported analyte name + type: string + example: ALB + comments: + description: Free text to expand upon details of the assay + type: string + example: Stored at -20°C + unit_preferred: + description: The preferred units for various components and measurements + essential for ensuring accuracy and consistency in the procedure + type: string + example: mg/mL + enum: + - M + - mg + - mg/mL + - mM + - ng/mL + - nM + - percent + - pfu + - pM + - rad + - U/mL + - units + - μg + - μg/kg + - μg/mL + - μM + unit_reported: + description: The reported units for various components and measurements + in the procedure + type: string + example: milligrams per deciliter + value_preferred: + description: Standardized value + type: float + format: numeric + example: 6.5 + lower_limit: 0.0 + value_reported: + description: Reported value + type: float + format: numeric + example: 5.5 + lower_limit: 0.0 + study_time_collected: + description: Time at which the sample was collected during the study + type: float + format: numeric + example: 12.5 + lower_limit: 0.0 + study_time_collected_unit: + description: Unit of time for sample collection + type: string + example: hours + enum: + - hours + - min + - sec + type: object + version: 1.0.0 diff --git a/model/project/assay/elispot/elispot-definition.yaml b/model/project/assay/elispot/elispot-definition.yaml new file mode 100644 index 0000000..260e758 --- /dev/null +++ b/model/project/assay/elispot/elispot-definition.yaml @@ -0,0 +1,80 @@ +Elispot: + description: This assay is a highly sensitive and specific technique + used to measure the frequency of cytokine secreting cells at the single cell level + properties: + donor_id: + description: Foreign key or unique identifier to map to donor + type: string + example: DO-31724 + result_id: + description: Foreign key or unique identifier to map a patient to a result + type: string + example: R-7391493 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-13224 + file_id: + description: Foreign key or unique identifier assigned to a file + type: string + example: F-7391493 + analyte_preferred: + description: A specific cytokine or other secreted protein of interest + type: string + example: IL-2 + analyte_reported: + description: A specific substance or protein that is being measured and + reported in the assay results + type: string + example: IL-2 + cell_number_preferred: + description: Refers to the optimal number of cells recommended for use in + the assay to achieve reliable and meaningful results + type: integer + format: numeric + example: 2000 + lower_limit: 0 + cell_number_reported: + description: Refers to the reported number of cells used in the assay + type: integer + format: numeric + example: 1250 + lower_limit: 0 + comments: + description: Free text to expand upon details of the assay + type: string + example: Sample stored at -80°C + spot_number_preferred: + description: Optimal number of spots or wells that should be analyzed to + obtain reliable and meaningful data + type: integer + format: numeric + example: 15 + lower_limit: 0 + spot_number_reported: + description: Reported number of spots or wells analyzed to obtain the data + type: integer + format: numeric + example: 10 + lower_limit: 0 + study_time_collected: + description: Specific time point or duration at which samples are collected + for analysis during a study + type: integer + format: numeric + example: 2 + study_time_collected_unit: + description: specific time point or duration unit at which samples are collected + for analysis during a study + type: string + example: hours + enum: + - hours + - min + - sec + type: object + version: 1.0.0 \ No newline at end of file diff --git a/model/project/assay/flow-cytometry/flow-cytometry-definition.yaml b/model/project/assay/flow-cytometry/flow-cytometry-definition.yaml new file mode 100644 index 0000000..179fc67 --- /dev/null +++ b/model/project/assay/flow-cytometry/flow-cytometry-definition.yaml @@ -0,0 +1,119 @@ +Flow-cytometry: + description: This assay provides a well-established method to identify cells in + solution and is most commonly used for evaluating peripheral blood, bone marrow, + and other body fluids + properties: + donor_id: + description: Foreign key or unique identifier to map donor + type: string + example: DO-31724 + result_id: + description: Primary key or unique identifier to map the result + type: string + example: R-7391493 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-123456 + parent_population_preferred: + description: Standarized name of base or parent population preferred term + for population percentage calculation + type: string + example: Lymphoid + ontology_link: + type: string + description: Link to the ontology file + url: https://obofoundry.org/ontology/cl + parent_population_reported: + description: Reported name of base or parent population preferred term for + population percentage calculation + type: string + example: Lymphoid Cells + ontology_link: + type: string + description: Link to the ontology file + url: https://obofoundry.org/ontology/cl + population_name_preferred: + description: Standardized name or idenitifier of cell population + type: string + example: B Cells, T cells + ontology_link: + type: string + description: Link to the ontology file + url: https://obofoundry.org/ontology/cl + population_name_reported: + description: Reported name or identifier of cell population + type: string + example: B Lymphocytes, T lymphocyte + ontology_link: + type: string + description: Link to the ontology file + url: https://obofoundry.org/ontology/cl + population_stat_unit_preferred: + description: Standardized statistical unit + type: string + example: percentage + population_stat_unit_reported: + description: Reported statistical unit + type: string + example: '%' + population_statistic_preferred: + description: Standardized statistical value + ype: float + format: numeric + example: 12 + lower_limit: 0.0 + upper_limit: 100.0 + population_statistic_reported: + description: Reported statistical value + type: float + format: numeric + example: 65 + lower_limit: 0.0 + upper_limit: 100.0 + population_name_modifier: + description: The value after the ampersand in the alternate population format + type: string + example: High Expression + enum: + - High Expression + - Moderate expression + - Low Expression + parent_population_modifier: + description: The value after the ampersand in the alternate parent population + format + type: string + example: Moderate expression + enum: + - High Expression + - Moderate expression + - Low Expression + population_marker_preferred: + description: Preferred marker + type: string + example: CD19, CD44 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene + population_marker_reported: + description: Reported marker + type: string + example: CD19 Marker, CD44 marker + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene + estimated_cell_count: + description: Estimated number of cells in the suspension + type: integer + format: numeric + example: 2100 + lower_limit: 0 + upper_limit: 1000000000 + type: object + version: 1.0.0 diff --git a/DataModel/Project/Subject/Events/Treatment/General Physician/GP-definition.yaml b/model/project/assay/ics/ics-definition.yaml similarity index 100% rename from DataModel/Project/Subject/Events/Treatment/General Physician/GP-definition.yaml rename to model/project/assay/ics/ics-definition.yaml diff --git a/DataModel/Project/Subject/Events/Treatment/Physiotherapy/Physiotherapy-definition.yaml b/model/project/assay/ihc/ihc-definition.yaml similarity index 100% rename from DataModel/Project/Subject/Events/Treatment/Physiotherapy/Physiotherapy-definition.yaml rename to model/project/assay/ihc/ihc-definition.yaml diff --git a/model/project/assay/mutation/mutation-definition.yaml b/model/project/assay/mutation/mutation-definition.yaml new file mode 100644 index 0000000..91a23fa --- /dev/null +++ b/model/project/assay/mutation/mutation-definition.yaml @@ -0,0 +1,656 @@ +Mutation: + description: Contains information regarding mutations, providing detailed information + about each mutation's location, type, impact, and clinical significance + properties: + protocol_id: + description: Foreign key or unique identifier to map to a protocol + type: string + example: PROTOCOL_025 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EVENT_123 + file_id: + description: Foreign key or unique identifier to map a file + type: string + example: FILE_456 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-7391493 + entrez_gene_id: + description: A unique identifier assigned to genes within the NCBI Gene + database + type: integer + format: numeric + example: 12345 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene/ + ncbi_build: + description: The reference genome used for the alignment GRCh38/GRCh37 + type: string + example: GRCh38 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/datasets/genome/ + chromosome: + description: The affected chromosome where the gene is located + type: string + example: chr1 + start_position: + description: Mutation start coordinate + type: integer + format: numeric + example: 10000 + end_position: + description: Mutation end coordinate + type: integer + format: numeric + example: 456184 + strand: + description: Genomic strand of the reported allele. Variants should always + be reported on the positive genomic strand. + type: string + example: + + enum: + - - + - + + variant_classification: + description: Translational effect of variant allele. snv,mnv, indel, amplification, + fusion + type: string + example: Missense Mutation + enum: + - Frame Shift Deletion + - Frame Shift Insertion + - In Frame Deletion + - In Frame Insertion + - Missense Mutation + - Nonsense Mutation + - Silent + - Splice Site + - Translation Start Site + - Nonstop Mutation + - 3'UTR + - 3'Flank + - 5'UTR + - 5'Flank + - IGR + - Intron + - RNA + - Targeted_Region + variant_type: + description: Type of mutation. synonymous/non-synonymous. TNP (tri-nucleotide + polymorphism) is analogous to DNP (di-nucleotide polymorphism) but for + three consecutive nucleotides. ONP (oligo-nucleotide polymorphism) is + analogous to TNP but for consecutive runs of four or more (SNP, DNP, TNP, + ONP, INS, DEL, or Consolidated) + type: string + example: SNP + enum: + - SNP + - Synonymous + - Non-synonumous + reference_allele: + description: The plus strand reference allele at this position. Comparator + allele + type: string + example: A + enum: + - A + - C + - G + - T + tumor_seq_allele1: + description: Nucleotide sequence observed at a specific genomic position + in the tumor sample for one of the alleles + type: string + example: T + enum: + - T + - A + - C + - G + tumor_seq_allele2: + description: Nucleotide sequence observed at a specific genomic position + in the tumor sample for the second allele + type: string + example: C + enum: + - C + - G + - A + - T + dbsnp_rs: + description: SNP ID number, commonly known as an "rsID," which is a unique + identifier assigned to a single nucleotide polymorphism (SNP) by the dbSNP + database + type: string + example: rs123456 + dbsnp_val_status: + description: Validation status of a single nucleotide polymorphism (SNP) + in the dbSNP database + type: boolean + example: True + mutation_status: + description: Germline/Somatic. Whether a specific genetic variation (germline/somatic) + is present or absent in a given sample + type: boolean + example: True + sequencing_phase: + description: Stage of the sequencing process or the specific phase in the + preparation and execution of a sequencing experiment + type: string + example: Phase 1 + sequence_source: + description: Molecular assay type used to produce the analytes used for + sequencing + type: string + example: Genomic DNA + enum: + - Genomic DNA + - Whole exome + - Whole transcriptome + validation_method: + description: The assay platforms used for the validation call + type: string + example: Sanger sequencing + sequencer: + description: Instrument used to produce primary sequence data + type: string + example: Illumina HiSeq + hgvsc: + description: The coding sequence of the variant in HGVS recommended format + type: string + example: c.123A>T + hgvsp: + description: The protein sequence of the variant in HGVS recommended format. + type: string + example: p.Arg456Ser + hgvsp_short: + description: Same as the HGVSp column, but using 1-letter amino-acid codes + type: string + example: R456S + transcript_id: + description: Ensembl ID of the transcript affected by the variant + type: string + example: ENST0000012345 + exon_number: + description: Specific exon within a gene where a mutation is located + type: integer + format: numeric + example: 3 + t_depth: + description: Read depth across this locus in tumor BAM + type: integer + format: numeric + example: 50 + t_ref_count: + description: Read depth supporting the reference allele in tumor BAM + type: integer + format: numeric + example: 30 + t_alt_count: + description: Read depth supporting the variant allele in tumor BAM + type: integer + format: numeric + example: 20 + n_depth: + description: Read depth across this locus in normal BAM + type: integer + format: numeric + example: 40 + n_ref_count: + description: Read depth supporting the reference allele in normal BAM (cleared + in somatic MAF) + type: integer + format: numeric + example: 25 + n_alt_count: + description: Read depth supporting the variant allele in normal BAM (cleared + in somatic MAF) + type: integer + format: numeric + example: 15 + allele: + description: The variant allele used to calculate the consequence + type: string + example: G + enum: + - A + - C + - G + - T + gene: + description: Stable Ensembl ID of affected gene + type: string + example: TP53 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene/ + feature: + description: Stable Ensembl ID of feature (transcript, regulatory, motif) + type: string + example: Exon + enum: + - Exon + - Protein + - Gene + feature_type: + description: Classification of genomic regions based on their function and + the type of sequences they contain + type: string + example: Coding + enum: + - Coding + - non-coding + one_consequence: + description: The single consequence of the canonical transcript in sequence + ontology terms + type: string + example: Missense_variant + consequence: + description: Consequence type of this variant in sequence ontology terms + type: string + example: Predicted to be damaging, VUS, deleterious, Likely pathogenic + cdna_position: + description: Relative position of base pair in the cDNA sequence as a fraction. + A "-" symbol is displayed as the numerator if the variant does not appear + in cDNA + type: integer + format: numeric + example: 345 + cds_position: + description: Relative position of base pair in coding sequence. A "-" symbol + is displayed as the numerator if the variant does not appear in coding + sequence + type: integer + format: numeric + example: 234 + protein_position: + description: Relative position of affected amino acid in protein. A "-" + symbol is displayed as the numerator if the variant does not appear in + coding sequence + type: integer + format: numeric + example: 78 + amino_acids: + description: If the variation affects the protein-coding sequence + type: string + example: R/S + codons: + description: The alternative codons with the variant base + type: string + example: CGT/TGT + existing_variation: + description: Known identifier of existing variation + type: string + example: rs123456 + allele_num: + description: Allele number from input; 0 is reference, 1 is first alternate + etc + type: integer + format: numeric + example: 1 + distance: + description: Shortest distance from the variant to transcript + type: integer + format: numeric + example: 500 + transcript_strand: + description: The DNA strand (1 or -1) on which the transcript/feature lies + type: string + example: - + enum: + - - + - + + symbol: + description: A standardized shorthand notation used to uniquely identify + a gene + type: string + example: BRCA1 + symbol_source: + description: The source of the gene symbol + type: string + example: HGNC + hgnc_id: + description: Gene identifier from the HUGO Gene Nomenclature Committee, + if applicable + type: string + example: HGNC:12345 + biotype: + description: Classification of the transcript based on its functional and + structural characteristics + type: string + example: Protein coding + enum: + - Protein coding + - Non coding + - Pseudogene + - Others + canonical: + description: A flag (YES) indicating that the VEP-based canonical transcript, + the longest translation, was used for this gene. If not, the value is + null + type: boolean + example: True + ccds: + description: CCDS identifier for this transcript, where applicable + type: string + example: CCDS12345 + ensp: + description: The Ensembl protein identifier of the affected transcript + type: string + example: ENSP0000012345 + swissprot: + description: UniProtKB/Swiss-Prot accession + type: string + example: P04637 + trembl: + description: UniProtKB/TrEMBL identifier of protein product + type: string + example: A0A024RBG1 + uniparc: + description: UniParc identifier of protein product + type: string + example: UPI0000123456 + refseq: + description: RefSeq identifier for this transcript + type: string + example: NM_000546 + sift: + description: SIFT prediction and/or score, with both given as prediction + (score) + type: string + example: Tolerated + polyphen: + description: PolyPhen prediction and/or score used to predict the potential + impact of amino acid substitutions on the structure and function of proteins. + The scores range from 0 (probably benign) to 1 (probably damaging) + type: float + format: numeric + example: 0 + lower_limit: 0.0 + upper_limit: 1.0 + exon: + description: The exon number (out of total number) + type: integer + format: numeric + example: 3 + intron: + description: The intron number (out of total number) + type: integer + format: numeric + example: 12 + domains: + description: The source and identifier of any overlapping protein domains + type: string + example: Zinc Finger + enum: + - Zinc Finger + - Cystein Rich + - RAS GEF + gmaf: + description: Non-reference allele and frequency of existing variant in 1000 + Genomes + type: float + format: numeric + example: 0.005 + afr_maf: + description: Non-reference allele and frequency of existing variant in 1000 + Genomes combined African population + type: float + format: numeric + example: 0.002 + amr_maf: + description: Non-reference allele and frequency of existing variant in 1000 + Genomes combined American population + type: float + format: numeric + example: 0.001 + asn_maf: + description: Non-reference allele and frequency of existing variant in 1000 + Genomes combined Asian population + type: float + format: numeric + example: 0.004 + eas_maf: + description: Non-reference allele and frequency of existing variant in 1000 + Genomes combined East Asian population + type: float + format: numeric + example: 0.003 + eur_maf: + description: Non-reference allele and frequency of existing variant in 1000 + Genomes combined European population + type: float + format: numeric + example: 0.006 + sas_maf: + description: Non-reference allele and frequency of existing variant in 1000 + Genomes combined South Asian population + type: float + format: numeric + example: 0.007 + aa_maf: + description: Non-reference allele and frequency of existing variant in NHLBI-ESP + African American population + type: float + format: numeric + example: 0.008 + ea_maf: + description: Non-reference allele and frequency of existing variant in NHLBI-ESP + European American population + type: float + format: numeric + example: 0.009 + clin_sig: + description: Clinical significance of variant from dbSNP + type: string + example: Pathogenic + enum: + - Pathogenic + - Likely pathogenic + - Variant of uncertain significance + - Likely benign + somatic: + description: Somatic status of each ID reported under Existing_variation + (0, 1, or null) + type: boolean + example: True + pubmed: + description: Pubmed ID(s) of publications that cite existing variant + type: integer + format: numeric + example: 31666701 + motif_name: + description: The source and identifier of a transcription factor binding + profile aligned at this position + type: string + example: TF_BINDING_SITE + motif_pos: + description: The relative position of the variation in the aligned TFBP + type: integer + format: numeric + example: 100 + high_inf_pos: + description: A flag indicating if the variant falls in a high information + position of a transcription factor binding profile (TFBP) (Y, N, or null) + type: boolean + example: True + motif_score_change: + description: The difference in motif score of the reference and variant + sequences for the TFBP + type: float + format: numeric + example: 0.5 + impact: + description: The impact modifier for the consequence type + type: string + example: Moderate + enum: + - Moderate + - Severe + - No effect + pick: + description: Indicates if this block of consequence data was picked by VEPs + type: string + example: High + enum: + - High + - Low + variant_class: + description: Sequence Ontology variant class + type: string + example: SNV + tsl: + description: Transcript support level, which is based on independent RNA + analyses + type: integer + format: numeric + example: 2 + hgvs_offset: + description: Indicates by how many bases the HGVS notations for this variant + have been shifted + type: integer + format: numeric + example: 10 + pheno: + description: Indicates if existing variant is associated with a phenotype, + disease or trait (0, 1, or null) + type: boolean + example: True + minimised: + description: Alleles in this variant have been converted to minimal representation + before consequence calculation (1 or null) + type: boolean + example: False + exac_af: + description: Global Allele Frequency + type: float + format: numeric + example: 0.001 + exac_af_adj: + description: Adjusted Global Allele Frequency + type: float + format: numeric + example: 0.002 + exac_af_afr: + description: African/African American Allele Frequency + type: float + format: numeric + example: 0.0005 + exac_af_amr: + description: American Allele Frequency + type: float + format: numeric + example: 0.0003 + exac_af_eas: + description: East Asian Allele Frequency + type: float + format: numeric + example: 0.0003 + exac_af_fin: + description: Finnish Allele Frequency + type: float + format: numeric + example: 0.0004 + exac_af_nfe: + description: Non-Finnish European Allele Frequency + type: float + format: numeric + example: 0.0006 + exac_af_oth: + description: Other Allele Frequency + type: float + format: numeric + example: 0.0001 + exac_af_sas: + description: South Asian Allele Frequency + type: float + format: numeric + example: 0.0007 + gene_pheno: + description: Indicates if gene that the variant maps to is associated with + a phenotype, disease or trait (0, 1, or null) + type: boolean + example: False + filter: + description: Copied from input VCF. This includes filters implemented directly + by the variant caller and other external software used in the DNA-Seq + pipeline + type: string + example: Pass + enum: + - Pass + - Fail + context: + description: The reference allele per VCF specs, and its five flanking base + pairs + type: string + example: TCCATAGTAGCCA + src_vcf_id: + description: GDC UUID for the input VCF file + type: string + example: VCF_123 + tumor_bam_uuid: + description: GDC UUID for the tumor bam file + type: string + example: UUID_123 + normal_bam_uuid: + description: GDC UUID for the normal bam file + type: string + example: UUID_456 + case_id: + description: GDC UUID for the case + type: string + example: CASE_789 + gdc_filter: + description: 'Whether a Genomic Data Commons (GDC) filter is present ' + type: boolean + example: True + cosmic: + description: Whether Overlapping COSMIC variants are present + type: boolean + example: True + mc3_overlap: + description: Indicates whether this region overlaps with an MC3 variant + for the same sample pair + type: boolean + example: True + gdc_validation_status: + description: GDC implementation of validation checks + type: string + example: Validated + gdc_valid_somatic: + description: A specific criterion available in the Genomic Data Commons + (GDC) data portal for filtering somatic mutations that have been validated + type: boolean + example: True + vcf_region: + description: Colon separated string containing the CHROM, POS, ID, REF, + and ALT columns from the VCF file (e.g., chrZ:20:rs1234:A:T) + type: string + example: chr1:10000-20000 + vcf_info: + description: INFO column from VCF + type: string + example: AC=3;DP=50;AF=0.06, AC=5;DP=20;AF=0.01 + vcf_format: + description: FORMAT column from VCF + type: string + example: GT:AD:DP:GQ:PL + vcf_tumor_gt: + description: Tumor sample genotype column from VCF + type: string + example: 1/1:0,50:50:99:0,99 + vcf_normal_gt: + description: Normal sample genotype column from VCF + type: string + example: 0/1:0,30:40:60:0,60 + type: object + version: 1.0.0 diff --git a/DataModel/Project/Subject/Events/Treatment/Surgery/Surgery-definition.yaml b/model/project/assay/omics/genomics/genomics-definition.yaml similarity index 100% rename from DataModel/Project/Subject/Events/Treatment/Surgery/Surgery-definition.yaml rename to model/project/assay/omics/genomics/genomics-definition.yaml diff --git a/model/project/assay/omics/proteomics/proteomics-definition.yaml b/model/project/assay/omics/proteomics/proteomics-definition.yaml new file mode 100644 index 0000000..817862f --- /dev/null +++ b/model/project/assay/omics/proteomics/proteomics-definition.yaml @@ -0,0 +1,71 @@ +Proteomics: + description: This proteomics assay analyzes the protein composition of a biological sample using techniques such as mass spectrometry. + properties: + protocol_id: + description: Foreign key or unique identifier to map to a protocol + type: string + example: PRT451 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-7391493 + file_id: + description: Foreign key or unique identifier to map a file + type: string + example: F-7391493 + gene_id: + description: A unique identifier assigned to genes in the NCBI Gene database. + type: integer + format: numeric + example: 51181 + ontology_link: + type: string + description: Link to the ontology file + url: https://geneontology.org/ + gene_name: + description: A standardized, universally accepted shorthand notation used + to uniquely identify genes- HGNC + type: string + example: DCXR + ontology_link: + type: string + description: Link to the ontology file + url: https://genenames.org/ + protein_id: + description: A unique identifier assigned to proteins in a database such as UniProt. + type: string + example: P12345 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.uniprot.org/ + protein_name: + description: A standardized, universally accepted shorthand notation used to uniquely identify proteins. + type: string + example: Hemoglobin subunit alpha + ontology_link: + type: string + description: Link to the ontology file + url: https://www.uniprot.org/ + peptide_sequence: + description: The amino acid sequence of a peptide identified in the sample. + type: string + example: AGCTGTTAGC + modifications: + description: Post-translational modifications identified on the protein or peptide. + type: string + example: Phosphorylation + raw_intensity: + description: Raw intensity value for the protein or peptide in the sample. + type: integer + format: numeric + normalized_intensity: + description: Normalized intensity value for the protein or peptide in the sample. + type: integer + format: numeric + type: object + version: 1.0.0 diff --git a/model/project/assay/omics/transcriptomics/bulk-transcriptomics/bulk-transcriptomics-definition.yaml b/model/project/assay/omics/transcriptomics/bulk-transcriptomics/bulk-transcriptomics-definition.yaml new file mode 100644 index 0000000..59c0fcf --- /dev/null +++ b/model/project/assay/omics/transcriptomics/bulk-transcriptomics/bulk-transcriptomics-definition.yaml @@ -0,0 +1,58 @@ +Bulk-transcriptomics: + description: This RNA sequencing is the method of choice for transcriptomic analysis + of pooled cell populations, tissue sections, or biopsies + properties: + protocol_id: + description: Foreign key or unique identifier to map to a protocol + type: string + example: PRT451 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-7391493 + file_id: + description: Foreign key or unique identifier to map a file + type: string + example: F-7391493 + gene_id: + description: A unique identifier assigned to genes in the NCBI Gene database. + type: integer + format: numeric + example: 51181 + ontology_link: + type: string + description: Link to the ontology file + url: https://geneontology.org/ + gene_name: + description: A standardized, universally accepted shorthand notation used + to uniquely identify genes- HGNC + type: string + example: DCXR + ontology_link: + type: string + description: Link to the ontology file + url: https://genenames.org/ + ensemble_id: + description: A unique identifier assigned to various biological entities + such as genes, transcripts, proteins, and regulatory features within the + Ensembl database + type: string + example: ENSG00000169738 + ontology_link: + type: string + description: Link to the ontology file + url: https://asia.ensembl.org/Help/View?id=285 + raw_counts: + description: Raw count expression value for the gene in the sample + type: integer + format: numeric + normalized_counts: + description: Normalized count expression value for the gene in the sample + type: integer + format: numeric + type: object + version: 1.0.0 diff --git a/model/project/assay/omics/transcriptomics/sc-transcriptomics/sc-transcriptomics-definition.yaml b/model/project/assay/omics/transcriptomics/sc-transcriptomics/sc-transcriptomics-definition.yaml new file mode 100644 index 0000000..4ce985f --- /dev/null +++ b/model/project/assay/omics/transcriptomics/sc-transcriptomics/sc-transcriptomics-definition.yaml @@ -0,0 +1,153 @@ +SC-Transcriptomics: + description: This assay technology analyzes the gene expression profiles of individual + cells, allowing for the identification of distinct cell types, states, and functions + within a heterogeneous population + properties: + protocol_id: + description: Foreign key or unique identifier for the protocol + type: string + example: PRT451 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + file_id: + description: Foreign key or unique identifier to map a file + type: string + example: F-7391493 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-7391493 + total_counts: + description: Refers to the total number of RNA molecules (or transcripts) + detected in a single cell + type: float + example: 62409 + lower_limit: 53.0 + upper_limit: 98789.0 + percentage_dropouts: + description: Refers to the proportion of genes that are not detected (i.e., + have zero counts) in a given cell or sample + type: float + example: 35.35 + lower_limit: 14.6 + upper_limit: 99.96 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + total_cells: + description: Number of cells containing the gene, Refers to the total number + of individual cells that have been successfully captured and processed + type: float + format: numeric + example: 13612 + lower_limit: 7.0 + upper_limit: 17982.0 + mean_counts: + description: Average counts per cell for the feature + type: float + example: 2.963672 + lower_limit: 0.0 + upper_limit: 4.5 + median_counts: + description: Median counts per cell for the feature + type: float + example: 1003 + gene_id: + description: A unique identifier assigned to genes in the NCBI Gene database. + type: integer + format: numeric + example: 816464 + ontology_link: + type: string + description: Link to the ontology file + url: https://geneontology.org/ + gene_name: + description: ID of the feature (gene, protein, metabolite etc.) being measured + type: string + example: DCXR + ensemble_id: + description: A unique identifier assigned to various biological entities + such as genes, transcripts, proteins, and regulatory features within the + Ensembl database + type: string + example: ENSG00000169738 + ontology_link: + type: string + description: Link to the ontology file + url: https://asia.ensembl.org/Help/View?id=285 + mean: + description: Mean value per gene + type: float + example: 0 + lower_limit: -1.91 + upper_limit: 1.79 + highly_variable_gene: + description: A boolean value that indicates whether a particular gene is + considered highly variable across the cells + type: boolean + example: True + enum: + - True + - False + mitochondrial_gene: + description: A boolean value that indicates whether a particular gene is + considered a mitochondrial gene + type: boolean + example: False + enum: + - True + - False + gene_info: + description: Brief description of the gene + type: string + example: The protein encoded by this gene acts as a homotetramer to catalyze + diacetyl reductase and L-xylulose reductase reactions. The encoded protein + may play a role in the uronate cycle of glucose metabolism and in the + cellular osmoregulation in the proximal renal tubules. Defects in this + gene are a cause of pentosuria. Two transcript variants encoding different + isoforms have been found for this gene.[provided by RefSeq, Aug 2010] + mean_expression_in_cell_type: + description: Brief description of the mean expression of gene in cells assigned + a cell type + type: string + example: Epithelial cell of proximal tubule ~ 0.1, kidney collecting duct + principal cell - -1.04, parietal epithelial cell ~ -0.38, B cell ~ -0.62, + CD14-positive monocyte ~ -0.95, T cell ~ -0.81, epithelial cell of distal + tubule ~ -0.49, natural killer cell ~ -0.64, kidney proximal straight + tubule epithelial cell ~ 0.45 + mean_expression_in_cluster: + description: Brief description of the mean expression of gene in cells assigned + to a cluster + type: string + example: Clustering not available for this dataset + median_expression_cell_type: + description: Brief description of the median expression of gene in cells + assigned a cell type + type: string + example: Epithelial cell of proximal tubule ~ 0.17, kidney collecting duct + principal cell ~ -0.88, parietal epithelial cell ~ -0.5, B cell ~ -0.5, + CD14-positive monocyte ~ -0.61, T cell ~ -0.51, epithelial cell of distal + tubule ~ -0.17, natural killer cell ~ -0.49, kidney proximal straight + tubule epithelial cell ~ 0.58 + median_expression_in_cluster: + description: Brief description of the median expression of gene in cells + assigned to a cluster + type: string + example: Clustering not available for this dataset + mean_expression_in_group: + description: Brief description of the mean expression of gene in cells belonging + to a group (like disease vs control) + type: string + example: (Grouping by sample ID) GSM4145204 ~ 0.0, GSM4145205 ~ 0.0, GSM4145206 + ~ -0.0 + median_expression_in_group: + description: Brief description of the median expression of gene in cells + belonging to a group + type: string + example: GSM4145204 ~ -0.48, GSM4145205 ~ 0.22, GSM4145206 ~ 0.2 + type: object + version: 1.0.0 diff --git a/model/project/assay/omics/transcriptomics/spatial-sc-transcriptomics/sc-spatial-transcriptomics-definition.yaml b/model/project/assay/omics/transcriptomics/spatial-sc-transcriptomics/sc-spatial-transcriptomics-definition.yaml new file mode 100644 index 0000000..f2331b5 --- /dev/null +++ b/model/project/assay/omics/transcriptomics/spatial-sc-transcriptomics/sc-spatial-transcriptomics-definition.yaml @@ -0,0 +1,165 @@ +SC-Spatial-Transcriptomics: + description: This assay technology combines single-cell RNA sequencing with spatial + information, allowing researchers to analyze gene expression profiles while preserving + the spatial context of the cells within a tissue + properties: + protocol_id: + description: Foreign key or unique identifier for the protocol + type: string + example: PRT451 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + file_id: + description: Foreign key or unique identifier to map a file + type: string + example: F-7391493 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-7391493 + total_counts: + description: Refers to the total number of RNA molecules (or transcripts) + detected within a specific spatial location or spot on a tissue section + type: float + format: numeric + example: 745 + lower_limit: 0.0 + upper_limit: 162868.0 + percentage_dropouts: + description: Refers to the proportion of genes that are not detected (i.e., + have zero counts) in a given spatial location or spot within a tissue + section + type: float + example: 57.3 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + total_cells: + description: Refers to the total number of individual cells that are present + within a specific spatial location or spot on a tissue section and have + been successfully captured and analyzed + type: float + format: numeric + example: 496 + lower_limit: 0.0 + upper_limit: 1162.0 + mean_counts: + description: Refers to the average number of RNA molecules (or transcripts) + detected per gene across all cells within a specific spatial location + or spot + type: float + example: 0.426 + lower_limit: 0.0 + upper_limit: 140.1618 + gene_id: + description: A unique identifier assigned to genes within the NCBI Gene + database + type: integer + format: numeric + example: 26155 + ontology_link: + type: string + description: Link to the ontology file + url: https://geneontology.org/ + gene_name: + description: A standardized, universally accepted shorthand notation used + to uniquely identify genes - HGNC + type: string + example: NOC2L + ontology_link: + type: string + description: Link to the ontology file + url: https://genenames.org/ + ensemble_id: + description: A unique identifier assigned to various biological entities + such as genes, transcripts, proteins, and regulatory features within the + Ensembl database + type: string + example: ENSG00000188976 + ontology_link: + type: string + description: Link to the ontology file + url: https://asia.ensembl.org/Help/View?id=285 + mean: + description: Mean value per gene + type: float + example: 0.36 + lower_limit: 0.0 + upper_limit: 2.3 + highly_variable_gene: + description: A boolean value that indicates whether a particular gene is + considered highly variable across the cells + type: boolean + example: True + enum: + - True + - False + spatially_variable_gene: + description: A boolean value that indicates whether a particular gene is + considered spatially variable gene + type: boolean + example: True + enum: + - True + - False + mean_expression_in_spatial_domain: + description: Description of the mean expression of gene across spots assigned + to a spatial domain + type: string + example: Spatial domain not computed for this dataset + median_expression_in_spatial_domain: + description: Description of the mean expression of gene across spots assigned + to a spatial domain + type: string + example: Spatial domain not computed for this dataset + mean_expression_in_cluster: + description: Description of the mean expression of gene in spots assigned + to a cluster + type: string + example: Cluster 5 ~ 0.24, Cluster 1 ~ 0.25, Cluster 4 ~ 0.36, Cluster + 0 ~ 0.46, Cluster 3 ~ 0.4, Cluster 6 ~ 0.39, Cluster 2 ~ 0.39 + median_expression_in_cluster: + description: Description of the mean expression of gene in spots assigned + to a cluster + type: string + example: Cluster 5 ~ 0.0, Cluster 1 ~ 0.0, Cluster 4 ~ 0.0, Cluster 0 ~ + 0.48, Cluster 3 ~ 0.36, Cluster 6 ~ 0.0, Cluster 2 ~ 0.0 + mean_expression_in_annotated_region: + description: Description of the mean count of gene across spots assigned + a particular annotation + type: string + example: Normal + stroma + lymphocytes Region ~ 0.27, Stroma + adipose + tissue Region ~ 0.2, Stroma Region ~ 0.35, DCIS Region ~ 0.46, Lymphocytes + Region ~ 0.31, Invasive cancer + lymphocytes Region ~ 0.41 + median_expression_in_annotated_region: + description: Description of the median count of gene across spots assigned + a particular annotation + type: string + example: Normal + stroma + lymphocytes Region ~ 0.0, Stroma + adipose tissue + Region ~ 0.0, Stroma Region ~ 0.0, DCIS Region ~ 0.47, Lymphocytes Region + ~ 0.0, Invasive cancer + lymphocytes Region ~ 0.0 + mitochondrial_gene: + description: A boolean value that indicates whether a particular gene is + considered a mitochondrial gene + type: boolean + example: False + enum: + - True + - False + gene_info: + description: Brief description of the gene + type: string + example: The protein encoded by this gene acts as a homotetramer to catalyze + diacetyl reductase and L-xylulose reductase reactions. The encoded protein + may play a role in the uronate cycle of glucose metabolism and in the + cellular osmoregulation in the proximal renal tubules. Defects in this + gene are a cause of pentosuria. Two transcript variants encoding different + isoforms have been found for this gene.[provided by RefSeq, Aug 2010] + type: object + version: 1.0.0 diff --git a/model/project/assay/pcr/pcr-definition.yaml b/model/project/assay/pcr/pcr-definition.yaml new file mode 100644 index 0000000..001cf0b --- /dev/null +++ b/model/project/assay/pcr/pcr-definition.yaml @@ -0,0 +1,92 @@ +PCR: + description: This assay is a widely used molecular biology technique that allows + for the amplification of specific DNA sequences + properties: + donor_id: + description: Foreign key or unique identifier to map to donor + type: string + example: DO-31724 + result_id: + description: Foreign key or unique identifier to map a patient to a result + type: string + example: R-7391493 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + specimen_id: + description: Foreign key or unique identifier to map a specimen + type: string + example: SP-123456 + gene_id: + description: NCBI identification for genes + type: integer + format: numeric + example: 16653 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene + gene_name: + description: Official gene name from species-specific nomenclature committee + type: string + example: Beta-actin + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene + gene_symbol_preferred: + description: Preferred symbol for the gene identified + type: string + example: ACTB + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene + gene_symbol_reported: + description: Official gene symbol from species-specific nomenclature committee + type: string + example: ACTB + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ncbi.nlm.nih.gov/gene + unit_preferred: + description: The preferred units for various components and measurements + essential for ensuring accuracy and consistency in the procedure + type: string + example: mg/mL + enum: + - M + - mg + - mg/mL + - mM + - ng/mL + - nM + - percent + - pfu + - pM + - rad + - U/mL + - units + - μg + - μg/kg + - μg/mL + - μM + unit_reported: + description: The reported units for various components and measurements + in the procedure + type: string + example: milligrams per deciliter + value_preferred: + description: Standardized numerical value + type: float + format: numeric + example: 111.6 + value_reported: + description: Numerical value as reported + type: float + format: numeric + example: 7.2 + type: object + version: 1.0.0 \ No newline at end of file diff --git a/model/project/core/project-core-definition.md b/model/project/core/project-core-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Core/Project-core-definition.yaml b/model/project/core/project-core-definition.yaml similarity index 100% rename from DataModel/Project/Core/Project-core-definition.yaml rename to model/project/core/project-core-definition.yaml diff --git a/model/project/experiment_design/experiment-design-definition.md b/model/project/experiment_design/experiment-design-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Experiment Design/Experiment-design-definition.yaml b/model/project/experiment_design/experiment-design-definition.yaml similarity index 100% rename from DataModel/Project/Experiment Design/Experiment-design-definition.yaml rename to model/project/experiment_design/experiment-design-definition.yaml diff --git a/model/project/files/files-definition.md b/model/project/files/files-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/model/project/files/files-definition.yaml b/model/project/files/files-definition.yaml new file mode 100644 index 0000000..e69de29 diff --git a/model/project/protocol/protocol-definition.md b/model/project/protocol/protocol-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Protocol/Protocol-definition.yaml b/model/project/protocol/protocol-definition.yaml similarity index 100% rename from DataModel/Project/Protocol/Protocol-definition.yaml rename to model/project/protocol/protocol-definition.yaml diff --git a/model/project/protocol/qc_metrics/qc-metrics-definition.yaml b/model/project/protocol/qc_metrics/qc-metrics-definition.yaml new file mode 100644 index 0000000..25c6040 --- /dev/null +++ b/model/project/protocol/qc_metrics/qc-metrics-definition.yaml @@ -0,0 +1,921 @@ +QC-metrics: + description: Contains details of quality checks done for various procedures and + assays to ensure the accuracy, reliability, and consistency of experimental data + and results. + properties: + protocol_id: + description: Foreign key or unique identifier for the protocol + type: string + example: PRT451 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-20201124 + file_id: + description: Foreign key or unique identifier to map a file + type: string + example: F-7391493, F-427255 + RNAseq: + description: A technique used to analyze the quantity and sequences of RNA in a sample, providing insights into gene expression and discovering new transcripts + properties: + encoding: + description: Encoding format of the quality scores + type: string + example: Sanger / Illumina 1.9 + post_aligned_gc_content: + description: GC content of reads after alignment + type: float + format: numeric + example: 44 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_of_reads_aligned_to_human_genome_by_star: + description: Total reads aligned to the human genome using STAR aligner + type: float + format: numeric + example: 100000000 + number_of_reads_aligned_to_exons_5_3_utrs: + description: Number of reads aligned specifically to exons and UTRs + type: float + format: numeric + example: 75000000 + rseqc_infer_experiment: + description: Ratio of reads expected sense to total (sense + anti-sense), + for stranded data + type: float + format: numeric + example: 0.9 + lower_limit: 0.0 + upper_limit: 1.0 + rseqc_percent_of_read_corresponding_to_cds_exon_utr: + description: Percent of reads corresponding to CDS, exon, and UTR regions + type: float + format: numeric + example: 55 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + percent_of_proper_pair_reads_aligned: + description: Percentage of reads that are properly paired and aligned + type: float + format: numeric + example: 90 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + duplicate_reads: + description: Number of duplicate reads detected in the sequencing data + type: float + format: numeric + example: 500000000 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + library_complexity: + description: Number of unique DNA fragments present in a library + type: float + format: numeric + example: 1000000 + read_group: + description: Identifier of the read group + type: string + example: RG01 + sequencing_platform: + description: The platform used for sequencing + type: string + example: Illumina HiSeq + total_reads: + description: Total number of reads + type: float + format: numeric + example: 1000000 + mapped_reads: + description: Number of reads mapped to the reference genome + type: float + format: numeric + example: 900000 + average_insert_size: + description: Average size of the inserted sequence + type: float + format: numeric + example: 150 + average_read_length: + description: Average length of the reads + type: float + format: numeric + example: 75 + median_quality_score: + description: Median quality score of reads + type: float + format: numeric + example: 30.5 + passing_filter_reads: + description: Number of reads passing quality filters + type: float + format: numeric + example: 850000 + adapter_content: + description: Proportion of the adapter content + type: float + format: numeric + example: 5 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + quality_metric_of: + description: A particular file associated with this quality metric + type: string + example: checksums.text + sequence_counts: + description: Estimated number of sequences per sample, including duplicates + type: integer + format: numeric + example: 500000 + sequence_quality: + description: Mean quality value across each base position in the read (phred + score) + type: integer + format: numeric + example: 30 + lower_limit: 20.0 + upper_limit: 60.0 + per_sequence_quality_score: + description: Number of reads with average quality scores per read + type: integer + format: numeric + example: 28 + lower_limit: 20.0 + upper_limit: 40.0 + per_base_sequence_content: + description: 'Proportion of each DNA base at each position. ex: 25% (A at + position 10)' + type: float + format: numeric + example: 25 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + per_sequence_gc_content: + description: Average GC content in reads + type: float + format: numeric + example: 50 + lower_limit: 0..0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + per_base_n_content: + description: Percentage of N bases at each position + type: float + format: numeric + example: 1 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + sequence_length_distribution: + description: Most common sequence length across all samples + type: float + format: numeric + example: 63 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: bp + encoding: UTF-8 + sequence_duplication_levels: + description: Relative level of duplication for each sequence + type: float + format: numeric + example: 10 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + overrepresented_sequences: + description: Number of sequences that appear more frequently than expected + type: integer + format: numeric + example: 100 + total_deduplicated_percentage: + description: Percentage of deduplicated sequences + type: float + format: numeric + example: 25.83 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + total_records: + description: Total number of records in the sample + type: integer + format: numeric + example: 662541538 + qc_failed: + description: Number of records that failed quality control + type: integer + format: numeric + example: 0 + optical_pcr_duplicate: + description: Number of duplicate records due to PCR or optical errors + type: integer + format: numeric + example: 0 + non_primary_hits: + description: Number of secondary alignment hits + type: integer + format: numeric + example: 22117310 + unmapped_reads: + description: Number of reads that did not map to the reference genome + type: integer + format: numeric + example: 304568704 + mapq_lt_mapq_cut_non_unique: + description: Reads with mapping quality less than the threshold + type: integer + format: numeric + example: 783066 + mapq_gte_mapq_cut_unique: + description: Reads with mapping quality above the threshold and unique + type: integer + format: numeric + example: 335072458 + read_1: + description: Number of first reads in pairs + type: integer + format: numeric + example: 172232289 + read_2: + description: Number of second reads in pairs + type: integer + format: numeric + example: 162840169 + reads_map_to_sense: + description: Reads mapped to the sense strand of the genome + type: integer + format: numeric + example: 168129713 + reads_map_to_antisense: + description: Reads mapped to the antisense strand of the genome + type: integer + format: numeric + example: 166942745 + non_splice_reads: + description: Reads that are not spliced + type: integer + format: numeric + example: 206621110 + splice_reads: + description: Reads that are spliced + type: integer + format: numeric + example: 128451348 + reads_mapped_in_proper_pairs: + description: Reads correctly paired and mapped + type: integer + format: numeric + example: 322356294 + proper_paired_reads_map_to_different_chrom: + description: Properly paired reads that map to different chromosomes + type: integer + format: numeric + example: 0 + unique_percent: + description: Percentage of reads that map uniquely to the genome + type: float + format: numeric + example: 50.57 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + proper_pairs_percent: + description: Percentage of reads that are mapped in proper pairs + type: float + format: numeric + example: 48.65 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + average_input_read_length: + description: Average length of input reads + type: integer + format: numeric + example: 200 + number_of_splices_gt_ag: + description: Number of GT/AG splices + type: integer + format: numeric + example: 10449311 + number_of_reads_mapped_too_many_loci: + description: Reads mapped to more locations than allowed + type: integer + format: numeric + example: 2135 + mapping_speed_million_reads_per_hour: + description: Speed of mapping reads per hour + type: float + format: numeric + example: 116.6 + percent_reads_unmapped_too_short: + description: Percentage of reads unmapped due to being too short + type: float + format: numeric + example: 2.75 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_of_splices_non_canonical: + description: Number of non-canonical splices + type: integer + format: numeric + example: 8537 + number_of_reads_mapped_multiple_loci: + description: Reads mapped to multiple locations + type: integer + format: numeric + example: 1224406 + percent_reads_unmapped_many_mismatches: + description: Percentage of reads unmapped due to too many mismatches + type: float + format: numeric + example: 0 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + average_mapped_length: + description: Average length of mapped reads + type: float + format: numeric + example: 198.79 + deletion_rate_per_base: + description: Percentage of deletions per base + type: float + format: numeric + example: 0.01 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + insertion_average_length: + description: Average length of insertions + type: float + format: numeric + example: 1.51 + number_of_splices_gc_ag: + description: Number of GC/AG splices + type: integer + format: numeric + example: 81342 + insertion_rate_per_base: + description: Percentage of insertions per base + type: float + format: numeric + example: 0.01 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_of_splices_annotated_sjdb: + description: Number of splices annotated in sjdb + type: integer + format: numeric + example: 10428229 + percent_reads_unmapped_other: + description: Percentage of reads unmapped due to other reasons + type: float + format: numeric + example: 0.1 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + uniquely_mapped_reads_percent: + description: Percentage of uniquely mapped reads + type: float + format: numeric + example: 90.64 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + percent_reads_mapped_too_many_loci: + description: Percentage of reads mapped to too many loci + type: float + format: numeric + example: 0.01 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_of_splices_total: + description: Total number of splices + type: integer + format: numeric + example: 10549888 + deletion_average_length: + description: Average length of deletions + type: float + format: numeric + example: 2.1 + uniquely_mapped_reads_number: + description: Number of reads that mapped uniquely + type: integer + format: numeric + example: 17055977 + percent_reads_mapped_multiple_loci: + description: Percentage of reads mapped to multiple loci + type: float + format: numeric + example: 6.51 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_of_splices_at_ac: + description: Number of AT/AC splices + type: integer + format: numeric + example: 10698 + mismatch_rate_per_base_percent: + description: Mismatch rate per base calculated as a percentage + type: float + format: numeric + example: 0.34 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + read_depth: + description: Total reads used in the analysis + type: integer + format: numeric + example: 18280383 + scRNAseq: + description: A technique used to analyze the gene expression profiles of individual cells, providing insights into cellular diversity, function, and states within a complex tissue + properties: + sequencing_saturation: + description: Percentage of sequencing saturation + type: float + format: numeric + example: 50 + lower_limit: 50.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_of_cells_passing_cell_level_qc: + description: Number of cells passing QC per patient + type: integer + format: numeric + example: 500 + proportion_of_cells_passing_cell_level_qc: + description: Proportion of cells passing QC per sample + type: float + format: numeric + example: 50 + lower_limit: 50.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + median_number_of_reads_per_cell: + description: Median number of reads per cell + type: integer + format: numeric + example: 10000 + lower_limit: 10000 + median_number_of_genes_per_cell: + description: Median number of genes identified per cell + type: integer + format: numeric + example: 2000 + lower_limit: 2000 + median_number_of_umi_counts_per_cell: + description: Median number of UMI counts per cell + type: integer + format: numeric + example: 3000 + lower_limit: 3000 + percent_confidently_mapped_reads_in_cells: + description: Percentage of reads confidently mapped in cells + type: float + format: numeric + example: 70 + lower_limit: 70.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_of_umi_counts_per_cell: + description: Number of UMI counts per cell + type: integer + format: numeric + example: 1000 + lower_limit: 1000 + number_of_features_per_cell: + description: Number of features (e.g., genes, transcripts) detected per + cell + type: integer + format: numeric + example: 200 + lower_limit: 200 + percent_reads_in_mitochondrial_genes: + description: Percentage of reads mapped to mitochondrial genes (scRNAseq) + type: float + format: numeric + example: 10 + lower_limit: 0.0 + upper_limit: 20.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + percent_reads_in_haemoglobin_gene: + description: Percentage of reads mapped to haemoglobin genes + type: float + format: numeric + example: 5 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + percent_reads_in_ribosomal_genes: + description: Percentage of reads mapped to ribosomal genes + type: float + format: numeric + example: 25 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + ChIP-seq: + description: No description provided + properties: + usable_fragments: + description: Fragments passing various filters in the ChIP-seq processing + pipeline + type: integer + format: numeric + example: 100000 + relaxed_peaks: + description: Peaks called with low significance, high false positive rate + type: integer + format: numeric + example: 5000 + eCLIP: + description: A technique used to analyze protein-DNA interactions in the genome. It identifies the binding sites of DNA-associated proteins, such as transcription factors, and maps the locations of histone modifications + properties: + unique_fragments: + description: Fragments uniquely mapping to the genome + type: integer + format: numeric + example: 85000 + usable_reads: + description: Reads uniquely mapping and without duplicates + type: integer + format: numeric + example: 120000 + saturated_peak_detection: + description: Whether 80% of original peaks are detected after downsampling + 50% + type: boolean + example: True + read_clusters: + description: Regions with higher read density than background + type: integer + format: numeric + example: 400 + RNA-seq, RAMPAGE: + description: A specialized technique designed to precisely map transcription start sites (TSS) and quantify gene expression. It provides high-resolution data on promoter usage and the structure of the 5' ends of transcripts + properties: + uniquely_mapped_reads: + description: Reads mapping to exactly one location + type: integer + format: numeric + example: 150000 + ChIA-PET: + description: A technique used to study the interactions between different regions of the genome that are mediated by specific proteins + properties: + unique_usable_pets: + description: PETs mapping uniquely and within set distances + type: integer + format: numeric + example: 30000 + multiQC: + description: A bioinformatics tool used to aggregate and visualize quality control (QC) metrics from multiple tools across many samples + properties: + aligned_reads: + description: Total of uniquely mapped and multi-mappers + type: integer + format: numeric + example: 200000 + frip: + description: Mapped reads in peak regions divided by all usable reads + type: float + format: numeric + example: 0.65 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + WGBS: + description: A high-throughput sequencing technique used to analyze DNA methylation patterns across the entire genome at single-base resolution + properties: + coverage: + description: Read length times number of uniquely mapped reads divided by + genome size + type: float + format: numeric + example: 30 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: X + encoding: UTF-8 + DNase-seq: + description: A high-throughput sequencing technique used to identify regions of open chromatin, which are accessible to regulatory proteins and are indicative of regulatory elements such as promoters, enhancers, and other DNA-binding sites + properties: + spot: + description: Fraction of reads in tag-enriched regions + type: float + format: numeric + example: 0.8 + lower_limit: 0.0 + upper_limit: 1.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + ATAC-seq: + description: A high-throughput sequencing technique used to study chromatin accessibility, identifying regions of open chromatin, nucleosome positioning, and transcription factor binding sites across the genome + properties: + tss_enrichment_score: + description: Signal to noise ratio at transcription start sites + type: float + format: numeric + example: 6 + RNA Bind-N-Seq: + description: A high-throughput sequencing technique used to identify the binding preferences and specific RNA targets of RNA-binding proteins (RBPs) + properties: + r_value_of_k_mer: + description: Frequency of k-mer in enriched vs. input reads + type: float + format: numeric + example: 1.5 + Imaging: + description: Data obtained from various imaging technologies used to visualize biological structures, tissues, and processes in medical and research settings + properties: + tissue_section_length: + description: Length of tissue section + type: float + format: numeric + example: 3.5 + tissue_section_breadth: + description: Breadth of tissue section + type: float + format: numeric + example: 1.5 + tissue_section_artefacts: + description: Artefacts visible in H&E image of tissue section + type: string + example: Folding + enum: + - Chatter + - Tears + - Folding + - Creasing + - Pen markings + - Air bubbles + - None + tissue_section_thickness: + description: Thickness of the tissue section being examined + type: float + format: numeric + example: 5 + scanning_artefacts: + description: Artefacts introduced in tissue image during scanning + type: string + example: None + enum: + - Scan lines + - Background effects + - None + magnification: + description: Magnification of the instrement used for imaging + type: integer + format: numeric + example: 10 + resolution: + description: Smallest spatial distance that can be resolved in the image + type: float + format: numeric + example: 10 + out_of_focus_regions: + description: Whether the tissue image contains out of focus (OOF) regions, + degree of OOF + type: string + example: Global OOF + enum: + - Global OOF + - Regional OOF + percent_stromal_cells: + description: The percent of stromal cells based on the tissue image. + type: float + format: numeric + example: 90 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + percent_necrosis: + description: The percent of necrotic region based on the tissue image. + type: float + format: numeric + example: 80 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + percent_inflam_infiltration: + description: The ratio of inflammatory cells to the gross cell population + seen on a slide. + type: float + format: numeric + example: 10 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_proliferating_cells: + description: The number of proliferating cells based on the tissue image. + type: integer + format: numeric + example: 200 + percent_tumor_cells: + description: The percent of tumor cells based on the tissue image. + type: float + format: numeric + example: 10 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + number_malignant_cells: + description: The number of malignant cells based on tissue image + type: integer + format: numeric + example: 1500 + region_mean_intensity: + description: Mean expression (pixel intensity) of a marker across the imaged + region + type: float + format: numeric + example: 112 + region_stdev: + description: Std deviation in expression (pixel intensity) of a marker across + the imaged region + type: float + format: numeric + example: 18 + signal_area: + description: Area of the imaged region with marker expression above a binarization + threshold - Area with Signal + type: integer + format: numeric + example: 45 + signal_mean_intensity: + description: Mean expression (pixel intensity) of a marker across the signal + area + type: float + format: numeric + example: 50 + signal_stdev: + description: Std Deviation in expression (pixel intensity) of a marker across + the signal area + type: float + format: numeric + example: 55.5 + background_area: + description: Area of the imaged region with marker expression below a binarization + threshold - Area with Background/Noise + type: integer + format: numeric + example: 32 + background_mean_intensity: + description: Mean expression (pixel intensity) of a marker across the background + area + type: float + format: numeric + example: 100 + background_stdev: + description: Std Deviation in expression (pixel intensity) of a marker across + the background area + type: float + format: numeric + example: 11 + SNR: + description: Ratio of mean intensity in signal area to mean intensity in + backgroud area + type: float + format: numeric + example: 1.3 + type: object + version: 1.0.0 diff --git a/model/project/publication/publication-definition.md b/model/project/publication/publication-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Publication/Publication-definition.yaml b/model/project/publication/publication-definition.yaml similarity index 100% rename from DataModel/Project/Publication/Publication-definition.yaml rename to model/project/publication/publication-definition.yaml diff --git a/model/project/subject/clinical_history/clinical-history-definition.md b/model/project/subject/clinical_history/clinical-history-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Subject/Clinical History/Clinical-history-definition.yaml b/model/project/subject/clinical_history/clinical-history-definition.yaml similarity index 92% rename from DataModel/Project/Subject/Clinical History/Clinical-history-definition.yaml rename to model/project/subject/clinical_history/clinical-history-definition.yaml index 5816fc1..6152916 100644 --- a/DataModel/Project/Subject/Clinical History/Clinical-history-definition.yaml +++ b/model/project/subject/clinical_history/clinical-history-definition.yaml @@ -1,10 +1,10 @@ Clinical-history: description: Describes the information related to the patient's past symptoms, diagnoses, and treatments properties: - donor_id: - description: Foreign donor ID or unique identifier to map donor information + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. type: string - example: DO-001 + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. clinical_history_id: description: Unique identifier for clinical history record type: string diff --git a/model/project/subject/comorbidity/comorbidity-definition.yaml b/model/project/subject/comorbidity/comorbidity-definition.yaml new file mode 100644 index 0000000..3e89cfe --- /dev/null +++ b/model/project/subject/comorbidity/comorbidity-definition.yaml @@ -0,0 +1,86 @@ +Comorbidity: + description: Refers to the presence of one or more additional medical conditions + co-occurring with a primary condition + properties: + donor_id: + description: Foreign key or unique identifier to map donor + type: string + example: D-002 + diagnosis_id: + description: Foreign key or unique identifier to map a project + type: string + example: DI-034 + comorbidity_id: + description: Primary key or unique identifier to track comorbidity + type: string + example: CMBT-124 + disease_name: + description: Simultaneous disease described for the donor along with the + curated_disease of interest + type: string + example: Diabetes, Hypercholesterolemia + comorbidity_duration: + description: Time duration for this simultaneous disease in years + type: float + format: numeric + example: 8.8 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: years + encoding: UTF-8 + disease_treatment: + description: Specific treatment undergone to treat the comorbid condition + type: string + example: Anti-diabetic drug, statins + disease_treatment_duration: + description: Duration of treament for the given disease indication in years + type: float + format: numeric + example: 2.5 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: years + encoding: UTF-8 + age_at_diagnosis: + description: Age at which the comorbid condition was identificied + type: integer + format: numeric + example: 56 + comorbidity_treatment_status: + description: Indicate if the patient is being treated for the comorbidity + (this includes prior malignancies). + type: boolean + example: True + enum: + - True + - False + prior_malignancy: + description: Prior malignancy affecting donor. + type: boolean + example: True + enum: + - True + - False + laterality_of_prior_malignancy: + description: If donor has history of prior malignancy, indicate laterality + of previous diagnosis + type: string + example: Unilateral + enum: + - Bilateral + - Left + - Midline + - Not applicable + - Right + - Unilateral + - Side not specified + - Unknown + comorbidity_type_code: + description: Indicate the code for the comorbidity using the WHO ICD-10 + code classification + type: string + example: E11, B02, K74.60 + type: object + version: 1.0.0 diff --git a/model/project/subject/core/subject-core-definition.md b/model/project/subject/core/subject-core-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/model/project/subject/core/subject-core-definition.yaml b/model/project/subject/core/subject-core-definition.yaml new file mode 100644 index 0000000..c5b2251 --- /dev/null +++ b/model/project/subject/core/subject-core-definition.yaml @@ -0,0 +1,185 @@ +subject-core: + description: Describes information such as age gender, race, and other core details related to the subject + properties: + project_id: + description: the unique id provided to the experimental study or project that is governed by an investigator. + type: string + example: PROJECT ID is an alphanumeric string that is globally unique identifier for the project or study. + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + age: + description: The age of the organism in years + type: integer + format: numeric + example: 35 + lower_limit: 0 + age_unit: + description: Unit in with the age is measured + type: string + example: years + developmental_stage_unit: + description: Represents the unit used for denoting the development stage + type: string + example: stage + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/efo/ + developmental_stage: + description: Represents the stage of development/formation of the embryo + of humans and mice + type: string + example: fetal + enum: + - fetal + - neonatal + - adolescent + - adult + - geriatric + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/efo/, https://www.ebi.ac.uk/ols4/ontologies/uberon + gender: + description: Gender of the organism from which the sample was derived + type: string + example: Female + enum: + - Male + - Female + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/gsso + # maritial_status: + # description: Marital status at the time of sampling + # type: string + # example: Married + # enum: + # - Married + # - Single + # - Divorced + # ontology_link: + # type: string + # description: Link to the ontology file + # url: https://www.ebi.ac.uk/ols4/ontologies/snomed/classes/http%253A%252F%252Fsnomed.info%252Fid%252F87915002 + pre_menopause_cycle_type: + description: Type of pre-menopause cycle + type: string + example: Regular, Irregular + enum: + - Regular + - Irregular + pre_menopause_cycle_duration: + description: Duration of the pre-menopause + type: string + example: 28 + lower_limit: 0 + menstruation: + description: Menstruation status of Donor + type: string + example: Menstruating + enum: + - Menstruating + - Not menstruating + # occupation: + # description: Occupation of the subject + # type: string + # example: Engineer + # ontology_link: + # type: string + # description: Link to the ontology file + # url: https://www.ebi.ac.uk/ols4/ontologies/occo + cohort: + description: Group of individuals included in the study or dataset, which + may have implications for data analysis and interpretation + type: string + example: Study Group A + strain_characteristics: + description: Characteristics of the mouse strain + type: string + example: Wildtype + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ncit, https://sites.google.com/site/environmentontology/ + strain: + description: The name of the mouse strain + type: string + example: C57BL/6 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/clo, https://www.ebi.ac.uk/ols4/ontologies/efo, + # country_of_origin: + # description: Country of origin or nationality of the patient, which may + # provide insights into geographic variations in disease incidence or environmental + # factors. + # type: string + # example: USA + # ontology_link: + # type: string + # description: Link to the ontology file + # url: https://www.ebi.ac.uk/ols4/ontologies/geo, https://www.ebi.ac.uk/ols4/ontologies/envo + # race: + # description: A geographic ancestral origin category that is assigned to a population group based mainly on physical characteristics that are thought to be distinct and inherent. + # type: string + # example: Asian + # enum: + # - African + # - African Caribbea + # - American Indian or Alaska Native + # - Asian + # - Asian American + # - Australian + # - Black or African American + # - Caribbean Indian + # - Eskimo + # - European + # - Hispanic or Latino + # - Latin American + # - Mediterranean + # - Middle Eastern or North African + # - Multiracial + # - Native Hawaiian or Other Pacific Islander + # - New Zealander + # - North American + # - Other Race + # - South or Central American Indian + # - White + # ontology_link: + # type: string + # description: Link to the ontology file + # url: https://www.ebi.ac.uk/ols4/ontologies/ncit/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FNCIT_C17049?lang=en + # ethinicity: + # description: A social group characterized by a distinctive social and cultural tradition that is maintained from generation to generation. + # type: string + # example: Yi Chinese + # ontology_link: + # type: string + # description: Link to the ontology file + # url: https://www.ebi.ac.uk/ols4/ontologies/ncit/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FNCIT_C16564 + # number_of_children: + # description: Number of children + # type: integer + # example: 2 + # lower_limit: 0 + # pregnancy: + # description: Whether preganant? + # type: boolean + # example: True + # enum: + # - True + # - False + # cause_of_death: + # description: Cause of death + # type: string + # example: hysterectomy, stroke, Kidney disease, neonatal condition + # ontology_link: + # type: string + # description: Link to the ontology file + # url: https://www.disease-ontology.org/, https://hpo.jax.org/ + type: object + version: 1.0.0 \ No newline at end of file diff --git a/model/project/subject/events/core/events-core-definition.md b/model/project/subject/events/core/events-core-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/model/project/subject/events/core/events-core-definition.yaml b/model/project/subject/events/core/events-core-definition.yaml new file mode 100644 index 0000000..4ca8eb0 --- /dev/null +++ b/model/project/subject/events/core/events-core-definition.yaml @@ -0,0 +1,70 @@ +Events-core: + description: Lists any significant occurrence or activity that is recorded within + a patient's health record. Contains information like project ID, donor ID, event + ID and event type. + properties: + project_id: + description: The unique id provided to the experimental study or project that is governed by an investigator. + type: string + example: P012 + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: D001 + event_id: + description: Primary key or unique identifier for a event + type: string + example: E001 + billing_id: + description: Primary key or unique identifier used for billing purposes + type: string + example: B1234 + event_type: + description: Describing the events occurence for a patient + type: string + example: Diagnosis + enum: + - Encounter + - Treatment + - Procedure + event_date: + description: The date when the event occurred in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 2024-06-24 + event_time: + description: The time when the event occurred + type: time + example: 14:30:00 + reason: + description: The reason for the event occurrence + type: string + example: Routine check-up + site: + description: The location where the event took place + type: string + example: Clinic A, Room 202 + notes: + description: Additional notes or comments about the event + type: text + example: Patient improved + status: + description: Current status of the event + type: string + example: Completed + enum: + - Pending + - Completed + - Cancelled + duration: + description: Duration of the event in minutes + type: integer + format: numeric + example: 60 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: min + encoding: UTF-8 + type: object + version: 1.0.0 \ No newline at end of file diff --git a/model/project/subject/events/encounter/encounter-definition.yaml b/model/project/subject/events/encounter/encounter-definition.yaml new file mode 100644 index 0000000..e69de29 diff --git a/model/project/subject/events/procedure/procedure-definition.yaml b/model/project/subject/events/procedure/procedure-definition.yaml new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Subject/Events/Treatment/Core/Treatment-core-definition.yaml b/model/project/subject/events/treatment/treatment-definition.yaml similarity index 98% rename from DataModel/Project/Subject/Events/Treatment/Core/Treatment-core-definition.yaml rename to model/project/subject/events/treatment/treatment-definition.yaml index 0dd636b..5449968 100644 --- a/DataModel/Project/Subject/Events/Treatment/Core/Treatment-core-definition.yaml +++ b/model/project/subject/events/treatment/treatment-definition.yaml @@ -1,11 +1,11 @@ -Treatment-core: +treatment: description: Describes the therapeutic agents, therapies, or procedures used to treat a medical condition. properties: - donor_id: - description: Foreign key or unique identifier to map to donor + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. type: string - example: D31724 + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. treatment_id: description: Primary key unique identifier for this treatment type: string diff --git a/model/project/subject/exposure/exposure-definition.md b/model/project/subject/exposure/exposure-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/model/project/subject/exposure/exposure-definition.yaml b/model/project/subject/exposure/exposure-definition.yaml new file mode 100644 index 0000000..657ad92 --- /dev/null +++ b/model/project/subject/exposure/exposure-definition.yaml @@ -0,0 +1,135 @@ +exposure: + description: This object captures details about various types of exposures, including their classification, duration, frequency, and other attributes related to diet and exercise. + properties: + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + exposure_id: + description: A primary key that uniquely identifies a specific exposure instance in the dataset. + type: string + example: "EXP001" + event_id: + description: A foreign key or unique identifier linking a patient to an event within the study's context. + type: string + example: "EVT001" + exposure_type: + description: Classifies how individuals encounter hazardous substances or environmental factors, such as smoking or exposure to asbestos. + type: string + example: "Coal dust" + enum: + - Smoking + - Alcohol + - Asbestos + - Coal dust + - Respirable crystalline silica + - Secondhand smoke as child + - Radon exposure + - Marijuana + - Virus + - Allergen + ontology_link: + type: string + description: Link to the ontology file providing further information about exposure types. + url: "https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C157103" + exposure_unit: + description: Specifies the unit used to measure the duration of exposure, such as months or years. + type: string + example: "months" + exposure_duration: + description: Represents the time duration associated with the exposure, measured in the specified unit. + type: integer + format: numeric + example: 17 + lower_limit: 0 + units: + description: Defines the unit of measurement used for specifying exposure duration. + uom: "months" + encoding: UTF-8 + exposure_frequency: + description: Indicates how often the exposure to the hazardous substance or environmental factor occurs, such as once or multiple times. + type: string + example: "once" + enum: + - once + - multiple + - none + units: + description: Specifies the frequency unit used to measure exposure frequency, typically in days. + uom: "day" + encoding: UTF-8 + diet: + description: Describes the general dietary pattern or regimen followed by the donor. + type: string + example: "Mediterranean" + diet_duration: + description: Specifies the duration for which the donor has adhered to the described diet, measured in years. + type: float + format: numeric + example: 3.5 + lower_limit: 0.0 + units: + description: Defines the unit of measurement used for specifying diet duration. + uom: "years" + encoding: UTF-8 + exercise: + description: Refers to physical activities undertaken by the donor to maintain or improve physical fitness and overall health. + type: string + example: "Jogging" + exercise_specify: + description: Indicates the duration for which the donor has engaged in the specified exercise activity, measured in years. + type: integer + format: numeric + example: 10 + lower_limit: 0 + units: + description: Defines the unit of measurement used for specifying exercise duration. + uom: "years" + encoding: UTF-8 + time_between_waking_and_first_exposure: + description: Specifies the duration in months between waking up and the first exposure to tobacco (if applicable). + type: integer + format: numeric + example: 15 + lower_limit: 0 + units: + description: Defines the unit of measurement used for specifying the time duration. + uom: "months" + encoding: UTF-8 + exposure_category: + description: Describes the reported level of exposure by the donor, such as daily, weekly, occasional, or none. + type: string + example: "Occasional (< once a month)" + enum: + - Daily + - None + - Not applicable + - Occasional (< once a month) + - Social (> once a month - < once a week) + - Unknown + - Weekly (>=1x a week) + exercise_frequency: + description: Indicates how frequently the donor engages in physical exercise, measured in times per week. + type: string + example: "Less than once a month" + enum: + - Never + - Less than once a month + - 1-3 times a month + - Not applicable + - Once or twice a week + - Most days but not every day + - Every day + - Unknown + exercise_intensity: + description: Specifies the intensity level of the donor's physical exercise, such as low, moderate, or vigorous. + type: string + example: "Low - No increase in the heart beat and no perspiration" + enum: + - Low - No increase in the heart beat and no perspiration + - Moderate - Increase in the heart beat slightly with some light perspiration + - Vigorous - Increase in the heart beat substantially with heavy perspiration + - Not applicable + - Unknown + type: object + version: 1.0.0 \ No newline at end of file diff --git a/model/project/subject/family_history/family-history-definition.md b/model/project/subject/family_history/family-history-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Subject/Family History/Family-history-definition.yaml b/model/project/subject/family_history/family-history-definition.yaml similarity index 94% rename from DataModel/Project/Subject/Family History/Family-history-definition.yaml rename to model/project/subject/family_history/family-history-definition.yaml index b70bc56..90bcf53 100644 --- a/DataModel/Project/Subject/Family History/Family-history-definition.yaml +++ b/model/project/subject/family_history/family-history-definition.yaml @@ -3,10 +3,10 @@ Family-history: This includes a relative's disease and cancer histories, exposure to toxic substances and their durations properties: - donor_id: - description: Foreign key or unique identifier to map donor information + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. type: string - example: DO-31724 + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. relationship_age_at_diagnosis: description: Age of the patient's relative at the time of diagnosis type: integer diff --git a/model/project/subject/measurement/measurement-definition.yaml b/model/project/subject/measurement/measurement-definition.yaml new file mode 100644 index 0000000..590c872 --- /dev/null +++ b/model/project/subject/measurement/measurement-definition.yaml @@ -0,0 +1,62 @@ +Measurement: + description: Contains details of sample measurements related to the routine/diagnostic + tests + properties: + donor_id: + description: Foreign key or unique identifier to map donor + type: string + example: D001 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EV-01012020 + measurement_id: + description: Identifier for the specific measurement + type: string + example: MS-01012020 + date_of_measurement: + description: Date when the measurement was taken + type: date + example: 01-01-2020 + measurement_type: + description: Physical measurement that provide medical insights into the + donors health status + type: string + example: Blood Pressure + enum: + - Tumor weight + - Breslow thickness + - Body Mass Index + - Height + - Weight + - Waist Circumference + - Hip Circumference + - Systolic blood pressure + - Diastolic blood pressure + - Heart rate + - Temparature + - Respiratory Rate + anatomical_site_of_measurement: + description: Location where the measurement was taken + type: string + example: Upper arm + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/uberon + measurement_result: + description: Numeric or qualitative result of the measurement + type: float + format: numeric + example: 2.2 + lower_limit: 0.0 + measurement_result_unit: + description: Unit for result of the measurement + type: string + example: mmHg + reference_range: + description: Reference range from a normal donor + type: string + example: 90-120/60-80 + type: object + version: 1.0.0 diff --git a/DataModel/Project/Subject/Diagnosis/Biomarker/Biomarker-definition.yaml b/model/project/subject/observations/diagnosis/biomarker/biomarker-definition.yaml similarity index 90% rename from DataModel/Project/Subject/Diagnosis/Biomarker/Biomarker-definition.yaml rename to model/project/subject/observations/diagnosis/biomarker/biomarker-definition.yaml index d772974..d7dd20e 100644 --- a/DataModel/Project/Subject/Diagnosis/Biomarker/Biomarker-definition.yaml +++ b/model/project/subject/observations/diagnosis/biomarker/biomarker-definition.yaml @@ -2,18 +2,18 @@ Biomarker: description: Describes information regarding protein biomarkers/molecules that serve as indicators of specific processes or conditions of the subject properties: - donor_id: - description: Foreign Key or unique identifier referencing patient + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. type: string - example: DO-31724 + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. result_id: description: Primary Key or unique identifier mapping result type: string example: R-7391493 - event_id: - description: Primary key or unique identifier to map a patient to an event + observation_id: + description: A foreign key or unique identifier linking an observation or data point to an event involving a patient or subject. type: string - example: EV-20201124 + example: "OBS001" treatment_id: description: Primary key or unique identifier for this treatment type: string diff --git a/DataModel/Project/Subject/Diagnosis/Core/Diagnosis-core-definition.yaml b/model/project/subject/observations/diagnosis/core/diagnosis-core-definition.yaml similarity index 96% rename from DataModel/Project/Subject/Diagnosis/Core/Diagnosis-core-definition.yaml rename to model/project/subject/observations/diagnosis/core/diagnosis-core-definition.yaml index ff796b8..23e6168 100644 --- a/DataModel/Project/Subject/Diagnosis/Core/Diagnosis-core-definition.yaml +++ b/model/project/subject/observations/diagnosis/core/diagnosis-core-definition.yaml @@ -3,14 +3,14 @@ Diagnosis-core: subject n based on clinical evidence and reasoning. Details include type of diseases, methods of diagnosis, classification of diseases and symptoms. properties: - donor_id: - description: Foreign key or unique identifier to map to donor + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. type: string - example: D003 - event_id: - description: Foreign key or unique identifier to map a patient to an event + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + observation_id: + description: A foreign key or unique identifier linking an observation or data point to an event involving a patient or subject. type: string - example: EVT001 + example: "OBS001" diagnosis_id: description: An ID or unique identifier for the specific diagnosis type: string diff --git a/DataModel/Project/Subject/Specimen/Specimen-definition.yaml b/model/project/subject/specimen/Specimen-definition.yaml similarity index 99% rename from DataModel/Project/Subject/Specimen/Specimen-definition.yaml rename to model/project/subject/specimen/Specimen-definition.yaml index c3660e4..c7191ff 100644 --- a/DataModel/Project/Subject/Specimen/Specimen-definition.yaml +++ b/model/project/subject/specimen/Specimen-definition.yaml @@ -5,10 +5,10 @@ Specimen: core: description: Core details related to specimen properties: - donor_id: - description: Foreign key or unique identifier to map to donor + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. type: string - example: D-056 + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. diagnosis_id: description: Foreign key or unique identifier to map a project type: string diff --git a/model/project/subject/specimen/specimen-definition.md b/model/project/subject/specimen/specimen-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/model/project/trial/trial-definition.md b/model/project/trial/trial-definition.md new file mode 100644 index 0000000..e69de29 diff --git a/DataModel/Project/Trial/Trial-definition.yaml b/model/project/trial/trial-definition.yaml similarity index 100% rename from DataModel/Project/Trial/Trial-definition.yaml rename to model/project/trial/trial-definition.yaml diff --git a/uml/img/Clinical-history.png b/uml/img/Clinical-history.png new file mode 100644 index 0000000..91a41e7 Binary files /dev/null and b/uml/img/Clinical-history.png differ diff --git a/uml/img/Diagnosis-core.png b/uml/img/Diagnosis-core.png new file mode 100644 index 0000000..fac74dd Binary files /dev/null and b/uml/img/Diagnosis-core.png differ diff --git a/uml/img/Events-core.png b/uml/img/Events-core.png new file mode 100644 index 0000000..a61d1a6 Binary files /dev/null and b/uml/img/Events-core.png differ diff --git a/uml/img/Family-history.png b/uml/img/Family-history.png new file mode 100644 index 0000000..0dfdcfa Binary files /dev/null and b/uml/img/Family-history.png differ diff --git a/uml/img/Project-core.png b/uml/img/Project-core.png new file mode 100644 index 0000000..229a0c3 Binary files /dev/null and b/uml/img/Project-core.png differ diff --git a/uml/img/Protocol.png b/uml/img/Protocol.png new file mode 100644 index 0000000..36f8cdc Binary files /dev/null and b/uml/img/Protocol.png differ diff --git a/uml/img/Publication.png b/uml/img/Publication.png new file mode 100644 index 0000000..3e45957 Binary files /dev/null and b/uml/img/Publication.png differ diff --git a/uml/img/Specimen.png b/uml/img/Specimen.png new file mode 100644 index 0000000..505d7e3 Binary files /dev/null and b/uml/img/Specimen.png differ diff --git a/uml/img/experiment-design.png b/uml/img/experiment-design.png new file mode 100644 index 0000000..ee0678c Binary files /dev/null and b/uml/img/experiment-design.png differ diff --git a/uml/img/exposure.png b/uml/img/exposure.png new file mode 100644 index 0000000..39a9076 Binary files /dev/null and b/uml/img/exposure.png differ diff --git a/uml/img/treatment.png b/uml/img/treatment.png new file mode 100644 index 0000000..5f35b87 Binary files /dev/null and b/uml/img/treatment.png differ diff --git a/uml/img/trial.png b/uml/img/trial.png new file mode 100644 index 0000000..ff9f03e Binary files /dev/null and b/uml/img/trial.png differ diff --git a/uml/wsd/Specimen-definition.wsd b/uml/wsd/Specimen-definition.wsd new file mode 100644 index 0000000..cd5b416 --- /dev/null +++ b/uml/wsd/Specimen-definition.wsd @@ -0,0 +1,1410 @@ +@startyaml Specimen + Specimen: + description: Describes the details related to specimen such as tissue, sampling + site, preservation methods and platform + properties: + core: + description: Core details related to specimen + properties: + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + diagnosis_id: + description: Foreign key or unique identifier to map a project + type: string + example: DI-034 + event_id: + description: Foreign key or unique identifier to map a patient to an event + type: string + example: EVT-123 + tissue: + description: Group of cells that work together to perform a specific function in an organism. Here it would be the tissue from which the samples were derived + type: string + example: Brain + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/bto + sampling_site: + description: The location or area from where the samples are collected. + type: string + example: Chest + donor_sample_type: + description: Refers to the specific kind of biological material collected from a donor for testing, research, or medical procedures + type: string + example: Stem cell + read_index: + description: Refers to the identifier or position of a specific read within a sequence dataset. + type: string + example: read1 + enum: + - read1 + - read2 + - read3 + - read4 + - index1 + - index2 + - single-end + - non-indexed + library_prep_id: + description: A unique ID for the library preparation + type: string + example: tech_rep_group_001 + Cell-line: + description: Details related to the cell-line from which the sample has been derived + properties: + growth_medium: + description: A substance or preparation used to support the growth of microorganisms, cells, or small plants in a laboratory setting + type: string + example: RPMI + culture_environment: + description: Specific conditions under which biological cultures (such as cells, tissues, or microorganisms) are maintained and grown in a laboratory setting + type: string + example: 5% CO2; 37°C + nutritional_state: + description: Specific nutritional conditions and requirements necessary for the optimal growth and maintenance of cells in a culture + type: string + example: starving + storage_method: + description: Procedures and conditions applied to store biological specimens + type: string + example: Liquid nitrogen + storage_duration: + description: Length of time a biological specimen is stored under specified conditions + type: integer + format: numeric + example: 8 + storage_time_unit: + description: Storage duration unit + type: string + example: months + autolysis_score: + description: Rating or measurement of the degree of autolysis, which is the process of self-digestion or degradation of cells and tissues by their own enzymes + type: integer + format: numeric + example: 3 + gross_description: + description: Color, size, and other aspects of specimen as visible to naked + eye + type: string + example: Buff, 20um, round + gross_images: + description: List of filenames of photographs of specimen without magnification + type: string + example: xyzDD-MM-YYYY.tiff + ischemic_temperature: + description: Whether specimen experienced warm or cold ischemia + type: string + enum: + - Warm + - Cold + ischemic_time: + description: Duration of time, in seconds, between when the specimen stopped + receiving oxygen and when it was preserved or processed + type: integer + format: numeric + example: 10 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mins + encoding: UTF-8 + microscopic_description: + description: Text that describes how the specimen looks under the microscope and how it compares + with normal cells + type: string + example: Most cells in their natural state, even if fixed and sectioned, + are almost invisible in an ordinary light microscope + microscopic_images: + description: List of filenames of photographs of specimen under microscope + type: string + example: xyzDD-MM-YYYY.tiff + postmortem_interval: + description: Duration of time between when death was declared and when the + specimen was preserved or processed + type: integer + format: numeric + example: 48 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: hours + encoding: UTF-8 + supplier: + description: The supplier of the cell line + type: string + example: HipSci + catalog_number: + description: The supplier catalogue number for the cell line + type: integer + format: numeric + example: 77650057 + lot_number: + description: The supplier lot or batch number for the cell line + type: string + example: 24.10.14 + catalog_url: + description: The supplier catalogue URL for the cell line + type: string + format: alphanumeric + example: www.phe-culturecollections.org.uk/products/celllines/ipsc/detail.jsp?refId=77650057&collection=ecacc_ipsc + cell_cycle: + description: The cell cycle phase if the cell line is synchronized growing + cells or the phase is known. + type: string + example: S + enum: + - S + - G1 + - M + - Synchronous + type: + description: Population of cells that have been derived from a single cell and are capable of indefinite growth and division in culture + type: string + example: Primary + enum: + - Primary + - Immortalized + - Stem cell + - Stem cell-derived + - Induced pluripotent + - Synthetic + model_organ: + description: Organ for which this cell line is a model + type: string + example: Liver + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/uberon + cell_morphology: + description: Features relating to the morphology of the cells for the cell line + type: string + example: Fibroblastic + growth_conditions: + description: Features relating to the growth and/or maintenance of the cell + lines + type: string + example: 37°C and 5% CO2 with saturating humidity + confluency: + description: The percent a plate surface is covered by cells + type: float + format: numeric + example: 60 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + karyotype: + description: The karyotype of the cell line + type: string + example: sex chromosome complement of XY; del(2) (q11) + date_established: + description: Date when the cell line was established + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 1995-05-10 + disease: + description: Short description of any disease association to the cell type + type: string + example: Pancreatic Cell line derived from patient with T2D for duration + of 6.5 years + genus_species: + description: The scientific binomial name for the species of the cell line + type: string + example: Homo sapiens + publication: + description: A publication that cites the creation of the cell line + type: string + format: alphanumeric + example: https://pubmed.ncbi.nlm.nih.gov/34230000/ + slide: + description: Small, flat, usually rectangular piece of glass or plastic on which a sample of material, such as cells, tissues, or microorganisms, is mounted for microscopic examination + properties: + section_location: + description: Tissue source of the slide + type: string + example: Z= -6, Y= -3, X= -9 + bone_marrow_malignant_cells: + description: The text term used to indicate whether there are malignant + cells in the bone marrow + type: string + enum: + - Yes + - No + - Unknown + - Not Reported + percent_tumor_cells: + description: Numeric value that represents the percentage of infiltration + by tumor cells in a sample + type: float + format: numeric + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + percent_tumor_nuclei: + description: Numeric value to represent the percentage of tumor nuclei in + a malignant neoplasm sample or specimen + type: float + format: numeric + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + prostatic_chips_positive_count: + description: The text term used to describe the number of positive prostatic chips, + which are generated from transurethral resection of the prostate (TURP) procedures + and are generally used for relieving urinary obstruction due to nodular hyperplasia + of the prostate + type: integer + format: numeric + example: 9 + prostatic_chips_total_count: + description: The text term used to describe the total number of prostatic + chips, which are generated from transurethral resection of the prostate + (TURP) procedures and are generally used for relieving urinary obstruction + due to nodular hyperplasia of the prostate (benign prostatic hyperplasia) + type: integer + format: numeric + example: 18 + prostatic_involvement_percentage: + description: Numeric value that represents the percentage of prostatic involvement + found in a specific tissue sample + type: float + format: numeric + example: 7 + lower_limit: 0.0 + upper_limit: 100.0 + sample: + description: Portion of material taken from a larger sample, typically for the purpose of analysis or examination + properties: + preservation_method: + description: Text term that represents the method used to preserve the sample. + type: string + example: EDTA + enum: + - Cryopreserved + - EDTA + - FFPE + - Fresh + - Frozen + - OCT + - Snap + - Frozen + - Unknown + - Not Reported + specimen_type: + description: The type of a material sample taken from a biological entity + for testing, diagnostic, propagation, treatment or research purposes. + This includes particular types of cellular molecules, cells, tissues, + organs, body fluids, embryos, and body excretory substances + type: string + example: 2D Modified Conditionally Reprogrammed Cells + enum: + - 2D Classical Conditionally Reprogrammed Cells + - 2D Modified Conditionally Reprogrammed Cells + - 3D Air-Liquid Interface Organoid + - 3D Neurosphere + - 3D Organoid + - Adherent Cell Line + - Bone Marrow Components NOS + - Bone Marrow NOS + - Buccal Cells + - Buffy Coat + - Cell + - Control Analyte;Derived Cell Line + - Derived Cell Lines and Sorted Cells + - EBV Immortalized + - Fibroblasts from Bone Marrow + - Granulocytes + - Human Original Cells + - Liquid Suspension Cell Line + - Lymphocytes + - Lymphoid + - Mixed Adherent Suspension + - Mononuclear Cells from Bone Marrow + - Peripheral Blood Components NOS + - Peripheral Blood NOS + - Peripheral Whole Blood + - Plasma + - Pleural Effusion + - Saliva + - Serum + - Solid Tissue + - Sorted Cells + - Sputum + - Whole Bone Marrow + - Unknown + - Not Reported + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C70713 + tissue_type: + description: Text term that represents a description of the kind of tissue + collected with respect to disease status or proximity to tumor tissue + type: string + example: Abnormal + enum: + - Abnormal + - Normal + - Peritumoral + - Tumor + - Unknown + - Not Reported + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C19697 + tumor_descriptor: + description: Text that describes the kind of disease present in the tumor + specimen as related to a specific timepoint. + type: string + example: NOS + enum: + - Metastatic + - New Primary + - NOS + - Premalignant + - Primary + - Recurrence + - Xenograft + - Unknown + - Not Reported + - Not Applicable + biospecimen_anatomic_site: + description: Text term that represents the name of the primary disease site + of the submitted tumor sample + type: string + example: Gland + enum: + - Sacrum + - Salivary Gland + - Scalp + - Scapula + - Sciatic Nerve + - Scrotum + - Seminal Vesicle + - Shoulder + - Sigmoid Colon + - Sinus + - Sinus(es) + - Maxillary + - Skeletal Muscle + - Skin + - Skull + - Small Bowel + - Small Bowel - Mucosa Only + - Small Finger + - Soft Tissue + - Spinal Column + - Spinal Cord + - Spleen + - Splenic Flexure + - Sternum + - Stomach + - Stomach - Mucosa Only + - Subcutaneous Tissue + - Subglottis + - Sublingual Gland + - Submandibular Gland + - Supraglottis + - Synovium + - Temporal Cortex + - Tendon + - Testis + - Thigh + - Thoracic Spine + - Thorax + - Throat + - Thumb + - Thymus + - Thyroid + - Tibia + - Tongue + - Tonsil + - Tonsil (Pharyngeal) + - Trachea / Major Bronchi + - Transverse Colon + - Trunk + - Umbilical Cord + - Ureter + - Urethra + - Urinary Tract + - Uterus + - Uvula + - Vagina + - Vas Deferens + - Vein + - Venous + - Vertebra + - Vulva + - White Blood Cells + - Wrist + - Other + - Unknown + - Not Reported + - Not Allowed To Collect + biospecimen_laterality: + description: For tumors in paired organs, designates the side on which the + specimen was obtained. + type: string + example: Bilateral + enum: + - Bilateral + - Left + - Right + - Unknown + - Not Reported + catalog_reference: + description: HCMI catalog reference number for cancer model + type: string + pattern: "^HCM-CSHL-\\d{4}-[A-Z]\\d{2}$" + example: HCM-CSHL-0459-C17 + current_weight: + description: Numeric value that represents the current weight of the sample, + measured in milligrams. + type: float + format: numeric + example: 76.8 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mg + encoding: UTF-8 + days_to_collection: + description: The number of days from the index date to the date a sample + was collected for a specific study or project. + type: integer + format: numeric + example: 3 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: days + encoding: UTF-8 + days_to_sample_procurement: + description: The number of days from the index date to the date a patient + underwent a procedure (e.g. surgical resection) yielding a sample that + was eventually used for research. + type: integer + format: numeric + example: 4 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: days + encoding: UTF-8 + diagnosis_pathologically_confirmed: + description: The histologic description of tissue or cells confirmed by + a pathology review of frozen or formalin fixed slide(s) completed after + the diagnostic pathology review of the tumor sample used to extract analyte(s). + type: boolean + distance_normal_to_tumor: + description: Text term to signify the distance between the tumor tissue + and the normal control tissue that was procured for matching normal DNA. + type: string + enum: + - Adjacent (< or = 2cm) + - Distal (>2cm) + - Unknown + - Not Reported + distributor_reference: + description: Distributor reference number for cancer model. + type: string + example: 23.11d + freezing_method: + description: Text term that represents the method used for freezing the + sample. + type: string + example: cryopreservation + growth_rate: + description: Rate at which the model grows, measured as hours to time to + split. + type: integer + format: numeric + example: 72 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: hours + encoding: UTF-8 + initial_weight: + description: Numeric value that represents the initial weight of the sample, + measured in milligrams + type: integer + format: numeric + example: 56 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mg + encoding: UTF-8 + intermediate_dimension: + description: Intermediate dimension of the sample, in millimeters. + type: integer + format: numeric + example: 19 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mm + encoding: UTF-8 + longest_dimension: + description: Numeric value that represents the longest dimension of the + sample, measured in millimeters. + type: integer + format: numeric + example: 20 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mm + encoding: UTF-8 + method_of_sample_procurement: + description: The method used to procure the sample used to extract analyte(s) + type: string + example: Autopsy + enum: + - Abdomino-perineal Resection of Rectum + - Anterior Resection of Rectum + - Ascites Drainage + - Aspirate + - Autopsy + - Biopsy + - Blood Draw + - Bone Marrow Aspirate + - Buccal Mucosal Resection + - Core Biopsy + - Cystectomy + - Deep Parotidectomy + - Endo Rectal Tumor Resection + - Endolaryngeal Excision + - Endoscopic Biopsy + - Endoscopic Mucosal Resection (EMR) + - Enucleation + - Excisional Biopsy + - Fine Needle Aspiration + - Full Hysterectomy + - Glossectomy + - Gross Total Resection + - Hand Assisted Laparoscopic Radical Nephrectomy + - Hysterectomy NOS + - Incisional Biopsy + - Indeterminant + - Laparoscopic Biopsy + - Laparoscopic Partial Nephrectomy + - Laparoscopic Radical Nephrectomy + - Laparoscopic Radical Prostatectomy with Robotics + - Laparoscopic Radical Prostatectomy without Robotics + - Laryngopharyngectomy + - Left Hemicolectomy + - Liquid Biopsy + - Lobectomy + - Local Resection (Exoresection + - wall resection) + - Lumpectomy + - Lymph Node Dissection + - Lymphadenectomy + - Mandibulectomy + - Maxillectomy + - Metastasectomy + - Modified Radical Mastectomy + - Needle Biopsy + - Omentectomy + - Oophorectomy + - Open Craniotomy + - Open Partial Nephrectomy + - Open Radical Nephrectomy + - Open Radical Prostatectomy + - Orchiectomy + - Other Surgical Resection + - Palatectomy + - Pan-Procto Colectomy + - Pancreatectomy + - Paracentesis + - Parotidectomy + - NOS + - Partial Hepatectomy + - Partial Laryngectomy + - Partial Maxillectomy + - Partial Nephrectomy + - Peritoneal Lavage + - Pneumonectomy + - Punch Biopsy + - Radical Hysterectomy + - Radical Maxillectomy + - Radical Nephrectomy + - Radical Prostatectomy + - Right Hemicolectomy + - Salpingectomy + - Salpingo-oophorectomy + - Sigmoid Colectomy + - Simple Hysterectomy + - Simple Mastectomy + - Subtotal Prostatectomy + - Subtotal Resection + - Superficial Parotidectomy + - Supracervical Hysterectomy + - Supracricoid Laryngectomy + - Supraglottic Laryngectomy + - Surgical Resection + - Thoracentesis + - Thoracoscopic Biopsy + - Tonsillectomy + - Total Colectomy + - Total Hepatectomy + - Total Laryngectomy + - Total Mastectomy + - Total Nephrectomy + - Transoral Laser Excision + - Transplant + - Transurethral resection (TURBT) + - Transurethral Resection (TURP) + - Transverse Colectomy + - Tumor Debulking + - Tumor Resection + - Vertical Hemilaryngectomy + - Wedge Resection + - Whipple Procedure + - Other + - Unknown + - Not Reported + - Not Allowed To Collect + passage_count: + description: Number of passages (splits) between the original tissue and + this model. + type: integer + format: numeric + example: 3 + pathology_report_uuid: + description: UUID of the related pathology report + type: string + pattern: "^UUID\\d+$" + example: UUID42561 + sample_ordinal: + description: A number describing the samples place in an ordered sequence. + type: integer + format: numeric + example: 119 + shortest_dimension: + description: Numeric value that represents the shortest dimension of the + sample, measured in millimeters + type: integer + format: numeric + example: 9 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mm + encoding: UTF-8 + time_between_clamping_and_freezing: + description: Numeric representation of the elapsed time between the surgical + clamping of blood supply and freezing of the sample, measured in minutes. + type: integer + format: numeric + example: 9 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mins + encoding: UTF-8 + time_between_excision_and_freezing: + description: Numeric representation of the elapsed time between the excision + and freezing of the sample, measured in minutes. + type: integer + format: numeric + example: 5 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mins + encoding: UTF-8 + tissue_collection_type: + description: The text term used to describe the type of collection used + to obtain tissue + type: string + enum: + - Prospective + - Retrospective + tumor_code: + description: Diagnostic tumor code of the tissue sample source. + type: string + enum: + - Acute Leukemia of Ambiguous Lineage (ALAL) + - Acute lymphoblastic leukemia (ALL) + - Acute myeloid leukemia (AML) + - Anal Cancer (all types) + - Cervical Cancer (all types) + - Clear cell sarcoma of the kidney (CCSK) + - Ependymoma + - Glioblastoma (GBM) + - Low grade glioma (LGG) + - Medulloblastoma + - Other + - Rhabdoid tumor + - Diffuse Large B-Cell Lymphoma (DLBCL) + - Ewing sarcoma + - Induction Failure AML (AML-IF) + - Lung Cancer (all types) + - Neuroblastoma (NBL) + - Anaplastic large cell lymphoma + - Burkitt lymphoma (BL) + - Non cancerous tissue + - Osteosarcoma (OS) + - Rhabdoid tumor (kidney) (RT) + - Rhabdomyosarcoma + - Soft tissue sarcoma + - Non-rhabdomyosarcoma + - Wilms tumor (WT) + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C177615 + tumor_code_id: + description: BCR-defined id code for the tumor sample. + type: integer + format: numeric + enum: + - 0 + - 1 + - 2 + - 3 + - 4 + - 10 + - 15 + - 20 + - 21 + - 30 + - 40 + - 41 + - 50 + - 51 + - 52 + - 60 + - 61 + - 62 + - 63 + - 64 + - 65 + - 70 + - 71 + - 80 + - 81 + fresh_slicing_method: + description: The method by which fresh tissue was sliced + type: string + example: Vibrotome + final_slicing_method: + description: The method by which the final slice was obtained + type: string + example: Cryosectioning + post_resection_interval: + description: Length of time between surgical resection and fresh slicing + of tissue + type: integer + format: numeric + example: 11 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: min + encoding: UTF-8 + post_resection_interval_unit: + description: The unit of time in which the post resection interval is expressed + type: string + example: day + pre_final_slice_preservation_method: + description: Tissue preservation method used prior to final slicing + type: string + example: freezing + post_final_slicing_interval: + description: Length of time between secondary slicing and hybridization + type: integer + format: numeric + example: 7 + post_final_slicing_interval_unit: + description: The unit of time in which the post final slicing interval is + expressed + type: string + example: day + permeabilisation_time: + description: The permeabilisation time in time units that the tissue was + exposed to + type: integer + format: numeric + example: 12 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mins + encoding: UTF-8 + permeabilisation_time_unit: + description: The unit in which permeabilisation time is expressed + type: string + example: mins + Read group: + description: Used to keep track of the different sources of sequencing data within a single sequencing experiment + properties: + experiment_name: + description: Submitter-defined name for the experiment + type: string + example: RNA sequencing + is_paired_end: + description: indicates whether each read in a dataset is part of a paired-end sequencing experiment + type: boolean + enum: + - True + - False + library_name: + description: Name or identifier assigned to a library in a sequencing experiment + type: string + example: Total RNA library + library_selection: + description: Method used to select the DNA fragments that are included in a sequencing library + type: string + example: miRNA Size Fractionation + enum: + - Affinity Enrichment + - Hybrid Selection + - miRNA Size Fractionation + - Other + - PCR + - Poly-T Enrichment + - Random + - rRNA Depletion + library_strategy: + description: Overall approach or method used to create a sequencing library from a biological sample + type: string + example: Bisulfite-Seq + enum: + - ATAC-Seq + - Bisulfite-Seq + - ChIP-Seq + - HiChIP + - m6A MeRIP-Seq + - miRNA-Seq + - RNA-Seq + - scATAC-Seq + - scRNA-Seq + - Targeted Sequencing + - WGS + - WXS + platform: + description: Name of the platform used to obtain data + type: string + example: PacBio + enum: + - Complete Genomics + - Illumina + - Ion Torrent + - LS454 + - Other + - PacBio + - SOLiD + read_group_name: + description: Refers to a set of reads that are generated from a single run + of a sequencing instrument + type: string + example: ID:H0164.2 + read_length: + description: Number of base pairs in a DNA sequence read obtained from a sequencing experiment + type: integer + format: numeric + example: 60 + sequencing_center: + description: Name of the center that provided the sequence files + type: string + example: Donelly, Norwegian + target_capture_kit: + description: Description that can uniquely identify a target capture kit. + Suggested value is a combination of vendor, kit name, and kit version + type: string + enum: + - Custom AmpliSeq Cancer Hotspot GENIE-MDA Augmented Panel v1 - 46 Genes + - Custom GENIE-DFCI OncoPanel - 275 Genes + - Custom GENIE-DFCI Oncopanel - 300 Genes + - Custom GENIE-DFCI Oncopanel - 447 Genes + - Custom HaloPlex DLBCL Panel - 370 Genes + - Custom Ion AmpliSeq Hotspot GENIE-MOSC3 Augmented Panel - 74 Genes + - Custom Large Construct Capture TARGET-OS Panel - 8 Genes + - Custom MSK IMPACT Panel - 341 Genes + - Custom MSK IMPACT Panel - 410 Genes + - Custom MSK IMPACT Panel - 468 Genes + - Custom Myeloid GENIE-VICC Panel - 37 Genes + - Custom Personalis ACEcp VAREPOP-APOLLO Panel v2 + - Custom PGDX SureSelect CancerSelect VAREPOP-APOLLO Panel - 203 Genes + - Custom PGDX SureSelect CancerSelect VAREPOP-APOLLO Panel - 88 Genes + - Custom SeqCap EZ BeatAML Panel - 12.5 Mb + - Custom SeqCap EZ HGSC VCRome v2.1 ER Augmented v1 + - Custom SeqCap EZ HGSC VCRome v2.1 ER Augmented v2 + - Custom SeqCap EZ TARGET-OS Panel - 7.0 Mb + - Custom Solid Tumor GENIE-VICC Panel - 34 Genes + - Custom SureSelect CGCI-BLGSP Panel - 4.6 Mb + - Custom SureSelect CGCI-BLGSP Panel - 7.8 Mb + - Custom SureSelect CGCI-HTMCP-CC KMT2D And Hotspot Panel - 37.0 Kb + - Custom SureSelect CGCI-HTMCP-CC Panel - 19.7 Mb + - Custom SureSelect GENIE-UHN Panel - 555 Genes + - Custom SureSelect Human All Exon v1.1 Plus 3 Boosters + - Custom SureSelect TARGET-AML_NBL_WT Panel - 2.8 Mb + - Custom Twist Broad Exome v1.0 - 35.0 Mb + - Custom Twist Broad PanCancer Panel - 396 Genes + - Custom Twist MP2PRT-WT Panel - 52 Kb + - Foundation Medicine T5a Panel - 322 Genes + - Foundation Medicine T7 Panel - 429 Genes + - Ion AmpliSeq Cancer Hotspot Panel v2 + - Ion AmpliSeq Comprehensive Cancer Panel + - Nextera DNA Exome + - Nextera Rapid Capture Exome v1.2 + - Not Applicable + - SeqCap EZ HGSC VCRome v2.1 + - SeqCap EZ Human Exome v2.0 + - SeqCap EZ Human Exome v3.0 + - SureSelect Human All Exon v3 + - SureSelect Human All Exon v4 + - SureSelect Human All Exon v5 + - SureSelect Human All Exon v5 + UTR + - TruSeq Amplicon Cancer Panel + - TruSeq Exome Enrichment - 62 Mb + - TruSeq RNA Exome + - TruSight Myeloid Sequencing Panel + - Twist Human Comprehensive Exome + - xGen Exome Research Panel v1.0 + - Unknown + adapter_name: + description: Name of the sequencing adapter + type: string + example: H7xx + adapter_sequence: + description: Base sequence of the sequencing adapter + type: string + example: TCGCCTTA + base_caller_name: + description: Base calling is the process by which an order of nucleotides + in a template is inferred during a sequencing reaction + type: string + example: Albacore + enum: + - Albacore + - Guppy + - Scrappie + - Flappie + base_caller_version: + description: Version of the base caller + type: string + example: v2.1.10 + chipseq_antibody: + description: The antibody used in the ChIP-Seq assay + type: string + example: abcam ab4729 anti-H3K27ac + chipseq_target: + description: Antibody target of the ChIP-Seq assay + type: string + example: H3K4me1 + enum: + - H3K4me1 + - H3K4me3 + - H3K9me3 + - H3K27me3 + - H3K36me3 + - H3K27ac + - Input Control + - Unknown + days_to_sequencing: + description: Number of days between the date used for index and the date + the read group was sequenced + type: integer + format: numeric + example: 3 + flow_cell_barcode: + description: Unique molecular identifier (barcode) that is added to DNA fragments during library preparation for sequencing + type: string + fragment_maximum_length: + description: Maximum length of the sequenced fragments (e.g., as predicted + by Agilent Bioanalyzer) + type: integer + format: numeric + example: 600 + fragment_mean_length: + description: Mean length of the sequenced fragments (e.g., as predicted + by Agilent Bioanalyzer) + type: integer + format: numeric + example: 550 + fragment_minimum_length: + description: Minimum length of the sequenced fragments (e.g., as predicted + by Agilent Bioanalyzer) + type: integer + format: numeric + example: 300 + fragment_standard_deviation_length: + description: Standard deviation of the sequenced fragments length (e.g., + as predicted by Agilent Bioanalyzer) + type: float + format: numeric + example: 10.7 + fragmentation_enzyme: + description: The restriction enzyme used for nucleotide fragmentation + type: string + example: MboI + enum: + - MboI + - Unknown + - Not applicable + includes_spike_ins: + description: The restriction enzyme used for nucleotide fragmentation + type: boolean + enum: + - True + - False + instrument_model: + description: Specific model of sequencing instrument used. + type: string + enum: + - 454 GS FLX Titanium + - AB SOLiD 2 + - AB SOLiD 3 + - AB SOLiD 4 + - Complete Genomics + - Illumina Genome Analyzer II + - Illumina Genome + - Analyzer IIx + - Illumina HiSeq 2000 + - Illumina HiSeq 2500 + - Illumina HiSeq 4000 + - Illumina HiSeq X Five + - Illumina HiSeq X Ten + - Illumina MiSeq + - Illumina NextSeq + - Illumina NovaSeq 6000 + - Illumina NovaSeq X + - Ion Torrent PGM + - Ion Torrent Proton + - Ion Torrent S5 + - Other + - PacBio RS + - Unknown + - Not Reported + lane_number: + description: The basic machine unit for sequencing. For Illumina machines, + this reflects the physical lane number. Wrong or missing information may + affect analysis results + type: integer + format: numeric + example: 3 + library_preparation_kit_catalog_number: + description: Catalog of Library Preparation Kit + type: integer + format: numeric + example: 20091660 + library_preparation_kit_name: + description: Name of Library Preparation Kit + type: string + example: MicroPlex Library Preparation Kit + library_preparation_kit_vendor: + description: Vendor of Library Preparation Kit + type: string + example: Illumina + library_preparation_kit_version: + description: Version of Library Preparation Kit + type: string + example: v3 + library_strand: + description: The orientation of the RNA fragment can be preserved or lost during library preparation. If the orientation is preserved, it enables you to determine the directionality of the original RNA transcript + type: string + enum: + - Unstranded + - First Stranded + - Second Stranded + - Not Applicable + multiplex_barcode: + description: Individual "barcode" sequences are added to each DNA fragment + during next-generation sequencing (NGS) library preparation. The barcode/index + sequence used. Wrong or missing information may affect analysis results + type: string + number_expect_cells: + description: Expected number of recovered cells in droplet-based single-cell + libraries. + type: integer + format: numeric + example: 4000 + rin: + description: A numerical assessment of the integrity of RNA based on the + entire electrophoretic trace of the RNA sample including the presence + or absence of degradation products. + type: integer + format: numeric + example: 9 + lower_limit: 0 + upper_limit: 10 + sequencing_date: + description: Date of sequencing in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + single_cell_library: + description: Library preparation strategy that distinguishes different single-cell + assays. + type: string + example: Chromium 3' Gene Expression v2 Library + size_selection_range: + description: Range of size selection + type: integer + format: numeric + example: 500 + spike_ins_concentration: + description: Spike-in concentration + type: integer + format: numeric + example: 30 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: ng/µL. + encoding: UTF-8 + spike_ins_fasta: + description: Name of the FASTA file that contains the spike-in sequences + type: string + example: xyz.fasta + to_trim_adapter_sequence: + description: Does the user suggest adapter trimming? + type: boolean + Portion: + description: Sample taken from a larger specimen for analysis or storage + properties: + creation_datetime: + description: The datetime of portion creation encoded as seconds from epoch + type: integer + format: numeric + example: 1715867021 + portion_number: + description: Numeric value that represents the sequential number assigned + to a portion of the sample + type: integer + format: numeric + example: 54 + Analyte: + description: A substance or component that is being measured or analyzed in a specimen + properties: + analyte_type: + description: Text term that represents the kind of molecular specimen analyte. + type: string + enum: + - cfDNA + - DNA + - EBV Immortalized Normal + - FFPE DNA + - FFPE RNA + - GenomePlex (Rubicon) Amplified DNA + - m6A Enriched RNA + - Nuclei RNA + - Repli-G (Qiagen) DNA + - Repli-G Pooled (Qiagen) DNA + - Repli-G X (Qiagen) DNA + - RNA + - Total RNA + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C156434 + a260_a280_ratio: + description: Numeric value that represents the sample ratio of nucleic acid + absorbance at 260 nm and 280 nm, used to determine a measure of DNA purity + type: float + format: numeric + example: 1.8 + amount: + description: Weight in grams or volume in mL + type: integer + format: numeric + example: 2 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: gm + encoding: UTF-8 + analyte_quantity: + description: The quantity in micrograms (μg) of the analyte(s) derived from + the analyte(s) shipped for sequencing and characterization. + type: integer + format: numeric + example: 110 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: μg + encoding: UTF-8 + analyte_volume: + description: The volume in microliters (μl) of the aliquot(s) derived from + the analyte(s) shipped for sequencing and characterization. + type: integer + format: numeric + example: 200 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: μl + encoding: UTF-8 + concentration: + description: Numeric value that represents the concentration of an analyte + or aliquot extracted from the sample or sample portion, measured in milligrams + per milliliter. + type: integer + format: numeric + example: 30 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mg/ml + encoding: UTF-8 + experimental_protocol_type: + description: The type of experiment used to extract the analyte. + type: string + example: Allprep RNA Extraction + enum: + - aDNA Preparation Type + - Allprep FFPE DNA + - Allprep RNA Extraction + - Chemical Lysis DNA Extraction + - Genomplex + - HighPure miRNA (Allprep DNA) FFPE RNA + - mirVana (Allprep DNA) RNA + - nRNA - Melanoma Protocol + - Pre-extracted DNA received by TSS + - Repli-G + - Repli-G X + - Total RNA + normal_tumor_genotype_snp_match: + description: Text term that represents whether or not the genotype of the + normal tumor matches or if the data is not available + type: string + enum: + - Yes + - No + - Unknown + - Not Reported + - Not Allowed To Collect + ribosomal_rna_28s_16s_ratio: + description: The 28S/18S ribosomal RNA band ratio used to assess the quality + of total RNA. + type: float + format: numeric + example: 2.5 + spectrophotometer_method: + description: Name of the method used to determine the concentration of purified + nucleic acid within a solution + type: string + example: Infrared spectrophotometer + well_number: + description: Numeric value that represents the well location within a plate + for the analyte or aliquot from the sample + type: string + example: B8 + Aliquot: + description: Portion of a specimen that has been divided from the original specimen for the purpose of analysis, storage, or distribution + properties: + aliqout_quantity: + description: The quantity in micrograms (μg) of the aliquot(s) derived from + the analyte(s) shipped for sequencing and characterization + type: integer + format: numeric + example: 55 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: μg + encoding: UTF-8 + aliquot_volume: + description: The volume in microliters (μl) of the aliquot(s) derived from + the analyte(s) shipped for sequencing and characterization + type: integer + format: numeric + example: 550 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: μl + encoding: UTF-8 + amount: + description: Weight in grams or volume in mL. + type: float + format: numeric + example: 0.5 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: gm, mL + encoding: UTF-8 + no_matched_normal_low_pass_wgs: + description: There will be no matched normal low pass WGS aliquots for this + case that can be used for variant calling purposes. The GDC may elect + to use a single tumor calling pipeline to process this data. + type: boolean + enum: + - True + - False + no_matched_normal_target_sequencing: + description: There will be no matched normal Targeted Sequencing aliquots + for this case that can be used for variant calling purposes. The GDC may + elect to use a single tumor calling pipeline to process this data. + type: boolean + enum: + - True + - False + no_matched_normal_wgs: + description: There will be no matched normal WGS aliquots for this case + that can be used for variant calling purposes. The GDC may elect to use + a single tumor calling pipeline to process this data. + type: boolean + enum: + - True + - False + no_matched_normal_wxs: + description: There will be no matched normal WXS aliquots for this case + that can be used for variant calling purposes. The GDC may elect to use + a single tumor calling pipeline to process this data. + type: boolean + enum: + - True + - False + selected_normal_low_pass_wgs: + description: Denotes which low-pass WGS normal aliquot the submitter prefers + to use for variant calling. Only one normal per experimental strategy + per case can be selected. + type: boolean + enum: + - True + - False + selected_normal_targeted_sequencing: + description: Denotes which targeted_sequencing normal aliquot the submitter + prefers to use for variant calling. Only one normal per experimental strategy + per case can be selected + type: boolean + enum: + - True + - False + selected_normal_wgs: + description: Denotes which WGS normal aliquot the submitter prefers to use + for variant calling. Only one normal per experimental strategy per case + can be selected. + type: boolean + enum: + - True + - False + selected_normal_wxs: + description: Denotes which WXS normal aliquot the submitter prefers to use + for variant calling. Only one normal per experimental strategy per case + can be selected. + type: boolean + enum: + - True + - False +@endyaml \ No newline at end of file diff --git a/uml/wsd/clinical-history-definition.wsd b/uml/wsd/clinical-history-definition.wsd new file mode 100644 index 0000000..d7bb021 --- /dev/null +++ b/uml/wsd/clinical-history-definition.wsd @@ -0,0 +1,88 @@ +@startyaml Clinical-history: + Clinical-history: + description: Describes the information related to the patient's past symptoms, diagnoses, and treatments + properties: + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + clinical_history_id: + description: Unique identifier for clinical history record + type: string + example: CH001 + age_at_diagnosis: + description: Age of the patient at the time of diagnosis + type: integer + format: numeric + example: 45 + lower_limit: 0 + date_of_diagnosis: + description: Date when the diagnosis was made in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 2023-06-15 + treatment_for_diagnosis: + description: Type of treatment received for the diagnosis + type: string + example: Chemotherapy + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/SNMI?p=classes&conceptid=http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FSNMI%2FG-0005 + start_date_of_symptoms: + description: Start date of symptoms related to diagnosis in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 2023-05-01 + end_date_of_symptoms: + description: End date of symptoms related to diagnosis in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 2023-05-20 + symptoms_at_diagnosis: + description: Description of symptoms at the time of diagnosis + type: string + example: Fatigue + genetic_disorder: + description: Presence of a genetic disorder + type: boolean + prior_malignancy: + description: History of a prior malignancy + type: boolean + laterality_of_prior_malignancy: + description: Laterality of any prior malignancy + type: string + example: Left + enum: + - Left + - Right + - Bilateral + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/HRAVS?p=classes&conceptid=https%3A%2F%2Fpurl.humanatlas.io%2Fvocab%2Fhravs%23HRAVS_1000651 + allergy_status: + description: Presence of allergies + type: boolean + allergy_type: + description: Type of allergy, if present + type: string + example: Penicillin allergy + previous_treatments: + description: Details of any previous treatments + type: string + example: Surgery, Radiation Therapy + failed_treatments: + description: Details of treatments were unsuccessful + type: string + example: Immunotherapy + treatment_switch: + description: Information about whether patients were switched from their + randomly assigned treatment onto an alternative + type: string + example: Switched from Drug A to Drug B + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/COGAT?p=classes&conceptid=file%3A%2Fsrv%2Fncbo%2Frepository%2FCOGAT%2F8%2Fcogat.owl%23cnt_4bd1ebae45752 +@endyaml \ No newline at end of file diff --git a/uml/wsd/diagnosis-core-definition.wsd b/uml/wsd/diagnosis-core-definition.wsd new file mode 100644 index 0000000..5e396a5 --- /dev/null +++ b/uml/wsd/diagnosis-core-definition.wsd @@ -0,0 +1,313 @@ +@startyaml Diagnosis-core + Diagnosis-core: + description: Describes the underlying disease process and health condition of a + subject n based on clinical evidence and reasoning. Details include type of diseases, + methods of diagnosis, classification of diseases and symptoms. + properties: + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + observation_id: + description: A foreign key or unique identifier linking an observation or data point to an event involving a patient or subject. + type: string + example: "OBS001" + diagnosis_id: + description: An ID or unique identifier for the specific diagnosis + type: string + example: DGN001 + disease: + description: Name of the disease or medical condition + type: string + example: Breast Cancer + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/SNOMEDCT?p=classes&conceptid=http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FSNOMEDCT%2F64572001 + disease_type: + description: Classification of diseases into various types based on their + causes, characteristics, and the systems they affect + type: string + example: Infectious Disease + date_of_diagnosis: + description: Date when the diagnosis was made in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 2023-06-15 + age_at_diagnosis: + description: Age of the patient at the time of diagnosis + type: integer + format: numeric + example: 57 + lower_limit: 0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: years + encoding: UTF-8 + symptoms_described: + description: Subjective experiences or sensations reported by a patient + that indicate a departure from normal function, health, or feeling + type: string + example: pain, tremors, rigidity, bradykinesia, cognitive changes, mood + fluctuations + laterality: + description: Refers to the side of the body that is affected by a medical + condition or disease + type: string + example: Unilateral + enum: + - Unilateral + - Bilateral + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C25185 + pathology_review: + description: Whether the diagnosis underwent a detailed examination of tissue, + cell, or body fluid samples to diagnose disease and understand its progression, + etiology, and effects on the body + type: boolean + example: True + pathologic_stage: + description: Stage of the disease based on pathology findings + type: string + example: Stage 0 + enum: + - Stage 0 + - Stage I + - Stage II + - Stage III + - Stage IV + method_of_diagnosis: + description: A systematic methodology for identifying a disease or condition + based on a patients symptoms, medical history, physical examination, and + various diagnostic tests + type: string + example: Imaging + enum: + - Imaging + - Biopsy + - Blood Test + - Physical Examination + morphology: + description: Cellular structure or specific appearance of cells and tissues + (normal and abnormal) under the light or electron microscope + type: string + example: Adenocarcinoma + disease_classification: + description: Categorizing diseases based on various criteria such as pathophysiology, + clinical manifestations, and affected systems + type: string + example: Invasive + classification_system: + description: System used for classification + type: string + example: TNM System + enum: + - TNM System + - WHO System + - AJCC System + - Bethesda System + classification_system_version: + description: Version of the classification system + type: string + example: 8th Edition, 2020 + diagnosis_stage: + description: Extent of disease in the body, including the size and the spread + type: string + example: Stage 0 + enum: + - Stage 0 + - Stage I + - Stage II + - Stage III + - Stage IV + - Metastatic + staging_system: + description: System used for staging the disease + type: string + example: TNM Staging + enum: + - TNM Staging + - AJCC Staging + - Dukes' Staging + - FIGO Staging + - Hoehn and Yahr Scale + staging_system_version: + description: Version of the staging system + type: string + example: 8th Edition, 2020 + disease_grade: + description: Grade assigned to the diagnosis + type: string + example: Grade 1 + enum: + - Grade 1 + - Grade 2 + - Grade 3 + - Grade 4 + - Grade X (if unknown) + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C28076 + grading_system: + description: System used for grading the disease + type: string + example: Gleason Score + enum: + - Gleason Score + - Nottingham Grading System + - Fuhrman Grade + grading_version: + description: Version of the grading system + type: string + example: 4th Edition, 2020 + disease_group: + description: Grouping of similar diagnoses + type: string + example: Hormone Receptor Positive Breast Cancer + margins_involved_site: + description: A situation where disease-causing cells or tissues (such as + cancer cells, infectious agents, or abnormal tissue) are present at the + outer edges (margins) of the tissue that has been surgically removed + type: string + example: The characteristic of the boundary, edges or border of a detected + lesion + anaplasia_present_type: + description: Weather or not cells lose their normal characteristics and + differentiation, appearing more primitive or embryonic + type: string + example: Pleomorphic + enum: + - Pleomorphic + - Low Grade Anaplasia + - High Grade Anaplasia + - Not Applicable + extranodal_involvement: + description: Presence of disease in tissues or organs outside of the primary + lymph nodes + type: boolean + example: True + enum: + - True + - False + enneking_msts_metastasis: + description: Metastasis status according to Enneking-MSTS criteria + type: string + example: Metastatic + enum: + - Metastatic + - Non-Metastatic + - Not Applicable + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C140266 + enneking_msts_tumor_site: + description: Site of the tumor according to Enneking-MSTS criteria + type: string + example: Femur + presenting_symptoms: + description: Symptom experienced at time of diagnosis + type: string + example: Lump in breast + gastric_esophageal_junction_involvement: + description: Involvement of the gastric-esophageal junction + type: string + example: True + enum: + - True + - False + gleason_patterns_percent: + description: Percentage of Gleason patterns that can help in determining + the aggressiveness of prostate cancer if present in a tissue sample + type: float + example: 60 + lower_limit: 0.0 + upper_limit: 100.0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + goblet_cells_columnar_mucosa_present: + description: Presence of goblet cells in columnar mucosa + type: string + example: True + enum: + - True + - False + international_prognostic_index: + description: Prognostic tool useful for non-Hodgkin lymphoma samples + type: string + example: 3 + enum: + - low-risk group (0-1) + - low-intermediate-risk group (2) + - high-intermediate-risk group (3) + - high-risk group (4-5) + ishak_fibrosis_score: + description: If liver samples are present, this histopathological grading + system can assess the degree of fibrosis + type: integer + format: numeric + enum: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + lower_limit: 0 + upper_limit: 6 + largest_extrapelvic_peritoneal_focus: + description: Size of the largest extrapelvic peritoneal focus in centimeters + type: integer + format: numeric + example: 5 + lower_limit: 0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: cms + encoding: UTF-8 + lymph_nodes_examined_status: + description: Indicate if lymph nodes were examined for metastases. + example: True + enum: + - True + - False + lymph_nodes_examined_method: + description: Method used to examine lymph nodes + type: string + example: Imaging + enum: + - Lymph node dissection/pathological exam + - Physical palpation of patient + - Imaging + number_lymph_nodes_positive: + description: Number of lymph nodes positive + type: integer + format: numeric + example: 1 + lymph_node_examined_site: + description: Sites or areas where lymph nodes were involved + type: string + example: Lymph Nodes Involved - Axillary + enum: + - Lymph Nodes Involved - Axillary + - Lymph Nodes Clear - Inguinal + metastasis_at_diagnosis: + description: Presence of metastasis at the time of diagnosis + texample: True + enum: + - True + - False + metastasis_at_diagnosis_site: + description: Site(s) of metastasis at the time of diagnosis + type: string + example: Liver, Lung, Brain +@endyaml \ No newline at end of file diff --git a/uml/wsd/events-core-definition.wsd b/uml/wsd/events-core-definition.wsd new file mode 100644 index 0000000..578d1a2 --- /dev/null +++ b/uml/wsd/events-core-definition.wsd @@ -0,0 +1,27 @@ +@startyaml Events-core + Events-core: + description: Lists any significant occurrence or activity that is recorded within + a patient's health record. Contains information like project ID, donor ID, event + ID and event type. + properties: + project_id: + description: the unique id provided to the experimental study or project that is governed by an investigator. + type: string + example: PROJECT ID is an alphanumeric string that is globally unique identifier for the project or study. + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + event_id: + description: Primary key or unique identifier for a event + type: string + example: E001 + event_type: + description: Describing the events occurence for a patient + type: string + example: Diagnosis + enum: + - Encounter + - Treatment + - Procedure +@endyaml \ No newline at end of file diff --git a/uml/wsd/experiment-design-definition.wsd b/uml/wsd/experiment-design-definition.wsd new file mode 100644 index 0000000..7b65fa3 --- /dev/null +++ b/uml/wsd/experiment-design-definition.wsd @@ -0,0 +1,59 @@ +@startyaml experiment-design + experiment-design: + properties: + donor_id: + type: string + example: D001 + experiment_id: + type: string + example: EXPT045 + experiment_type: + type: string + example: Between-Subjects Design + enum: + - Between-Subjects Design + - Within Subject Design + - Cross Sectional Design + - Longitudinal Design + - Cohort Study + - Case-Control Study + - Ecological Study + - Population Study + - Randomized Controlled Trial + - Non-Randomized Controlled Trial + protocol_id: + type: string + example: PRT451 + project_id: + type: string + example: P024 + total_number_of_samples: + type: integer + format: numeric + example: 40 + lower_value: 1 + comparison_groups: + type: string + example: Control group + enum: + - Control group + - Treatment group + - Disease cohort + - Healthy cohort + genetic_modification: + type: string + example: Gene knockout + hypothesis: + type: string + example: Drug X reduces tumor size + timecourse_value: + type: integer + format: numeric + example: 7 + timecourse_unit: + type: string + example: days + timecourse_relevance: + type: string + example: monitor decrease in CRP levels +@endyaml \ No newline at end of file diff --git a/uml/wsd/exposure-definition.wsd b/uml/wsd/exposure-definition.wsd new file mode 100644 index 0000000..cb5f0d9 --- /dev/null +++ b/uml/wsd/exposure-definition.wsd @@ -0,0 +1,135 @@ +@startyaml exposure + exposure: + description: This object captures details about various types of exposures, including their classification, duration, frequency, and other attributes related to diet and exercise. + properties: + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + exposure_id: + description: A primary key that uniquely identifies a specific exposure instance in the dataset. + type: string + example: "EXP001" + event_id: + description: A foreign key or unique identifier linking a patient to an event within the study's context. + type: string + example: "EVT001" + exposure_type: + description: Classifies how individuals encounter hazardous substances or environmental factors, such as smoking or exposure to asbestos. + type: string + example: "Coal dust" + enum: + - Smoking + - Alcohol + - Asbestos + - Coal dust + - Respirable crystalline silica + - Secondhand smoke as child + - Radon exposure + - Marijuana + - Virus + - Allergen + ontology_link: + type: string + description: Link to the ontology file providing further information about exposure types. + url: "https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C157103" + exposure_unit: + description: Specifies the unit used to measure the duration of exposure, such as months or years. + type: string + example: "months" + exposure_duration: + description: Represents the time duration associated with the exposure, measured in the specified unit. + type: integer + format: numeric + example: 17 + lower_limit: 0 + units: + description: Defines the unit of measurement used for specifying exposure duration. + uom: "months" + encoding: UTF-8 + exposure_frequency: + description: Indicates how often the exposure to the hazardous substance or environmental factor occurs, such as once or multiple times. + type: string + example: "once" + enum: + - once + - multiple + - none + units: + description: Specifies the frequency unit used to measure exposure frequency, typically in days. + uom: "day" + encoding: UTF-8 + diet: + description: Describes the general dietary pattern or regimen followed by the donor. + type: string + example: "Mediterranean" + diet_duration: + description: Specifies the duration for which the donor has adhered to the described diet, measured in years. + type: float + format: numeric + example: 3.5 + lower_limit: 0.0 + units: + description: Defines the unit of measurement used for specifying diet duration. + uom: "years" + encoding: UTF-8 + exercise: + description: Refers to physical activities undertaken by the donor to maintain or improve physical fitness and overall health. + type: string + example: "Jogging" + exercise_specify: + description: Indicates the duration for which the donor has engaged in the specified exercise activity, measured in years. + type: integer + format: numeric + example: 10 + lower_limit: 0 + units: + description: Defines the unit of measurement used for specifying exercise duration. + uom: "years" + encoding: UTF-8 + time_between_waking_and_first_exposure: + description: Specifies the duration in months between waking up and the first exposure to tobacco (if applicable). + type: integer + format: numeric + example: 15 + lower_limit: 0 + units: + description: Defines the unit of measurement used for specifying the time duration. + uom: "months" + encoding: UTF-8 + exposure_category: + description: Describes the reported level of exposure by the donor, such as daily, weekly, occasional, or none. + type: string + example: "Occasional (< once a month)" + enum: + - Daily + - None + - Not applicable + - Occasional (< once a month) + - Social (> once a month - < once a week) + - Unknown + - Weekly (>=1x a week) + exercise_frequency: + description: Indicates how frequently the donor engages in physical exercise, measured in times per week. + type: string + example: "Less than once a month" + enum: + - Never + - Less than once a month + - 1-3 times a month + - Not applicable + - Once or twice a week + - Most days but not every day + - Every day + - Unknown + exercise_intensity: + description: Specifies the intensity level of the donor's physical exercise, such as low, moderate, or vigorous. + type: string + example: "Low - No increase in the heart beat and no perspiration" + enum: + - Low - No increase in the heart beat and no perspiration + - Moderate - Increase in the heart beat slightly with some light perspiration + - Vigorous - Increase in the heart beat substantially with heavy perspiration + - Not applicable + - Unknown +@endyaml \ No newline at end of file diff --git a/uml/wsd/family-history-definition.wsd b/uml/wsd/family-history-definition.wsd new file mode 100644 index 0000000..dd6d643 --- /dev/null +++ b/uml/wsd/family-history-definition.wsd @@ -0,0 +1,143 @@ +@startyaml Family-history + Family-history: + description: Describes the health information about a patient’s close relatives. + This includes a relative's disease and cancer histories, exposure to toxic substances + and their durations + properties: + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + relationship_age_at_diagnosis: + description: Age of the patient's relative at the time of diagnosis + type: integer + format: numeric + example: 50 + lower_limit: 0 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: year + encoding: UTF-8 + relationship_gender: + description: Gender of the relative + type: string + example: Male + enum: + - Male + - Female + - Other + relationship_primary_diagnosis: + description: Primary diagnosis for the relative + type: string + example: Diabetes + relative_with_cancer_history: + description: Relationship of the patient with a relative who has cancer history + type: string + example: Father + relatives_with_cancer_history_count: + description: Count of relatives with history of cancer + type: integer + format: numeric + example: 2 + brca_carrier: + description: Indicate whether donor is a carrier of a mutation in a BRCA + gene. A mutation in this gene is associated with an increased risk of + familial breast and ovarian cancer. + type: string + example: BRCA1 + enum: + - BRCA1 + - BRCA2 + - Both BRCA1 and BRCA2 + - Not present + - Not applicable + - Unknown + relatives_vital_status: + description: Relative's last known state of living or deceased + type: string + example: Alive + enum: + - Alive + - Deceased + - Unknown + cause_of_death_of_relatives: + description: Indicate the cause of the death of the relative + type: string + example: COVID + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C81239 + relatives_survival_time: + description: Indicate how long, in days, the relative survived from the + time they were diagnosed with cancer. + type: integer + format: numeric + example: 150 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: days + encoding: UTF-8 + family_line: + description: Identification of the maternal or paternal family line in the + relationship + type: string + example: Maternal + date_of_birth_of_relative: + description: Date of birth of relative in YYYY-MM-DD + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 1990-08-09 + age_at_death_of_relative: + description: Age at which the relative died + type: integer + format: numeric + example: 87 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: year + encoding: UTF-8 + biomarkers_of_disease_in_relative: + description: Biomarkers of disease present in the patient's relative + type: string + example: EGFR mutation + exposure_type_of_relative: + description: Exposure to agents that could result in a disease condition + type: string + example: Smoking + enum: + - Smoking + - Alcohol + - Asbestos + - Coal dust + - Respirable crystaline silica + - Secondhand smoke as child + - Radon exposure + - Marijuana + - Virus + - Allergen + ontology_link: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C157103 + exposure_unit_of_relative: + description: Unit associated with rate of exposure for the relative + type: string + example: years + exposure_duration_of_relative: + description: Time associated with rate of exposure for the relative + type: integer + format: numeric + example: 30 + lower_limit: 0 + exposure_frequency_of_relative: + description: Frequency of the exposure for the relative + type: string + enum: + - Once + - Multiple + - None +@endyaml \ No newline at end of file diff --git a/uml/wsd/project-core-definition.wsd b/uml/wsd/project-core-definition.wsd new file mode 100644 index 0000000..8df6ece --- /dev/null +++ b/uml/wsd/project-core-definition.wsd @@ -0,0 +1,68 @@ +@startyaml Project-core + Project-core: + description: Describes the core details of the project like project ID, summary, + overall design, publication related details and any other supplementary links + associated with the project + properties: + project_id: + description: Primary key or unique identifier for a project + type: string + example: P034 + summary: + description: Summary of the study which provides information about the key objectives and results from the publication + type: string + example: The identification of novel therapeutic strategies to overcome the intrinsic + or acquired resistance to trametinib in mutant KRAS lung adenocarcinoma (LUAD) is a major challenge. + This study analyzes the effects of trametinib in Id1, a key factor involved in the oncogenic KRAS + pathway, and investigates the Id1 role in acquire resistance and synergy with immunotherapy in KRAS-driven + LUAD. Restoring the antitumor immune response by blocking programmed-cell death protein 1 (PD-1) and + programmed-cell death-ligand 1 (PD-L1) pathway represents a major breakthrough in non-small-cell lung + cancer (NSCLC) treatment. Nevertheless, a high proportion of LUAD patients with KRAS alterations remain + refractory to this therapy. + overall_design: + description: Overall design of the experiment as given by the author + type: string + example: In order to investigate the molecular mechanisms involved in the + trametinib-mediated Id1 downregulation, a global transcriptomic profiling + using RNA sequencing analysis was performed on murine CMT167 and KLA lung + cancer cells treated or not with trametinib + donor_derived_dataset: + description: Whether the dataset has been derived from a donor or not + type: boolean + organism: + description: The organism from which the samples were derived + type: string + example: Homo sapiens + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon + cell_line: + description: Specific population of cells that can maintain in vitro culture for an extended period of time and are used to derive samples for an experiment + type: string + pattern: "^CVCL_[0-9]{4}$" + example: CVCL_0030 + ontology_link: + type: string + description: Link to the ontology file + url: https://www.cellosaurus.org/index.html + author_cell_type: + description: Cell types as curated from the publication/source for this experiment + type: string + example: CMT167 cell + ontology_link: + type: string + description: Link to the ontology file + url: https://obofoundry.org/ontology/cl.html + source_link: + description: Link to source from where the data was fetched given as a url + type: string + format: alphanumeric + example: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE236258 + supplementary_links: + description: External link(s) pointing to code, supplementary data files, + or analysis files associated with the project which will not be uploaded + type: array + format: alphanumeric + example: https://github.com/czbiohub/tabula-muris, http://celltag.org/ +@endyaml \ No newline at end of file diff --git a/uml/wsd/protocol-definition.wsd b/uml/wsd/protocol-definition.wsd new file mode 100644 index 0000000..6406515 --- /dev/null +++ b/uml/wsd/protocol-definition.wsd @@ -0,0 +1,351 @@ +@startyaml Protocol +Protocol: + description: Describes the protocols followed in a study, including details such + as the cell capture system, library protocol, the chemistry kit and sampling technique + used, as well as sequencing related information. + properties: + core: + description: Core properties of the protocol + properties: + donor_id: + description: Foreign key or unique identifier to map to donor + type: string + example: DO-31724 + project_id: + description: Foreign key or unique identifier to map a project + type: string + example: P024 + protocol_id: + description: Primary key or unique identifier for the protocol + type: string + example: PRT451 + protocol_name: + description: Describes the experiment procedure or methods organized in a standardized format that can be executed in a laboratory environment. + type: string + example: Single cell sequencing protocol + protocol_description: + description: Protocols define step-by-step procedures and include a list of the materials needed to perform the experiment. + type: string + example: Prepare Media from stocks, Add 485 µl Clone Medium to ionomycin vial; + add 490 µl Clone Medium to PMA vial, mix and transfer 5µl of diluted PMA to + ionomycin vial yielding the final solution. Collect spleens from mice and + place each in a small petri dish containing 5ml Clone Medium. Isolate + lymphocytes using the syringe puncture method. Wash and lyse rbcs with ACT. + Wash with Clone Medium and resuspend in 2ml Clone Medium. Place exactly + 1ml in one well of a 12 well plate. Add an additional 1ml of Clone Medium + to the well. Can store spleen suspensions in 12 well plate overnight in + refrigerator. Use remaining spleen cells for phenotype FACS if desired - + wash with Staining Buffer to remove Clone Medium. + protocol_core: + description: Core protocol-level information + type: string + example: Prepare Media from stocks, Add 485 µl Clone Medium to ionomycin vial; + add 490 µl Clone Medium to PMA vial, mix and transfer 5µl of diluted PMA + to ionomycin vial yielding the final solution. Collect spleens from mice + and place each in a small petri dish containing 5ml Clone Medium. Isolate + lymphocytes using the syringe puncture method. Wash and lyse rbcs with ACT. + Wash with Clone Medium and resuspend in 2ml Clone Medium. Place exactly + 1ml in one well of a 12 well plate. Add an additional 1ml of Clone Medium + to the well. Can store spleen suspensions in 12 well plate overnight in + refrigerator. Use remaining spleen cells for phenotype FACS if desired - + wash with Staining Buffer to remove Clone Medium. + analysis: + description: Analysis related information of the protocol + properties: + computational_method: + description: A URL to a versioned workflow and versioned execution environment + type: string + format: alphanumeric + matrix_transformation: + description: Information related to protocols that transforms a matrix + type: string + example: r2 + bioinformatics_software: + description: A wide array of computer programs and tools designed to manage, analyze, and interpret biological data + type: string + example: Cellranger + bioinformatics_software_version: + description: Version of software used + type: string + example: v2.0.1 + gene_annotation_version: + description: Ensembl release version accession number or NCBI RefSeq assembly + version used for gene annotation + type: string + example: GCF_000001405.40 + sequencing: + description: Sequence related information of the protocol + properties: + chemistry_kit: + description: Chemistry/Reagent kit used in the protocol + type: string + example: Ion Total RNASeq Kit v2 + library_protocol: + description: Specifies the protocol for preparing sequencing libraries from + single-cell RNA or DNA, outlining the steps for library construction and + amplification. + type: string + example: Stage I - Preparation of single-cell lysates + enum: + - Stage I - Preparation of single-cell lysates + - Stage II - Lysate cleanup and reverse transcription of mRNA species + - Stage III - Performing whole transcriptome amplification) and post-PCR cleanup + - Stage IV - Nextera XT sequencing-library construction + - Stage V - Pooling and DNA SPRI bead cleanup + - Stage VI - Sequencing + sampling_technique: + description: The method/procedure used for collecting samples + type: string + example: Biopsy + sequencer: + description: The sequencing platform used to generate single-cell sequencing + data. + type: string + example: Illumina NexSeq 2000 + sequencing_technology: + description: Describes the methodology employed for sequencing, such as single + cell RNA-seq, ATAC-seq, or DNA sequencing. + type: string + example: scRNA-Seq + sample_collection: + description: Sample collection related information of the protocol + properties: + formation_method: + description: Method used to form cell aggregates + type: string + example: suspension cultures + cell_uniformity: + description: Description of the cell aggregates uniformity after formation + type: string + example: Mostly homogenous embryoid bodies of variable cell numbers + method_collection: + description: Method used to collect the biomaterial + type: string + example: Biopsy + ontology: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/NCIT?p=classes&conceptid=http%3A%2F%2Fncicb.nci.nih.gov%2Fxml%2Fowl%2FEVS%2FThesaurus.owl%23C70700 + reagents: + description: A list of purchased reagents used in this protocol + type: array + example: SMART CDS Primer II A, SMARTer II A Oligo, SMARTScribe Reverse Transcriptase + method_differentiation: + description: Method applied to cell culture to induce a specific differentiation + response + type: string + example: Inductive Co-Culture + media_differentiation: + description: Culture media used to induce a specific differentiation response + type: string + example: StemPro-34 Serum-Free Medium + small_molecules: + description: Small molecules added to stem cell medium to induce a specific + differentiation response + type: string + example: Retinoic Acid + ontology: + type: string + description: Link to the ontology file + url: https://pubchem.ncbi.nlm.nih.gov/ + target_cell_yield: + description: Percent of target cells obtained after directed differentiation + of origin cell + type: float + format: numeric + example: 95 + upper_limit: 100.0 + lower_limit: 0.0 + target_pathway: + description: Targeted pathway for specific differentiation response + type: string + example: Hedgehog signaling pathway + validation_method: + description: Method used to validate origin cell successfully differentiated + to target cell + type: string + example: Pancreatic Cell DTZ Detection Assay + validation_result: + description: Result confirming successful differentiation to target cell type + type: string + example: CD103 Positive + markers: + description: A list of markers used to enrich for or against certain cells + type: string + example: CD4+ CD8- + ontology: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/LOINC?p=classes&conceptid=http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FLNC%2FLP7783-6 + minimum_size: + description: Minimum cell or organelle size passing selection, in microns + type: float + format: numeric + example: 70 + upper_limit: 10000.0 + lower_limit: 0.01 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: µm + encoding: UTF-8 + maximum_size: + description: Maximum cell or organelle size passing selection, in microns + type: float + format: numeric + example: 90 + upper_limit: 10000.0 + lower_limit: 0.01 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: µm + encoding: UTF-8 + method_induction: + description: Induction method applied to primary cell culture to induce pluripotent + stem cell generation + type: string + example: lentivirus + reprogramming_factors: + description: Reprogramming factors added to primary cell culture to induce + pluripotency + type: string + example: POU5F1 + ipsc_induction_kit: + description: Kit used to induce pluripotent stem cell generation + type: string + example: Human iPS Cell Reprogramming Episomal Kit + pluripotency_test: + description: Description of how pluripotency was tested in induced pluripotent + stem cells + type: string + example: Teratoma formation + percent_pluripotency: + description: Percent of iPSCs that passed the pluripotency test + type: float + format: numeric + example: 97.2 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: '%' + encoding: UTF-8 + pluripotency_vector_removed: + description: Whether a viral vector was removed after induction + type: boolean + imaging_preparation_protocol: + description: Image preperation related information of protocol + properties: + slice_thickness: + description: Thickness of the imaged slice in micrometres + type: integer + format: numeric + example: 14 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: µm + encoding: UTF-8 + internal_anatomical_structures: + description: Internal (landmark) structures visible in the overview image + that are informative about the broader anatomical context/location of the + sample + type: string + example: M0 + fiducial_marker: + description: Fiducial markers for the alignment of images taken across multiple + rounds of imaging + type: string + example: beads + expansion_factor: + description: Factor by which the imaged tissue was expanded in one dimension + type: integer + format: numeric + example: 3 + microscope_setup_description: + description: Description of the microscope setup + type: string + example: Motorized stage (SCAN IM 112 x 74, Marzhauser) + microscopy_technique: + description: The type of microscopy + type: string + example: Fluroscence + ontology: + type: string + description: Link to the ontology file + url: https://bioportal.bioontology.org/ontologies/SNOMEDCT?p=classes&conceptid=http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FSNOMEDCT%2F278289002 + magnification: + description: Magnification of the objective used for imaging + type: string + example: 60x + numerical_aperture: + description: Numerical aperture of the objective + type: float + format: numeric + example: 1.3 + immersion_medium_type: + description: Immersion medium used for imaging + type: string + example: oil + immersion_medium_refractive_index: + description: Refractive index of the immersion medium used for imaging + type: float + format: numeric + example: 1.5 + pixel_size: + description: Pixel size in nanometers + type: integer + format: numeric + example: 109 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: nm + encoding: UTF-8 + number_of_tiles: + description: Number of XY tiles in the experiment + type: integer + format: numeric + example: 2000 + tile_size_y: + description: Y size of the tile in micrometers + type: integer + format: numeric + example: 100 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: µm + encoding: UTF-8 + tile_size_x: + description: X size of the tile in micrometers + type: integer + format: numeric + example: 100 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: µm + encoding: UTF-8 + z_stack_step_size: + description: Z-stack step size in nanometers + type: integer + format: numeric + example: 200 + units: + description: Unit is a quantity of constant magnitude which is used to measure + the magnitudes of other quantities of the same manner + uom: nm + encoding: UTF-8 + overlapping_tiles: + description: Whether tiles were collected with overlap + type: boolean + channel: + description: Information about each channel used in the imaging protocol + type: array + example: red + probe: + description: Information about each probe in the imaging experiment + type: array + example: CNA35-OG488 +@endyaml \ No newline at end of file diff --git a/uml/wsd/publication-definition.wsd b/uml/wsd/publication-definition.wsd new file mode 100644 index 0000000..16ac45a --- /dev/null +++ b/uml/wsd/publication-definition.wsd @@ -0,0 +1,51 @@ +@startyaml Publication + Publication: + description: Describes the publication related information such as abstract, authors, PUBMED ID and the publication link. + properties: + project_id: + description: Primary key or unique identifier for a project + type: string + example: P034 + abstract: + description: Abstract of the publication associated with the dataset. + type: string + example: Background - The identification of novel therapeutic strategies to overcome resistance... + authors: + description: List of authors for the associated publication as given in the APA format + type: string + example: Puyalto, A., Rodríguez-Remírez, M., López, I., Macaya, I. + journal: + description: Journal in which the associated study was published + type: string + example: Molecular Cancer + publication_link: + description: Link to the publication associated with the dataset. + type: string + format: alphanumeric + example: https://pubmed.ncbi.nlm.nih.gov/38643157/ + publication_title: + description: Title of the publication associated with the dataset + type: string + example: Trametinib sensitizes KRAS-mutant lung adenocarcinoma tumors to PD-1/PD-L1 axis blockade via Id1 downregulation + publication_year: + description: Year in which the dataset was published + type: integer + format: numeric + example: 2023 + lower_limit: 1900 + units: + description: Unit is a quantity of constant magnitude which is used to measure the magnitudes of other quantities of the same manner + uom: year + encoding: UTF-8 + pubmed_id: + description: Unique PUBMED identifier of the publication associated with the dataset + type: integer + format: numeric + example: 38643157 + total_number_of_cells: + description: Total number of cells present in a dataset + type: integer + format: numeric + example: 150000 + lower_limit: 0.0 +@endyaml \ No newline at end of file diff --git a/uml/wsd/treatment-definition.wsd b/uml/wsd/treatment-definition.wsd new file mode 100644 index 0000000..eb83163 --- /dev/null +++ b/uml/wsd/treatment-definition.wsd @@ -0,0 +1,393 @@ +@startyaml treatment + treatment: + description: Describes the therapeutic agents, therapies, or procedures used to + treat a medical condition. + properties: + subject_id: + description: The unique id provided to the entity being investigated. A subject can be a patient, human or non-human donor. + type: string + example: SUBJECT ID is an alphanumeric string that is globally unique identifier for the entity being investigated. + treatment_id: + description: Primary key unique identifier for this treatment + type: string + example: TR-7391493 + event_id: + description: Foreign key unique identifier to map a patient to an event + type: string + example: EV-20201124 + treatment_name: + description: Name of the treatment given to the samples i.e. name of the + chemical/drug/therapy + type: string + example: Carboplatin + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/chebi/ + treatment_start_date: + description: Start date for treatment in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 2021-02-12 + treatment_end_date: + description: End date for treatment in YYYY-MM-DD format + type: string + pattern: "^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" + example: 2022-06-06 + is_treatment_ongoing: + description: Is the treatment ongoing? + type: boolean + enum: + - True + - False + is_primary_treatment: + description: Is the treatment primary? + type: boolean + enum: + - True + - False + treatement_response: + description: This field indicates the type/extent of response to the treatment + type: string + example: Partial Response (PR) + enum: + - Complete Response (CR) + - Partial Response (PR) + - Stable Disease (SD) + - Progressive Disease (PD) + - No Response + treatment_response_criteria_method: + description: This field indicates the criteria used to document response + to the treatment + type: string + example: ELN Dohner AML 2017 Oncology Response Criteria + enum: + - ELN Dohner AML 2017 Oncology Response Criteria + - IWG Cheson AML 2003 Oncology Response Criteria + - iRECIST + - RECIST + - Response Assessment in Neuro-Oncology (RANO) + - Physician Assessed Response Criteria + treatment_type: + description: Treatment types can be chemotherapy, radiotherapy, cognitive + therapy + type: string + example: Chemotherapy + enum: + - Medication + - Chemotherapy + - Radiation Therapy + - Surgery on Primary Tumor + - Surgery on Lymph Node + - Cognitive Behavioral Therapy + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/maxo + treatment_intent: + description: Indicate the purpose of the treatment, or the desired effect + or outcome resulting from the treatment + type: string + example: Curative + enum: + - Curative + - Diagnostic + - Forensic + - Guidance + - Palliative + - Preventative + - Screening + - Supportive + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/maxo + treatment_setting: + description: Indicate the treatment setting, which describes the treatments + purpose in relation to the primary treatment + type: string + example: Adjuvant + enum: + - Adjuvant + - Advanced/Metastatic + - Conditioning + - Induction + - Maintenance + - Mobilization + - Neoadjuvant + - Preventative + - Radiosensitization + - Salvage + treatment_type_detail: + description: Treatment details which capture type of surgery, types of radiotherapy + type: string + example: Biopsy + enum: + - Biopsy + - Resection + - 3D conformal + - Intensity-modulated radiation therapy (IMRT) + - Volumetric modulated radiation therapy (VMAT) + - Image-guided radiation therapy (IGRT) + - Stereotactic radiosurgery (SRS) + - Brachytherapy + - Superficial x-ray radiation therapy (SXRT) + - Intraoperative radiation therapy (IORT) + - Other + - Unknown + ontology_link: + type: string + description: Link to the ontology file + url: https://obi-ontology.org/ + treatment_dosage: + description: Treatment dosage for the drug administered over a defined period. + Dosages can be increase or decreased as disease/condition improves or + degrades + type: integer + format: numeric + example: 100 + units: + description: Unit is a quantity of constant magnitude which is used to + measure the magnitudes of other quantities of the same manner + uom: mg, gy + encoding: UTF-8 + treatment_dose_level: + description: Dosage level corresponding to the cohort in a given clinical + trial + type: string + treatment_anatomic_site: + description: Anatomical site of administered drug or chemotherapy + type: string + example: Arm + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/uberon + treatment_anotamical_site_type: + description: Anatomical site type such as primary, lymph node or metastatic + type: string + example: Primary tumor + enum: + - Primary tumor + - Lymph node + - Metastatic + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/pato, https://www.ebi.ac.uk/efo/ + days_per_cycle_of_chemotherapy: + description: Days per cycle of chemotherapy + type: integer + format: numeric + example: 120 + number_of_cycles_of_chemotherapy: + description: The number of cycles of chemotherapy refers to the repeated + administration of chemotherapy drugs over a defined period. + type: integer + format: numeric + example: 4 + lower_limit: 1 + route_of_administration: + description: The route of administration describes how chemotherapy drugs + are delivered into the body + type: string + example: Intravenous (IV) + enum: + - Oral + - Intravenous (IV) + - Subcutaneous (SC). + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ogms + adverse_event_list: + description: Adverse events refer to undesirable or harmful effects experienced + by patients as a result of chemotherapy or drug treatments. + type: string + example: Nausea, Vomitting, Anemia, Palpitation + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ogms + adverse_event_treatment: + description: Adverse event treatment involves managing and mitigating the + side effects and complications associated with drug or chemotherapy. + type: string + example: Zofran + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/chebi/ + location_reaction_reported: + description: Location of adverse event + type: string + example: Injection site redness + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ogms + severity_reported: + description: Severity reported of adverse event + type: string + example: Mild + enum: + - Mild + - Moderate + - Sever + - Life Threatening + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ogms + previous_treatments: + description: Previous treatments for the donor + type: string + example: Chemotherapy + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ogms + failed_treatments: + description: Failed treatments of the donor + type: string + example: ALK inhibitors + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ogms + treatment_switch: + description: Treatment switch information for the donor + type: string + example: Patient responded poorly to the allocated treatment hence switched + to a different regimen + drug_class: + description: The class that drug belongs to in ATC Classification + type: string + pattern: "^[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}$" + example: A10BA02 + ontology_link: + type: string + description: Link to the ontology file + url: https://pubchem.ncbi.nlm.nih.gov/classification/#hid=79 + drug_brand_name: + description: The brand name of the drug in use + type: string + example: Tagrisso + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/chebi/ + drug_name_generic: + description: Chemical composition of the active drug component + type: string + example: Osimertinib + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/chebi/ + treatment_chronology_with_surgery: + description: The order of events for treatment and surgery. + type: string + enum: + - Pre-operative + - Post-operative + - No surgery + ontology_link: + type: string + description: Link to the ontology file + url: https://www.ebi.ac.uk/ols4/ontologies/ogms + line_of_treatment: + description: If treatment is not primary then what line is it? + type: string + example: second-line threrapy + response_to_treatment_criteria_method: + description: Indicate the criteria used to assess the donors response to + the applied treatment regimen. + type: string + example: WG Cheson AML 2003 Oncology Response Criteria + enum: + - ELN Dohner AML 2017 Oncology Response Criteria + - IWG Cheson AML 2003 Oncology Response Criteria + - iRECIST + - RECIST + - Response Assessment in Neuro-Oncology (RANO) + - Physician Assessed Response Criteria + outcome_of_treatment: + description: Indicate the donors outcome of the prescribed treatment. + type: string + example: Patient choice (stopped or interrupted treatment) + enum: + - Treatment completed as prescribed + - Treatment incomplete due to technical or organizational problems + - Treatment incomplete because patient died + - Patient choice (stopped or interrupted treatment) + - Physician decision (stopped or interrupted treatment) + - Treatment stopped due to lack of efficacy (disease progression) + - Treatment stopped due to acute toxicity + - Other + - Not applicable + - Unknown + toxicity_type: + description: If the treatment was terminated early due to acute toxicity, + indicate whether it was due to hematological toxicity or non-hematological + toxicity. + type: string + example: Hematological + enum: + - Hematological + - Non-hematological + - Not applicable + - Unknown + hematological_toxicity: + description: Indicate the hematological toxicities which caused early termination + of the treatment. + type: string + example: Anemia - Grade 4 + enum: + - Anemia - Grade 3 + - Anemia - Grade 4 + - Anemia - Grade 5 + - Neutropenia - Grade 3 + - Neutropenia - Grade 4 + - Neutropenia - Grade 5 + - Thrombocytopenia - Grade 3 + - Thrombocytopenia - Grade 4 + - Thrombocytopenia - Grade 5 + - Unknown + non-hematological_toxicity: + description: Indicate the non-hematological toxicities which caused early + termination of the treatment + type: string + example: Cardiac disorders - Grade 1 + enum: + - Cardiac disorders - Grade 1 + - Cardiac disorders - Grade 2 + - Cardiac disorders - Grade 3 + - Cardiac disorders - Grade 4 + - Cardiac disorders - Grade 5 + - Dehydration - Grade 1 + - Dehydration - Grade 2 + - Dehydration - Grade 3 + - Dehydration - Grade 4 + - Dehydration - Grade 5 + - Diarrhea - Grade 1 + - Diarrhea - Grade 2 + - Diarrhea - Grade 3 + - Diarrhea - Grade 4 + - Diarrhea - Grade 5 + - Fatigue - Grade 1 + - Fatigue - Grade 2 + - Fatigue - Grade 3 + - Nausea - Grade 1 + - Nausea - Grade 2 + - Nausea - Grade 3 + - Unknown + - Vomiting - Grade 1 + - Vomiting - Grade 2 + - Vomiting - Grade 3 + - Vomiting - Grade 4 + - Vomiting - Grade 5 + - Weight loss - Grade 1 + - Weight loss - Grade 2 + - Weight loss - Grade 3 +@endyaml diff --git a/uml/wsd/trial-definition.wsd b/uml/wsd/trial-definition.wsd new file mode 100644 index 0000000..6e5efda --- /dev/null +++ b/uml/wsd/trial-definition.wsd @@ -0,0 +1,135 @@ +@startyaml trial + trial: + properties: + project_id: + type: string + example: P034 + author_trial: + type: string + example: Gil-Bazo, I., Ajona, D. + registry_name: + type: string + example: NCT + registry_trail_id: + type: string + pattern: "^NCT[0-9]{8}$" + example: NCT03131687 + location: + type: string + example: United States + region: + type: string + example: Los Angeles, San Diego + sponsor: + type: string + example: Eli Lily + trial_year: + type: integer + format: numeric + example: 2023 + lower_limit: 1900 + units: + uom: year + encoding: UTF-8 + trial_design: + type: string + example: The purpose of this study is to evaluate the efficacy of the study drug tirzepatide in participants with type 2 diabetes mellitus. + trial_blinding: + type: string + example: double-blind + enum: + - single-blind + - double-blind + primary_purpose: + type: string + example: prevention + enum: + - treatment + - prevention + primary_drug: + type: string + example: Dulaglutide + ontology_link: + type: string + description: Link to the ontology file + url: https://go.drugbank.com/ + trial_duration: + type: float + format: numeric + example: 20 + upper_limit: 100.0 + lower_limit: 0.0 + units: + uom: months + encoding: UTF-8 + trail_phase: + type: string + example: Phase I + enum: + - Phase I + - Phase II + - Phase III + - Phase IV + - Pilot + treatement_duration: + type: integer + format: numeric + example: 20 + upper_limit: 100 + lower_limit: 0 + condition_studied_trial: + type: string + example: Diabetes + intervention_agent: + type: string + example: Drug + objectives: + type: string + example: Change from baseline in HbA1c + sponsoring_organization: + type: string + example: Eli Lily + target_enrollment: + type: integer + format: numeric + example: 245 + lower_limit: 0 + number_arm: + type: integer + format: numeric + example: 3 + lower_limit: 0 + placebo_control: + type: boolean + principal_investigator: + type: string + example: Thomas J Fahey + recruiting_status: + type: string + example: Recruiting + enum: + - Recruiting + - Non-recruiting + - Completed + - Active but not recruiting + intervention_other_than_drug: + type: string + example: Surgery + observational_studies: + type: boolean + funder_type: + type: string + example: NIH + enum: + - NIH + - Other Federal agency + - Industry + - Others + study_documents: + type: string + example: Study protocol + enum: + - Study protocol + - Statistical analysis plan + - Informed consent forms +@endyaml \ No newline at end of file