Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions scripts/biomedical/NCBI_Taxonomy/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
"download.sh",
"scripts/format_ncbi_taxonomy.py"
],
"source_files": ["input/*.dmp"],
"source_files": [
"input/*.dmp"
],
"import_inputs": [
{
"cleaned_csv": "output/ncbi_taxonomy.csv",
"node_mcf": "output/ncbi_taxonomy_schema_enum.mcf",
"template_mcf" : "tMCFs/ncbi_taxonomy.tmcf"

"template_mcf": "tMCFs/ncbi_taxonomy.tmcf"
}
],
"cron_schedule": "0 0 1 * *"
Expand Down
42 changes: 23 additions & 19 deletions scripts/census_county_business_patterns/manifest.json
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
{
"import_specifications": [
{
"import_name": "CensusCountyBusinessPatterns",
"curator_emails": ["support@datacommons.org"],
"provenance_url": "https://www2.census.gov/programs-surveys/cbp/datasets/{year}/cbp{last_two_digit_of_year}co.zip",
"provenance_description": "County Business Patterns (CBP) is an annual series that provides subnational economic data for establishments with paid employees by industry and employment size.",
"scripts": ["main.py",
"shard_input_csv.sh"
],
"source_files": ["gcs_output/input_files/*"],
"import_inputs": [
"import_specifications": [
{
"template_mcf": "CensusCountyBusinessPatterns.tmcf",
"cleaned_csv": "gcs_output/output/*.csv"
"import_name": "CensusCountyBusinessPatterns",
"curator_emails": [
"support@datacommons.org"
],
"provenance_url": "https://www2.census.gov/programs-surveys/cbp/datasets/{year}/cbp{last_two_digit_of_year}co.zip",
"provenance_description": "County Business Patterns (CBP) is an annual series that provides subnational economic data for establishments with paid employees by industry and employment size.",
"scripts": [
"main.py",
"shard_input_csv.sh"
],
"source_files": [
"gcs_output/input_files/*"
],
"import_inputs": [
{
"template_mcf": "CensusCountyBusinessPatterns.tmcf",
"cleaned_csv": "gcs_output/output/*.csv"
}
],
"cron_schedule": "0 9 5,25 * *"
}
],

"cron_schedule": "0 9 5,25 * *"
}
]
}
]
}
41 changes: 24 additions & 17 deletions scripts/covid19_india/cases_count_states_data/manifest.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
{
"import_specifications": [{
"import_name":
"covid19IndiaCasesCountStatesData",
"curator_emails": ["i@thejeshgn.com"],
"provenance_url":
"https://github.com/datameet/covid19",
"provenance_description":
"Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
"scripts": ["preprocess.py"],
"import_inputs": [{
"template_mcf": "COVID19_cases_indian_states.tmcf",
"cleaned_csv": "COVID19_cases_indian_states.csv"
}],
"cron_schedule":
"0 5 * * *"
}]
}
"import_specifications": [
{
"import_name": "covid19IndiaCasesCountStatesData",
"curator_emails": [
"support@datacommons.org"
],
"provenance_url": "https://github.com/datameet/covid19",
"provenance_description": "Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
"scripts": [
"preprocess.py"
],
"import_inputs": [
{
"template_mcf": "COVID19_cases_indian_states.tmcf",
"cleaned_csv": "COVID19_cases_indian_states.csv"
}
],
"source_files": [
"COVID19_cases_indian_states.csv"
],
"cron_schedule": "0 5 * * *"
}
]
}
41 changes: 24 additions & 17 deletions scripts/covid19_india/medical_tests_in_data/manifest.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
{
"import_specifications": [{
"import_name":
"covid19IndiaMedicalTestsInData",
"curator_emails": ["i@thejeshgn.com"],
"provenance_url":
"https://github.com/datameet/covid19",
"provenance_description":
"Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
"scripts": ["preprocess.py"],
"import_inputs": [{
"template_mcf": "COVID19_tests_india.tmcf",
"cleaned_csv": "COVID19_tests_india.csv"
}],
"cron_schedule":
"0 6 * * *"
}]
}
"import_specifications": [
{
"import_name": "covid19IndiaMedicalTestsInData",
"curator_emails": [
"support@datacommons.org"
],
"provenance_url": "https://github.com/datameet/covid19",
"provenance_description": "Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
"scripts": [
"preprocess.py"
],
"import_inputs": [
{
"template_mcf": "COVID19_tests_india.tmcf",
"cleaned_csv": "COVID19_tests_india.csv"
}
],
"source_files": [
"COVID19_cases_indian_states.csv"
],
"cron_schedule": "0 6 * * *"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"import_name": "COVIDTracking_HistoricStateData",
"curator_emails": [
"hhhhhwu@google.com"
"support@datacommons.org"
],
"provenance_url": "https://www.covidtracking.com",
"provenance_description": "The COVID Tracking Project is a volunteer organization launched from The Atlantic and dedicated to collecting and publishing the data required to understand the COVID-19 outbreak in the United States.",
Expand All @@ -17,7 +17,10 @@
"node_mcf": "COVIDTracking_States_StatisticalVariables.mcf"
}
],
"source_files": [
"COVIDTracking_States.csv"
],
"cron_schedule": "45 3 * * *"
}
]
}
}
7 changes: 5 additions & 2 deletions scripts/covid_tracking_project/historic_us_data/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"import_name": "COVIDTracking_HistoricUSData",
"curator_emails": [
"hhhhhwu@google.com"
"support@datacommons.org"
],
"provenance_url": "https://www.covidtracking.com",
"provenance_description": "The COVID Tracking Project is a volunteer organization launched from The Atlantic and dedicated to collecting and publishing the data required to understand the COVID-19 outbreak in the United States.",
Expand All @@ -16,7 +16,10 @@
"cleaned_csv": "COVIDTracking_US.csv"
}
],
"source_files": [
"COVIDTracking_US.csv"
],
"cron_schedule": "30 3 * * *"
}
]
}
}
Loading