Skip to content

Commit 6fe332e

Browse files
cleanup task along with validation flag (#1602)
* config changes along with validation flag * fixed formatting issue of imports * remove ignore_validation_status override from all the imports * lint_fixes
1 parent ec453f4 commit 6fe332e

111 files changed

Lines changed: 2992 additions & 2483 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

scripts/biomedical/NCBI_Taxonomy/manifest.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@
1111
"download.sh",
1212
"scripts/format_ncbi_taxonomy.py"
1313
],
14-
"source_files": ["input/*.dmp"],
14+
"source_files": [
15+
"input/*.dmp"
16+
],
1517
"import_inputs": [
1618
{
1719
"cleaned_csv": "output/ncbi_taxonomy.csv",
1820
"node_mcf": "output/ncbi_taxonomy_schema_enum.mcf",
19-
"template_mcf" : "tMCFs/ncbi_taxonomy.tmcf"
20-
21+
"template_mcf": "tMCFs/ncbi_taxonomy.tmcf"
2122
}
2223
],
2324
"cron_schedule": "0 0 1 * *"
Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
{
2-
"import_specifications": [
3-
{
4-
"import_name": "CensusCountyBusinessPatterns",
5-
"curator_emails": ["support@datacommons.org"],
6-
"provenance_url": "https://www2.census.gov/programs-surveys/cbp/datasets/{year}/cbp{last_two_digit_of_year}co.zip",
7-
"provenance_description": "County Business Patterns (CBP) is an annual series that provides subnational economic data for establishments with paid employees by industry and employment size.",
8-
"scripts": ["main.py",
9-
"shard_input_csv.sh"
10-
],
11-
"source_files": ["gcs_output/input_files/*"],
12-
"import_inputs": [
2+
"import_specifications": [
133
{
14-
"template_mcf": "CensusCountyBusinessPatterns.tmcf",
15-
"cleaned_csv": "gcs_output/output/*.csv"
4+
"import_name": "CensusCountyBusinessPatterns",
5+
"curator_emails": [
6+
"support@datacommons.org"
7+
],
8+
"provenance_url": "https://www2.census.gov/programs-surveys/cbp/datasets/{year}/cbp{last_two_digit_of_year}co.zip",
9+
"provenance_description": "County Business Patterns (CBP) is an annual series that provides subnational economic data for establishments with paid employees by industry and employment size.",
10+
"scripts": [
11+
"main.py",
12+
"shard_input_csv.sh"
13+
],
14+
"source_files": [
15+
"gcs_output/input_files/*"
16+
],
17+
"import_inputs": [
18+
{
19+
"template_mcf": "CensusCountyBusinessPatterns.tmcf",
20+
"cleaned_csv": "gcs_output/output/*.csv"
21+
}
22+
],
23+
"cron_schedule": "0 9 5,25 * *"
1624
}
17-
],
18-
19-
"cron_schedule": "0 9 5,25 * *"
20-
}
21-
]
22-
}
25+
]
26+
}
Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
{
2-
"import_specifications": [{
3-
"import_name":
4-
"covid19IndiaCasesCountStatesData",
5-
"curator_emails": ["i@thejeshgn.com"],
6-
"provenance_url":
7-
"https://github.com/datameet/covid19",
8-
"provenance_description":
9-
"Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
10-
"scripts": ["preprocess.py"],
11-
"import_inputs": [{
12-
"template_mcf": "COVID19_cases_indian_states.tmcf",
13-
"cleaned_csv": "COVID19_cases_indian_states.csv"
14-
}],
15-
"cron_schedule":
16-
"0 5 * * *"
17-
}]
18-
}
2+
"import_specifications": [
3+
{
4+
"import_name": "covid19IndiaCasesCountStatesData",
5+
"curator_emails": [
6+
"support@datacommons.org"
7+
],
8+
"provenance_url": "https://github.com/datameet/covid19",
9+
"provenance_description": "Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
10+
"scripts": [
11+
"preprocess.py"
12+
],
13+
"import_inputs": [
14+
{
15+
"template_mcf": "COVID19_cases_indian_states.tmcf",
16+
"cleaned_csv": "COVID19_cases_indian_states.csv"
17+
}
18+
],
19+
"source_files": [
20+
"COVID19_cases_indian_states.csv"
21+
],
22+
"cron_schedule": "0 5 * * *"
23+
}
24+
]
25+
}
Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
{
2-
"import_specifications": [{
3-
"import_name":
4-
"covid19IndiaMedicalTestsInData",
5-
"curator_emails": ["i@thejeshgn.com"],
6-
"provenance_url":
7-
"https://github.com/datameet/covid19",
8-
"provenance_description":
9-
"Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
10-
"scripts": ["preprocess.py"],
11-
"import_inputs": [{
12-
"template_mcf": "COVID19_tests_india.tmcf",
13-
"cleaned_csv": "COVID19_tests_india.csv"
14-
}],
15-
"cron_schedule":
16-
"0 6 * * *"
17-
}]
18-
}
2+
"import_specifications": [
3+
{
4+
"import_name": "covid19IndiaMedicalTestsInData",
5+
"curator_emails": [
6+
"support@datacommons.org"
7+
],
8+
"provenance_url": "https://github.com/datameet/covid19",
9+
"provenance_description": "Community collected, cleaned and organized COVID-19/India datasets, sourced from various government websites.",
10+
"scripts": [
11+
"preprocess.py"
12+
],
13+
"import_inputs": [
14+
{
15+
"template_mcf": "COVID19_tests_india.tmcf",
16+
"cleaned_csv": "COVID19_tests_india.csv"
17+
}
18+
],
19+
"source_files": [
20+
"COVID19_cases_indian_states.csv"
21+
],
22+
"cron_schedule": "0 6 * * *"
23+
}
24+
]
25+
}

scripts/covid_tracking_project/historic_state_data/manifest.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
{
44
"import_name": "COVIDTracking_HistoricStateData",
55
"curator_emails": [
6-
"hhhhhwu@google.com"
6+
"support@datacommons.org"
77
],
88
"provenance_url": "https://www.covidtracking.com",
99
"provenance_description": "The COVID Tracking Project is a volunteer organization launched from The Atlantic and dedicated to collecting and publishing the data required to understand the COVID-19 outbreak in the United States.",
@@ -17,7 +17,10 @@
1717
"node_mcf": "COVIDTracking_States_StatisticalVariables.mcf"
1818
}
1919
],
20+
"source_files": [
21+
"COVIDTracking_States.csv"
22+
],
2023
"cron_schedule": "45 3 * * *"
2124
}
2225
]
23-
}
26+
}

scripts/covid_tracking_project/historic_us_data/manifest.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
{
44
"import_name": "COVIDTracking_HistoricUSData",
55
"curator_emails": [
6-
"hhhhhwu@google.com"
6+
"support@datacommons.org"
77
],
88
"provenance_url": "https://www.covidtracking.com",
99
"provenance_description": "The COVID Tracking Project is a volunteer organization launched from The Atlantic and dedicated to collecting and publishing the data required to understand the COVID-19 outbreak in the United States.",
@@ -16,7 +16,10 @@
1616
"cleaned_csv": "COVIDTracking_US.csv"
1717
}
1818
],
19+
"source_files": [
20+
"COVIDTracking_US.csv"
21+
],
1922
"cron_schedule": "30 3 * * *"
2023
}
2124
]
22-
}
25+
}

0 commit comments

Comments
 (0)