diff --git a/db/Makefile b/db/Makefile index 0e8b83a..a82a6fe 100644 --- a/db/Makefile +++ b/db/Makefile @@ -1,2 +1,2 @@ build-pg-docker: - docker build -f Dockerfile-pg -t valorumdata/cmdc-tools/pg:latest + docker build -t valorumdata/cmdc-tools-pg:latest . diff --git a/db/schemas/018_api_covid.sql b/db/schemas/018_api_covid.sql index 188c7c3..802ffc4 100644 --- a/db/schemas/018_api_covid.sql +++ b/db/schemas/018_api_covid.sql @@ -178,11 +178,11 @@ FROM last_vintage lv LEFT JOIN data.us_covid uc USING (dt, fips, variable_id, vintage) LEFT JOIN meta.covid_variables cv ON cv.id = uc.variable_id; - + COMMENT ON VIEW api.covid_us IS E'This table contains relevant information on COVID-19 -This table only includes the most recent observation for each date, location, and variable. - +This table only includes the most recent observation for each date, location, and variable. + For a full history of all data we have collected see `covid_historical` Currently, the following variables are collected in the database @@ -221,12 +221,13 @@ These variables are only collected from official US federal/state/county governm COMMENT ON COLUMN api.covid_us.dt IS E'The date that corresponds to the observed variable'; -COMMENT ON COLUMN api.covid_us.fips IS E'The fips code'; +COMMENT ON COLUMN api.covid_us.location IS E'The fips code'; COMMENT ON COLUMN api.covid_us.variable IS E'One of the variables described in the table description'; COMMENT ON COLUMN api.covid_us.value IS E'The value of the variable'; + /* Historical view with vintages */ CREATE OR REPLACE VIEW api.covid_historical AS SELECT diff --git a/schemas/api_covid.sql b/schemas/api_covid.sql deleted file mode 100644 index 6e47b93..0000000 --- a/schemas/api_covid.sql +++ /dev/null @@ -1,248 +0,0 @@ -/* Covid Tracking Project */ -CREATE OR REPLACE VIEW api.covidtrackingproject AS - WITH last_vintage as ( - SELECT dt, fips, variable_id, MAX(vintage) AS vintage - FROM data.ctp_covid - GROUP BY (dt, fips, variable_id) - ) - SELECT ctp.dt, ctp.fips, cv.name as variable, ctp.value - FROM last_vintage lv - LEFT JOIN data.ctp_covid ctp using (dt, fips, variable_id, vintage) - LEFT JOIN meta.covid_variables cv ON cv.id = ctp.variable_id; - -COMMENT ON VIEW api.covidtrackingproject IS E'This table contains the data from the COVID Tracking Project COVID data - -This table only includes the most recent observation for each date, location, and variable. If you are interested in historical revisions of this data, please reach out -- We have previous "vintages" of the CTP data but, in order to simplify our list of tables, we have chosen not to expose (but are happy to if it would be useful). - -The COVID Tracking Project collects data on the number of cases, test results, and hospitaliztions at the state level. As always, if you intend to use this data, we recommend that you read the corresponding documentation on their [data page](https://covidtracking.com/data) as it provides insights into how data collection changed at various points in time and highlights other data caveats. - -The data can also be found at on the [COVID Tracking Project page](https://covidtracking.com/data). - -The COVID Tracking Project data is released under the following license: - -You are welcome to copy, distribute, and develop data and website content from The COVID Tracking Project at The Atlantic for all healthcare, medical, journalistic and non-commercial uses, including any personal, editorial, academic, or research purposes. - -The COVID Tracking Project at The Atlantic data and website content is published under a Creative Commons CC BY-NC-4.0 license, which requires users to attribute the source and license type (CC BY-NC-4.0) when sharing our data or website content. The COVID Tracking Project at The Atlantic also grants permission for any derivative use of this data and website content that supports healthcare or medical research (including institutional use by public health and for-profit organizations), or journalistic usage (by nonprofit or for-profit organizations). All other commercial uses are not permitted under the Creative Commons license, and will require permission from The COVID Tracking Project at The Atlantic. -'; - -COMMENT ON COLUMN api.covidtrackingproject.dt is E'The date of the observation'; -COMMENT ON COLUMN api.covidtrackingproject.fips is E'The fips code corresponding to the observation'; -COMMENT ON COLUMN api.covidtrackingproject.variable is E'Denotes whether observation is total cases or total deaths'; -COMMENT ON COLUMN api.covidtrackingproject.value is E'The value of the observation'; - - - -/* NYTimes */ -CREATE OR REPLACE VIEW api.nytimes_covid AS - WITH last_vintage as ( - SELECT dt, fips, variable_id, MAX(vintage) AS vintage - FROM data.nyt_covid - GROUP BY (dt, fips, variable_id) - ) - SELECT nyt.dt, nyt.fips, cv.name as variable, nyt.value - FROM last_vintage lv - LEFT JOIN data.nyt_covid nyt using (dt, fips, variable_id, vintage) - LEFT JOIN meta.covid_variables cv ON cv.id = nyt.variable_id; - -COMMENT ON VIEW api.nytimes_covid IS E'This table contains the data from the NY Times COVID data - -This table only includes the most recent observation for each date, location, and variable. If you are interested in historical revisions of this data, please reach out -- We have previous "vintages" of the NYT data but, in order to simplify our list of tables, we have chosen not to expose (but are happy to if it would be useful). - -The data only includes total number of cases and total number of COVID related deaths. If you use this data, we recommend that you read the corresponding documentation on their github site as it provides useful insights to when certain variables changed how they were collected etc... - -The NYTimes COVID data can be found online at https://github.com/nytimes/covid-19-data and is released under the following license: - -Copyright 2020 by The New York Times Company - -In light of the current public health emergency, The New York Times Company is -providing this database under the following free-of-cost, perpetual, -non-exclusive license. Anyone may copy, distribute, and display the database, or -any part thereof, and make derivative works based on it, provided (a) any such -use is for non-commercial purposes only and (b) credit is given to The New York -Times in any public display of the database, in any publication derived in part -or in full from the database, and in any other public use of the data contained -in or derived from the database. - -By accessing or copying any part of the database, the user accepts the terms of -this license. Anyone seeking to use the database for other purposes is required -to contact The New York Times Company at covid-data@nytimes.com to obtain -permission. - -The New York Times has made every effort to ensure the accuracy of the -information. However, the database may contain typographic errors or -inaccuracies and may not be complete or current at any given time. Licensees -further agree to assume all liability for any claims that may arise from or -relate in any way to their use of the database and to hold The New York Times -Company harmless from any such claims. -'; - -COMMENT ON COLUMN api.nytimes_covid.dt is E'The date of the observation'; -COMMENT ON COLUMN api.nytimes_covid.fips is E'The fips code corresponding to the observation'; -COMMENT ON COLUMN api.nytimes_covid.variable is E'Denotes whether observation is total cases or total deaths'; -COMMENT ON COLUMN api.nytimes_covid.value is E'The value of the observation'; - -/* USAFacts */ -CREATE OR REPLACE VIEW api.usafacts_covid AS - WITH last_vintage as ( - SELECT dt, fips, variable_id, MAX(vintage) AS vintage - FROM data.usafacts_covid - GROUP BY (dt, fips, variable_id) - ) - SELECT ufc.dt, ufc.fips, cv.name as variable, ufc.value - FROM last_vintage lv - LEFT JOIN data.usafacts_covid ufc using (dt, fips, variable_id, vintage) - LEFT JOIN meta.covid_variables cv ON cv.id = ufc.variable_id; - -COMMENT ON VIEW api.usafacts_covid IS E'This table the USAFacts COVID data - -This table only includes the most recent observation for each date, location, and variable. If you are interested in historical revisions of this data, please reach out -- We have previous "vintages" of the USAFacts data but, in order to simplify our list of tables, we have chosen not to expose (but are happy to if it would be useful). - -The data only includes total number of cases and total number of COVID related deaths. If you use this data, we recommend that you read the corresponding documentation on their webpage as it provides useful insights to how the data were collected and how they should be used etc... - -The USAFacts COVID data can be found online at https://usafacts.org/visualizations/coronavirus-covid-19-spread-map/ and is released under the Creative Commons Share Alike 4.0 license: - -To see the terms of this license refer to https://creativecommons.org/licenses/by-sa/4.0/ -'; - -COMMENT ON COLUMN api.usafacts_covid.dt is E'The date of the observation'; -COMMENT ON COLUMN api.usafacts_covid.fips is E'The fips code corresponding to the observation'; -COMMENT ON COLUMN api.usafacts_covid.variable is E'Denotes whether observation is total cases or total deaths'; -COMMENT ON COLUMN api.usafacts_covid.value is E'The value of the observation'; - - -/* The covid view */ -CREATE OR REPLACE VIEW api.covid AS -WITH last_vintage as ( - SELECT dt, fips, variable_id, max(vintage) as vintage - from data.us_covid uc - group by (dt, fips, variable_id) -) - SELECT lv.vintage, - uc.dt, - uc.fips, - cv.name AS variable, - uc.value - FROM last_vintage lv - LEFT JOIN data.us_covid uc using (dt, fips, variable_id, vintage) - LEFT JOIN meta.covid_variables cv ON cv.id = uc.variable_id; - -/* The covid view */ -CREATE OR REPLACE VIEW api.covid_us AS -WITH last_vintage as ( - SELECT dt, fips, variable_id, max(vintage) as vintage - from data.us_covid uc - group by (dt, fips, variable_id) -) - SELECT uc.dt, - uc.fips as location, - cv.name AS variable, - uc.value - FROM last_vintage lv - LEFT JOIN data.us_covid uc using (dt, fips, variable_id, vintage) - LEFT JOIN meta.covid_variables cv ON cv.id = uc.variable_id; - - -COMMENT ON VIEW api.covid IS E'This table contains relevant information on COVID-19 - -This table only includes the most recent observation for each date, location, and variable. For a full history of all data we have collected see `covid_historical` - -Currently, the following variables are collected in the database - -* `cases_suspected`: Total number of suspected cases -* `cases_confirmed`: Total number of confirmed cases -* `cases_total`: The number of suspected or confirmed cases -* `deaths_suspected`: The number of deaths that are suspected to have been caused by COVID-19 -* `deaths_confirmed`: The number of deaths that are confirmed to have been caused by COVID-19 -* `deaths_total`: The number of deaths that are either suspected or confirmed to have been caused by COVID-19 -* `positive_tests_total`: The total number of tests that have been positive -* `negative_tests_total`: The total number of tests that have been negative -* `icu_beds_capacity_count`: The number of ICU beds available in the geography -* `icu_beds_in_use_any`: The number of ICU beds currently in use -* `icu_beds_in_use_covid_suspected`: The number of ICU beds currently in use by a patient suspected of COVID-19 -* `icu_beds_in_use_covid_confirmed`: The number of ICU beds currently in use by a patient confirmed to have COVID-19 -* `icu_beds_in_use_covid_total`: The number of ICU beds currently in use by a patient who is suspected of having or confirmed to have COVID-19 -* `icu_beds_in_use_covid_new`: The number of ICU beds occupied by an individual suspected or confirmed of having COVID-19 that have been admitted today -* `hospital_beds_capacity_count`: The number of hospital beds available in the geography -* `hospital_beds_in_use_any`: The number of hospital beds currently in use -* `hospital_beds_in_use_covid_suspected`: The number of hospital beds currently in use by a patient suspected of COVID-19 -* `hospital_beds_in_use_covid_confirmed`: The number of hospital beds currently in use by a patient confirmed to have COVID-19 -* `hospital_beds_in_use_covid_total`: The number of hospital beds currently in use by a patient who is suspected of having or confirmed to have COVID-19 -* `hospital_beds_in_use_covid_new`: The number of hospital beds occupied by an individual suspected or confirmed of having COVID-19 that have been admitted today -* `ventilators_capacity_count`: The number of individuals who can be supported by a ventilator -* `ventilators_in_use_any`: The number of individuals who are currently on a ventilator -* `ventilators_in_use_covid_suspected`: The number of individuals who are suspected of having COVID-19 that are currently on a ventilator -* `ventilators_in_use_covid_confirmed`: The number of individuals who are confirmed to have COVID-19 that are currently on a ventilator -* `ventilators_in_use_covid_total`: The number of individuals who are either suspected of having or confirmed to have COVID-19 that are on a ventilator -* `ventilators_in_use_covid_new`: The number of ventilators that are currently on a ventilator that are suspected of having or confirmed to have COVID-19 that started the ventilator today -* `recovered_total`: The number of individuals who tested positive for COVID-19 and no longer test positive -* `active_total`: The number of currently active COVID-19 cases - -These variables are only collected from official US federal/state/county government sources -'; - -COMMENT ON COLUMN api.covid.vintage is E'The date/time the data was collected and stored in our database. Only the most recent vintage is returned. See `covid_historical` for data with all'; -COMMENT ON COLUMN api.covid.dt is E'The date that corresponds to the observed variable'; -COMMENT ON COLUMN api.covid.fips is E'The fips code'; -COMMENT ON COLUMN api.covid.variable is E'One of the variables described in the table description'; -COMMENT ON COLUMN api.covid.value is E'The value of the variable'; - - -/* Historical view with vintages */ - -CREATE OR REPLACE VIEW api.covid_historical - AS - SELECT uc.vintage, - uc.dt, - uc.fips, - cv.name AS variable, - uc.value - FROM data.us_covid uc - LEFT JOIN meta.covid_variables cv ON cv.id = uc.variable_id; - - -COMMENT ON VIEW api.covid_historical IS E'This table contains relevant information on COVID-19 - -This table returns all vintages (data from different collection dates) of data in our system. - -For only the most recent data, please use the `covid` endpoint. - -Currently, the following variables are collected in the database - -* `cases_suspected`: Total number of suspected cases -* `cases_confirmed`: Total number of confirmed cases -* `cases_total`: The number of suspected or confirmed cases -* `deaths_suspected`: The number of deaths that are suspected to have been caused by COVID-19 -* `deaths_confirmed`: The number of deaths that are confirmed to have been caused by COVID-19 -* `deaths_total`: The number of deaths that are either suspected or confirmed to have been caused by COVID-19 -* `positive_tests_total`: The total number of tests that have been positive -* `negative_tests_total`: The total number of tests that have been negative -* `icu_beds_capacity_count`: The number of ICU beds available in the geography -* `icu_beds_in_use_any`: The number of ICU beds currently in use -* `icu_beds_in_use_covid_suspected`: The number of ICU beds currently in use by a patient suspected of COVID-19 -* `icu_beds_in_use_covid_confirmed`: The number of ICU beds currently in use by a patient confirmed to have COVID-19 -* `icu_beds_in_use_covid_total`: The number of ICU beds currently in use by a patient who is suspected of having or confirmed to have COVID-19 -* `icu_beds_in_use_covid_new`: The number of ICU beds occupied by an individual suspected or confirmed of having COVID-19 that have been admitted today -* `hospital_beds_capacity_count`: The number of hospital beds available in the geography -* `hospital_beds_in_use_any`: The number of hospital beds currently in use -* `hospital_beds_in_use_covid_suspected`: The number of hospital beds currently in use by a patient suspected of COVID-19 -* `hospital_beds_in_use_covid_confirmed`: The number of hospital beds currently in use by a patient confirmed to have COVID-19 -* `hospital_beds_in_use_covid_total`: The number of hospital beds currently in use by a patient who is suspected of having or confirmed to have COVID-19 -* `hospital_beds_in_use_covid_new`: The number of hospital beds occupied by an individual suspected or confirmed of having COVID-19 that have been admitted today -* `ventilators_capacity_count`: The number of individuals who can be supported by a ventilator -* `ventilators_in_use_any`: The number of individuals who are currently on a ventilator -* `ventilators_in_use_covid_suspected`: The number of individuals who are suspected of having COVID-19 that are currently on a ventilator -* `ventilators_in_use_covid_confirmed`: The number of individuals who are confirmed to have COVID-19 that are currently on a ventilator -* `ventilators_in_use_covid_total`: The number of individuals who are either suspected of having or confirmed to have COVID-19 that are on a ventilator -* `ventilators_in_use_covid_new`: The number of ventilators that are currently on a ventilator that are suspected of having or confirmed to have COVID-19 that started the ventilator today -* `recovered_total`: The number of individuals who tested positive for COVID-19 and no longer test positive -* `active_total`: The number of currently active COVID-19 cases - -These variables are only collected from official US federal/state/county government sources -'; - - -COMMENT ON COLUMN api.covid_historical.vintage is E'The date/time the data was collected and stored in our database'; -COMMENT ON COLUMN api.covid_historical.dt is E'The date that corresponds to the observed variable'; -COMMENT ON COLUMN api.covid_historical.fips is E'The fips code'; -COMMENT ON COLUMN api.covid_historical.variable is E'One of the variables described in the table description'; -COMMENT ON COLUMN api.covid_historical.value is E'The value of the variable'; diff --git a/schemas/ctp_tables.sql b/schemas/ctp_tables.sql deleted file mode 100644 index 38e63ee..0000000 --- a/schemas/ctp_tables.sql +++ /dev/null @@ -1,14 +0,0 @@ -DROP TABLE IF EXISTS data.ctp_covid; - -CREATE TABLE data.ctp_covid ( - vintage DATE, - dt DATE, - fips INT references meta.us_fips(fips), - variable_id SMALLINT REFERENCES meta.covid_variables(id), - value INT, - PRIMARY KEY (fips, dt, vintage, variable_id) -); - -COMMENT ON TABLE data.ctp_covid IS E'This table contains the data collected by COVID Tracking Project'; - -CREATE INDEX ctp_dt_idx on data.ctp_covid (dt); diff --git a/src/cmdc_tools/datasets/__init__.py b/src/cmdc_tools/datasets/__init__.py index 4ebf40c..fa2da7a 100644 --- a/src/cmdc_tools/datasets/__init__.py +++ b/src/cmdc_tools/datasets/__init__.py @@ -49,6 +49,8 @@ Montana, MOStLouis, Nebraska, + NevadaCounty, + NevadaFips, NewJersey, NewMexico, NewYork, diff --git a/src/cmdc_tools/datasets/official/NV/__init__.py b/src/cmdc_tools/datasets/official/NV/__init__.py new file mode 100644 index 0000000..0471238 --- /dev/null +++ b/src/cmdc_tools/datasets/official/NV/__init__.py @@ -0,0 +1 @@ +from .data import NevadaCounty, NevadaFips diff --git a/src/cmdc_tools/datasets/official/NV/data.py b/src/cmdc_tools/datasets/official/NV/data.py new file mode 100644 index 0000000..3e3b7e0 --- /dev/null +++ b/src/cmdc_tools/datasets/official/NV/data.py @@ -0,0 +1,287 @@ +import asyncio + +import numpy as np +import pandas as pd +import us +from pyppeteer.element_handle import ElementHandle + +from ...base import DatasetBaseNoDate +from ...puppet import with_page + + +class NevadaFips(DatasetBaseNoDate): + state_fips = int(us.states.lookup("Nevada").fips) + has_fips = True + source = "https://app.powerbigov.us/view?r=eyJrIjoiMjA2ZThiOWUtM2FlNS00MGY5LWFmYjUtNmQwNTQ3Nzg5N2I2IiwidCI6ImU0YTM0MGU2LWI4OWUtNGU2OC04ZWFhLTE1NDRkMjcwMzk4MCJ9" + + def get(self): + cases = self._get_cases() + tests = self._get_tests() + hosp = self._get_hosp() + return pd.concat([tests, cases, hosp], sort=False, ignore_index=True) + + def _get_tests(self): + return asyncio.run(self._get_tests_async()) + + def _get_cases(self): + return asyncio.run(self._get_cases_async()) + + def _get_hosp(self): + return asyncio.run(self._get_hosp_async()) + + async def _get_tests_async(self): + async with with_page() as page: + await page.goto(self.source) + # Wait for dashboard to load + await page.waitForXPath("//span[text()='COVID-19 ']") + # Get next page button + button = await _get_next_page_button(page) + await button.click() + # Wait for dashboard to load + await page.waitForXPath("//div[text()='COVID-19 Statistics by County']") + # Go to next page + await button.click() + # Wait for dashboard to laod + await page.waitForXPath("//div[text()='Results Filter for Demographics']") + # Go to next page + await button.click() + # Find cumulative tests reported graph + graph = await page.waitForXPath("//*[@class='cartesianChart']") + await graph.click(button="right") + # Get table button + table_button = await page.waitForXPath("//h6[text()='Show as a table']") + await table_button.click() + + labels = await self._get_labels_from_graph(page) + + # parse labels + data = [] + for label in labels: + split = label.split(". ") + date = split[0].split("Date")[1].strip() + "/2020" + tests = split[1].split("Tests") + tests_type = tests[0].strip() + # Skip all new tests + if tests_type == "New": + break + tests_num = int(tests[1][:-1].strip().replace(",", "")) + {"Date": date, f"{tests_type}": tests_num} + data.append({"Date": date, f"{tests_type}": tests_num}) + + df = pd.DataFrame(data) + renamed = df.rename(columns={"Date": "dt", "Cumulative": "tests_total"}) + renamed.dt = pd.to_datetime(renamed.dt) + return renamed.melt(id_vars=["dt"], var_name="variable_name").assign( + vintage=pd.Timestamp.utcnow(), fips=self.state_fips + ) + + async def _get_labels_from_graph(self, page): + # wait for graph to load + visual_modern = await page.waitForXPath("//*[@class='cartesianChart']") + # get all graph points + elems = await visual_modern.Jx( + "//*[@class='series']//*[@class='column setFocusRing']" + ) + # print("elems\n", elems) + labels = [ + (await page.evaluate("(el) => el.getAttribute('aria-label')", e)) + for e in elems + ] + + return labels + + async def _get_cases_async(self): + async with with_page() as page: + await page.goto(self.source) + # Wait for dashboard to load + # Get next page button + await page.waitForXPath("//span[text()='COVID-19 ']") + button = await _get_next_page_button(page) + await button.click() + # Wait for dashboard to load + # Go to next page + await page.waitForXPath("//div[text()='COVID-19 Statistics by County']") + await button.click() + # Wait for dashboard to laod + # Go to next page + await page.waitForXPath("//div[text()='Results Filter for Demographics']") + await button.click() + + await page.waitForXPath("//div[text()='Cumulative Tests Reported']") + await button.click() + + # Find cumulative cases reported graph + graph = await page.waitForXPath("//*[@class='cartesianChart']") + await graph.click(button="right") + # Get table button + table_button = await page.waitForXPath("//h6[text()='Show as a table']") + await table_button.click() + + labels = await self._get_labels_from_graph(page) + + # parse labels + data = [] + for label in labels: + split = label.split(". ") + date = split[0].split("Date")[1].strip() + "/2020" + tests = split[1].split(" ") + tests_type = tests[0].strip() + # Skip all new tests + if tests_type == "New": + break + tests_num = int(tests[1][:-1].strip().replace(",", "")) + {"Date": date, f"{tests_type}": tests_num} + data.append({"Date": date, f"{tests_type}": tests_num}) + + df = pd.DataFrame(data) + renamed = df.rename(columns={"Date": "dt", "Cases": "cases_total"}) + renamed.dt = pd.to_datetime(renamed.dt) + return renamed.melt(id_vars=["dt"], var_name="variable_name").assign( + vintage=pd.Timestamp.utcnow(), fips=self.state_fips + ) + + async def _get_hosp_async(self): + async with with_page() as page: + await page.goto(self.source) + # Wait for dashboard to load + await page.waitForXPath("//span[text()='COVID-19 ']") + # Get next page button + button = await _get_next_page_button(page) + await button.click() + # Wait for dashboard to load (2) + await page.waitForXPath("//div[text()='COVID-19 Statistics by County']") + # Go to next page + await button.click() + # Wait for dashboard to laod (3) + await page.waitForXPath("//div[text()='Results Filter for Demographics']") + # Go to next page + await button.click() + # Wait for dashboard to laod (3) + await page.waitForXPath("//div[text()='Cumulative Tests Reported']") + # Go to next page + await button.click() + # Wait for dashboard to laod (3) + await page.waitForXPath("//div[text()='Cumulative Cases']") + # Go to next page + await button.click() + # Wait for dashboard to laod (3) + await page.waitForXPath("//div[text()='Cumulative Deaths']") + # Go to next page + await button.click() + # Wait for dashboard to laod (3) + await page.waitForXPath( + "//div[text()='Daily Growth Rate, Cases (April 1st to Current)']" + ) + # Go to next page + await button.click() + # Find graph + graph = await page.waitForXPath("//*[@class='cartesianChart']") + # return graph + # all_labels = [] + # labels = await self._get_labels_from_single_graph(page, graph[0]) + # icu_labels = await self._get_labels_from_single_graph(page, graph[1]) + + await graph.click(button="right") + # Get table button + table_button = await page.waitForXPath("//h6[text()='Show as a table']") + await table_button.click() + + labels = await self._get_labels_from_graph(page) + # parse labels + data = {"Suspected": [], "Confirmed": []} + for label in labels: + split = label.split(". ") + date = split[0].split("Date")[1].strip() + tests = split[1].split(" ") + tests_type = tests[0].strip() + # Skip all new tests + + tests_num = int(tests[1][:-1].strip().replace(",", "")) + {"Date": date, f"{tests_type}": tests_num} + + # data.append({"Date": date, f"{tests_type}": tests_num}) + data[tests_type].append({"dt": date, "value": tests_num}) + suspected = pd.DataFrame(data["Suspected"]).assign( + variable_name="hospital_beds_in_use_covid_suspected" + ) + confirmed = pd.DataFrame(data["Confirmed"]).assign( + variable_name="hospital_beds_in_use_covid_confirmed" + ) + + df = pd.concat([suspected, confirmed], sort=False, ignore_index=True) + + df.dt = pd.to_datetime(df.dt) + return df.assign(vintage=pd.Timestamp.utcnow(), fips=self.state_fips) + + def _parse_hospitalizations(self, labels): + pass + + +class NevadaCounty(DatasetBaseNoDate): + state_fips = int(us.states.lookup("Nevada").fips) + has_fips = False + source = "https://app.powerbigov.us/view?r=eyJrIjoiMjA2ZThiOWUtM2FlNS00MGY5LWFmYjUtNmQwNTQ3Nzg5N2I2IiwidCI6ImU0YTM0MGU2LWI4OWUtNGU2OC04ZWFhLTE1NDRkMjcwMzk4MCJ9" + + def get(self): + return asyncio.run(self._get_county_overview_async()) + + async def _get_county_overview_async(self): + async with with_page() as page: + await page.goto(self.source) + # Wait for dashboard to load + # Get next page button + await page.waitForXPath("//span[text()='COVID-19 ']") + button = await _get_next_page_button(page) + await button.click() + # Wait for dashboard to load + # Go to next page + await page.waitForXPath("//div[text()='COVID-19 Statistics by County']") + + table = await page.waitForXPath("//div[@class='tableExContainer']") + + col_headers = await _get_table_vals(page, table, "columnHeaders") + + body_cells = np.array(await _get_table_vals(page, table, "bodyCells")) + + out = pd.DataFrame( + body_cells.reshape((-1, len(col_headers)), order="F"), + columns=col_headers, + ) + + renamed = out.rename( + columns={ + "County": "county", + "Tests": "tests_total", + "Total Cases": "cases_total", + "Deaths": "deaths_total", + } + ) + + return ( + renamed[["county", "tests_total", "cases_total", "deaths_total"]] + .melt(id_vars=["county"], var_name="variable_name") + .assign( + vintage=pd.Timestamp.utcnow(), + dt=pd.Timestamp.utcnow() + .tz_convert("US/Mountain") + .normalize() + .tz_localize(None), + ) + ) + + +async def _get_next_page_button(page): + # class_name = "glyphicon glyph-small pbi-glyph-chevronrightmedium middleIcon pbi-focus-outline active" + button = await page.waitForXPath("//i[@title='Next Page']") + return button + + +async def _get_table_vals(page, table: ElementHandle, parentClass: str): + xp = f"//div[@class='{parentClass}']//div[{_class_check('pivotTableCellWrap')}]" + elements = await table.Jx(xp) + func = "(el) => el.textContent" + return [(await page.evaluate(func, e)).strip() for e in elements] + + +def _class_check(cls): + return f"contains(concat(' ',normalize-space(@class),' '),' {cls} ')" diff --git a/src/cmdc_tools/datasets/official/__init__.py b/src/cmdc_tools/datasets/official/__init__.py index 6cf382d..d51b2fe 100644 --- a/src/cmdc_tools/datasets/official/__init__.py +++ b/src/cmdc_tools/datasets/official/__init__.py @@ -24,6 +24,7 @@ from .NE import Nebraska from .NJ import NewJersey from .NM import NewMexico +from .NV import NevadaCounty, NevadaFips from .NY import NewYork from .OK import OKTulsa from .PA import Pennsylvania