Skip to content

Latest commit

 

History

History
1067 lines (915 loc) · 17 KB

README.md

File metadata and controls

1067 lines (915 loc) · 17 KB
# Libraries

import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from dateutil.parser import parse
from pandas import Series

Data from contralacorrupcion.mx

# Reading the dataset
covid19 = pd.read_csv("actas-defuncion-covid-19-cdmx1.csv",parse_dates=['FECHA'], index_col = "FECHA")
covid19.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
EDAD SEMANA MES RAZON ACTA
FECHA
2020-03-18 41 AÑOS 12 3 CHOQUE SEPTICO, NEUMONIA POR COVID 19 POR SARS... 7129
2020-03-23 61 AÑOS 13 3 INSUFICIENCIA RESPIRATORIA AGUDA, NEUMONIA VIR... 4459
2020-03-26 60 AÑOS 13 3 SINDROME DE INSUFICIENCIA RESPIRATORIA AGUDA, ... 4591
2020-03-26 37 AÑOS 13 3 NEUMONIA POR CORONAVIRUS 7879
2020-03-26 63 AÑOS 13 3 CERVICOVAGINITIS PURULENTA, CARCINOMA EPIDERMO... 7829
number_by_date = covid19.drop(['EDAD', 'SEMANA', 'MES', 'RAZON', 'ACTA'], axis=1)
number_by_date
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
FECHA
2020-03-18
2020-03-23
2020-03-26
2020-03-26
2020-03-26
...
2020-05-12
2020-05-12
2020-05-12
2020-05-12
2020-05-12

4579 rows × 0 columns

number_by_date['MUERTES'] = 0
number_by_date
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
MUERTES
FECHA
2020-03-18 0
2020-03-23 0
2020-03-26 0
2020-03-26 0
2020-03-26 0
... ...
2020-05-12 0
2020-05-12 0
2020-05-12 0
2020-05-12 0
2020-05-12 0

4579 rows × 1 columns

number_by_date.drop(number_by_date.head(2).index, inplace=True)
number_by_date = number_by_date.groupby('FECHA').count()
number_by_date.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
MUERTES
FECHA
2020-03-26 3
2020-03-27 2
2020-03-28 1
2020-03-29 2
2020-03-30 3

Data from gob.mx

covid19_oficial = pd.read_csv("200521COVID19MEXICO.csv", sep = ",",parse_dates = ["FECHA_DEF"], encoding ='latin1')
covid19_oficial.head(10)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
FECHA_ACTUALIZACION ID_REGISTRO ORIGEN SECTOR ENTIDAD_UM SEXO ENTIDAD_NAC ENTIDAD_RES MUNICIPIO_RES TIPO_PACIENTE ... CARDIOVASCULAR OBESIDAD RENAL_CRONICA TABAQUISMO OTRO_CASO RESULTADO MIGRANTE PAIS_NACIONALIDAD PAIS_ORIGEN UCI
0 2020-05-21 11e989 2 3 27 2 27 27 4 2 ... 2 2 2 2 1 1 99 México 99 2
1 2020-05-21 1aad65 2 4 19 2 5 5 18 2 ... 2 2 2 2 99 1 99 México 99 1
2 2020-05-21 04f631 2 4 14 1 14 14 67 1 ... 2 2 2 2 99 1 99 México 99 97
3 2020-05-21 02556b 2 4 15 1 15 15 110 2 ... 2 2 2 2 99 1 99 México 99 2
4 2020-05-21 0356d5 2 4 9 1 9 9 5 2 ... 2 2 2 1 99 1 99 México 99 2
5 2020-05-21 1d2dfb 2 4 25 2 14 25 1 1 ... 2 2 2 2 99 1 99 México 99 97
6 2020-05-21 1b3e2b 2 4 9 2 7 9 2 1 ... 2 2 2 2 99 1 99 México 99 97
7 2020-05-21 0c0eef 2 4 21 1 21 21 114 2 ... 2 1 2 2 99 1 99 México 99 2
8 2020-05-21 043ea2 2 4 27 2 27 27 4 1 ... 2 2 2 2 99 1 99 México 99 97
9 2020-05-21 0bd39a 2 4 8 1 8 8 17 1 ... 2 2 2 2 99 1 99 México 99 97

10 rows × 35 columns

temp = covid19_oficial[['FECHA_DEF', 'ENTIDAD_RES']]
temp = temp.set_index('FECHA_DEF')
temp
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
ENTIDAD_RES
FECHA_DEF
2020-04-27 27
2020-04-03 5
9999-99-99 14
2020-04-20 15
9999-99-99 9
... ...
2020-05-14 15
9999-99-99 26
9999-99-99 26
9999-99-99 15
9999-99-99 9

201838 rows × 1 columns

from_cdmx = temp.loc[(temp['ENTIDAD_RES'] == 9)] #Number nine corresponds to CDMX as stated in the data dictionary
from_cdmx
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
ENTIDAD_RES
FECHA_DEF
9999-99-99 9
9999-99-99 9
9999-99-99 9
9999-99-99 9
2020-03-22 9
... ...
2020-05-05 9
9999-99-99 9
9999-99-99 9
9999-99-99 9
9999-99-99 9

46594 rows × 1 columns

from_cdmx.drop(['ENTIDAD_RES'], axis=1, inplace=True, errors='ignore')
C:\Users\User\Anaconda3\lib\site-packages\pandas\core\frame.py:3997: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
from_cdmx
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
FECHA_DEF
9999-99-99
9999-99-99
9999-99-99
9999-99-99
2020-03-22
...
2020-05-05
9999-99-99
9999-99-99
9999-99-99
9999-99-99

46594 rows × 0 columns

from_cdmx['MUERTES'] = 0
C:\Users\User\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
from_cdmx = from_cdmx.groupby('FECHA_DEF').count()
from_cdmx.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
MUERTES
FECHA_DEF
2020-03-16 1
2020-03-22 1
2020-03-23 1
2020-03-25 1
2020-03-26 3
from_cdmx.drop(from_cdmx.tail(1).index, inplace=True) #9999-99-99 date is dropped
from_cdmx = from_cdmx.reset_index()
from_cdmx = from_cdmx.loc[(from_cdmx['FECHA_DEF'] >= '2020-03-26') &  (from_cdmx['FECHA_DEF'] <= '2020-05-12')]
from_cdmx = from_cdmx.set_index('FECHA_DEF')
print(from_cdmx.head(1))
print(from_cdmx.tail(1))
            MUERTES
FECHA_DEF          
2020-03-26        3
            MUERTES
FECHA_DEF          
2020-05-12       60
from_cdmx = from_cdmx.reset_index()
from_cdmx['FECHA_DEF'] = pd.to_datetime(from_cdmx['FECHA_DEF'])
from_cdmx = from_cdmx.set_index('FECHA_DEF')

Visualization

sns.set(rc={'figure.figsize':(14, 7)})
start, end = '2020-03', '2020-05'
fig, ax = plt.subplots()
ax.plot(from_cdmx.loc[start:end],
marker='o', markersize=8, linestyle='-', label='gob.mx')
ax.plot(number_by_date.loc[start:end],
marker='o', markersize=8, linestyle='-', label='contralacorrupcion.mx')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Deaths')
ax.set_title('COVID-19 death comparison in CDMX')
ax.legend()
<matplotlib.legend.Legend at 0x1946b7e5a58>

death comparison