# Libraries
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib .pyplot as plt
from dateutil .parser import parse
from pandas import Series
Data from contralacorrupcion.mx
# Reading the dataset
covid19 = pd .read_csv ("actas-defuncion-covid-19-cdmx1.csv" ,parse_dates = ['FECHA' ], index_col = "FECHA" )
covid19 .head ()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
EDAD
SEMANA
MES
RAZON
ACTA
FECHA
2020-03-18
41 AÑOS
12
3
CHOQUE SEPTICO, NEUMONIA POR COVID 19 POR SARS...
7129
2020-03-23
61 AÑOS
13
3
INSUFICIENCIA RESPIRATORIA AGUDA, NEUMONIA VIR...
4459
2020-03-26
60 AÑOS
13
3
SINDROME DE INSUFICIENCIA RESPIRATORIA AGUDA, ...
4591
2020-03-26
37 AÑOS
13
3
NEUMONIA POR CORONAVIRUS
7879
2020-03-26
63 AÑOS
13
3
CERVICOVAGINITIS PURULENTA, CARCINOMA EPIDERMO...
7829
number_by_date = covid19 .drop (['EDAD' , 'SEMANA' , 'MES' , 'RAZON' , 'ACTA' ], axis = 1 )
number_by_date
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
FECHA
2020-03-18
2020-03-23
2020-03-26
2020-03-26
2020-03-26
...
2020-05-12
2020-05-12
2020-05-12
2020-05-12
2020-05-12
4579 rows × 0 columns
number_by_date ['MUERTES' ] = 0
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
MUERTES
FECHA
2020-03-18
0
2020-03-23
0
2020-03-26
0
2020-03-26
0
2020-03-26
0
...
...
2020-05-12
0
2020-05-12
0
2020-05-12
0
2020-05-12
0
2020-05-12
0
4579 rows × 1 columns
number_by_date .drop (number_by_date .head (2 ).index , inplace = True )
number_by_date = number_by_date .groupby ('FECHA' ).count ()
number_by_date .head ()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
MUERTES
FECHA
2020-03-26
3
2020-03-27
2
2020-03-28
1
2020-03-29
2
2020-03-30
3
covid19_oficial = pd .read_csv ("200521COVID19MEXICO.csv" , sep = "," ,parse_dates = ["FECHA_DEF" ], encoding = 'latin1' )
covid19_oficial .head (10 )
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
FECHA_ACTUALIZACION
ID_REGISTRO
ORIGEN
SECTOR
ENTIDAD_UM
SEXO
ENTIDAD_NAC
ENTIDAD_RES
MUNICIPIO_RES
TIPO_PACIENTE
...
CARDIOVASCULAR
OBESIDAD
RENAL_CRONICA
TABAQUISMO
OTRO_CASO
RESULTADO
MIGRANTE
PAIS_NACIONALIDAD
PAIS_ORIGEN
UCI
0
2020-05-21
11e989
2
3
27
2
27
27
4
2
...
2
2
2
2
1
1
99
México
99
2
1
2020-05-21
1aad65
2
4
19
2
5
5
18
2
...
2
2
2
2
99
1
99
México
99
1
2
2020-05-21
04f631
2
4
14
1
14
14
67
1
...
2
2
2
2
99
1
99
México
99
97
3
2020-05-21
02556b
2
4
15
1
15
15
110
2
...
2
2
2
2
99
1
99
México
99
2
4
2020-05-21
0356d5
2
4
9
1
9
9
5
2
...
2
2
2
1
99
1
99
México
99
2
5
2020-05-21
1d2dfb
2
4
25
2
14
25
1
1
...
2
2
2
2
99
1
99
México
99
97
6
2020-05-21
1b3e2b
2
4
9
2
7
9
2
1
...
2
2
2
2
99
1
99
México
99
97
7
2020-05-21
0c0eef
2
4
21
1
21
21
114
2
...
2
1
2
2
99
1
99
México
99
2
8
2020-05-21
043ea2
2
4
27
2
27
27
4
1
...
2
2
2
2
99
1
99
México
99
97
9
2020-05-21
0bd39a
2
4
8
1
8
8
17
1
...
2
2
2
2
99
1
99
México
99
97
10 rows × 35 columns
temp = covid19_oficial [['FECHA_DEF' , 'ENTIDAD_RES' ]]
temp = temp .set_index ('FECHA_DEF' )
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
ENTIDAD_RES
FECHA_DEF
2020-04-27
27
2020-04-03
5
9999-99-99
14
2020-04-20
15
9999-99-99
9
...
...
2020-05-14
15
9999-99-99
26
9999-99-99
26
9999-99-99
15
9999-99-99
9
201838 rows × 1 columns
from_cdmx = temp .loc [(temp ['ENTIDAD_RES' ] == 9 )] #Number nine corresponds to CDMX as stated in the data dictionary
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
ENTIDAD_RES
FECHA_DEF
9999-99-99
9
9999-99-99
9
9999-99-99
9
9999-99-99
9
2020-03-22
9
...
...
2020-05-05
9
9999-99-99
9
9999-99-99
9
9999-99-99
9
9999-99-99
9
46594 rows × 1 columns
from_cdmx .drop (['ENTIDAD_RES' ], axis = 1 , inplace = True , errors = 'ignore' )
C:\Users\User\Anaconda3\lib\site-packages\pandas\core\frame.py:3997: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
errors=errors,
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
FECHA_DEF
9999-99-99
9999-99-99
9999-99-99
9999-99-99
2020-03-22
...
2020-05-05
9999-99-99
9999-99-99
9999-99-99
9999-99-99
46594 rows × 0 columns
C:\Users\User\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
"""Entry point for launching an IPython kernel.
from_cdmx = from_cdmx .groupby ('FECHA_DEF' ).count ()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
MUERTES
FECHA_DEF
2020-03-16
1
2020-03-22
1
2020-03-23
1
2020-03-25
1
2020-03-26
3
from_cdmx .drop (from_cdmx .tail (1 ).index , inplace = True ) #9999-99-99 date is dropped
from_cdmx = from_cdmx .reset_index ()
from_cdmx = from_cdmx .loc [(from_cdmx ['FECHA_DEF' ] >= '2020-03-26' ) & (from_cdmx ['FECHA_DEF' ] <= '2020-05-12' )]
from_cdmx = from_cdmx .set_index ('FECHA_DEF' )
print (from_cdmx .head (1 ))
print (from_cdmx .tail (1 ))
MUERTES
FECHA_DEF
2020-03-26 3
MUERTES
FECHA_DEF
2020-05-12 60
from_cdmx = from_cdmx .reset_index ()
from_cdmx ['FECHA_DEF' ] = pd .to_datetime (from_cdmx ['FECHA_DEF' ])
from_cdmx = from_cdmx .set_index ('FECHA_DEF' )
sns .set (rc = {'figure.figsize' :(14 , 7 )})
start , end = '2020-03' , '2020-05'
fig , ax = plt .subplots ()
ax .plot (from_cdmx .loc [start :end ],
marker = 'o' , markersize = 8 , linestyle = '-' , label = 'gob.mx' )
ax .plot (number_by_date .loc [start :end ],
marker = 'o' , markersize = 8 , linestyle = '-' , label = 'contralacorrupcion.mx' )
ax .set_xlabel ('Date' )
ax .set_ylabel ('Number of Deaths' )
ax .set_title ('COVID-19 death comparison in CDMX' )
ax .legend ()
<matplotlib.legend.Legend at 0x1946b7e5a58>