-
Notifications
You must be signed in to change notification settings - Fork 68
/
mimic4_preprocess_util.py
75 lines (49 loc) · 2.59 KB
/
mimic4_preprocess_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import csv
import numpy as np
import os
import pandas as pd
import sys
def dataframe_from_csv(path, compression='gzip', header=0, index_col=0):
return pd.read_csv(path, compression=compression, header=header, index_col=index_col)
def read_patients_table(mimic4_path):
pats = dataframe_from_csv(os.path.join(mimic4_path, 'core/patients.csv.gz'))
pats = pats.reset_index()
pats = pats[['subject_id', 'gender','dod','anchor_age','anchor_year']]
pats['yob']= pats['anchor_year'] - pats['anchor_age']
#pats.dob = pd.to_datetime(pats.dob)
pats.dod = pd.to_datetime(pats.dod)
return pats
def read_admissions_table(mimic4_path):
admits = dataframe_from_csv(os.path.join(mimic4_path, 'core/admissions.csv.gz'))
admits=admits.reset_index()
admits = admits[['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime', 'ethnicity']]
admits.admittime = pd.to_datetime(admits.admittime)
admits.dischtime = pd.to_datetime(admits.dischtime)
admits.deathtime = pd.to_datetime(admits.deathtime)
return admits
def read_icustays_table(mimic4_path):
stays = dataframe_from_csv(os.path.join(mimic4_path, 'icu/icustays.csv.gz'))
stays.intime = pd.to_datetime(stays.intime)
stays.outtime = pd.to_datetime(stays.outtime)
return stays
def clean_stays(stays):
return stays[['subject_id', 'hadm_id', 'stay_id', 'first_careunit','last_careunit', 'intime', 'outtime', 'los']]
def merge_on_subject(table1, table2):
return table1.merge(table2, how='inner', left_on=['subject_id'], right_on=['subject_id'])
def merge_on_subject_admission(table1, table2):
return table1.merge(table2, how='inner', left_on=['subject_id', 'hadm_id'], right_on=['subject_id', 'hadm_id'])
def add_age_to_icustays(stays):
stays['age'] = (stays['intime'].dt.year - stays['yob'])
stays.loc[stays.age < 0, 'age'] = 90
return stays
def add_inhospital_mortality_to_icustays(stays):
mortality = stays.dod.notnull() & ((stays.admittime <= stays.dod) & (stays.dischtime >= stays.dod))
mortality = mortality | (stays.deathtime.notnull() & ((stays.admittime <= stays.deathtime) & (stays.dischtime >= stays.deathtime)))
stays['mortality'] = mortality.astype(int)
stays['mortality_inhospital'] = stays['mortality']
return stays
def add_inunit_mortality_to_icustays(stays):
mortality = stays.dod.notnull() & ((stays.intime <= stays.dod) & (stays.outtime >= stays.dod))
mortality = mortality | (stays.deathtime.notnull() & ((stays.intime <= stays.deathtime) & (stays.outtime >= stays.deathtime)))
stays['mortality_inunit'] = mortality.astype(int)
return stays