-
Notifications
You must be signed in to change notification settings - Fork 0
/
cleaningMissingAndNullValues.sql
54 lines (42 loc) · 1.44 KB
/
cleaningMissingAndNullValues.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
-- Data cleaning based on the generated EDA report.
--1) Column agent has 13.1% missing values, so its better to remove the column
--First check the % of missing values
select count(*)
from bookings
where agent is null;
--So its better to drop the column
alter table bookings
drop column agent
-- 2) Column company has 94% missing values , so its better to remove the column
--Check the count
select count(*)
from bookings
where company is null;
--Remove the column
alter table bookings
drop column company
--3) In the column 'children' there are some NA values and NULL values
--First check how many unknown values
select distinct children, count(children)
from bookings
group by (children)
-- Since number of missing values is only 4, so rather than removing the column
--we can remove the respective rows. And removing 4 rows out of 119390 rows will not affect the data.
delete from bookings
where children is null or children = 'NA'
--Now recheck the counts
select distinct children, count(children)
from bookings
group by (children)
--Its all good now.
--Now it has only numerical values to convert it into integer data type.
alter table bookings
alter column children type int using children::integer;
--4) Column 'country' has few rows with NULL values, remove them
--first check the no. of rows with NULL values
select count(*)
from bookings
where country is null
--remove the rows with null values
delete from bookings
where country is NULL