-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSQL_DATA_ANALYSIS.sql
114 lines (98 loc) · 2.24 KB
/
SQL_DATA_ANALYSIS.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
CREATE DATABASE sample_log_db;
use sample_log_db;
-- 1. Log Data Aggregation and Summarization: Aggregate log data by date and log level
SELECT
date,
log_level,
COUNT(*) AS log_count,
GROUP_CONCAT(message) AS messages
FROM
logs
GROUP BY
date, log_level
LIMIT 0, 1000;
-- 2. Error Analysis and Identification: Filter and aggregate error entries to identify common issues or patterns
SELECT
message AS error_message,
COUNT(*) AS occurrence_count
FROM
logs
WHERE
log_level = 'error'
GROUP BY
message
ORDER BY
occurrence_count DESC;
-- 3. Log Frequency Analysis: Count occurrences of log entries by log level
SELECT
log_level,
COUNT(*) AS occurrence_count
FROM
logs
GROUP BY
log_level;
-- 4. Database Performance Monitoring: Extract performance-related metrics from logs for error and notice logs
SELECT
date,
log_level,
COUNT(*) AS log_count
FROM
logs
where
log_level = 'error'
GROUP BY
date;
SELECT
date,
log_level,
COUNT(*) AS log_count
FROM
logs
where
log_level = 'notice'
GROUP BY
date;
-- 5. User Behavior Analysis: Filter and analyze logs related to user activities or interactions
SELECT
message AS activity,
COUNT(*) AS activity_count
FROM
logs
WHERE
log_level = 'notice'
GROUP BY
message;
-- 6. Data Quality and Validation - Check for inconsistencies or missing values in the log data
SELECT
id,
CASE
WHEN message IS NULL OR message = '' THEN 'Missing message'
WHEN date IS NULL THEN 'Missing date'
WHEN time IS NULL THEN 'Missing time'
WHEN log_level IS NULL OR log_level NOT IN ('notice', 'error') THEN 'Invalid log_level'
ELSE 'Valid'
END AS data_quality_issue
FROM
logs
WHERE
message IS NULL
OR message = ''
OR date IS NULL
OR time IS NULL
OR log_level IS NULL
OR log_level NOT IN ('notice', 'error')
UNION ALL
SELECT
NULL AS id,
'No inconsistencies found' AS data_quality_issue
WHERE NOT EXISTS (
SELECT 1
FROM logs
WHERE
message IS NULL
OR message = ''
OR date IS NULL
OR time IS NULL
OR log_level IS NULL
OR log_level NOT IN ('notice', 'error')
);