-
Notifications
You must be signed in to change notification settings - Fork 3
/
histogram
executable file
·106 lines (89 loc) · 2.64 KB
/
histogram
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/bin/bash
print_help()
{
cat <<EOF
Usage:
$ExecName <size_query>
Generates a histogram of the results of the SQL query, <size_query>. The query
should return a single column of sizes in bytes. The generated histogram will be
base-2 logarithmically binned.
EOF
}
set -e
readonly ExecName=$(basename "$0")
main()
{
readonly SizeQuery="$*"
readonly HIST_WID=60
if [ -z "$SizeQuery" ]
then
print_help >&2
exit 1
fi
export PGHOST
export PGPORT=5432
psql --quiet ICAT icat_reader <<EOF
\\pset footer off
BEGIN;
CREATE TEMPORARY TABLE data(val) AS ($SizeQuery);
CREATE INDEX idx_data ON data(val);
WITH
units(unit, lb, ub) AS (
SELECT ' B', 0, 2 ^ 10
UNION SELECT 'kiB', 2 ^ 10, 2 ^ 20
UNION SELECT 'MiB', 2 ^ 20, 2 ^ 30
UNION SELECT 'GiB', 2 ^ 30, 2 ^ 40
UNION SELECT 'TiB', 2 ^ 40, 2 ^ 50
UNION SELECT 'PiB', 2 ^ 50, 2 ^ 60
UNION SELECT 'EiB', 2 ^ 60, NULL ), -- BIGINT DOESN'T SUPPORT LARGER than 2 ^ 62.
log_bounds(lb, ub) AS (
SELECT
CASE
WHEN COUNT(*) = 0 THEN NULL
ELSE CAST(LEAST(FLOOR(LOG(2, GREATEST(MIN(val), 1))), 62) AS INT)
END,
CASE
WHEN COUNT(*) = 0 THEN NULL
ELSE CAST(LEAST(CEIL(LOG(2, MAX(val) + 1)), 62) AS INT)
END
FROM data),
log_seq(el) AS (SELECT GENERATE_SERIES(lb, ub) FROM log_bounds),
bins(lb, ub) AS (
SELECT 0, 2 ^ MIN(el) FROM log_seq
UNION SELECT
2 ^ el,
CASE WHEN el < (SELECT MAX(el) FROM log_seq) THEN 2 ^ (el + 1) ELSE NULL END
FROM log_seq),
binned_data(lb, ub, cnt) AS (
SELECT b.lb, b.ub, COUNT(d.*)
FROM bins AS b
LEFT JOIN data AS d ON d.val BETWEEN b.lb AND b.ub - 1 OR (d.val >= b.lb AND b.ub IS NULL)
GROUP BY b.lb, b.ub)
SELECT
CASE
WHEN b.lb = 0 AND b.ub IS NULL THEN u.unit || ' [0, ∞)'
WHEN b.lb = 0
THEN (
SELECT unit || ' [0, ' || b.ub / GREATEST(lb, 1) || ')'
FROM units
WHERE b.ub BETWEEN lb AND ub - 1)
WHEN b.lb = (SELECT MAX(lb) FROM bins)
THEN u.unit || ' [' || b.lb / GREATEST(u.lb, 1) || ', ∞)'
ELSE u.unit || ' [' || b.lb / GREATEST(u.lb, 1) || ', ' || b.ub / GREATEST(u.lb, 1) || ')'
END AS "Range",
b.cnt AS "Count",
CASE
WHEN b.cnt = 0 THEN ''
ELSE
REPEAT('*',
CAST(CAST($HIST_WID AS REAL) * b.cnt
/ (SELECT GREATEST(MAX(cnt), $HIST_WID) FROM binned_data)
AS INT))
END AS "Histogram"
FROM binned_data AS b
JOIN units AS u ON b.lb BETWEEN u.lb AND u.ub - 1 OR (b.lb >= u.lb AND u.ub IS NULL)
ORDER BY b.lb;
ROLLBACK;
EOF
}
main "$@"