forked from pjungwir/aggs_for_vecs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvec_without_outliers.c
162 lines (144 loc) · 5.62 KB
/
vec_without_outliers.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
Datum vec_without_outliers(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(vec_without_outliers);
/**
* Removes outliers from an array.
*
* This function takes an array of n elements
* plus two n-element filter arrays,
* and it returns an n-element array,
* where each element is either the value from the input array,
* or NULL if the input value exceeds the filters.
* The first filter array contains minimum values (or NULL for unbounded),
* and the second filter array contains maximum values (or NULL for unbounded).
* If the input value is within the min and max (inclusive),
* it is included in the return value.
*
* by Paul A. Jungwirth
*/
Datum
vec_without_outliers(PG_FUNCTION_ARGS)
{
Oid elemTypeId;
int16 elemTypeWidth;
bool elemTypeByValue;
char elemTypeAlignmentCode;
int valsLength;
ArrayType *valsArray, *minsArray, *maxesArray, *retArray;
Datum *valsContent, *minsContent, *maxesContent, *retContent;
bool *valsNulls, *minsNulls, *maxesNulls, *retNulls;
int i;
int dims[1];
int lbs[1];
if (PG_ARGISNULL(0)) {
PG_RETURN_NULL();
}
valsArray = PG_GETARG_ARRAYTYPE_P(0);
if (PG_ARGISNULL(1)) {
minsArray = NULL;
} else {
minsArray = PG_GETARG_ARRAYTYPE_P(1);
}
if (PG_ARGISNULL(2)) {
maxesArray = NULL;
} else {
maxesArray = PG_GETARG_ARRAYTYPE_P(2);
}
if (ARR_NDIM(valsArray) == 0 || (minsArray && ARR_NDIM(minsArray) == 0) || (maxesArray && ARR_NDIM(maxesArray) == 0)) {
PG_RETURN_NULL();
}
if (ARR_NDIM(valsArray) > 1 || (minsArray && ARR_NDIM(minsArray) > 1) || (maxesArray && ARR_NDIM(maxesArray) > 1)) {
ereport(ERROR, (errmsg("vec_without_outliers: one-dimensional arrays are required")));
}
elemTypeId = ARR_ELEMTYPE(valsArray);
if (elemTypeId != INT2OID &&
elemTypeId != INT4OID &&
elemTypeId != INT8OID &&
elemTypeId != FLOAT4OID &&
elemTypeId != FLOAT8OID) {
ereport(ERROR, (errmsg("vec_without_outliers input must be array of SMALLINT, INTEGER, BIGINT, REAL, or DOUBLE PRECISION")));
}
if (minsArray && elemTypeId != ARR_ELEMTYPE(minsArray)) {
ereport(ERROR, (errmsg("vec_without_outliers mins array must be the same type as input array")));
}
if (maxesArray && elemTypeId != ARR_ELEMTYPE(maxesArray)) {
ereport(ERROR, (errmsg("vec_without_outliers maxes array must be the same type as input array")));
}
valsLength = (ARR_DIMS(valsArray))[0];
if (minsArray && valsLength != (ARR_DIMS(minsArray))[0]) {
ereport(ERROR, (errmsg("vec_without_outliers mins array must be the same length as input array")));
}
if (maxesArray && valsLength != (ARR_DIMS(maxesArray))[0]) {
ereport(ERROR, (errmsg("vec_without_outliers maxes array must be the same length as input array")));
}
get_typlenbyvalalign(elemTypeId, &elemTypeWidth, &elemTypeByValue, &elemTypeAlignmentCode);
deconstruct_array(valsArray, elemTypeId, elemTypeWidth, elemTypeByValue, elemTypeAlignmentCode,
&valsContent, &valsNulls, &valsLength);
if (minsArray) {
deconstruct_array(minsArray, elemTypeId, elemTypeWidth, elemTypeByValue, elemTypeAlignmentCode,
&minsContent, &minsNulls, &valsLength);
}
if (maxesArray) {
deconstruct_array(maxesArray, elemTypeId, elemTypeWidth, elemTypeByValue, elemTypeAlignmentCode,
&maxesContent, &maxesNulls, &valsLength);
}
retContent = palloc0(sizeof(Datum) * valsLength);
retNulls = palloc0(sizeof(bool) * valsLength);
for (i = 0; i < valsLength; i++) {
if (valsNulls[i]) {
retNulls[i] = true;
continue;
}
switch(elemTypeId) {
case INT2OID:
if ((minsArray && !minsNulls[i] && DatumGetInt16(valsContent[i]) < DatumGetInt16(minsContent[i])) ||
(maxesArray && !maxesNulls[i] && DatumGetInt16(valsContent[i]) > DatumGetInt16(maxesContent[i]))) {
retNulls[i] = true;
} else {
retNulls[i] = false;
retContent[i] = valsContent[i];
}
break;
case INT4OID:
if ((minsArray && !minsNulls[i] && DatumGetInt32(valsContent[i]) < DatumGetInt32(minsContent[i])) ||
(maxesArray && !maxesNulls[i] && DatumGetInt32(valsContent[i]) > DatumGetInt32(maxesContent[i]))) {
retNulls[i] = true;
} else {
retNulls[i] = false;
retContent[i] = valsContent[i];
}
break;
case INT8OID:
if ((minsArray && !minsNulls[i] && DatumGetInt32(valsContent[i]) < DatumGetInt32(minsContent[i])) ||
(maxesArray && !maxesNulls[i] && DatumGetInt32(valsContent[i]) > DatumGetInt32(maxesContent[i]))) {
retNulls[i] = true;
} else {
retNulls[i] = false;
retContent[i] = valsContent[i];
}
break;
case FLOAT4OID:
if ((minsArray && !minsNulls[i] && DatumGetFloat4(valsContent[i]) < DatumGetFloat4(minsContent[i])) ||
(maxesArray && !maxesNulls[i] && DatumGetFloat4(valsContent[i]) > DatumGetFloat4(maxesContent[i]))) {
retNulls[i] = true;
} else {
retNulls[i] = false;
retContent[i] = valsContent[i];
}
break;
case FLOAT8OID:
if ((minsArray && !minsNulls[i] && DatumGetFloat8(valsContent[i]) < DatumGetFloat8(minsContent[i])) ||
(maxesArray && !maxesNulls[i] && DatumGetFloat8(valsContent[i]) > DatumGetFloat8(maxesContent[i]))) {
retNulls[i] = true;
} else {
retNulls[i] = false;
retContent[i] = valsContent[i];
}
break;
}
}
dims[0] = valsLength;
lbs[0] = 1;
retArray = construct_md_array(retContent, retNulls, 1, dims, lbs,
elemTypeId, elemTypeWidth, elemTypeByValue, elemTypeAlignmentCode);
PG_RETURN_ARRAYTYPE_P(retArray);
}