Skip to content

Commit 72e9146

Browse files
committed
wip
1 parent 3a00241 commit 72e9146

File tree

7 files changed

+96
-149
lines changed

7 files changed

+96
-149
lines changed

x-pack/platform/packages/shared/kbn-slo-schema/src/rest_specs/routes/find_definition.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*/
77
import { toBooleanRt } from '@kbn/io-ts-utils/src/to_boolean_rt';
88
import * as t from 'io-ts';
9-
import { healthStatusSchema, sloDefinitionSchema, transformHealthSchema } from '../../schema';
9+
import { sloDefinitionSchema, transformHealthSchema } from '../../schema';
1010

1111
const findSloDefinitionsParamsSchema = t.partial({
1212
query: t.partial({
@@ -21,7 +21,7 @@ const findSloDefinitionsParamsSchema = t.partial({
2121

2222
const healthMetadataSchema = t.partial({
2323
health: t.type({
24-
overall: healthStatusSchema,
24+
isProblematic: t.boolean,
2525
rollup: transformHealthSchema,
2626
summary: transformHealthSchema,
2727
}),

x-pack/platform/packages/shared/kbn-slo-schema/src/rest_specs/routes/get_slo_health.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77
import * as t from 'io-ts';
8-
import { healthStatusSchema, sloIdSchema, stateSchema, transformHealthSchema } from '../../schema';
8+
import { sloIdSchema, transformHealthSchema } from '../../schema';
99
import { allOrAnyString } from '../../schema/common';
1010

1111
const fetchSLOHealthResponseSchema = t.array(
@@ -14,11 +14,10 @@ const fetchSLOHealthResponseSchema = t.array(
1414
sloInstanceId: allOrAnyString,
1515
sloRevision: t.number,
1616
sloName: t.string,
17-
state: stateSchema,
1817
health: t.type({
19-
overall: transformHealthSchema,
20-
rollup: healthStatusSchema,
21-
summary: healthStatusSchema,
18+
isProblematic: t.boolean,
19+
rollup: transformHealthSchema,
20+
summary: transformHealthSchema,
2221
}),
2322
})
2423
);
@@ -33,4 +32,4 @@ type FetchSLOHealthResponse = t.OutputOf<typeof fetchSLOHealthResponseSchema>;
3332
type FetchSLOHealthParams = t.TypeOf<typeof fetchSLOHealthParamsSchema.props.body>;
3433

3534
export { fetchSLOHealthParamsSchema, fetchSLOHealthResponseSchema };
36-
export type { FetchSLOHealthResponse, FetchSLOHealthParams };
35+
export type { FetchSLOHealthParams, FetchSLOHealthResponse };

x-pack/platform/packages/shared/kbn-slo-schema/src/schema/health.ts

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,24 @@ import * as t from 'io-ts';
1414
* If types need to diverge, they should be split into separate files.
1515
*/
1616

17-
const healthStatusSchema = t.union([t.literal('healthy'), t.literal('unhealthy')]);
18-
19-
const transformHealthSchema = t.type({
20-
status: t.union([t.literal('healthy'), t.literal('unhealthy'), t.literal('missing')]),
21-
state: t.union([t.literal('stopped'), t.literal('started'), t.literal('unavailable')]),
22-
});
23-
24-
const stateSchema = t.union([
25-
t.literal('no_data'),
26-
t.literal('indexing'),
27-
t.literal('running'),
28-
t.literal('stale'),
17+
const transformHealthSchema = t.intersection([
18+
t.type({
19+
isProblematic: t.boolean,
20+
missing: t.boolean,
21+
status: t.union([t.literal('healthy'), t.literal('unhealthy'), t.literal('unavailable')]),
22+
state: t.union([
23+
t.literal('stopped'),
24+
t.literal('started'),
25+
t.literal('stopping'),
26+
t.literal('aborting'),
27+
t.literal('failed'),
28+
t.literal('indexing'),
29+
t.literal('unavailable'),
30+
]),
31+
}),
32+
t.partial({
33+
stateMatches: t.boolean,
34+
}),
2935
]);
3036

31-
export { transformHealthSchema, healthStatusSchema, stateSchema };
37+
export { transformHealthSchema };

x-pack/solutions/observability/plugins/slo/public/pages/slo_details/components/slo_health_callout.tsx

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ export function SloHealthCallout({ slo }: { slo: SLOWithSummaryResponse }) {
5757
}
5858

5959
const health = data[0].health;
60-
if (health.overall === 'healthy') {
60+
if (!health.isProblematic) {
6161
return null;
6262
}
6363

@@ -67,20 +67,7 @@ export function SloHealthCallout({ slo }: { slo: SLOWithSummaryResponse }) {
6767
const rollupUrl = getUrl(rollupTransformId);
6868
const summaryUrl = getUrl(summaryTransformId);
6969

70-
const rollup = {
71-
unhealthy: health.rollup.status === 'unhealthy',
72-
missing: health.rollup.status === 'missing',
73-
stopped: health.rollup.transformState === 'stopped',
74-
};
75-
const summary = {
76-
unhealthy: health.summary.status === 'unhealthy',
77-
missing: health.summary.status === 'missing',
78-
stopped: health.summary.transformState === 'stopped',
79-
};
80-
81-
const rollupHasIssue = values(rollup).some(Boolean);
82-
const summaryHasIssue = values(summary).some(Boolean);
83-
const count = [rollupHasIssue, summaryHasIssue].filter(Boolean).length;
70+
const count = [health.rollup.isProblematic, health.summary.isProblematic].filter(Boolean).length;
8471

8572
return (
8673
<EuiCallOut

x-pack/solutions/observability/plugins/slo/public/pages/slos/components/health_callout/health_callout.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export function HealthCallout({ sloList = [] }: { sloList: SLOWithSummaryRespons
3131
return null;
3232
}
3333

34-
const problematicSloList = results.filter((result) => result.health.overall !== 'healthy');
34+
const problematicSloList = results.filter((result) => result.health.isProblematic);
3535
if (problematicSloList.length === 0) {
3636
return null;
3737
}

x-pack/solutions/observability/plugins/slo/server/domain/models/health.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@
55
* 2.0.
66
*/
77

8-
import type { healthStatusSchema, stateSchema, transformHealthSchema } from '@kbn/slo-schema';
8+
import type { transformHealthSchema } from '@kbn/slo-schema';
99
import type * as t from 'io-ts';
1010

1111
type TransformHealth = t.OutputOf<typeof transformHealthSchema>;
12-
type HealthStatus = t.OutputOf<typeof healthStatusSchema>;
13-
type State = t.OutputOf<typeof stateSchema>;
1412

15-
export type { TransformHealth, HealthStatus, State };
13+
export type { TransformHealth };

x-pack/solutions/observability/plugins/slo/server/domain/services/compute_health.ts

Lines changed: 65 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,13 @@
88
import type { IScopedClusterClient } from '@kbn/core/server';
99
import { ALL_VALUE } from '@kbn/slo-schema';
1010
import type { TransformGetTransformStatsTransformStats } from 'elasticsearch-8.x/lib/api/types';
11-
import { groupBy, keyBy, type Dictionary } from 'lodash';
12-
import moment from 'moment';
11+
import { keyBy, type Dictionary } from 'lodash';
1312
import {
14-
SUMMARY_DESTINATION_INDEX_PATTERN,
1513
getSLOSummaryTransformId,
1614
getSLOTransformId,
1715
getWildcardTransformId,
1816
} from '../../../common/constants';
19-
import type { EsSummaryDocument } from '../../services/summary_transform_generator/helpers/create_temp_summary';
20-
import type { HealthStatus, State, TransformHealth } from '../models/health';
17+
import type { TransformHealth } from '../models/health';
2118

2219
interface Item {
2320
id: string;
@@ -36,63 +33,28 @@ export interface SLOHealth {
3633
sloInstanceId: string;
3734
sloRevision: number;
3835
sloName: string;
39-
state: State;
4036
health: {
41-
overall: HealthStatus;
37+
isProblematic: boolean;
4238
rollup: TransformHealth;
4339
summary: TransformHealth;
4440
};
4541
}
4642

47-
const LAG_THRESHOLD_MINUTES = 10;
48-
const STALE_THRESHOLD_MINUTES = 2 * 24 * 60;
49-
5043
export async function computeHealth(list: Item[], deps: Dependencies): Promise<SLOHealth[]> {
51-
const [summaryDocsById, transformStatsById] = await Promise.all([
52-
getSummaryDocsById(list, deps),
53-
getTransformStatsById(list, deps),
54-
]);
55-
44+
const transformStatsById = await getTransformStatsById(list, deps);
5645
return list.map((item) => {
57-
const health = computeTransformsHealth(transformStatsById, item);
58-
const state = computeTransformState(summaryDocsById, item);
46+
const health = computeItemHealth(transformStatsById, item);
5947

6048
return {
6149
sloId: item.id,
6250
sloName: item.name,
6351
sloInstanceId: item.instanceId ?? ALL_VALUE,
6452
sloRevision: item.revision,
65-
state,
6653
health,
6754
};
6855
});
6956
}
7057

71-
async function getSummaryDocsById(list: Item[], deps: Dependencies) {
72-
const summaryDocs = await deps.scopedClusterClient.asCurrentUser.search<EsSummaryDocument>({
73-
index: SUMMARY_DESTINATION_INDEX_PATTERN,
74-
query: {
75-
bool: {
76-
should: list.map((item) => ({
77-
bool: {
78-
must: [
79-
{ term: { 'slo.id': item.id } },
80-
{ term: { 'slo.instanceId': item.instanceId ?? ALL_VALUE } },
81-
{ term: { 'slo.revision': item.revision } },
82-
],
83-
},
84-
})),
85-
},
86-
},
87-
});
88-
89-
const summaryDocsById = groupBy(
90-
summaryDocs.hits.hits.map((hit) => hit._source!),
91-
(doc: EsSummaryDocument) => buildSummaryKey(doc.slo.id, doc.slo.instanceId)
92-
);
93-
return summaryDocsById;
94-
}
95-
9658
async function getTransformStatsById(
9759
list: Item[],
9860
deps: Dependencies
@@ -109,89 +71,84 @@ async function getTransformStatsById(
10971
return keyBy(stats.transforms, (transform) => transform.id);
11072
}
11173

112-
function computeTransformState(
113-
summaryDocsById: Dictionary<EsSummaryDocument[]>,
74+
function computeItemHealth(
75+
transformStatsById: Dictionary<TransformGetTransformStatsTransformStats>,
11476
item: Item
115-
): State {
116-
const sloSummaryDocs = summaryDocsById[buildSummaryKey(item.id, item.instanceId)];
77+
): { rollup: TransformHealth; summary: TransformHealth; isProblematic: boolean } {
78+
const rollup = getTransformHealth(
79+
item,
80+
transformStatsById[getSLOTransformId(item.id, item.revision)]
81+
);
82+
const summary = getTransformHealth(
83+
item,
84+
transformStatsById[getSLOSummaryTransformId(item.id, item.revision)]
85+
);
11786

118-
let state: State = 'no_data';
119-
if (!sloSummaryDocs) {
120-
return state;
121-
}
122-
const hasOnlyTempSummaryDoc = sloSummaryDocs.every((doc) => doc.isTempDoc); // only temporary documents mean the summary transform did not run yet
123-
const sloSummarydoc = sloSummaryDocs.find((doc) => !doc.isTempDoc);
124-
const latestSliTimestamp = sloSummarydoc?.latestSliTimestamp;
125-
const summaryUpdatedAt = sloSummarydoc?.summaryUpdatedAt;
126-
127-
if (hasOnlyTempSummaryDoc) {
128-
state = 'no_data';
129-
} else if (summaryUpdatedAt && latestSliTimestamp) {
130-
const summaryLag = moment().diff(new Date(summaryUpdatedAt), 'minute');
131-
const indexingLag = moment(summaryUpdatedAt).diff(new Date(latestSliTimestamp), 'minute');
132-
133-
// When the summaryUpdatedAt is greater than STALE_THRESHOLD_MINUTES minutes, the SLO is considered stale since no new data triggered a summary document update.
134-
// When the difference between the summaryUpdatedAt and the latestSliTimestamp is
135-
// - Below LAG_THRESHOLD_MINUTES minutes, the SLO has cought up with the sli data, and is running correctly
136-
// - Above LAG_THRESHOLD_MINUTES minutes, the SLO is indexing
137-
if (summaryLag > STALE_THRESHOLD_MINUTES) {
138-
state = 'stale';
139-
} else {
140-
state = indexingLag >= LAG_THRESHOLD_MINUTES ? 'indexing' : 'running';
141-
}
142-
}
143-
return state;
87+
const isProblematic = rollup.isProblematic || summary.isProblematic;
88+
89+
return { isProblematic, rollup, summary };
14490
}
14591

14692
function getTransformHealth(
93+
item: Item,
14794
transformStat?: TransformGetTransformStatsTransformStats
14895
): TransformHealth {
14996
if (!transformStat) {
15097
return {
151-
status: 'missing',
98+
isProblematic: true,
99+
missing: true,
100+
status: 'unavailable',
152101
state: 'unavailable',
153102
};
154103
}
155104

156-
const transformStatus = transformStat.health?.status?.toLowerCase();
157-
const transformState = transformStat.state?.toLowerCase();
105+
const state = toTransformState(transformStat.state?.toLowerCase());
106+
107+
const status = toTransformStatus(transformStat.health?.status?.toLowerCase());
108+
const stateMatches =
109+
(!item.enabled && ['stopped', 'stopping', 'aborting'].includes(state)) ||
110+
(item.enabled && ['started', 'indexing'].includes(state));
111+
112+
const isProblematic = status === 'unhealthy' || state === 'failed' || !stateMatches;
158113

159114
return {
160-
status: transformStatus === 'green' ? 'healthy' : 'unhealthy',
161-
state:
162-
transformState === 'started'
163-
? 'started'
164-
: transformState === 'stopped'
165-
? 'stopped'
166-
: 'unavailable',
115+
isProblematic,
116+
missing: false,
117+
status,
118+
state,
119+
stateMatches,
167120
};
168121
}
169122

170-
function computeTransformsHealth(
171-
transformStatsById: Dictionary<TransformGetTransformStatsTransformStats>,
172-
item: Item
173-
): { overall: HealthStatus; rollup: TransformHealth; summary: TransformHealth } {
174-
const rollup = getTransformHealth(transformStatsById[getSLOTransformId(item.id, item.revision)]);
175-
const summary = getTransformHealth(
176-
transformStatsById[getSLOSummaryTransformId(item.id, item.revision)]
177-
);
178-
179-
const rollupStateMatchesSloEnabled =
180-
(item.enabled && rollup.state === 'started') || (!item.enabled && rollup.state === 'stopped');
181-
const summaryStateMatchesSloEnabled =
182-
(item.enabled && rollup.state === 'started') || (!item.enabled && rollup.state === 'stopped');
183-
184-
const overall: HealthStatus =
185-
rollup.status === 'healthy' &&
186-
rollupStateMatchesSloEnabled &&
187-
summary.status === 'healthy' &&
188-
summaryStateMatchesSloEnabled
189-
? 'healthy'
190-
: 'unhealthy';
191-
192-
return { overall, rollup, summary };
123+
function toTransformState(
124+
state: string
125+
): 'started' | 'indexing' | 'stopped' | 'stopping' | 'failed' | 'aborting' | 'unavailable' {
126+
switch (state) {
127+
case 'started':
128+
return 'started';
129+
case 'indexing':
130+
return 'indexing';
131+
case 'stopped':
132+
return 'stopped';
133+
case 'stopping':
134+
return 'stopping';
135+
case 'failed':
136+
return 'failed';
137+
case 'aborting':
138+
return 'aborting';
139+
default:
140+
return 'unavailable';
141+
}
193142
}
194143

195-
function buildSummaryKey(id: string, instanceId: string = ALL_VALUE) {
196-
return id + '|' + instanceId;
144+
function toTransformStatus(status?: string): 'healthy' | 'unhealthy' | 'unavailable' {
145+
switch (status) {
146+
case 'green':
147+
return 'healthy';
148+
case 'red':
149+
case 'yellow':
150+
return 'unhealthy';
151+
default:
152+
return 'unavailable';
153+
}
197154
}

0 commit comments

Comments
 (0)