Skip to content

Commit

Permalink
Merge pull request #1703 from christianvogt/ss-errors
Browse files Browse the repository at this point in the history
display StatefulSet errors when notebook pod fails to create
  • Loading branch information
openshift-merge-robot authored Sep 6, 2023
2 parents 2824f2d + ae0f2a9 commit 6ee8280
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 115 deletions.
7 changes: 5 additions & 2 deletions backend/src/routes/api/nb-events/eventUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@ import { KubeFastifyInstance } from '../../../types';
export const getNotebookEvents = async (
fastify: KubeFastifyInstance,
namespace: string,
podUID: string,
notebookName: string,
podUID: string | undefined,
): Promise<V1Event[]> => {
return fastify.kube.coreV1Api
.listNamespacedEvent(
namespace,
undefined,
undefined,
undefined,
`involvedObject.kind=Pod,involvedObject.uid=${podUID}`,
podUID
? `involvedObject.kind=Pod,involvedObject.uid=${podUID}`
: `involvedObject.kind=StatefulSet,involvedObject.name=${notebookName}`,
)
.then((res) => {
const body = res.body as V1EventList;
Expand Down
35 changes: 16 additions & 19 deletions backend/src/routes/api/nb-events/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,21 @@ import { getNotebookEvents } from './eventUtils';
import { secureRoute } from '../../../utils/route-security';

export default async (fastify: FastifyInstance): Promise<void> => {
fastify.get(
'/:namespace/:podUID',
secureRoute(fastify)(
async (
request: FastifyRequest<{
Params: {
namespace: string;
podUID: string;
};
Querystring: {
// TODO: Support server side filtering
from?: string;
};
}>,
) => {
const { namespace, podUID } = request.params;
return getNotebookEvents(fastify, namespace, podUID);
},
),
const routeHandler = secureRoute(fastify)(
async (
request: FastifyRequest<{
Params: {
namespace: string;
notebookName: string;
podUID: string | undefined;
};
}>,
) => {
const { namespace, notebookName, podUID } = request.params;
return getNotebookEvents(fastify, namespace, notebookName, podUID);
},
);

fastify.get('/:namespace/:notebookName', routeHandler);
fastify.get('/:namespace/:notebookName/:podUID', routeHandler);
};
19 changes: 11 additions & 8 deletions frontend/src/api/k8s/events.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import { k8sListResource } from '@openshift/dynamic-plugin-sdk-utils';
import { k8sListResourceItems } from '@openshift/dynamic-plugin-sdk-utils';
import { EventKind } from '~/k8sTypes';
import { EventModel } from '~/api/models';

export const getNotebookEvents = async (namespace: string, podUid: string): Promise<EventKind[]> =>
k8sListResource<EventKind>({
export const getNotebookEvents = async (
namespace: string,
notebookName: string,
podUid: string | undefined,
): Promise<EventKind[]> =>
k8sListResourceItems<EventKind>({
model: EventModel,
queryOptions: {
ns: namespace,
queryParams: {
fieldSelector: `involvedObject.kind=Pod,involvedObject.uid=${podUid}`,
fieldSelector: podUid
? `involvedObject.kind=Pod,involvedObject.uid=${podUid}`
: `involvedObject.kind=StatefulSet,involvedObject.name=${notebookName}`,
},
},
}).then(
// Filter the events by pods that have the same name as the notebook
(r) => r.items,
);
});
7 changes: 7 additions & 0 deletions frontend/src/api/models/k8s.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ export const PodModel: K8sModelCommon = {
plural: 'pods',
};

export const StatefulSetModel: K8sModelCommon = {
apiVersion: 'v1',
apiGroup: 'apps',
kind: 'StatefulSet',
plural: 'statefulsets',
};

export const PVCModel: K8sModelCommon = {
apiVersion: 'v1',
kind: 'PersistentVolumeClaim',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,14 @@ const StartServerModal: React.FC<StartServerModalProps> = ({ open, spawnInProgre
isIndented
>
<List isPlain isBordered tabIndex={0}>
{events.reverse().map((event, index) => (
<ListItem key={`notebook-event-${event.metadata.uid ?? index}`}>
{`${getEventTimestamp(event)} [${event.type}] ${event.message}`}
</ListItem>
))}
{events
.slice()
.reverse()
.map((event, index) => (
<ListItem key={`notebook-event-${event.metadata.uid ?? index}`}>
{`${getEventTimestamp(event)} [${event.type}] ${event.message}`}
</ListItem>
))}
<ListItem>Server requested</ListItem>
</List>
</ExpandableSection>
Expand Down
13 changes: 8 additions & 5 deletions frontend/src/pages/projects/notebook/StartNotebookModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,14 @@ const StartNotebookModal: React.FC<StartNotebookModalProps> = ({
<Panel isScrollable>
<PanelMain maxHeight="250px">
<List isPlain isBordered data-id="event-logs">
{events.reverse().map((event, index) => (
<ListItem key={`notebook-event-${event.metadata.uid ?? index}`}>
{getEventFullMessage(event)}
</ListItem>
))}
{events
.slice()
.reverse()
.map((event, index) => (
<ListItem key={`notebook-event-${event.metadata.uid ?? index}`}>
{getEventFullMessage(event)}
</ListItem>
))}
<ListItem>Server requested</ListItem>
</List>
</PanelMain>
Expand Down
70 changes: 42 additions & 28 deletions frontend/src/pages/projects/notebook/useWatchNotebookEvents.ts
Original file line number Diff line number Diff line change
@@ -1,47 +1,61 @@
import * as React from 'react';
import { EventKind } from '~/k8sTypes';
import { EventKind, NotebookKind } from '~/k8sTypes';
import { getNotebookEvents } from '~/api';
import { FAST_POLL_INTERVAL } from '~/utilities/const';

export const useWatchNotebookEvents = (
projectName: string,
notebook: NotebookKind,
podUid: string,
activeFetch: boolean,
): EventKind[] => {
const [notebookEvents, setNoteBookEvents] = React.useState<EventKind[]>([]);
const notebookName = notebook.metadata.name;
const namespace = notebook.metadata.namespace;
const [notebookEvents, setNotebookEvents] = React.useState<EventKind[]>([]);

// Cached events are returned when activeFetch is false.
// This allows us to reset notebookEvents state when activeFetch becomes
// false to prevent UI blips when activeFetch goes true again.
const notebookEventsCache = React.useRef<EventKind[]>(notebookEvents);

// when activeFetch switches to false, reset events state
React.useEffect(() => {
if (!activeFetch) {
setNotebookEvents([]);
}
}, [activeFetch]);

React.useEffect(() => {
let watchHandle: ReturnType<typeof setTimeout>;
let cancelled = false;

const clear = () => {
cancelled = true;
clearTimeout(watchHandle);
};

if (activeFetch) {
if (activeFetch && namespace && notebookName) {
const watchNotebookEvents = () => {
if (projectName && podUid) {
getNotebookEvents(projectName, podUid)
.then((data: EventKind[]) => {
if (cancelled) {
return;
}
setNoteBookEvents(data);
})
.catch((e) => {
/* eslint-disable-next-line no-console */
console.error('Error fetching notebook events', e);
clear();
});
watchHandle = setTimeout(watchNotebookEvents, FAST_POLL_INTERVAL);
}
getNotebookEvents(namespace, notebookName, podUid)
.then((data: EventKind[]) => {
if (!cancelled) {
notebookEventsCache.current = data;
setNotebookEvents(data);
}
})
.catch((e) => {
/* eslint-disable-next-line no-console */
console.error('Error fetching notebook events', e);
});
watchHandle = setTimeout(watchNotebookEvents, FAST_POLL_INTERVAL);
};

watchNotebookEvents();
if (!podUid) {
// delay the initial fetch to avoid older StatefulSet event errors from blipping on screen during notebook startup
watchHandle = setTimeout(watchNotebookEvents, Math.max(FAST_POLL_INTERVAL, 3000));
} else {
watchNotebookEvents();
}
}
return clear;
}, [projectName, podUid, activeFetch]);
return () => {
cancelled = true;
clearTimeout(watchHandle);
};
}, [namespace, notebookName, podUid, activeFetch]);

return notebookEvents;
return activeFetch ? notebookEvents : notebookEventsCache.current;
};
13 changes: 9 additions & 4 deletions frontend/src/pages/projects/notebook/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ const filterEvents = (
allEvents: EventKind[],
lastActivity: Date,
): [filterEvents: EventKind[], thisInstanceEvents: EventKind[], gracePeriod: boolean] => {
const thisInstanceEvents = allEvents.filter(
(event) => new Date(getEventTimestamp(event)) >= lastActivity,
);
const thisInstanceEvents = allEvents
.filter((event) => new Date(getEventTimestamp(event)) >= lastActivity)
.sort((a, b) => getEventTimestamp(a).localeCompare(getEventTimestamp(b)));
if (thisInstanceEvents.length === 0) {
// Filtered out all of the events, exit early
return [[], [], false];
Expand Down Expand Up @@ -121,7 +121,7 @@ export const useNotebookStatus = (
podUid: string,
spawnInProgress: boolean,
): [status: NotebookStatus | null, events: EventKind[]] => {
const events = useWatchNotebookEvents(notebook.metadata.namespace, podUid, spawnInProgress);
const events = useWatchNotebookEvents(notebook, podUid, spawnInProgress);

const annotationTime = notebook?.metadata.annotations?.['notebooks.kubeflow.org/last-activity'];
const lastActivity = annotationTime
Expand Down Expand Up @@ -230,6 +230,11 @@ export const useNotebookStatus = (
status = EventStatus.INFO;
break;
}
case 'FailedCreate': {
currentEvent = 'Failed to create pod';
status = EventStatus.ERROR;
break;
}
default: {
if (!gracePeriod && lastItem.reason === 'FailedScheduling') {
currentEvent = 'Insufficient resources to start';
Expand Down
8 changes: 6 additions & 2 deletions frontend/src/services/notebookEventsService.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import axios from 'axios';
import { K8sEvent } from '~/types';

export const getNotebookEvents = (projectName: string, podUID: string): Promise<K8sEvent[]> => {
const url = `/api/nb-events/${projectName}/${podUID}`;
export const getNotebookEvents = (
projectName: string,
notebookName: string,
podUID?: string,
): Promise<K8sEvent[]> => {
const url = `/api/nb-events/${projectName}/${notebookName}${podUID ? `/${podUID}` : ''}`;
return axios.get(url).then((response) => response.data);
};
22 changes: 10 additions & 12 deletions frontend/src/utilities/notebookControllerUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ const filterEvents = (
allEvents: K8sEvent[],
lastActivity: Date,
): [filterEvents: K8sEvent[], thisInstanceEvents: K8sEvent[], gracePeroid: boolean] => {
const thisInstanceEvents = allEvents.filter(
(event) => new Date(getEventTimestamp(event)) >= lastActivity,
);
const thisInstanceEvents = allEvents
.filter((event) => new Date(getEventTimestamp(event)) >= lastActivity)
.sort((a, b) => getEventTimestamp(a).localeCompare(getEventTimestamp(b)));
if (thisInstanceEvents.length === 0) {
// Filtered out all of the events, exit early
return [[], [], false];
Expand Down Expand Up @@ -311,23 +311,16 @@ export const useNotebookStatus = (
spawnInProgress: boolean,
open: boolean,
): [status: NotebookStatus | null, events: K8sEvent[]] => {
const { notebookNamespace } = useNamespaces();
const {
currentUserNotebook: notebook,
currentUserNotebookIsRunning: isNotebookRunning,
currentUserNotebookPodUID,
} = React.useContext(NotebookControllerContext);

const events = useWatchNotebookEvents(
notebookNamespace,
notebook,
currentUserNotebookPodUID,
spawnInProgress && !isNotebookRunning,
).filter(
(evt) =>
// Note: This looks redundant but is useful -- our state is stale when a new pod starts; this cleans that up
// This is not ideal, but the alternative is expose a reset function & thread it up to the stop call
// This is old code and needs to be removed in time, this will do for now.
evt.involvedObject.uid === currentUserNotebookPodUID,
);

const lastActivity =
Expand All @@ -339,7 +332,7 @@ export const useNotebookStatus = (
? new Date(notebook.metadata.creationTimestamp ?? 0)
: null);

if (!lastActivity) {
if (!notebook || !lastActivity) {
// Notebook not started, we don't have a filter time, ignore
return [null, []];
}
Expand Down Expand Up @@ -439,6 +432,11 @@ export const useNotebookStatus = (
status = EventStatus.INFO;
break;
}
case 'FailedCreate': {
currentEvent = 'Failed to create pod';
status = EventStatus.ERROR;
break;
}
default: {
if (!gracePeriod && lastItem.reason === 'FailedScheduling') {
currentEvent = 'Insufficient resources to start';
Expand Down
Loading

0 comments on commit 6ee8280

Please sign in to comment.