Skip to content

Commit 14b0a4f

Browse files
authored
koordlet: support heterogeneous GPU device reporting (koordinator-sh#2501)
Signed-off-by: ZhuZhezz <[email protected]>
1 parent f4b91fb commit 14b0a4f

File tree

15 files changed

+1304
-21
lines changed

15 files changed

+1304
-21
lines changed

apis/scheduling/v1alpha1/device_types.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ type DeviceInfo struct {
5353
Topology *DeviceTopology `json:"topology,omitempty"`
5454
// VFGroups represents the virtual function devices
5555
VFGroups []VirtualFunctionGroup `json:"vfGroups,omitempty"`
56+
// Conditions represents current conditions of device
57+
Conditions []metav1.Condition `json:"conditions,omitempty"`
5658
}
5759

5860
type DeviceTopology struct {
@@ -80,6 +82,12 @@ type VirtualFunction struct {
8082
BusID string `json:"busID,omitempty"`
8183
}
8284

85+
type DeviceConditionType string
86+
87+
const (
88+
DeviceConditionHealthy DeviceConditionType = "Healthy"
89+
)
90+
8391
type DeviceStatus struct {
8492
Allocations []DeviceAllocation `json:"allocations,omitempty"`
8593
}

apis/scheduling/v1alpha1/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/scheduling.koordinator.sh_devices.yaml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,79 @@ spec:
4040
devices:
4141
items:
4242
properties:
43+
conditions:
44+
description: Conditions represents current conditions of device
45+
items:
46+
description: "Condition contains details for one aspect of
47+
the current state of this API Resource.\n---\nThis struct
48+
is intended for direct use as an array at the field path
49+
.status.conditions. For example,\n\n\n\ttype FooStatus
50+
struct{\n\t // Represents the observations of a foo's
51+
current state.\n\t // Known .status.conditions.type are:
52+
\"Available\", \"Progressing\", and \"Degraded\"\n\t //
53+
+patchMergeKey=type\n\t // +patchStrategy=merge\n\t //
54+
+listType=map\n\t // +listMapKey=type\n\t Conditions
55+
[]metav1.Condition `json:\"conditions,omitempty\" patchStrategy:\"merge\"
56+
patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
57+
\ // other fields\n\t}"
58+
properties:
59+
lastTransitionTime:
60+
description: |-
61+
lastTransitionTime is the last time the condition transitioned from one status to another.
62+
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
63+
format: date-time
64+
type: string
65+
message:
66+
description: |-
67+
message is a human readable message indicating details about the transition.
68+
This may be an empty string.
69+
maxLength: 32768
70+
type: string
71+
observedGeneration:
72+
description: |-
73+
observedGeneration represents the .metadata.generation that the condition was set based upon.
74+
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
75+
with respect to the current state of the instance.
76+
format: int64
77+
minimum: 0
78+
type: integer
79+
reason:
80+
description: |-
81+
reason contains a programmatic identifier indicating the reason for the condition's last transition.
82+
Producers of specific condition types may define expected values and meanings for this field,
83+
and whether the values are considered a guaranteed API.
84+
The value should be a CamelCase string.
85+
This field may not be empty.
86+
maxLength: 1024
87+
minLength: 1
88+
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
89+
type: string
90+
status:
91+
description: status of the condition, one of True, False,
92+
Unknown.
93+
enum:
94+
- "True"
95+
- "False"
96+
- Unknown
97+
type: string
98+
type:
99+
description: |-
100+
type of condition in CamelCase or in foo.example.com/CamelCase.
101+
---
102+
Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
103+
useful (see .node.status.conditions), the ability to deconflict is important.
104+
The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
105+
maxLength: 316
106+
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
107+
type: string
108+
required:
109+
- lastTransitionTime
110+
- message
111+
- reason
112+
- status
113+
- type
114+
type: object
115+
type: array
43116
health:
44117
default: false
45118
description: Health indicates whether the device is normal

0 commit comments

Comments
 (0)