Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions cmd/nvidia-mig-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
"sync"

log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
"github.com/urfave/cli/v3"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
Expand Down Expand Up @@ -114,7 +114,7 @@ func (m *SyncableMigConfig) Get() string {
}

func main() {
c := cli.NewApp()
c := cli.Command{}
c.Before = validateFlags
c.Action = start
c.Version = info.GetVersionString()
Expand All @@ -125,162 +125,162 @@ func main() {
Value: "",
Usage: "absolute path to the kubeconfig file",
Destination: &kubeconfigFlag,
EnvVars: []string{"KUBECONFIG"},
Sources: cli.EnvVars("KUBECONFIG"),
},
&cli.StringFlag{
Name: "node-name",
Aliases: []string{"n"},
Value: "",
Usage: "the name of the node to watch for label changes on",
Destination: &nodeNameFlag,
EnvVars: []string{"NODE_NAME"},
Sources: cli.EnvVars("NODE_NAME"),
},
&cli.StringFlag{
Name: "config-file",
Aliases: []string{"f"},
Value: "",
Usage: "the path to the MIG parted configuration file",
Destination: &configFileFlag,
EnvVars: []string{"CONFIG_FILE"},
Sources: cli.EnvVars("CONFIG_FILE"),
},
&cli.StringFlag{
Name: "reconfigure-script",
Aliases: []string{"s"},
Value: DefaultReconfigureScript,
Usage: "script to run to do the actual MIG reconfiguration",
Destination: &reconfigureScriptFlag,
EnvVars: []string{"RECONFIGURE_SCRIPT"},
Sources: cli.EnvVars("RECONFIGURE_SCRIPT"),
},
&cli.StringFlag{
Name: "host-root-mount",
Aliases: []string{"m"},
Value: DefaultHostRootMount,
Usage: "container path where host root directory is mounted",
Destination: &hostRootMountFlag,
EnvVars: []string{"HOST_ROOT_MOUNT"},
Sources: cli.EnvVars("HOST_ROOT_MOUNT"),
},
&cli.StringFlag{
Name: "host-nvidia-dir",
Aliases: []string{"i"},
Value: DefaultHostNvidiaDir,
Usage: "host path of the directory where NVIDIA managed software directory is typically located",
Destination: &hostNvidiaDirFlag,
EnvVars: []string{"HOST_NVIDIA_DIR"},
Sources: cli.EnvVars("HOST_NVIDIA_DIR"),
},
&cli.StringFlag{
Name: "host-mig-manager-state-file",
Aliases: []string{"o"},
Value: DefaultHostMigManagerStateFile,
Usage: "host path where the host's systemd mig-manager state file is located",
Destination: &hostMigManagerStateFileFlag,
EnvVars: []string{"HOST_MIG_MANAGER_STATE_FILE"},
Sources: cli.EnvVars("HOST_MIG_MANAGER_STATE_FILE"),
},
&cli.StringFlag{
Name: "host-kubelet-systemd-service",
Aliases: []string{"k"},
Value: DefaultHostKubeletSystemdService,
Usage: "name of the host's 'kubelet' systemd service which may need to be shutdown/restarted across a MIG mode reconfiguration",
Destination: &hostKubeletSystemdServiceFlag,
EnvVars: []string{"HOST_KUBELET_SYSTEMD_SERVICE"},
Sources: cli.EnvVars("HOST_KUBELET_SYSTEMD_SERVICE"),
},
&cli.StringFlag{
Name: "gpu-clients-file",
Aliases: []string{"g"},
Value: "",
Usage: "the path to the file listing the GPU clients that need to be shutdown across a MIG configuration",
Destination: &gpuClientsFileFlag,
EnvVars: []string{"GPU_CLIENTS_FILE"},
Sources: cli.EnvVars("GPU_CLIENTS_FILE"),
},
&cli.BoolFlag{
Name: "with-reboot",
Aliases: []string{"r"},
Value: false,
Usage: "reboot the node if changing the MIG mode fails for any reason",
Destination: &withRebootFlag,
EnvVars: []string{"WITH_REBOOT"},
Sources: cli.EnvVars("WITH_REBOOT"),
},
&cli.BoolFlag{
Name: "with-shutdown-host-gpu-clients",
Aliases: []string{"d"},
Value: false,
Usage: "shutdown/restart any required host GPU clients across a MIG configuration",
Destination: &withShutdownHostGPUClientsFlag,
EnvVars: []string{"WITH_SHUTDOWN_HOST_GPU_CLIENTS"},
Sources: cli.EnvVars("WITH_SHUTDOWN_HOST_GPU_CLIENTS"),
},
&cli.StringFlag{
Name: "default-gpu-clients-namespace",
Aliases: []string{"p"},
Value: DefaultGPUClientsNamespace,
Usage: "Default name of the Kubernetes namespace in which the GPU client Pods are installed in",
Destination: &defaultGPUClientsNamespaceFlag,
EnvVars: []string{"DEFAULT_GPU_CLIENTS_NAMESPACE"},
Sources: cli.EnvVars("DEFAULT_GPU_CLIENTS_NAMESPACE"),
},
&cli.StringFlag{
Name: "nvidia-driver-root",
Aliases: []string{"driver-root", "t"},
Value: DefaultNvidiaDriverRoot,
Usage: "Root path to the NVIDIA driver installation. Only used if --cdi-enabled is set.",
Destination: &driverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
Sources: cli.EnvVars("NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"),
},
&cli.StringFlag{
Name: "driver-root-ctr-path",
Aliases: []string{"a"},
Value: DefaultDriverRootCtrPath,
Usage: "Root path to the NVIDIA driver installation mounted in the container. Only used if --cdi-enabled is set.",
Destination: &driverRootCtrPath,
EnvVars: []string{"DRIVER_ROOT_CTR_PATH"},
Sources: cli.EnvVars("DRIVER_ROOT_CTR_PATH"),
},
&cli.BoolFlag{
Name: "cdi-enabled",
Usage: "Enable CDI support",
Destination: &cdiEnabledFlag,
EnvVars: []string{"CDI_ENABLED"},
Sources: cli.EnvVars("CDI_ENABLED"),
},
&cli.StringFlag{
Name: "dev-root",
Aliases: []string{"b"},
Value: "",
Usage: "Root path to the NVIDIA device nodes. Only used if --cdi-enabled is set.",
Destination: &devRoot,
EnvVars: []string{"NVIDIA_DEV_ROOT"},
Sources: cli.EnvVars("NVIDIA_DEV_ROOT"),
},
&cli.StringFlag{
Name: "dev-root-ctr-path",
Aliases: []string{"j"},
Value: "",
Usage: "Root path to the NVIDIA device nodes mounted in the container. Only used if --cdi-enabled is set.",
Destination: &devRootCtrPath,
EnvVars: []string{"DEV_ROOT_CTR_PATH"},
Sources: cli.EnvVars("DEV_ROOT_CTR_PATH"),
},
&cli.StringFlag{
Name: "nvidia-cdi-hook-path",
Value: DefaultNvidiaCDIHookPath,
Usage: "Path to nvidia-cdi-hook binary on the host.",
Destination: &nvidiaCDIHookPath,
EnvVars: []string{"NVIDIA_CDI_HOOK_PATH"},
Sources: cli.EnvVars("NVIDIA_CDI_HOOK_PATH"),
},
}

err := c.Run(os.Args)
err := c.Run(context.Background(), os.Args)
if err != nil {
log.SetOutput(os.Stderr)
log.Printf("Error: %v", err)
os.Exit(1)
}
}

func validateFlags(c *cli.Context) error {
func validateFlags(ctx context.Context, c *cli.Command) (context.Context, error) {
if nodeNameFlag == "" {
return fmt.Errorf("invalid -n <node-name> flag: must not be empty string")
return ctx, fmt.Errorf("invalid -n <node-name> flag: must not be empty string")
}
if configFileFlag == "" {
return fmt.Errorf("invalid -f <config-file> flag: must not be empty string")
return ctx, fmt.Errorf("invalid -f <config-file> flag: must not be empty string")
}
return nil
return ctx, nil
}

func start(c *cli.Context) error {
func start(ctx context.Context, c *cli.Command) error {
config, err := clientcmd.BuildConfigFromFlags("", kubeconfigFlag)
if err != nil {
return fmt.Errorf("error building kubernetes clientcmd config: %s", err)
Expand All @@ -305,7 +305,7 @@ func start(c *cli.Context) error {
log.Infof("Waiting for change to '%s' label", MigConfigLabel)
value := migConfig.Get()
log.Infof("Updating to MIG config: %s", value)
err := migReconfigure(c.Context, value, clientset, driverLibraryPath, nvidiaSMIPath)
err := migReconfigure(ctx, value, clientset, driverLibraryPath, nvidiaSMIPath)
if err != nil {
log.Errorf("Error: %s", err)
continue
Expand Down
25 changes: 13 additions & 12 deletions cmd/nvidia-mig-parted/apply/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
package apply

import (
"context"
"fmt"
"os"
"reflect"

"github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
"github.com/urfave/cli/v3"

"github.com/NVIDIA/go-nvml/pkg/nvml"

Expand Down Expand Up @@ -68,7 +69,7 @@ func BuildCommand() *cli.Command {
apply := cli.Command{}
apply.Name = "apply"
apply.Usage = "Apply changes (if necessary) for a specific MIG configuration from a configuration file"
apply.Action = func(c *cli.Context) error {
apply.Action = func(_ context.Context, c *cli.Command) error {
return applyWrapper(c, &applyFlags)
}

Expand All @@ -79,35 +80,35 @@ func BuildCommand() *cli.Command {
Aliases: []string{"f"},
Usage: "Path to the configuration file",
Destination: &applyFlags.ConfigFile,
EnvVars: []string{"MIG_PARTED_CONFIG_FILE"},
Sources: cli.EnvVars("MIG_PARTED_CONFIG_FILE"),
},
&cli.StringFlag{
Name: "selected-config",
Aliases: []string{"c"},
Usage: "The label of the mig-config from the config file to apply to the node",
Destination: &applyFlags.SelectedConfig,
EnvVars: []string{"MIG_PARTED_SELECTED_CONFIG"},
Sources: cli.EnvVars("MIG_PARTED_SELECTED_CONFIG"),
},
&cli.StringFlag{
Name: "hooks-file",
Aliases: []string{"k"},
Usage: "Path to the hooks file",
Destination: &applyFlags.HooksFile,
EnvVars: []string{"MIG_PARTED_HOOKS_FILE"},
Sources: cli.EnvVars("MIG_PARTED_HOOKS_FILE"),
},
&cli.BoolFlag{
Name: "skip-reset",
Aliases: []string{"s"},
Usage: "Skip the GPU reset operation after applying the desired MIG mode to all GPUs",
Destination: &applyFlags.SkipReset,
EnvVars: []string{"MIG_PARTED_SKIP_RESET"},
Sources: cli.EnvVars("MIG_PARTED_SKIP_RESET"),
},
&cli.BoolFlag{
Name: "mode-only",
Aliases: []string{"m"},
Usage: "Only change the MIG enabled setting from the config, not configure any MIG devices",
Destination: &applyFlags.ModeOnly,
EnvVars: []string{"MIG_PARTED_MODE_CHANGE_ONLY"},
Sources: cli.EnvVars("MIG_PARTED_MODE_CHANGE_ONLY"),
},
}

Expand Down Expand Up @@ -140,9 +141,9 @@ func ParseHooksFile(hooksFile string) (*hooks.Spec, error) {

// GetHooksEnvsMap builds a 'hooks.EnvsMap' from the set of environment variables set when the CLI was envoked by the user.
// These environment variables are then made available to all hooks when thex are executed later on.
func GetHooksEnvsMap(c *cli.Context) hooks.EnvsMap {
func GetHooksEnvsMap(c *cli.Command) hooks.EnvsMap {
envs := make(hooks.EnvsMap)
for _, flag := range c.Command.Flags {
for _, flag := range c.Flags {
fv := reflect.ValueOf(flag)
for fv.Kind() == reflect.Ptr {
fv = reflect.Indirect(fv)
Expand Down Expand Up @@ -182,7 +183,7 @@ func (c *Context) ApplyMigConfig() error {
return ApplyMigConfig(c)
}

func applyWrapper(c *cli.Context, f *Flags) error {
func applyWrapper(c *cli.Command, f *Flags) error {
err := CheckFlags(f)
if err != nil {
_ = cli.ShowSubcommandHelp(c)
Expand Down Expand Up @@ -215,7 +216,7 @@ func applyWrapper(c *cli.Context, f *Flags) error {
context := Context{
Flags: f,
Context: assert.Context{
Context: c,
Command: c,
Flags: &f.Flags,
MigConfig: migConfig,
Nvml: nvml.New(),
Expand All @@ -233,7 +234,7 @@ func applyWrapper(c *cli.Context, f *Flags) error {

// ApplyMigConfigWithHooks orchestrates the calls of a 'MigConfigApplier' between a set of 'ApplyHooks' to the set MIG configuration of a node.
// If 'modeOnly' is 'true', then only the MIG mode settings embedded in the 'Context' are applied.
func ApplyMigConfigWithHooks(logger *logrus.Logger, context *cli.Context, modeOnly bool, hooks ApplyHooks, applier MigConfigApplier) (rerr error) {
func ApplyMigConfigWithHooks(logger *logrus.Logger, context *cli.Command, modeOnly bool, hooks ApplyHooks, applier MigConfigApplier) (rerr error) {
logger.Debugf("Running apply-start hook")
err := hooks.ApplyStart(GetHooksEnvsMap(context), context.Bool("debug"))
if err != nil {
Expand Down
Loading
Loading