From 0dfaa48041a5de0b7d93eb933ed15fecefc254fa Mon Sep 17 00:00:00 2001 From: "Ruben S. Montero" Date: Sat, 11 Jan 2025 16:46:26 +0100 Subject: [PATCH] F #6841: NVIDIA vendor mediated devices framwork This commit add support for the NVIDIA mdev framework intriduce with Ubuntu 24.04 release. The change includes: - Type of mdev is added by a new monitor attribute MDEV_MODE (<'legacy'|'nvidia'>). An empty MDEV_MODE in the PCI device defaults to 'legacy' - Profile monitoring has been also adapted to the new framework. - vgpu has been extended to prepare the vGPU prior to VM boot according to the new framework. - KVM domain generates the PCI device using the type='pci' and managed='no' to accomodate the new mdev interface. Older OS/driver versions will use the legacy interface. (cherry picked from commit 216c329b650a64034220f9fed5e5c5425ea8eabd) --- src/host/HostSharePCI.cc | 5 +- src/im_mad/remotes/node-probes.d/pci.rb | 84 +++++++++++++++++++---- src/vmm/LibVirtDriverKVM.cc | 16 ++++- src/vmm_mad/remotes/kvm/vgpu | 91 ++++++++++++++++++------- 4 files changed, 151 insertions(+), 45 deletions(-) diff --git a/src/host/HostSharePCI.cc b/src/host/HostSharePCI.cc index 1badaf83b84..f4e74de1fee 100644 --- a/src/host/HostSharePCI.cc +++ b/src/host/HostSharePCI.cc @@ -200,7 +200,7 @@ void HostSharePCI::pci_attribute(VectorAttribute *device, PCIDevice *pci, "ADDRESS", "SHORT_ADDRESS" }; - static vector cp_check_attr = {"NUMA_NODE", "UUID"}; + static vector cp_check_attr = {"NUMA_NODE", "UUID", "MDEV_MODE"}; //Save previous address for migrations, clear on revert - failed migration if (set_prev) @@ -523,7 +523,8 @@ int HostSharePCI::set_pci_address(VectorAttribute * pci_device, // ------------------- Remove well-known attributes ----------------------- static vector rm_attr = {"DOMAIN", "BUS", "SLOT", "FUNCTION", - "ADDRESS", "PREV_ADDRESS", "NUMA_NODE", "UUID" + "ADDRESS", "PREV_ADDRESS", "NUMA_NODE", + "UUID", "MDEV_MODE" }; if (clean) diff --git a/src/im_mad/remotes/node-probes.d/pci.rb b/src/im_mad/remotes/node-probes.d/pci.rb index 258ca301523..df28a50b283 100755 --- a/src/im_mad/remotes/node-probes.d/pci.rb +++ b/src/im_mad/remotes/node-probes.d/pci.rb @@ -91,9 +91,61 @@ def get_devices(filter = nil) end.flatten end +def pci_bus_path(device) + "/sys/bus/pci/devices/0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}" +end + +def mdev_bus_path(device) + "/sys/class/mdev_bus/0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}" +end + def device_attr?(device, attribute) - addr = "0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}" - !`ls -l /sys/bus/pci/devices/#{addr}/ | grep #{attribute}`.empty? + File.exist? File.join(pci_bus_path(device), attribute) +end + +def virtfn?(device) + device_attr?(device, 'physfn') +end + +# rubocop:disable Naming/PredicateName +def has_virtfn?(device) + device_attr?(device, 'virtfn') +end +# rubocop:enable Naming/PredicateName + +def legacy?(device) + File.exist? File.join mdev_bus_path(device) +end + +def legacy_profiles(device) + path = File.join(mdev_bus_path(device), 'mdev_supported_types') + + return [] unless File.exist? path + + `ls #{path}`.split('\n') +rescue StandardError + [] +end + +def nvidia_profiles(device) + path = File.join(pci_bus_path(device), 'nvidia/creatable_vgpu_types') + + profiles = [] + + File.read(path).each_line do |line| + id, name = line.split(':') + + id.strip! + name.strip! + + next if id.empty? || id.casecmp?('ID') || name.empty? + + profiles << "#{id} (#{name})" + end + + profiles +rescue StandardError + [] end filter = CONF[:filter] @@ -117,8 +169,7 @@ def pval(name, value) end # Skip NVIDIA cards with virtual functions - next if CONF[:nvidia_vendors].include?(dev[:vendor]) && - device_attr?(dev, 'virtfn') + next if CONF[:nvidia_vendors].include?(dev[:vendor]) && has_virtfn?(dev) puts 'PCI = [' values = [ @@ -139,26 +190,29 @@ def pval(name, value) # NVIDIA GPU device if CONF[:nvidia_vendors].include?(dev[:vendor]) - # When having NVIDIA GPU the name is always Device, so we merge - # it with vendor name, in this way Sunstone shows a better name - values << pval('DEVICE_NAME', - "#{dev[:vendor_name]} #{dev[:device_name]}") + # Better name for NVIDIA GPUs + values << pval('DEVICE_NAME', "#{dev[:vendor_name]} #{dev[:device_name]}") - # For vGPU, the uuid is based on the address to get always the same - if device_attr?(dev, 'physfn') + if virtfn?(dev) + # For vGPU, the uuid is based on the address to get always the same values << pval( 'UUID', `uuidgen --name '#{dev[:address]}' \ --namespace '@x500' --sha1`.strip ) - # Get profiles - addr = "0000:#{dev[:bus]}:#{dev[:slot]}.#{dev[:function]}" - profiles = `ls /sys/class/mdev_bus/#{addr}/mdev_supported_types` - profiles = profiles.split("\n") - # Comma separated value with different profiles + profiles = legacy_profiles(dev) + profiles = nvidia_profiles(dev) if profiles.empty? + values << pval('PROFILES', profiles.join(',')) + + mdev_mode = if legacy?(dev) + 'legacy' + else + 'nvidia' + end + values << pval('MDEV_MODE', mdev_mode) end else values << pval('DEVICE_NAME', dev[:device_name]) diff --git a/src/vmm/LibVirtDriverKVM.cc b/src/vmm/LibVirtDriverKVM.cc index 91153fd8560..fbb67455275 100644 --- a/src/vmm/LibVirtDriverKVM.cc +++ b/src/vmm/LibVirtDriverKVM.cc @@ -2082,6 +2082,9 @@ int LibVirtDriver::deployment_description_kvm( vm_func = pci[i]->vector_value("VM_FUNCTION"); string uuid = pci[i]->vector_value("UUID"); + string mdev = pci[i]->vector_value("MDEV_MODE"); + + one_util::tolower(mdev); if ( domain.empty() || bus.empty() || slot.empty() || func.empty() ) { @@ -2091,7 +2094,7 @@ int LibVirtDriver::deployment_description_kvm( continue; } - if ( !uuid.empty() ) + if ( !uuid.empty() && (mdev == "legacy" || mdev.empty()) ) { file << "\t\t\n"; file << "\t\t\t\n"; @@ -2102,7 +2105,16 @@ int LibVirtDriver::deployment_description_kvm( } else { - file << "\t\t\n"; + file << "\t\t\n"; + } + else + { + file << "managed='yes'>\n"; + } file << "\t\t\t\n"; file << "\t\t\t\t
/dev/null)" # Get specific information about the PCI @@ -41,22 +41,71 @@ function get_mdev_path() { slot=$(get_xpath_val "$pci" "/PCI/SLOT") func=$(get_xpath_val "$pci" "/PCI/FUNCTION") profile=$(get_xpath_val "$pci" "/PCI/PROFILE") + mode=$(get_xpath_val "$pci" "/PCI/MDEV_MODE") - # Generate mdev path - mdev="/sys/class/mdev_bus/$domain:$bus:$slot.$func" + if [[ "$mode" == "legacy" || -z "$mode" ]]; then + # Generate mdev path + mdev="/sys/class/mdev_bus/$domain:$bus:$slot.$func" - if [[ ! -d $mdev ]] - then - error_message "Directory '$mdev' does not exist" - exit 1 - fi + if [[ ! -d $mdev ]]; then + error_message "Directory '$mdev' does not exist" + exit 1 + fi - if [ -z "$profile" ] - then - profile="$(ls "$mdev/mdev_supported_types" | head -n1)" - fi + if [ -z "$profile" ]; then + profile="$(ls "$mdev/mdev_supported_types" | head -n1)" + fi + + mdev="$mdev/mdev_supported_types/$profile/" + + case "$3" in + "create") + if ! echo "$1" > "$mdev/create"; then + error_message "Error creating mediated device" + exit 1 + fi + ;; + "remove") + if ! echo "1" > "$mdev/devices/$1/remove"; then + error_message "Error removing mediated device" + fi + ;; + esac + else + pci="/sys/bus/pci/devices/$domain:$bus:$slot.$func" + + if [[ ! -d "${pci}" ]]; then + error_message "Directory '$pci' does not exist" + exit 1 + fi + + ppath="${pci}/nvidia/creatable_vgpu_types" + + if [ -z "$profile" ]; then + profile=$(sed -n '2p' ${ppath} | cut -f1 -d':' | tr -d '[:blank:]') + else + profile=${profile%% *} + + if [[ "$3" == "create" ]] && ! grep -q "${profile}" ${ppath}; then + error_message "Profile '$profile' not supported by vGPU" + exit 1 + fi + fi - echo "$mdev/mdev_supported_types/$profile/" + case "$3" in + "create") + if ! echo "${profile}" > "${pci}/nvidia/current_vgpu_type"; then + error_message "Error activating vgpu with profile ${profile}" + exit 1 + fi + ;; + "remove") + if ! echo "0" > "${pci}/nvidia/current_vgpu_type"; then + error_message "Error deactivating vgpu" + fi + ;; + esac + fi } # ------------------------------------------------------------------------------ @@ -80,27 +129,17 @@ case "$ACTION" in if [ -n "$uuids" ]; then for uuid in $uuids; do - mdev="$(get_mdev_path "$uuid" "$VM")" - - if ! echo "$uuid" > "$mdev/create"; then - error_message "Error creating mediated device" - exit 1 - fi + vgpuctl "$uuid" "$VM" "create" done fi ;; "delete") + # Not exit with error, just log uuids="$(get_uuids "$VM")" if [ -n "$uuids" ]; then for uuid in $uuids; do - mdev="$(get_mdev_path "$uuid" "$VM")" - - if ! echo "1" > "$mdev/devices/$uuid/remove"; then - error_message "Error removing mediated device" - # Not exit with error, just log the error - # exit -1 - fi + vgpuctl "$uuid" "$VM" "remove" done fi ;;