Skip to content

Commit

Permalink
F #6841: NVIDIA vendor mediated devices framwork
Browse files Browse the repository at this point in the history
This commit add support for the NVIDIA mdev framework intriduce with
Ubuntu 24.04 release. The change includes:

- Type of mdev is added by a new monitor attribute MDEV_MODE
  (<'legacy'|'nvidia'>). An empty MDEV_MODE in the PCI device defaults
  to 'legacy'
- Profile monitoring has been also adapted to the new framework.
- vgpu has been extended to prepare the vGPU prior to VM boot according
  to the new framework.
- KVM domain generates the PCI device using the type='pci' and managed='no' to
  accomodate the new mdev interface.

Older OS/driver versions will use the legacy interface.

(cherry picked from commit 216c329b650a64034220f9fed5e5c5425ea8eabd)
  • Loading branch information
rsmontero committed Jan 11, 2025
1 parent ef932b6 commit 0dfaa48
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 45 deletions.
5 changes: 3 additions & 2 deletions src/host/HostSharePCI.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ void HostSharePCI::pci_attribute(VectorAttribute *device, PCIDevice *pci,
"ADDRESS", "SHORT_ADDRESS"
};

static vector<string> cp_check_attr = {"NUMA_NODE", "UUID"};
static vector<string> cp_check_attr = {"NUMA_NODE", "UUID", "MDEV_MODE"};

//Save previous address for migrations, clear on revert - failed migration
if (set_prev)
Expand Down Expand Up @@ -523,7 +523,8 @@ int HostSharePCI::set_pci_address(VectorAttribute * pci_device,

// ------------------- Remove well-known attributes -----------------------
static vector<string> rm_attr = {"DOMAIN", "BUS", "SLOT", "FUNCTION",
"ADDRESS", "PREV_ADDRESS", "NUMA_NODE", "UUID"
"ADDRESS", "PREV_ADDRESS", "NUMA_NODE",
"UUID", "MDEV_MODE"
};

if (clean)
Expand Down
84 changes: 69 additions & 15 deletions src/im_mad/remotes/node-probes.d/pci.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,61 @@ def get_devices(filter = nil)
end.flatten
end

def pci_bus_path(device)
"/sys/bus/pci/devices/0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}"
end

def mdev_bus_path(device)
"/sys/class/mdev_bus/0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}"
end

def device_attr?(device, attribute)
addr = "0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}"
!`ls -l /sys/bus/pci/devices/#{addr}/ | grep #{attribute}`.empty?
File.exist? File.join(pci_bus_path(device), attribute)
end

def virtfn?(device)
device_attr?(device, 'physfn')
end

# rubocop:disable Naming/PredicateName
def has_virtfn?(device)
device_attr?(device, 'virtfn')
end
# rubocop:enable Naming/PredicateName

def legacy?(device)
File.exist? File.join mdev_bus_path(device)
end

def legacy_profiles(device)
path = File.join(mdev_bus_path(device), 'mdev_supported_types')

return [] unless File.exist? path

`ls #{path}`.split('\n')
rescue StandardError
[]
end

def nvidia_profiles(device)
path = File.join(pci_bus_path(device), 'nvidia/creatable_vgpu_types')

profiles = []

File.read(path).each_line do |line|
id, name = line.split(':')

id.strip!
name.strip!

next if id.empty? || id.casecmp?('ID') || name.empty?

profiles << "#{id} (#{name})"
end

profiles
rescue StandardError
[]
end

filter = CONF[:filter]
Expand All @@ -117,8 +169,7 @@ def pval(name, value)
end

# Skip NVIDIA cards with virtual functions
next if CONF[:nvidia_vendors].include?(dev[:vendor]) &&
device_attr?(dev, 'virtfn')
next if CONF[:nvidia_vendors].include?(dev[:vendor]) && has_virtfn?(dev)

puts 'PCI = ['
values = [
Expand All @@ -139,26 +190,29 @@ def pval(name, value)

# NVIDIA GPU device
if CONF[:nvidia_vendors].include?(dev[:vendor])
# When having NVIDIA GPU the name is always Device, so we merge
# it with vendor name, in this way Sunstone shows a better name
values << pval('DEVICE_NAME',
"#{dev[:vendor_name]} #{dev[:device_name]}")
# Better name for NVIDIA GPUs
values << pval('DEVICE_NAME', "#{dev[:vendor_name]} #{dev[:device_name]}")

# For vGPU, the uuid is based on the address to get always the same
if device_attr?(dev, 'physfn')
if virtfn?(dev)
# For vGPU, the uuid is based on the address to get always the same
values << pval(
'UUID',
`uuidgen --name '#{dev[:address]}' \
--namespace '@x500' --sha1`.strip
)

# Get profiles
addr = "0000:#{dev[:bus]}:#{dev[:slot]}.#{dev[:function]}"
profiles = `ls /sys/class/mdev_bus/#{addr}/mdev_supported_types`
profiles = profiles.split("\n")

# Comma separated value with different profiles
profiles = legacy_profiles(dev)
profiles = nvidia_profiles(dev) if profiles.empty?

values << pval('PROFILES', profiles.join(','))

mdev_mode = if legacy?(dev)
'legacy'
else
'nvidia'
end
values << pval('MDEV_MODE', mdev_mode)
end
else
values << pval('DEVICE_NAME', dev[:device_name])
Expand Down
16 changes: 14 additions & 2 deletions src/vmm/LibVirtDriverKVM.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2082,6 +2082,9 @@ int LibVirtDriver::deployment_description_kvm(
vm_func = pci[i]->vector_value("VM_FUNCTION");

string uuid = pci[i]->vector_value("UUID");
string mdev = pci[i]->vector_value("MDEV_MODE");

one_util::tolower(mdev);

if ( domain.empty() || bus.empty() || slot.empty() || func.empty() )
{
Expand All @@ -2091,7 +2094,7 @@ int LibVirtDriver::deployment_description_kvm(
continue;
}

if ( !uuid.empty() )
if ( !uuid.empty() && (mdev == "legacy" || mdev.empty()) )
{
file << "\t\t<hostdev mode='subsystem' type='mdev' model='vfio-pci'>\n";
file << "\t\t\t<source>\n";
Expand All @@ -2102,7 +2105,16 @@ int LibVirtDriver::deployment_description_kvm(
}
else
{
file << "\t\t<hostdev mode='subsystem' type='pci' managed='yes'>\n";
file << "\t\t<hostdev mode='subsystem' type='pci' ";

if ( mdev == "nvidia" )
{
file << "managed='no'>\n";
}
else
{
file << "managed='yes'>\n";
}

file << "\t\t\t<source>\n";
file << "\t\t\t\t<address "
Expand Down
91 changes: 65 additions & 26 deletions src/vmm_mad/remotes/kvm/vgpu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ function get_xpath_val() {
}

# Get mdev path used to (de)activate mediated device
function get_mdev_path() {
function vgpuctl() {
pci="$(xmllint --format --xpath "/VM/TEMPLATE/PCI[UUID='$1']" "$2" 2>/dev/null)"

# Get specific information about the PCI
Expand All @@ -41,22 +41,71 @@ function get_mdev_path() {
slot=$(get_xpath_val "$pci" "/PCI/SLOT")
func=$(get_xpath_val "$pci" "/PCI/FUNCTION")
profile=$(get_xpath_val "$pci" "/PCI/PROFILE")
mode=$(get_xpath_val "$pci" "/PCI/MDEV_MODE")

# Generate mdev path
mdev="/sys/class/mdev_bus/$domain:$bus:$slot.$func"
if [[ "$mode" == "legacy" || -z "$mode" ]]; then
# Generate mdev path
mdev="/sys/class/mdev_bus/$domain:$bus:$slot.$func"

if [[ ! -d $mdev ]]
then
error_message "Directory '$mdev' does not exist"
exit 1
fi
if [[ ! -d $mdev ]]; then
error_message "Directory '$mdev' does not exist"
exit 1
fi

if [ -z "$profile" ]
then
profile="$(ls "$mdev/mdev_supported_types" | head -n1)"
fi
if [ -z "$profile" ]; then
profile="$(ls "$mdev/mdev_supported_types" | head -n1)"
fi

mdev="$mdev/mdev_supported_types/$profile/"

case "$3" in
"create")
if ! echo "$1" > "$mdev/create"; then
error_message "Error creating mediated device"
exit 1
fi
;;
"remove")
if ! echo "1" > "$mdev/devices/$1/remove"; then
error_message "Error removing mediated device"
fi
;;
esac
else
pci="/sys/bus/pci/devices/$domain:$bus:$slot.$func"

if [[ ! -d "${pci}" ]]; then
error_message "Directory '$pci' does not exist"
exit 1
fi

ppath="${pci}/nvidia/creatable_vgpu_types"

if [ -z "$profile" ]; then
profile=$(sed -n '2p' ${ppath} | cut -f1 -d':' | tr -d '[:blank:]')
else
profile=${profile%% *}

if [[ "$3" == "create" ]] && ! grep -q "${profile}" ${ppath}; then
error_message "Profile '$profile' not supported by vGPU"
exit 1
fi
fi

echo "$mdev/mdev_supported_types/$profile/"
case "$3" in
"create")
if ! echo "${profile}" > "${pci}/nvidia/current_vgpu_type"; then
error_message "Error activating vgpu with profile ${profile}"
exit 1
fi
;;
"remove")
if ! echo "0" > "${pci}/nvidia/current_vgpu_type"; then
error_message "Error deactivating vgpu"
fi
;;
esac
fi
}

# ------------------------------------------------------------------------------
Expand All @@ -80,27 +129,17 @@ case "$ACTION" in

if [ -n "$uuids" ]; then
for uuid in $uuids; do
mdev="$(get_mdev_path "$uuid" "$VM")"

if ! echo "$uuid" > "$mdev/create"; then
error_message "Error creating mediated device"
exit 1
fi
vgpuctl "$uuid" "$VM" "create"
done
fi
;;
"delete")
# Not exit with error, just log
uuids="$(get_uuids "$VM")"

if [ -n "$uuids" ]; then
for uuid in $uuids; do
mdev="$(get_mdev_path "$uuid" "$VM")"

if ! echo "1" > "$mdev/devices/$uuid/remove"; then
error_message "Error removing mediated device"
# Not exit with error, just log the error
# exit -1
fi
vgpuctl "$uuid" "$VM" "remove"
done
fi
;;
Expand Down

0 comments on commit 0dfaa48

Please sign in to comment.