565.57.01

NVIDIA · Oct 22, 2024 · d5a0858 · d5a0858
1 parent ed4be64
commit d5a0858
Show file tree

Hide file tree

Showing 1,049 changed files with 209,289 additions and 167,306 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source
 
 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 560.35.03.
+version 565.57.01.
 
 
 ## How to Build
@@ -17,7 +17,7 @@ as root:
 
 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-560.35.03 driver release.  This can be achieved by installing
+565.57.01 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,
 
@@ -185,7 +185,7 @@ table below).
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:
 
-https://us.download.nvidia.com/XFree86/Linux-x86_64/560.35.03/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/565.57.01/README/kernel_open.html
 
 For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
 Package for more details.
@@ -199,6 +199,7 @@ Subsystem Device ID.
 | NVIDIA TITAN RTX                                | 1E02           |
 | NVIDIA GeForce RTX 2080 Ti                      | 1E04           |
 | NVIDIA GeForce RTX 2080 Ti                      | 1E07           |
+| NVIDIA CMP 50HX                                 | 1E09           |
 | Quadro RTX 6000                                 | 1E30           |
 | Quadro RTX 8000                                 | 1E30 1028 129E |
 | Quadro RTX 8000                                 | 1E30 103C 129E |
@@ -391,6 +392,7 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 2070                         | 1F07           |
 | NVIDIA GeForce RTX 2060                         | 1F08           |
 | NVIDIA GeForce GTX 1650                         | 1F0A           |
+| NVIDIA CMP 40HX                                 | 1F0B           |
 | NVIDIA GeForce RTX 2070                         | 1F10           |
 | NVIDIA GeForce RTX 2070 with Max-Q Design       | 1F10 1025 132D |
 | NVIDIA GeForce RTX 2070 with Max-Q Design       | 1F10 1025 1342 |
@@ -691,6 +693,7 @@ Subsystem Device ID.
 | NVIDIA GeForce GTX 1660                         | 2184           |
 | NVIDIA GeForce GTX 1650 SUPER                   | 2187           |
 | NVIDIA GeForce GTX 1650                         | 2188           |
+| NVIDIA CMP 30HX                                 | 2189           |
 | NVIDIA GeForce GTX 1660 Ti                      | 2191           |
 | NVIDIA GeForce GTX 1660 Ti with Max-Q Design    | 2191 1028 0949 |
 | NVIDIA GeForce GTX 1660 Ti with Max-Q Design    | 2191 103C 85FB |
@@ -758,9 +761,11 @@ Subsystem Device ID.
 | NVIDIA H200                                     | 2335 10DE 18BF |
 | NVIDIA H100                                     | 2339 10DE 17FC |
 | NVIDIA H800 NVL                                 | 233A 10DE 183A |
+| NVIDIA H200 NVL                                 | 233B 10DE 1996 |
 | NVIDIA GH200 120GB                              | 2342 10DE 16EB |
 | NVIDIA GH200 120GB                              | 2342 10DE 1805 |
 | NVIDIA GH200 480GB                              | 2342 10DE 1809 |
+| NVIDIA GH200 144G HBM3e                         | 2348 10DE 18D2 |
 | NVIDIA GeForce RTX 3060 Ti                      | 2414           |
 | NVIDIA GeForce RTX 3080 Ti Laptop GPU           | 2420           |
 | NVIDIA RTX A5500 Laptop GPU                     | 2438           |
@@ -831,12 +836,10 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 2050                         | 25AD           |
 | NVIDIA RTX A1000                                | 25B0 1028 1878 |
 | NVIDIA RTX A1000                                | 25B0 103C 1878 |
-| NVIDIA RTX A1000                                | 25B0 103C 8D96 |
 | NVIDIA RTX A1000                                | 25B0 10DE 1878 |
 | NVIDIA RTX A1000                                | 25B0 17AA 1878 |
 | NVIDIA RTX A400                                 | 25B2 1028 1879 |
 | NVIDIA RTX A400                                 | 25B2 103C 1879 |
-| NVIDIA RTX A400                                 | 25B2 103C 8D95 |
 | NVIDIA RTX A400                                 | 25B2 10DE 1879 |
 | NVIDIA RTX A400                                 | 25B2 17AA 1879 |
 | NVIDIA A16                                      | 25B6 10DE 14A9 |

diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"560.35.03\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"565.57.01\"
 
 ifneq ($(SYSSRCHOST1X),)
  EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

diff --git a/kernel-open/common/inc/nv-kthread-q-os.h b/kernel-open/common/inc/nv-kthread-q-os.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -43,6 +43,8 @@ struct nv_kthread_q
     atomic_t main_loop_should_exit;
 
     struct task_struct *q_kthread;
+
+    bool is_unload_flush_ongoing;
 };
 
 struct nv_kthread_q_item

diff --git a/kernel-open/common/inc/nv-linux.h b/kernel-open/common/inc/nv-linux.h
@@ -724,6 +724,7 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
 #endif
 }
 
+#define NV_GET_OFFSET_IN_PAGE(phys_page) offset_in_page(phys_page)
 #define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page))
 #define NV_VMA_PGOFF(vma)             ((vma)->vm_pgoff)
 #define NV_VMA_SIZE(vma)              ((vma)->vm_end - (vma)->vm_start)
@@ -951,14 +952,14 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
 }
 
 static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
-    NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
+    NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
 {
     int ret = -1;
 #if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
-    ret = nv_remap_page_range(vma, vma->vm_start, phys_addr, size,
+    ret = nv_remap_page_range(vma, start, phys_addr, size,
         nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
 #else
-    ret = io_remap_pfn_range(vma, vma->vm_start, (phys_addr >> PAGE_SHIFT),
+    ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
         size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
 #endif
     return ret;
@@ -1207,6 +1208,7 @@ typedef struct nv_alloc_s {
         NvBool physical    : 1;
         NvBool unencrypted : 1;
         NvBool coherent    : 1;
+        NvBool carveout    : 1;
     } flags;
     unsigned int   cache_type;
     unsigned int   num_pages;
@@ -1840,20 +1842,6 @@ static inline int nv_is_control_device(struct inode *inode)
 #endif
 #endif
 
-static inline NvU64 nv_pci_bus_address(struct pci_dev *dev, NvU8 bar_index)
-{
-    NvU64 bus_addr = 0;
-#if defined(NV_PCI_BUS_ADDRESS_PRESENT)
-    bus_addr = pci_bus_address(dev, bar_index);
-#elif defined(CONFIG_PCI)
-    struct pci_bus_region region;
-
-    pcibios_resource_to_bus(dev, &region, &dev->resource[bar_index]);
-    bus_addr = region.start;
-#endif
-    return bus_addr;
-}
-
 /*
  * Decrements the usage count of the allocation, and moves the allocation to
  * the given nvlfp's free list if the usage count drops to zero.

diff --git a/kernel-open/common/inc/nv-proto.h b/kernel-open/common/inc/nv-proto.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -59,6 +59,8 @@ NV_STATUS   nv_uvm_resume               (void);
 void        nv_uvm_notify_start_device  (const NvU8 *uuid);
 void        nv_uvm_notify_stop_device   (const NvU8 *uuid);
 NV_STATUS   nv_uvm_event_interrupt      (const NvU8 *uuid);
+NV_STATUS   nv_uvm_drain_P2P            (const NvU8 *uuid);
+NV_STATUS   nv_uvm_resume_P2P           (const NvU8 *uuid);
 
 /* Move these to nv.h once implemented by other UNIX platforms */
 NvBool      nvidia_get_gpuid_list       (NvU32 *gpu_ids, NvU32 *gpu_count);

diff --git a/kernel-open/common/inc/nv.h b/kernel-open/common/inc/nv.h
@@ -44,6 +44,7 @@
 #include <nv-ioctl.h>
 #include <nv-ioctl-numa.h>
 #include <nvmisc.h>
+#include <os/nv_memory_area.h>
 
 extern nv_cap_t *nvidia_caps_root;
 
@@ -279,8 +280,7 @@ typedef struct nv_usermap_access_params_s
     NvU64    offset;
     NvU64   *page_array;
     NvU64    num_pages;
-    NvU64    mmap_start;
-    NvU64    mmap_size;
+    MemoryArea memArea;
     NvU64    access_start;
     NvU64    access_size;
     NvU64    remap_prot_extra;
@@ -296,8 +296,7 @@ typedef struct nv_alloc_mapping_context_s {
     NvU64  page_index;
     NvU64 *page_array;
     NvU64  num_pages;
-    NvU64  mmap_start;
-    NvU64  mmap_size;
+    MemoryArea memArea;
     NvU64  access_start;
     NvU64  access_size;
     NvU64  remap_prot_extra;
@@ -330,7 +329,7 @@ typedef struct nv_soc_irq_info_s {
     NvS32 ref_count;
 } nv_soc_irq_info_t;
 
-#define NV_MAX_SOC_IRQS              6
+#define NV_MAX_SOC_IRQS              10
 #define NV_MAX_DPAUX_NUM_DEVICES     4
 
 #define NV_MAX_SOC_DPAUX_NUM_DEVICES 2
@@ -535,6 +534,7 @@ typedef struct UvmGpuAddressSpaceInfo_tag           *nvgpuAddressSpaceInfo_t;
 typedef struct UvmGpuAllocInfo_tag                  *nvgpuAllocInfo_t;
 typedef struct UvmGpuP2PCapsParams_tag              *nvgpuP2PCapsParams_t;
 typedef struct UvmGpuFbInfo_tag                     *nvgpuFbInfo_t;
+typedef struct UvmGpuNvlinkInfo_tag                 *nvgpuNvlinkInfo_t;
 typedef struct UvmGpuEccInfo_tag                    *nvgpuEccInfo_t;
 typedef struct UvmGpuFaultInfo_tag                  *nvgpuFaultInfo_t;
 typedef struct UvmGpuAccessCntrInfo_tag             *nvgpuAccessCntrInfo_t;
@@ -545,6 +545,7 @@ typedef struct UvmPmaAllocationOptions_tag          *nvgpuPmaAllocationOptions_t
 typedef struct UvmPmaStatistics_tag                 *nvgpuPmaStatistics_t;
 typedef struct UvmGpuMemoryInfo_tag                 *nvgpuMemoryInfo_t;
 typedef struct UvmGpuExternalMappingInfo_tag        *nvgpuExternalMappingInfo_t;
+typedef struct UvmGpuExternalPhysAddrInfo_tag       *nvgpuExternalPhysAddrInfo_t;
 typedef struct UvmGpuChannelResourceInfo_tag        *nvgpuChannelResourceInfo_t;
 typedef struct UvmGpuChannelInstanceInfo_tag        *nvgpuChannelInstanceInfo_t;
 typedef struct UvmGpuChannelResourceBindParams_tag  *nvgpuChannelResourceBindParams_t;
@@ -783,7 +784,7 @@ nv_state_t*  NV_API_CALL  nv_get_ctl_state       (void);
 
 void   NV_API_CALL  nv_set_dma_address_size      (nv_state_t *, NvU32 );
 
-NV_STATUS  NV_API_CALL  nv_alias_pages           (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
+NV_STATUS  NV_API_CALL  nv_alias_pages           (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, NvBool, void **);
 NV_STATUS  NV_API_CALL  nv_alloc_pages           (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  nv_free_pages            (nv_state_t *, NvU32, NvBool, NvU32, void *);
 
@@ -904,6 +905,9 @@ void      NV_API_CALL nv_dma_release_dma_buf     (nv_dma_buf_t *);
 
 void      NV_API_CALL nv_schedule_uvm_isr        (nv_state_t *);
 
+NV_STATUS NV_API_CALL nv_schedule_uvm_drain_p2p  (NvU8 *);
+void      NV_API_CALL nv_schedule_uvm_resume_p2p (NvU8 *);
+
 NvBool    NV_API_CALL nv_platform_supports_s0ix  (void);
 NvBool    NV_API_CALL nv_s2idle_pm_configured    (void);
 
@@ -1001,8 +1005,8 @@ NV_STATUS  NV_API_CALL  rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, vo
 NV_STATUS  NV_API_CALL  rm_p2p_dma_map_pages      (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
 void       NV_API_CALL  rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
-NV_STATUS  NV_API_CALL  rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
-void       NV_API_CALL  rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
+NV_STATUS  NV_API_CALL  rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, MemoryRange, void *, NvBool, MemoryArea *);
+void       NV_API_CALL  rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, void *, NvBool, MemoryArea);
 NV_STATUS  NV_API_CALL  rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
 void       NV_API_CALL  rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
 NV_STATUS  NV_API_CALL  rm_log_gpu_crash          (nv_stack_t *, nv_state_t *);

diff --git a/kernel-open/common/inc/nv_uvm_interface.h b/kernel-open/common/inc/nv_uvm_interface.h
@@ -1085,6 +1085,22 @@ NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvm
 //
 void nvUvmInterfaceDeRegisterUvmOps(void);
 
+/*******************************************************************************
+    nvUvmInterfaceGetNvlinkInfo
+
+    Gets NVLINK information from RM.
+
+    Arguments:
+        device[IN]        - GPU device handle
+        nvlinkInfo [OUT]     - Pointer to NvlinkInfo structure
+
+    Error codes:
+      NV_ERROR
+      NV_ERR_INVALID_ARGUMENT
+*/
+NV_STATUS nvUvmInterfaceGetNvlinkInfo(uvmGpuDeviceHandle device,
+                                      UvmGpuNvlinkInfo *nvlinkInfo);
+
 /*******************************************************************************
     nvUvmInterfaceP2pObjectCreate
 
@@ -1161,6 +1177,48 @@ NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
                                              NvU64 size,
                                              UvmGpuExternalMappingInfo *gpuExternalMappingInfo);
 
+/*******************************************************************************
+    nvUvmInterfaceGetExternalAllocPhysAddrs
+
+    The interface builds the RM physical addrs using the provided input parameters.
+
+    Arguments:
+        vaSpace[IN]                     -  vaSpace handle.
+        hMemory[IN]                     -  Memory handle.
+        offset [IN]                     -  Offset from the beginning of the allocation
+                                           where PTE mappings should begin.
+                                           Should be aligned with mappingPagesize
+                                           in gpuExternalMappingInfo associated
+                                           with the allocation.
+        size [IN]                       -  Length of the allocation for which PhysAddrs
+                                           should be built.
+                                           Should be aligned with mappingPagesize
+                                           in gpuExternalMappingInfo associated
+                                           with the allocation.
+                                           size = 0 will be interpreted as the total size
+                                           of the allocation.
+        gpuExternalMappingInfo[IN/OUT]  -  See nv_uvm_types.h for more information.
+
+   Error codes:
+        NV_ERR_INVALID_ARGUMENT         - Invalid parameter/s is passed.
+        NV_ERR_INVALID_OBJECT_HANDLE    - Invalid memory handle is passed.
+        NV_ERR_NOT_SUPPORTED            - Functionality is not supported (see comments in nv_gpu_ops.c)
+        NV_ERR_INVALID_BASE             - offset is beyond the allocation size
+        NV_ERR_INVALID_LIMIT            - (offset + size) is beyond the allocation size.
+        NV_ERR_BUFFER_TOO_SMALL         - gpuExternalMappingInfo.physAddrBufferSize is insufficient to
+                                          store single physAddr.
+        NV_ERR_NOT_READY                - Returned when querying the physAddrs requires a deferred setup
+                                          which has not yet completed. It is expected that the caller
+                                          will reattempt the call until a different code is returned.
+                                          As an example, multi-node systems which require querying
+                                          physAddrs from the Fabric Manager may return this code.
+*/
+NV_STATUS nvUvmInterfaceGetExternalAllocPhysAddrs(uvmGpuAddressSpaceHandle vaSpace,
+                                                  NvHandle hMemory,
+                                                  NvU64 offset,
+                                                  NvU64 size,
+                                                  UvmGpuExternalPhysAddrInfo *gpuExternalPhysAddrsInfo);
+
 /*******************************************************************************
     nvUvmInterfaceRetainChannel
 
@@ -1462,6 +1520,16 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
                                                 char *methodStream,
                                                 NvU32 methodStreamSize);
 
+/*******************************************************************************
+    nvUvmInterfaceReportFatalError
+
+    Reports a global fatal error so RM can inform the clients that a node reboot
+    is necessary to recover from this error. This function can be called from
+    any lock environment, bottom half or non-interrupt context.
+
+*/
+void nvUvmInterfaceReportFatalError(NV_STATUS error);
+
 /*******************************************************************************
     Cryptography Services Library (CSL) Interface
 */