diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index 4f65ae21..2f92310a 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -28,7 +28,7 @@ include_guard() # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "878") +set(ICD_PAL_CLIENT_MAJOR_VERSION "880") # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index f7817f69..6eb99348 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.285" + "api_version": "1.3.287" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.285", + "api_version": "1.3.287", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp index 94fbe86b..6595e97a 100644 --- a/icd/api/app_shader_optimizer.cpp +++ b/icd/api/app_shader_optimizer.cpp @@ -1166,6 +1166,7 @@ void ShaderOptimizer::BuildAppProfileGeneric() { const AppProfile appProfile = m_pDevice->GetAppProfile(); const Pal::GpuType gpuType = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gpuType; + const Pal::AsicRevision asicRevision = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().revision; uint32 i = 0; } diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index 8f886284..462c7b0c 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -459,6 +459,7 @@ VkResult CompilerSolutionLlpc::CreateGraphicsShaderBinary( int64_t startTime = Util::GetPerfCpuTime(); hasher.Update(pCreateInfo->libraryHash[gplType]); + hasher.Update(PipelineCompilerTypeLlpc); hasher.Update(m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash()); hasher.Finalize(cacheId.bytes); @@ -1203,7 +1204,8 @@ void CompilerSolutionLlpc::BuildPipelineInternalBufferData( if (needUberFetchShaderBuffer) { uint32_t uberFetchShaderInternalDataSize = pCompiler->BuildUberFetchShaderInternalData( - pVertexInput, pCreateInfo->pipelineInfo.dynamicVertexStride, pInternalBufferInfo->pData); + pVertexInput, pCreateInfo->pipelineInfo.dynamicVertexStride, + pCreateInfo->pipelineInfo.useSoftwareVertexBufferDescriptors, pInternalBufferInfo->pData); auto pBufferEntry = &pInternalBufferInfo->internalBufferEntries[0]; pBufferEntry->userDataOffset = uberFetchConstBufRegBase; diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 7a036e53..4b5ae8f4 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -119,6 +119,12 @@ constexpr uint64_t PrsDynamicStatesMask = 0 // - VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE // - VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE // - VK_DYNAMIC_STATE_STENCIL_OP +// - VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT +// - VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT +// - VK_DYNAMIC_STATE_SAMPLE_MASK_EXT +// - VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT +// - VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT +// - VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT constexpr uint64_t FgsDynamicStatesMask = 0 | (1ULL << static_cast(DynamicStatesInternal::DepthBounds)) | (1ULL << static_cast(DynamicStatesInternal::StencilCompareMask)) @@ -130,7 +136,13 @@ constexpr uint64_t FgsDynamicStatesMask = 0 | (1ULL << static_cast(DynamicStatesInternal::DepthCompareOp)) | (1ULL << static_cast(DynamicStatesInternal::DepthBoundsTestEnable)) | (1ULL << static_cast(DynamicStatesInternal::StencilTestEnable)) - | (1ULL << static_cast(DynamicStatesInternal::StencilOp)); + | (1ULL << static_cast(DynamicStatesInternal::StencilOp)) + | (1ULL << static_cast(DynamicStatesInternal::SampleLocations)) + | (1ULL << static_cast(DynamicStatesInternal::SampleLocationsEnable)) + | (1ULL << static_cast(DynamicStatesInternal::SampleMask)) + | (1ULL << static_cast(DynamicStatesInternal::AlphaToCoverageEnable)) + | (1ULL << static_cast(DynamicStatesInternal::AlphaToOneEnable)) + | (1ULL << static_cast(DynamicStatesInternal::RasterizationSamples)); // ===================================================================================================================== // The dynamic states of Fragment Output Interface section @@ -148,6 +160,7 @@ constexpr uint64_t FgsDynamicStatesMask = 0 // - VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT (not available) // - VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT (not available) // - VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT (not available) +// - VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR constexpr uint64_t FoiDynamicStatesMask = 0 | (1ULL << static_cast(DynamicStatesInternal::BlendConstants)) | (1ULL << static_cast(DynamicStatesInternal::SampleLocations)) @@ -161,7 +174,8 @@ constexpr uint64_t FoiDynamicStatesMask = 0 | (1ULL << static_cast(DynamicStatesInternal::ColorBlendEnable)) | (1ULL << static_cast(DynamicStatesInternal::ColorBlendEquation)) | (1ULL << static_cast(DynamicStatesInternal::ColorWriteMask)) - | (1ULL << static_cast(DynamicStatesInternal::SampleLocationsEnable)); + | (1ULL << static_cast(DynamicStatesInternal::SampleLocationsEnable)) + | (1ULL << static_cast(DynamicStatesInternal::FragmentShadingRateStateKhr)); // ===================================================================================================================== // Helper function used to check whether a specific dynamic state is set @@ -625,7 +639,7 @@ uint64_t GraphicsPipelineCommon::GetDynamicStateFlags( dynamicState |= fgsMask & (1ULL << static_cast(DynamicStatesInternal::StencilReference)); break; case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR: - dynamicState |= fgsMask & + dynamicState |= (fgsMask | foiMask) & (1ULL << static_cast(DynamicStatesInternal::FragmentShadingRateStateKhr)); break; case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: @@ -651,24 +665,27 @@ uint64_t GraphicsPipelineCommon::GetDynamicStateFlags( dynamicState |= foiMask & (1ULL << static_cast(DynamicStatesInternal::BlendConstants)); break; case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: - dynamicState |= foiMask & (1ULL << static_cast(DynamicStatesInternal::SampleLocations)); + dynamicState |= (fgsMask | foiMask) & + (1ULL << static_cast(DynamicStatesInternal::SampleLocations)); break; case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: dynamicState |= foiMask & (1ULL << static_cast(DynamicStatesInternal::ColorWriteEnable)); break; case VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT: - dynamicState |= foiMask & + dynamicState |= (fgsMask | foiMask) & (1ULL << static_cast(DynamicStatesInternal::RasterizationSamples)); break; case VK_DYNAMIC_STATE_SAMPLE_MASK_EXT: - dynamicState |= foiMask & (1ULL << static_cast(DynamicStatesInternal::SampleMask)); + dynamicState |= (fgsMask | foiMask) & + (1ULL << static_cast(DynamicStatesInternal::SampleMask)); break; case VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT: - dynamicState |= foiMask & + dynamicState |= (fgsMask | foiMask) & (1ULL << static_cast(DynamicStatesInternal::AlphaToCoverageEnable)); break; case VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT: - dynamicState |= foiMask & (1ULL << static_cast(DynamicStatesInternal::AlphaToOneEnable)); + dynamicState |= (fgsMask | foiMask) & + (1ULL << static_cast(DynamicStatesInternal::AlphaToOneEnable)); break; case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT: dynamicState |= foiMask & (1ULL << static_cast(DynamicStatesInternal::LogicOpEnable)); @@ -683,7 +700,7 @@ uint64_t GraphicsPipelineCommon::GetDynamicStateFlags( dynamicState |= foiMask & (1ULL << static_cast(DynamicStatesInternal::ColorWriteMask)); break; case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT: - dynamicState |= foiMask & + dynamicState |= (fgsMask | foiMask) & (1ULL << static_cast(DynamicStatesInternal::SampleLocationsEnable)); break; case VK_DYNAMIC_STATE_LOGIC_OP_EXT: @@ -701,6 +718,7 @@ uint64_t GraphicsPipelineCommon::GetDynamicStateFlags( // ===================================================================================================================== void GraphicsPipelineCommon::ExtractLibraryInfo( + const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, VkPipelineCreateFlags2KHR flags, @@ -762,6 +780,43 @@ void GraphicsPipelineCommon::ExtractLibraryInfo( } } } + + if ((pLibInfo->flags.isLibrary == false) && + (pLibInfo->pPreRasterizationShaderLib != nullptr) || + (pLibInfo->pFragmentShaderLib != nullptr)) + { + uint64_t preRasterHash = 0; + uint64_t fragmentHash = 0; + if (pLibInfo->pPreRasterizationShaderLib != nullptr) + { + preRasterHash = pLibInfo->pPreRasterizationShaderLib-> + GetPipelineBinaryCreateInfo().libraryHash[GraphicsLibraryPreRaster]; + } + if (pLibInfo->pFragmentShaderLib != nullptr) + { + fragmentHash = pLibInfo->pFragmentShaderLib-> + GetPipelineBinaryCreateInfo().libraryHash[GraphicsLibraryFragment]; + } + + PipelineCompilerType compilerType = pDevice->GetCompiler(DefaultDeviceIndex)-> + CheckCompilerType(nullptr, preRasterHash, fragmentHash); + + if (pLibInfo->pPreRasterizationShaderLib != nullptr) + { + if (compilerType != pLibInfo->pPreRasterizationShaderLib->GetPipelineBinaryCreateInfo().compilerType) + { + pLibInfo->pPreRasterizationShaderLib = pLibInfo->pPreRasterizationShaderLib->GetAltLibrary(); + } + } + + if (pLibInfo->pFragmentShaderLib != nullptr) + { + if (compilerType != pLibInfo->pFragmentShaderLib->GetPipelineBinaryCreateInfo().compilerType) + { + pLibInfo->pFragmentShaderLib = pLibInfo->pFragmentShaderLib->GetAltLibrary(); + } + } + } } } @@ -829,8 +884,11 @@ VkResult GraphicsPipelineCommon::Create( if ((flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) != 0) { - result = GraphicsPipelineLibrary::Create( - pDevice, pPipelineCache, pCreateInfo, extStructs, flags, pAllocator, pPipeline); + uint32_t compilerMask = pDevice->GetCompiler(DefaultDeviceIndex)->GetCompilerCollectionMask(); + { + result = GraphicsPipelineLibrary::Create( + pDevice, pPipelineCache, pCreateInfo, extStructs, flags, 0, pAllocator, pPipeline); + } } else { @@ -994,21 +1052,6 @@ static void BuildRasterizationState( pInfo->pipeline.rsState.shadeMode = Pal::ShadeMode::Flat; pInfo->pipeline.rsState.rasterizeLastLinePixel = 0; - // Pipeline Binning Override - switch (pDevice->GetPipelineBinningMode()) - { - case PipelineBinningModeEnable: - pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Enable; - break; - case PipelineBinningModeDisable: - pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Disable; - break; - case PipelineBinningModeDefault: - default: - pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Default; - break; - } - if (pRs != nullptr) { VK_ASSERT(pRs->sType == VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO); @@ -2153,18 +2196,23 @@ static void BuildExecutablePipelineState( // ===================================================================================================================== void GraphicsPipelineCommon::BuildPipelineObjectCreateInfo( - const Device* pDevice, - const VkGraphicsPipelineCreateInfo* pIn, - const GraphicsPipelineExtStructs& extStructs, - VkPipelineCreateFlags2KHR flags, - const PipelineOptimizerKey* pOptimizerKey, - const PipelineMetadata* pBinMeta, - GraphicsPipelineObjectCreateInfo* pInfo) + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pIn, + const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, + VkPipelineCreateFlags2KHR flags, + const PipelineOptimizerKey* pOptimizerKey, + const PipelineMetadata* pBinMeta, + GraphicsPipelineObjectCreateInfo* pInfo, + const GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo) { VK_ASSERT(pBinMeta != nullptr); - GraphicsPipelineLibraryInfo libInfo; - ExtractLibraryInfo(pIn, extStructs, flags, &libInfo); + pInfo->numTargets = 0; + for (uint32_t i = 0; i < MaxColorTargets; ++i) + { + pInfo->numTargets += pBinaryCreateInfo->pipelineInfo.cbState.target[i].channelWriteMask ? 1 : 0; + } bool hasMesh = false; #if VKI_RAY_TRACING @@ -2242,6 +2290,34 @@ void GraphicsPipelineCommon::BuildPipelineObjectCreateInfo( { CopyFragmentOutputInterfaceState(libInfo.pFragmentOutputInterfaceLib, pInfo); } + + // Pipeline Binning Override + switch (pDevice->GetPipelineBinningMode()) + { + case PipelineBinningModeEnable: + pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Enable; + break; + case PipelineBinningModeDisable: + pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Disable; + break; + case PipelineBinningModeDefault: + default: + pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Default; + break; + } + + // Override binning setting only when the shader has MRT >= 2 + if (pInfo->numTargets >= 2) + { + if (pDevice->GetRuntimeSettings().binningOverridePbbForMrt == BinningOverridePbbForMrtEnable) + { + pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Enable; + } + else if (pDevice->GetRuntimeSettings().binningOverridePbbForMrt == BinningOverridePbbForMrtDisable) + { + pInfo->pipeline.rsState.binningOverride = Pal::BinningOverride::Disable; + } + } } if (libInfo.flags.isLibrary == false) @@ -2265,14 +2341,12 @@ void GraphicsPipelineCommon::GeneratePipelineOptimizerKey( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineShaderStageInfo* pShaderStageInfo, ShaderOptimizerKey* pShaderKeys, PipelineOptimizerKey* pPipelineKey) { - GraphicsPipelineLibraryInfo libInfo; - GraphicsPipelineCommon::ExtractLibraryInfo(pCreateInfo, extStructs, flags, &libInfo); - pPipelineKey->shaderCount = VK_ARRAY_SIZE(pShaderStageInfo->stages); pPipelineKey->pShaders = pShaderKeys; @@ -2940,6 +3014,7 @@ void GraphicsPipelineCommon::BuildApiHash( const VkGraphicsPipelineCreateInfo* pCreateInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, const GraphicsPipelineBinaryCreateInfo& binaryCreateInfo, uint64_t* pApiHash, Util::MetroHash::Hash* pElfHash) @@ -2947,9 +3022,6 @@ void GraphicsPipelineCommon::BuildApiHash( Util::MetroHash128 elfHasher; Util::MetroHash128 apiHasher; - GraphicsPipelineLibraryInfo libInfo; - GraphicsPipelineCommon::ExtractLibraryInfo(pCreateInfo, extStructs, flags, &libInfo); - uint64_t dynamicStateFlags = GetDynamicStateFlags(pCreateInfo->pDynamicState, &libInfo); elfHasher.Update(dynamicStateFlags); diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index 0de05e0c..2ea9e2ad 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -145,6 +145,7 @@ struct GraphicsPipelineObjectCreateInfo VkShaderStageFlagBits activeStages; VkFormat dbFormat; uint64_t dynamicStates; + uint32_t numTargets; #if VKI_RAY_TRACING uint32_t dispatchRaysUserDataOffset; #endif @@ -200,6 +201,9 @@ struct GraphicsPipelineExtStructs : PipelineExtStructs const VkPipelineLibraryCreateInfoKHR* pPipelineLibraryCreateInfoKHR; }; +// Internal flags for graphics pipeline library +constexpr uint32_t VK_GRAPHICS_PIPELINE_LIBRARY_FORCE_LLPC = 1; + // ===================================================================================================================== // The common part used by both executable graphics pipelines and graphics pipeline libraries class GraphicsPipelineCommon : public Pipeline @@ -245,6 +249,7 @@ class GraphicsPipelineCommon : public Pipeline // Extract graphics pipeline library related info from VkGraphicsPipelineCreateInfo. static void ExtractLibraryInfo( + const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, VkPipelineCreateFlags2KHR flags, @@ -264,19 +269,22 @@ class GraphicsPipelineCommon : public Pipeline protected: // Convert API information into internal create info used to create internal pipeline object static void BuildPipelineObjectCreateInfo( - const Device* pDevice, - const VkGraphicsPipelineCreateInfo* pIn, - const GraphicsPipelineExtStructs& extStructs, - VkPipelineCreateFlags2KHR flags, - const PipelineOptimizerKey* pOptimizerKey, - const PipelineMetadata* pBinMeta, - GraphicsPipelineObjectCreateInfo* pObjInfo); + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pIn, + const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, + VkPipelineCreateFlags2KHR flags, + const PipelineOptimizerKey* pOptimizerKey, + const PipelineMetadata* pBinMeta, + GraphicsPipelineObjectCreateInfo* pObjInfo, + const GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo); // Populates the profile key for tuning graphics pipelines static void GeneratePipelineOptimizerKey( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineShaderStageInfo* pShaderStageInfo, ShaderOptimizerKey* pShaderKeys, @@ -287,6 +295,7 @@ class GraphicsPipelineCommon : public Pipeline const VkGraphicsPipelineCreateInfo* pCreateInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, const GraphicsPipelineBinaryCreateInfo& pBinaryCreateInfo, uint64_t* pApiHash, Util::MetroHash::Hash* elfHash); diff --git a/icd/api/include/khronos/devext/vk_amd_gpa_interface.h b/icd/api/include/khronos/devext/vk_amd_gpa_interface.h index 636a74fa..fd5a24b8 100644 --- a/icd/api/include/khronos/devext/vk_amd_gpa_interface.h +++ b/icd/api/include/khronos/devext/vk_amd_gpa_interface.h @@ -111,6 +111,7 @@ typedef enum VkGpaPerfBlockAMD VK_GPA_PERF_BLOCK_DF_MALL_AMD = 48, #if VKI_BUILD_GFX11 VK_GPA_PERF_BLOCK_SQ_WGP_AMD = 49, + VK_GPA_PERF_BLOCK_PC_AMD = 50, #endif VK_GPA_PERF_BLOCK_MAX_ENUM_AMD = 0x7FFFFFFF } VkGpaPerfBlockAMD; diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index 38468f26..663697a9 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -69,7 +69,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 285 +#define VK_HEADER_VERSION 287 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -1113,6 +1113,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_POOL_OVERALLOCATION_FEATURES_NV = 1000546000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAW_ACCESS_CHAINS_FEATURES_NV = 1000555000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT16_VECTOR_FEATURES_NV = 1000563000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_REPLICATED_COMPOSITES_FEATURES_EXT = 1000564000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_VALIDATION_FEATURES_NV = 1000568000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ALIGNMENT_CONTROL_FEATURES_MESA = 1000575000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ALIGNMENT_CONTROL_PROPERTIES_MESA = 1000575001, @@ -1409,6 +1410,7 @@ typedef enum VkObjectType { } VkObjectType; typedef enum VkVendorId { + VK_VENDOR_ID_KHRONOS = 0x10000, VK_VENDOR_ID_VIV = 0x10001, VK_VENDOR_ID_VSI = 0x10002, VK_VENDOR_ID_KAZAN = 0x10003, @@ -5790,7 +5792,8 @@ typedef enum VkDriverId { VK_DRIVER_ID_MESA_DOZEN = 23, VK_DRIVER_ID_MESA_NVK = 24, VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA = 25, - VK_DRIVER_ID_MESA_AGXV = 26, + VK_DRIVER_ID_MESA_HONEYKRISP = 26, + VK_DRIVER_ID_RESERVED_27 = 27, VK_DRIVER_ID_AMD_PROPRIETARY_KHR = VK_DRIVER_ID_AMD_PROPRIETARY, VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR = VK_DRIVER_ID_AMD_OPEN_SOURCE, VK_DRIVER_ID_MESA_RADV_KHR = VK_DRIVER_ID_MESA_RADV, @@ -10265,7 +10268,7 @@ typedef struct VkRenderingInputAttachmentIndexInfoKHR { } VkRenderingInputAttachmentIndexInfoKHR; typedef void (VKAPI_PTR *PFN_vkCmdSetRenderingAttachmentLocationsKHR)(VkCommandBuffer commandBuffer, const VkRenderingAttachmentLocationInfoKHR* pLocationInfo); -typedef void (VKAPI_PTR *PFN_vkCmdSetRenderingInputAttachmentIndicesKHR)(VkCommandBuffer commandBuffer, const VkRenderingInputAttachmentIndexInfoKHR* pLocationInfo); +typedef void (VKAPI_PTR *PFN_vkCmdSetRenderingInputAttachmentIndicesKHR)(VkCommandBuffer commandBuffer, const VkRenderingInputAttachmentIndexInfoKHR* pInputAttachmentIndexInfo); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkCmdSetRenderingAttachmentLocationsKHR( @@ -10274,7 +10277,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetRenderingAttachmentLocationsKHR( VKAPI_ATTR void VKAPI_CALL vkCmdSetRenderingInputAttachmentIndicesKHR( VkCommandBuffer commandBuffer, - const VkRenderingInputAttachmentIndexInfoKHR* pLocationInfo); + const VkRenderingInputAttachmentIndexInfoKHR* pInputAttachmentIndexInfo); #endif @@ -19160,6 +19163,18 @@ typedef struct VkPhysicalDeviceShaderAtomicFloat16VectorFeaturesNV { +// VK_EXT_shader_replicated_composites is a preprocessor guard. Do not pass it to API calls. +#define VK_EXT_shader_replicated_composites 1 +#define VK_EXT_SHADER_REPLICATED_COMPOSITES_SPEC_VERSION 1 +#define VK_EXT_SHADER_REPLICATED_COMPOSITES_EXTENSION_NAME "VK_EXT_shader_replicated_composites" +typedef struct VkPhysicalDeviceShaderReplicatedCompositesFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 shaderReplicatedComposites; +} VkPhysicalDeviceShaderReplicatedCompositesFeaturesEXT; + + + // VK_NV_ray_tracing_validation is a preprocessor guard. Do not pass it to API calls. #define VK_NV_ray_tracing_validation 1 #define VK_NV_RAY_TRACING_VALIDATION_SPEC_VERSION 1 diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index e10e5e34..696092a3 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -229,6 +229,7 @@ class PipelineCompiler Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineShaderStageInfo* pShaderInfo, const PipelineLayout* pPipelineLayout, @@ -321,7 +322,10 @@ class PipelineCompiler #endif template - PipelineCompilerType CheckCompilerType(const PipelineBuildInfo* pPipelineBuildInfo); + PipelineCompilerType CheckCompilerType( + const PipelineBuildInfo* pPipelineBuildInfo, + uint64_t preRasterHash, + uint64_t fragmentHash); uint32_t GetCompilerCollectionMask(); @@ -425,11 +429,13 @@ class PipelineCompiler const VkVertexInputBindingDescription2EXT* pVertexBindingDescriptions, uint32_t vertexAttributeDescriptionCount, const VkVertexInputAttributeDescription2EXT* pVertexAttributeDescriptions, - void* pUberFetchShaderInternalData); + void* pUberFetchShaderInternalData, + bool isOffsetMode); uint32_t BuildUberFetchShaderInternalData( const VkPipelineVertexInputStateCreateInfo* pVertexInput, bool dynamicStride, + bool isOffsetMode, void* pUberFetchShaderInternalData) const; static void ReadBinaryMetadata( @@ -475,6 +481,12 @@ class PipelineCompiler uint32_t binaryCount, const Vkgc::BinaryData* pElfBinary, VkResult result); + + static void InitPipelineDumpOption( + Vkgc::PipelineDumpOptions* pDumpOptions, + const RuntimeSettings& settings, + char* pBuffer, + PipelineCompilerType type); private: PAL_DISALLOW_COPY_AND_ASSIGN(PipelineCompiler); @@ -539,6 +551,7 @@ class PipelineCompiler uint32_t vertexDivisorDescriptionCount, const VertexInputDivisor* pVertexDivisorDescriptions, bool isDynamicStride, + bool isOffsetMode, void* pUberFetchShaderInternalData) const; // ----------------------------------------------------------------------------------------------------------------- diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index b74a3f69..7ff18c47 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -732,7 +732,7 @@ class CmdBuffer const VkRenderingAttachmentLocationInfoKHR* pLocationInfo); void SetRenderingInputAttachmentIndices( - const VkRenderingInputAttachmentIndexInfoKHR* pLocationInfo); + const VkRenderingInputAttachmentIndexInfoKHR* pInputAttachmentIndexInfo); void SetColorBlendEnable( uint32_t firstAttachment, @@ -1580,7 +1580,11 @@ class CmdBuffer typedef uint32_t RebindUserDataFlags; - RebindUserDataFlags SwitchUserDataLayouts( + RebindUserDataFlags SwitchCompactSchemeUserDataLayouts( + PipelineBindPoint apiBindPoint, + const UserDataLayout* pUserDataLayout); + + RebindUserDataFlags SwitchCommonUserDataLayouts( PipelineBindPoint apiBindPoint, const UserDataLayout* pUserDataLayout); @@ -2947,7 +2951,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetRenderingAttachmentLocationsKHR( VKAPI_ATTR void VKAPI_CALL vkCmdSetRenderingInputAttachmentIndicesKHR( VkCommandBuffer commandBuffer, - const VkRenderingInputAttachmentIndexInfoKHR* pLocationInfo); + const VkRenderingInputAttachmentIndexInfoKHR* pInputAttachmentIndexInfo); } // namespace entry diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h index 5a66cba6..3dd25481 100755 --- a/icd/api/include/vk_conv.h +++ b/icd/api/include/vk_conv.h @@ -3209,7 +3209,8 @@ inline Pal::GpuBlock VkToPalGpuBlock( (static_cast(VK_GPA_PERF_BLOCK_GE_SE_AMD) == static_cast(Pal::GpuBlock::GeSe)) && (static_cast(VK_GPA_PERF_BLOCK_DF_MALL_AMD) == static_cast(Pal::GpuBlock::DfMall)) #if VKI_BUILD_GFX11 - && (static_cast(VK_GPA_PERF_BLOCK_SQ_WGP_AMD) == static_cast(Pal::GpuBlock::SqWgp)) + && (static_cast(VK_GPA_PERF_BLOCK_SQ_WGP_AMD) == static_cast(Pal::GpuBlock::SqWgp)) && + (static_cast(VK_GPA_PERF_BLOCK_PC_AMD) == static_cast(Pal::GpuBlock::Pc)) #endif , "Need to update function convert::GpuBlock"); @@ -4065,7 +4066,8 @@ VkResult InitializeUberFetchShaderFormatTable( UberFetchShaderFormatInfo GetUberFetchShaderFormatInfo( const UberFetchShaderFormatInfoMap* pFormatInfoMap, const VkFormat vkFormat, - const bool isZeroStride); + const bool isZeroStride, + const bool isOffsetMode); // ===================================================================================================================== VkFormat GetLowPrecisionDepthFormat( diff --git a/icd/api/include/vk_defines.h b/icd/api/include/vk_defines.h index 33977836..849d5e60 100644 --- a/icd/api/include/vk_defines.h +++ b/icd/api/include/vk_defines.h @@ -189,7 +189,7 @@ namespace vk static const uint32_t MaxDescriptorSets = 32; // The maximum size of push constants in bytes - static const uint32_t MaxPushConstants = 128; + static const uint32_t MaxPushConstants = 256; // The maximum number of push descriptors that can appear in a descriptor set static const uint32_t MaxPushDescriptors = 32; @@ -207,7 +207,8 @@ namespace vk // Enumerates the compiler types enum PipelineCompilerType : uint32_t { - PipelineCompilerTypeLlpc, // Use shader compiler provided by LLPC + PipelineCompilerTypeInvalid, // shader compiler is unknown + PipelineCompilerTypeLlpc, // Use shader compiler provided by LLPC }; // Point size must be set via gl_PointSize, otherwise it must be 1.0f diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index 16b5b8c1..21e69dce 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -162,6 +162,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, GraphicsPipelineShaderStageInfo* pShaderStageInfo, GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, @@ -253,6 +254,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineShaderStageInfo* pShaderInfo, const PipelineLayout* pPipelineLayout, diff --git a/icd/api/include/vk_graphics_pipeline_library.h b/icd/api/include/vk_graphics_pipeline_library.h index accfcd9c..94a70ba9 100644 --- a/icd/api/include/vk_graphics_pipeline_library.h +++ b/icd/api/include/vk_graphics_pipeline_library.h @@ -43,6 +43,7 @@ class GraphicsPipelineLibrary final : public GraphicsPipelineCommon, public NonD const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, VkPipelineCreateFlags2KHR flags, + uint32_t internalFlags, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline); @@ -66,6 +67,10 @@ class GraphicsPipelineLibrary final : public GraphicsPipelineCommon, public NonD { return &m_elfHash; } void GetOwnedPalShaderLibraries(const Pal::IShaderLibrary* pLibraries[GraphicsLibraryCount]) const; + + void SetAltLibrary(GraphicsPipelineLibrary* pLibrary) { m_altLibrary = pLibrary; } + + GraphicsPipelineLibrary* GetAltLibrary() const { return m_altLibrary; } private: PAL_DISALLOW_COPY_AND_ASSIGN(GraphicsPipelineLibrary); @@ -94,6 +99,7 @@ class GraphicsPipelineLibrary final : public GraphicsPipelineCommon, public NonD const GraphicsPipelineLibraryInfo m_libInfo; GplModuleState m_gplModuleStates[ShaderStage::ShaderStageGfxCount]; const Util::MetroHash::Hash m_elfHash; + GraphicsPipelineLibrary* m_altLibrary; }; } diff --git a/icd/api/include/vk_image.h b/icd/api/include/vk_image.h index 85100d90..a36509ab 100644 --- a/icd/api/include/vk_image.h +++ b/icd/api/include/vk_image.h @@ -183,15 +183,6 @@ class Image final : public NonDispatchable uint32_t GetArraySize() const { return m_arraySize; } - bool IsPresentable() const - { return m_pSwapChain != nullptr; } - - VK_FORCEINLINE SwapChain* GetSwapChain() const - { - VK_ASSERT(m_pSwapChain != nullptr); - return m_pSwapChain; - } - // We have to treat the image sparse if any of these flags are set static const VkImageCreateFlags SparseEnablingFlags = VK_IMAGE_CREATE_SPARSE_BINDING_BIT | @@ -415,6 +406,11 @@ class Image final : public NonDispatchable const VkImageCreateInfo* pCreateInfo, VkMemoryRequirements2* pMemoryRequirements); + static void GetVideoEncodeContextBufferMemoryRequirements( + const Device* pDevice, + const VkImageCreateInfo* pCreateInfo, + VkMemoryRequirements* pMemoryRequirements); + #if defined(__unix__) static VkResult CreateImageWithModifierList( Device* pDevice, @@ -474,9 +470,6 @@ class Image final : public NonDispatchable ImageBarrierPolicy m_barrierPolicy; // Barrier policy to use for this image - SwapChain* m_pSwapChain; // If this image is a presentable image this tells - // which swap chain the image belongs to - ResourceOptimizerKey m_ResourceKey; VkMemoryRequirements m_memoryRequirements; // Image's memory requirements, including strict size if used diff --git a/icd/api/include/vk_memory.h b/icd/api/include/vk_memory.h index b4f7d4bc..8c661d4f 100644 --- a/icd/api/include/vk_memory.h +++ b/icd/api/include/vk_memory.h @@ -107,9 +107,10 @@ class Memory final : public NonDispatchable VkDeviceMemory* pMemory); static VkResult OpenExternalMemory( - Device* pDevice, - const ImportMemoryInfo& importInfo, - Memory** ppMemory); + Device* pDevice, + const Pal::GpuMemoryCreateInfo& localCreateInfo, + const ImportMemoryInfo& importInfo, + Memory** ppMemory); Pal::OsExternalHandle GetShareHandle(VkExternalMemoryHandleTypeFlagBits handleType); diff --git a/icd/api/include/vk_pipeline_layout.h b/icd/api/include/vk_pipeline_layout.h index 9ef67f1c..f792d87d 100644 --- a/icd/api/include/vk_pipeline_layout.h +++ b/icd/api/include/vk_pipeline_layout.h @@ -59,6 +59,15 @@ struct UserDataLayout { PipelineLayoutScheme scheme; + struct + { + // Base user data register index to use for push constants + uint32_t pushConstRegBase; + // Number of user data registers used for push constants + uint32_t pushConstRegCount; + + } common; + union { struct @@ -69,11 +78,6 @@ struct UserDataLayout // Number of user data registers used for the set binding points uint32_t setBindingRegCount; - // Base user data register index to use for push constants - uint32_t pushConstRegBase; - // Number of user data registers used for push constants - uint32_t pushConstRegCount; - // Base user data register index to use for transform feedback. uint32_t transformFeedbackRegBase; // Number of user data registers used for transform feedback @@ -111,12 +115,6 @@ struct UserDataLayout // The total number of user data registers used is always MaxDescriptorSets * 2 * SetPtrRegCount uint32_t setBindingPtrRegBase; - // Base user data register index to use for buffer storing push constant data - // The number of user data register used is always 1 - uint32_t pushConstPtrRegBase; - - // The size of buffer required to store push constants - uint32_t pushConstSizeInDword; // Base use data register for debug printf uint32_t debugPrintfRegBase; @@ -327,6 +325,7 @@ class PipelineLayout final : public NonDispatchable(this); } +// ===================================================================================================================== inline const QueryPoolWithStorageView* QueryPool::AsQueryPoolWithStorageView() const { if ((m_queryType != VK_QUERY_TYPE_TIMESTAMP) diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 07c5e1a4..30c6fa53 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -128,6 +128,20 @@ static uint32_t GpuRtShaderLibraryFlags( } #endif +// ===================================================================================================================== +void PipelineCompiler::InitPipelineDumpOption( + Vkgc::PipelineDumpOptions* pDumpOptions, + const RuntimeSettings& settings, + char* pBuffer, + PipelineCompilerType type) +{ + pDumpOptions->filterPipelineDumpByType = settings.filterPipelineDumpByType; + pDumpOptions->filterPipelineDumpByHash = settings.filterPipelineDumpByHash; + pDumpOptions->dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + pDumpOptions->pDumpDir = settings.pipelineDumpDir; +} + +// ===================================================================================================================== #if VKI_RAY_TRACING #endif @@ -1113,10 +1127,8 @@ VkResult PipelineCompiler::CreateGraphicsPipelineBinary( if (settings.enablePipelineDump && (result == VK_SUCCESS)) { Vkgc::PipelineDumpOptions dumpOptions = {}; - dumpOptions.pDumpDir = settings.pipelineDumpDir; - dumpOptions.filterPipelineDumpByType = settings.filterPipelineDumpByType; - dumpOptions.filterPipelineDumpByHash = settings.filterPipelineDumpByHash; - dumpOptions.dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + char tempBuff[Util::MaxPathStrLen]; + InitPipelineDumpOption(&dumpOptions, settings, tempBuff, pCreateInfo->compilerType); Vkgc::PipelineBuildInfo pipelineInfo = {}; pipelineInfo.pGraphicsInfo = &pCreateInfo->pipelineInfo; @@ -1216,10 +1228,8 @@ VkResult PipelineCompiler::CreateGraphicsShaderBinary( uint64_t dumpHash = settings.dumpPipelineWithApiHash ? pCreateInfo->apiPsoHash : libraryHash; Vkgc::PipelineDumpOptions dumpOptions = {}; - dumpOptions.pDumpDir = settings.pipelineDumpDir; - dumpOptions.filterPipelineDumpByType = settings.filterPipelineDumpByType; - dumpOptions.filterPipelineDumpByHash = settings.filterPipelineDumpByHash; - dumpOptions.dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + char tempBuff[Util::MaxPathStrLen]; + InitPipelineDumpOption(&dumpOptions, settings, tempBuff, pCreateInfo->compilerType); Vkgc::PipelineBuildInfo pipelineInfo = {}; pipelineInfo.pGraphicsInfo = &pCreateInfo->pipelineInfo; @@ -1288,10 +1298,8 @@ VkResult PipelineCompiler::CreateColorExportShaderLibrary( MetroHash::Compact64(&cacheId); Vkgc::PipelineDumpOptions dumpOptions = {}; - dumpOptions.pDumpDir = settings.pipelineDumpDir; - dumpOptions.filterPipelineDumpByType = settings.filterPipelineDumpByType; - dumpOptions.filterPipelineDumpByHash = settings.filterPipelineDumpByHash; - dumpOptions.dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + char tempBuff[Util::MaxPathStrLen]; + InitPipelineDumpOption(&dumpOptions, settings, tempBuff, pCreateInfo->compilerType); Vkgc::PipelineBuildInfo pipelineInfo = {}; GraphicsPipelineBuildInfo graphicsInfo = pCreateInfo->pipelineInfo; @@ -1488,10 +1496,8 @@ VkResult PipelineCompiler::CreateComputePipelineBinary( if (settings.enablePipelineDump && (result == VK_SUCCESS)) { Vkgc::PipelineDumpOptions dumpOptions = {}; - dumpOptions.pDumpDir = settings.pipelineDumpDir; - dumpOptions.filterPipelineDumpByType = settings.filterPipelineDumpByType; - dumpOptions.filterPipelineDumpByHash = settings.filterPipelineDumpByHash; - dumpOptions.dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + char tempBuff[Util::MaxPathStrLen]; + InitPipelineDumpOption(&dumpOptions, settings, tempBuff, pCreateInfo->compilerType); Vkgc::PipelineBuildInfo pipelineInfo = {}; pipelineInfo.pComputeInfo = &pCreateInfo->pipelineInfo; @@ -1865,6 +1871,8 @@ static void CopyPreRasterizationShaderState( pCreateInfo->pipelineInfo.rsState.rasterStream = libInfo.pipelineInfo.rsState.rasterStream; pCreateInfo->pipelineInfo.nggState = libInfo.pipelineInfo.nggState; pCreateInfo->pipelineInfo.enableUberFetchShader = libInfo.pipelineInfo.enableUberFetchShader; + pCreateInfo->pipelineInfo.useSoftwareVertexBufferDescriptors = + libInfo.pipelineInfo.useSoftwareVertexBufferDescriptors; MergePipelineOptions(libInfo.pipelineInfo.options, &pCreateInfo->pipelineInfo.options); @@ -2309,7 +2317,11 @@ static void BuildCompilerInfo( &pCreateInfo->pipelineInfo.fs, }; - pCreateInfo->compilerType = pDevice->GetCompiler(DefaultDeviceIndex)->CheckCompilerType(&pCreateInfo->pipelineInfo); + if (pCreateInfo->compilerType == PipelineCompilerTypeInvalid) + { + pCreateInfo->compilerType = + pDevice->GetCompiler(DefaultDeviceIndex)->CheckCompilerType(&pCreateInfo->pipelineInfo, 0, 0); + } for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) { @@ -2668,7 +2680,10 @@ static void BuildPreRasterizationShaderState( BuildPipelineShadersInfo(pDevice, pIn, dynamicStateFlags, pShaderInfo, pCreateInfo); - BuildCompilerInfo(pDevice, pShaderInfo, PrsShaderMask, pCreateInfo); + if (libInfo.flags.isLibrary) + { + BuildCompilerInfo(pDevice, pShaderInfo, PrsShaderMask, pCreateInfo); + } if (pCreateInfo->pipelineInfo.options.enableRelocatableShaderElf) { @@ -2680,6 +2695,7 @@ static void BuildPreRasterizationShaderState( static void BuildFragmentShaderState( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, + const GraphicsPipelineLibraryInfo& libInfo, const GraphicsPipelineShaderStageInfo* pShaderInfo, GraphicsPipelineBinaryCreateInfo* pCreateInfo, const uint64_t dynamicStateFlags) @@ -2693,7 +2709,10 @@ static void BuildFragmentShaderState( BuildPipelineShadersInfo(pDevice, pIn, 0, pShaderInfo, pCreateInfo); - BuildCompilerInfo(pDevice, pShaderInfo, FgsShaderMask, pCreateInfo); + if (libInfo.flags.isLibrary) + { + BuildCompilerInfo(pDevice, pShaderInfo, FgsShaderMask, pCreateInfo); + } } // ===================================================================================================================== @@ -2924,6 +2943,7 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineShaderStageInfo* pShaderInfo, const PipelineLayout* pPipelineLayout, @@ -2935,15 +2955,11 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( VkResult result = VK_SUCCESS; - GraphicsPipelineLibraryInfo libInfo; - if (result == VK_SUCCESS) { pCreateInfo->pBinaryMetadata = pBinaryMetadata; pCreateInfo->pPipelineProfileKey = pPipelineProfileKey; - GraphicsPipelineCommon::ExtractLibraryInfo(pIn, extStructs, flags, &libInfo); - pCreateInfo->libFlags = libInfo.libFlags; pCreateInfo->libFlags |= (libInfo.pVertexInputInterfaceLib == nullptr) ? @@ -3002,9 +3018,10 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( { if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { - BuildFragmentShaderState(pDevice, pIn, pShaderInfo, pCreateInfo, dynamicStateFlags); + BuildFragmentShaderState(pDevice, pIn, libInfo, pShaderInfo, pCreateInfo, dynamicStateFlags); pCreateInfo->pipelineInfo.enableColorExportShader = - (pDevice->GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink && + (libInfo.flags.isLibrary && + pDevice->GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink && ((pShaderInfo->stages[ShaderStageFragment].pModuleHandle != nullptr) || (pShaderInfo->stages[ShaderStageFragment].codeHash.lower != 0) || (pShaderInfo->stages[ShaderStageFragment].codeHash.upper != 0))); @@ -3113,6 +3130,7 @@ VkResult PipelineCompiler::BuildGplFastLinkCreateInfo( PipelineMetadata* pBinaryMetadata, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { + VK_ASSERT(pIn != nullptr); VK_ASSERT(libInfo.pPreRasterizationShaderLib != nullptr); VK_ASSERT(libInfo.pFragmentShaderLib != nullptr); @@ -3210,7 +3228,9 @@ VkResult PipelineCompiler::BuildGplFastLinkCreateInfo( // Checks which compiler is used template PipelineCompilerType PipelineCompiler::CheckCompilerType( - const PipelineBuildInfo* pPipelineBuildInfo) + const PipelineBuildInfo* pPipelineBuildInfo, + uint64_t preRasterHash, + uint64_t fragmentHash) { uint32_t availCompilerMask = 0; uint32_t compilerMask = 0; @@ -3432,7 +3452,7 @@ VkResult PipelineCompiler::ConvertComputePipelineInfo( if (result == VK_SUCCESS) { - pCreateInfo->compilerType = CheckCompilerType(&pCreateInfo->pipelineInfo); + pCreateInfo->compilerType = CheckCompilerType(&pCreateInfo->pipelineInfo, 0, 0); if (pShaderInfo->stage.pModuleHandle != nullptr) { @@ -3986,7 +4006,7 @@ VkResult PipelineCompiler::ConvertRayTracingPipelineInfo( } { - pCreateInfo->compilerType = CheckCompilerType(&pCreateInfo->pipelineInfo); + pCreateInfo->compilerType = CheckCompilerType(&pCreateInfo->pipelineInfo, 0, 0); } for (uint32_t i = 0; i < pShaderInfo->stageCount; ++i) @@ -4099,10 +4119,8 @@ VkResult PipelineCompiler::CreateRayTracingPipelineBinary( { Vkgc::PipelineDumpOptions dumpOptions = {}; - dumpOptions.pDumpDir = settings.pipelineDumpDir; - dumpOptions.filterPipelineDumpByType = settings.filterPipelineDumpByType; - dumpOptions.filterPipelineDumpByHash = settings.filterPipelineDumpByHash; - dumpOptions.dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + char tempBuff[Util::MaxPathStrLen]; + InitPipelineDumpOption(&dumpOptions, settings, tempBuff, pCreateInfo->compilerType); Vkgc::PipelineBuildInfo pipelineInfo = {}; @@ -5260,6 +5278,7 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalDataImp( uint32_t vertexDivisorDescriptionCount, const VertexInputDivisor* pVertexDivisorDescriptions, bool isDynamicStride, + bool isOffsetMode, void* pUberFetchShaderInternalData) const { const auto& settings = m_pPhysicalDevice->GetRuntimeSettings(); @@ -5300,10 +5319,37 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalDataImp( auto pBinding = GetVertexInputBinding(pAttrib->binding, vertexBindingDescriptionCount, pVertexBindingDescriptions); - auto attribFormatInfo = GetUberFetchShaderFormatInfo( - &m_uberFetchShaderInfoFormatMap, - pAttrib->format, - (isDynamicStride == false) && (pBinding->stride == 0)); + + uint32_t stride = pBinding->stride; + if (isDynamicStride) + { + stride = settings.forceAlignedForDynamicStride ? 0 : 1; + } + + if (settings.forcePerComponentFetchForUnalignedVbFormat == 1) + { + // Force stride to 1, to handle unaligned offsets + switch (pAttrib->format) + { + case VK_FORMAT_R8G8_SSCALED: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + case VK_FORMAT_R8G8_USCALED: + case VK_FORMAT_R8G8_SINT: + case VK_FORMAT_R8G8B8A8_UINT: + case VK_FORMAT_R8G8B8A8_SNORM: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R16G16B16A16_USCALED: + stride = 1; + break; + default: + break; + } + } + + auto attribFormatInfo = + GetUberFetchShaderFormatInfo(&m_uberFetchShaderInfoFormatMap, pAttrib->format, (stride == 0), isOffsetMode); + void* pAttribInternalData = Util::VoidPtrInc(pAttribInternalBase, sizeof(Vkgc::UberFetchShaderAttribInfo) * pAttrib->location); @@ -5326,12 +5372,6 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalDataImp( } else { - uint32_t stride = pBinding->stride; - if (isDynamicStride) - { - stride = settings.forceAlignedForDynamicStride ? 0 : 1; - } - if (((stride % attribFormatInfo.alignment) == 0) && ((pAttrib->offset % attribFormatInfo.alignment) == 0)) { @@ -5449,7 +5489,8 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalData( const VkVertexInputBindingDescription2EXT* pVertexBindingDescriptions, uint32_t vertexAttributeDescriptionCount, const VkVertexInputAttributeDescription2EXT* pVertexAttributeDescriptions, - void* pUberFetchShaderInternalData) + void* pUberFetchShaderInternalData, + bool isOffsetMode) { uint32_t dataSize = BuildUberFetchShaderInternalDataImp(vertexBindingDescriptionCount, @@ -5459,6 +5500,7 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalData( vertexBindingDescriptionCount, pVertexBindingDescriptions, false, + isOffsetMode, pUberFetchShaderInternalData); return dataSize; @@ -5469,6 +5511,7 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalData( uint32_t PipelineCompiler::BuildUberFetchShaderInternalData( const VkPipelineVertexInputStateCreateInfo* pVertexInput, bool dynamicStride, + bool isOffsetMode, void* pUberFetchShaderInternalData) const { const VkPipelineVertexInputDivisorStateCreateInfoEXT* pVertexDivisor = nullptr; @@ -5494,6 +5537,7 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalData( pVertexDivisor != nullptr ? pVertexDivisor->vertexBindingDivisorCount : 0, pVertexDivisor != nullptr ? pVertexDivisor->pVertexBindingDivisors : nullptr, dynamicStride, + isOffsetMode, pUberFetchShaderInternalData); } @@ -5624,10 +5668,8 @@ void PipelineCompiler::DumpPipeline( VkResult result) { Vkgc::PipelineDumpOptions dumpOptions = {}; - dumpOptions.pDumpDir = settings.pipelineDumpDir; - dumpOptions.filterPipelineDumpByType = settings.filterPipelineDumpByType; - dumpOptions.filterPipelineDumpByHash = settings.filterPipelineDumpByHash; - dumpOptions.dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + char tempBuff[Util::MaxPathStrLen]; + InitPipelineDumpOption(&dumpOptions, settings, tempBuff, PipelineCompilerTypeInvalid); void* pPipelineDumpHandle = nullptr; if (settings.dumpPipelineWithApiHash) @@ -5661,16 +5703,22 @@ void PipelineCompiler::DumpPipeline( template PipelineCompilerType PipelineCompiler::CheckCompilerType( - const Vkgc::ComputePipelineBuildInfo* pPipelineBuildInfo); + const Vkgc::ComputePipelineBuildInfo* pPipelineBuildInfo, + uint64_t preRrasterHash, + uint64_t fragmentHash); template PipelineCompilerType PipelineCompiler::CheckCompilerType( - const Vkgc::GraphicsPipelineBuildInfo* pPipelineBuildInfo); + const Vkgc::GraphicsPipelineBuildInfo* pPipelineBuildInfo, + uint64_t preRrasterHash, + uint64_t fragmentHash); #if VKI_RAY_TRACING template PipelineCompilerType PipelineCompiler::CheckCompilerType( - const Vkgc::RayTracingPipelineBuildInfo* pPipelineBuildInfo); + const Vkgc::RayTracingPipelineBuildInfo* pPipelineBuildInfo, + uint64_t preRrasterHash, + uint64_t fragmentHash); #endif } diff --git a/icd/api/vk_buffer.cpp b/icd/api/vk_buffer.cpp index 07cf6489..0d35fa27 100644 --- a/icd/api/vk_buffer.cpp +++ b/icd/api/vk_buffer.cpp @@ -272,6 +272,17 @@ void Buffer::LogBufferCreate( desc.createFlags = pCreateInfo->flags; desc.usageFlags = pCreateInfo->usage; +#if VKI_RAY_TRACING + const VkBufferUsageFlagBits2KHR rtBufferUsageFlags = + VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | + VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | + VK_BUFFER_USAGE_2_SHADER_BINDING_TABLE_BIT_KHR; + const bool isBufferUsedForRt = (Device::GetBufferUsageFlagBits(pCreateInfo) & rtBufferUsageFlags) != 0; + if (isBufferUsedForRt) + { + desc.usageFlags |= static_cast(PalUsageFlag::RayTracing); + } +#endif Buffer* pBufferObj = Buffer::ObjectFromHandle(buffer); Pal::ResourceCreateEventData data = {}; @@ -285,6 +296,22 @@ void Buffer::LogBufferCreate( &data, sizeof(Pal::ResourceCreateEventData)); +#if VKI_RAY_TRACING + if (isBufferUsedForRt) + { + Pal::ResourceUpdateEventData updateData = {}; + updateData.pObj = pBufferObj; + updateData.type = Pal::ResourceType::Buffer; + updateData.subresourceId = 0; + updateData.beforeUsageFlags = 0; + updateData.afterUsageFlags = static_cast(PalUsageFlag::RayTracing); + + pDevice->VkInstance()->PalPlatform()->LogEvent( + Pal::PalEvent::ResourceInfoUpdate, + &updateData, + sizeof(updateData)); + } +#endif // If there is already memory bound, log it now. // @NOTE - This only handles the single GPU case currently. MGPU is not supported by RMV v1 Pal::IGpuMemory* pPalMemory = pBufferObj->PalMemory(DefaultDeviceIndex); diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index d3885915..7e95bd61 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -85,7 +85,7 @@ namespace 0, // range; 0, // stride; Pal::UndefinedSwizzledFormat, - 0, // flags + {{0}}, // flags }; // ===================================================================================================================== @@ -2054,10 +2054,12 @@ void CmdBuffer::RebindPipeline() VK_NEVER_CALLED(); } + // Push Constant user data layouts are scheme-agnostic, which will always be checked and rebound if + // needed. // In compact scheme, the top-level user data layout of two compatible pipeline layout may be different. - // Thus, pipeline layout needs to be checked and rebind the user data if needed. + // Thus, pipeline layout needs to be checked and rebound if needed. // In indirect scheme, the top-level user data layout is always the same for all the pipeline layouts built - // in this scheme. So user data doesn't require to be rebind in this case. + // in this scheme. So user data doesn't require to be rebound in this case. // Pipeline layouts in different scheme can never be compatible. In this case, calling vkCmdBindDescriptorSets() // to rebind descirptor sets is mandatory for user. if ((pNewUserDataLayout->scheme == m_allGpuState.pipelineState[bindPoint].userDataLayout.scheme) && @@ -2084,10 +2086,15 @@ void CmdBuffer::RebindPipeline() // A user data layout switch may also require some user data to be reloaded (for both gfx and compute). if (pNewUserDataLayout != nullptr) { - rebindFlags |= SwitchUserDataLayouts(bindPoint, pNewUserDataLayout); + rebindFlags |= SwitchCompactSchemeUserDataLayouts(bindPoint, pNewUserDataLayout); } } + rebindFlags |= SwitchCommonUserDataLayouts(bindPoint, pNewUserDataLayout); + + // Cache the new user data layout information + m_allGpuState.pipelineState[bindPoint].userDataLayout = *pNewUserDataLayout; + // Reprogram the user data if necessary if (rebindFlags != 0) { @@ -2167,10 +2174,10 @@ void CmdBuffer::BindPipeline( // ===================================================================================================================== // Called during vkCmdBindPipeline when the new pipeline's layout might be different from the previously bound layout. -// This function will compare the compatibility of those layouts and reprogram any user data to maintain previously- -// written pipeline resources to make them available in the correct locations of the new pipeline layout. -// compatible with the new layout remain correctly bound. -CmdBuffer::RebindUserDataFlags CmdBuffer::SwitchUserDataLayouts( +// This function will compare the compatibility of those layouts in compact scheme and reprogram any user data to +// maintain previously-written pipeline resources to make them available in the correct locations of the new pipeline +// layout. Those that are compatible with the new layout remain correctly bound. +CmdBuffer::RebindUserDataFlags CmdBuffer::SwitchCompactSchemeUserDataLayouts( PipelineBindPoint apiBindPoint, const UserDataLayout* pNewUserDataLayout) { @@ -2192,6 +2199,27 @@ CmdBuffer::RebindUserDataFlags CmdBuffer::SwitchUserDataLayouts( flags |= RebindUserDataDescriptorSets; } + return flags; +} + +// ===================================================================================================================== +// Called during vkCmdBindPipeline when the new pipeline's layout might be different from the previously bound layout. +// This function will compare the compatibility of those scheme-agnostic layouts and reprogram any user data to maintain +// previously-written pipeline resources to make them available in the correct locations of the new pipeline layout. +// Those that are compatible with the new layout remain correctly bound. +CmdBuffer::RebindUserDataFlags CmdBuffer::SwitchCommonUserDataLayouts( + PipelineBindPoint apiBindPoint, + const UserDataLayout* pNewUserDataLayout) +{ + VK_ASSERT(pNewUserDataLayout != nullptr); + + PipelineBindState* pBindState = &m_allGpuState.pipelineState[apiBindPoint]; + + RebindUserDataFlags flags = 0; + + const auto& newUserDataLayout = pNewUserDataLayout->common; + const auto& curUserDataLayout = pBindState->userDataLayout.common; + // Rebind push constants if necessary if ((newUserDataLayout.pushConstRegBase != curUserDataLayout.pushConstRegBase) | (newUserDataLayout.pushConstRegCount != curUserDataLayout.pushConstRegCount)) @@ -2199,12 +2227,8 @@ CmdBuffer::RebindUserDataFlags CmdBuffer::SwitchUserDataLayouts( flags |= RebindUserDataPushConstants; } - // Cache the new user data layout information - pBindState->userDataLayout = *pNewUserDataLayout; - return flags; } - // ===================================================================================================================== // Called during vkCmdBindPipeline when something requires rebinding API-provided top-level user data (descriptor // sets, push constants, etc.) @@ -2214,14 +2238,17 @@ void CmdBuffer::RebindUserData( RebindUserDataFlags flags) { VK_ASSERT(flags != 0); - VK_ASSERT(m_allGpuState.pipelineState[apiBindPoint].userDataLayout.scheme == PipelineLayoutScheme::Compact); const PipelineBindState& bindState = m_allGpuState.pipelineState[apiBindPoint]; - const auto& userDataLayout = bindState.userDataLayout.compact; + + const auto& compactuserDataLayout = bindState.userDataLayout.compact; + const auto& commonUserDataLayout = bindState.userDataLayout.common; if ((flags & RebindUserDataDescriptorSets) != 0) { - const uint32_t count = Util::Min(userDataLayout.setBindingRegCount, bindState.boundSetCount); + VK_ASSERT(bindState.userDataLayout.scheme == PipelineLayoutScheme::Compact); + + const uint32_t count = Util::Min(compactuserDataLayout.setBindingRegCount, bindState.boundSetCount); if (count > 0) { @@ -2232,7 +2259,7 @@ void CmdBuffer::RebindUserData( PalCmdBuffer(deviceIdx)->CmdSetUserData( palBindPoint, - userDataLayout.setBindingRegBase, + compactuserDataLayout.setBindingRegBase, count, PerGpuState(deviceIdx)->setBindingData[apiBindPoint]); } @@ -2242,7 +2269,7 @@ void CmdBuffer::RebindUserData( if ((flags & RebindUserDataPushConstants) != 0) { - const uint32_t count = Util::Min(userDataLayout.pushConstRegCount, bindState.pushedConstCount); + const uint32_t count = Util::Min(commonUserDataLayout.pushConstRegCount, bindState.pushedConstCount); if (count > 0) { @@ -2252,7 +2279,7 @@ void CmdBuffer::RebindUserData( PalCmdBufferSetUserData( palBindPoint, - userDataLayout.pushConstRegBase, + commonUserDataLayout.pushConstRegBase, count, perDeviceStride, bindState.pushConstData); @@ -2261,6 +2288,8 @@ void CmdBuffer::RebindUserData( if (((flags & RebindUberFetchInternalMem) != 0) && (bindState.pVertexInputInternalData != nullptr)) { + VK_ASSERT(bindState.userDataLayout.scheme == PipelineLayoutScheme::Compact); + utils::IterateMask deviceGroup(m_curDeviceMask); do { @@ -2268,7 +2297,7 @@ void CmdBuffer::RebindUserData( PalCmdBuffer(deviceIdx)->CmdSetUserData( palBindPoint, - userDataLayout.uberFetchConstBufRegBase, + compactuserDataLayout.uberFetchConstBufRegBase, 2, reinterpret_cast(&bindState.pVertexInputInternalData->gpuAddress[deviceIdx])); } @@ -3067,30 +3096,36 @@ void CmdBuffer::BindVertexBuffers( pSizes, pStrides); - Pal::VertexBufferViews bufferViews = - { - .firstBuffer = firstBinding, - .bufferCount = lowBindingCount, - .offsetMode = (m_flags.offsetMode == 1) ? true : false - }; - Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; - if (m_flags.offsetMode) { + Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; for (uint32_t idx = 0; idx < lowBindingCount; idx++) { vertexViews[idx].gpuva = pBinding[idx].gpuAddr; vertexViews[idx].sizeInBytes = pBinding[idx].range; vertexViews[idx].strideInBytes = pBinding[idx].stride; } - bufferViews.pVertexBufferViews = vertexViews; + + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstBinding, + .bufferCount = lowBindingCount, + .offsetMode = true, + .pVertexBufferViews = vertexViews + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } else { - bufferViews.pBufferViewInfos = pBinding; + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstBinding, + .bufferCount = lowBindingCount, + .offsetMode = false, + .pBufferViewInfos = pBinding + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } - - PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } } @@ -3153,31 +3188,37 @@ void CmdBuffer::UpdateVertexBufferStrides( if (firstChanged <= lastChanged) { - Pal::VertexBufferViews bufferViews = - { - .firstBuffer = firstChanged, - .bufferCount = (lastChanged - firstChanged) + 1, - .offsetMode = (m_flags.offsetMode == 1) ? true : false - }; - Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; auto pBinding = &PerGpuState(deviceIdx)->vbBindings[firstChanged]; - if (m_flags.offsetMode) { + Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; for (uint32_t idx = 0; idx < (lastChanged - firstChanged + 1); idx++) { vertexViews[idx].gpuva = pBinding[idx].gpuAddr; vertexViews[idx].sizeInBytes = pBinding[idx].range; vertexViews[idx].strideInBytes = pBinding[idx].stride; } - bufferViews.pVertexBufferViews = vertexViews; + + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstChanged, + .bufferCount = (lastChanged - firstChanged) + 1, + .offsetMode = true, + .pVertexBufferViews = vertexViews + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } else { - bufferViews.pBufferViewInfos = pBinding; + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstChanged, + .bufferCount = (lastChanged - firstChanged) + 1, + .offsetMode = false, + .pBufferViewInfos = pBinding + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } - - PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } } while (deviceGroup.IterateNext()); @@ -5906,14 +5947,6 @@ void CmdBuffer::ExecuteAcquireRelease( VkToPalPipelineStageFlags(bufferMemoryBarrier.dstStageMask, false); pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].flags.u32All = 0; - // We set the address to 0 by default here. But, this will be computed correctly later for each - // device including DefaultDeviceIndex based on the deviceId. - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.address = - 0; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.offset = - bufferMemoryBarrier.offset; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.size = - bufferMemoryBarrier.size; pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].srcAccessMask = tempTransition.srcCacheMask; pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].dstAccessMask = @@ -6274,14 +6307,6 @@ void CmdBuffer::ExecuteReleaseThenAcquire( pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].flags.u32All = 0; - // We set the address to 0 by default here. But, this will be computed correctly later for each - // device including DefaultDeviceIndex based on the deviceId - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.address = - 0; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.offset = - pBufferMemoryBarriers[bufferMemoryBarrierIdx].offset; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.size = - pBufferMemoryBarriers[bufferMemoryBarrierIdx].size; pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].srcStageMask = palSrcStageMask; pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].dstStageMask = @@ -6664,66 +6689,69 @@ void CmdBuffer::BeginQueryIndexed( DbgBarrierPreCmd(DbgBarrierQueryBeginEnd); const QueryPool* pBasePool = QueryPool::ObjectFromHandle(queryPool); - const auto palQueryControlFlags = VkToPalQueryControlFlags(pBasePool->GetQueryType(), flags); - // NOTE: This function is illegal to call for TimestampQueryPools and AccelerationStructureQueryPools - const PalQueryPool* pQueryPool = pBasePool->AsPalQueryPool(); - Pal::QueryType queryType = pQueryPool->PalQueryType(); - if (queryType == Pal::QueryType::StreamoutStats) { - queryType = static_cast(static_cast(queryType) + index); - } + const auto palQueryControlFlags = VkToPalQueryControlFlags(pBasePool->GetQueryType(), flags); - utils::IterateMask deviceGroup(m_curDeviceMask); - do - { - const uint32_t deviceIdx = deviceGroup.Index(); - - PalCmdBuffer(deviceIdx)->CmdBeginQuery(*pQueryPool->PalPool(deviceIdx), - queryType, - query, - palQueryControlFlags); - } - while (deviceGroup.IterateNext()); + // NOTE: This function is illegal to call for TimestampQueryPools and AccelerationStructureQueryPools + const PalQueryPool* pQueryPool = pBasePool->AsPalQueryPool(); + Pal::QueryType queryType = pQueryPool->PalQueryType(); + if (queryType == Pal::QueryType::StreamoutStats) + { + queryType = static_cast(static_cast(queryType) + index); + } - const auto* const pRenderPass = m_allGpuState.pRenderPass; + utils::IterateMask deviceGroup(m_curDeviceMask); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); - // If queries are used while executing a render pass instance that has multiview enabled, - // the query uses N consecutive query indices in the query pool (starting at query) where - // N is the number of bits set in the view mask in the subpass the query is used in. - // - // Implementations may write the total result to the first query and - // write zero to the other queries. - if (((UsingDynamicRendering() == false) && pRenderPass->IsMultiviewEnabled()) || - (m_allGpuState.dynamicRenderingInstance.viewMask != 0)) - { - const auto viewMask = (pRenderPass != nullptr) ? pRenderPass->GetViewMask(m_renderPassInstance.subpass) : - m_allGpuState.dynamicRenderingInstance.viewMask; + PalCmdBuffer(deviceIdx)->CmdBeginQuery(*pQueryPool->PalPool(deviceIdx), + queryType, + query, + palQueryControlFlags); + } + while (deviceGroup.IterateNext()); - const auto viewCount = Util::CountSetBits(viewMask); + const auto* const pRenderPass = m_allGpuState.pRenderPass; - // Call Begin() and immediately call End() for all remaining queries, - // to set value of each remaining query to 0 and to make them avaliable. - for (uint32_t remainingQuery = 1; remainingQuery < viewCount; ++remainingQuery) + // If queries are used while executing a render pass instance that has multiview enabled, + // the query uses N consecutive query indices in the query pool (starting at query) where + // N is the number of bits set in the view mask in the subpass the query is used in. + // + // Implementations may write the total result to the first query and + // write zero to the other queries. + if (((UsingDynamicRendering() == false) && pRenderPass->IsMultiviewEnabled()) || + (m_allGpuState.dynamicRenderingInstance.viewMask != 0)) { - const auto remainingQueryIndex = query + remainingQuery; + const auto viewMask = (pRenderPass != nullptr) ? pRenderPass->GetViewMask(m_renderPassInstance.subpass) : + m_allGpuState.dynamicRenderingInstance.viewMask; - utils::IterateMask multiviewDeviceGroup(m_curDeviceMask); - do + const auto viewCount = Util::CountSetBits(viewMask); + + // Call Begin() and immediately call End() for all remaining queries, + // to set value of each remaining query to 0 and to make them avaliable. + for (uint32_t remainingQuery = 1; remainingQuery < viewCount; ++remainingQuery) { - const uint32_t deviceIdx = multiviewDeviceGroup.Index(); + const auto remainingQueryIndex = query + remainingQuery; - PalCmdBuffer(deviceIdx)->CmdBeginQuery( - *pQueryPool->PalPool(deviceIdx), - pQueryPool->PalQueryType(), - remainingQueryIndex, palQueryControlFlags); + utils::IterateMask multiviewDeviceGroup(m_curDeviceMask); + do + { + const uint32_t deviceIdx = multiviewDeviceGroup.Index(); - PalCmdBuffer(deviceIdx)->CmdEndQuery( - *pQueryPool->PalPool(deviceIdx), - pQueryPool->PalQueryType(), - remainingQueryIndex); + PalCmdBuffer(deviceIdx)->CmdBeginQuery( + *pQueryPool->PalPool(deviceIdx), + pQueryPool->PalQueryType(), + remainingQueryIndex, palQueryControlFlags); + + PalCmdBuffer(deviceIdx)->CmdEndQuery( + *pQueryPool->PalPool(deviceIdx), + pQueryPool->PalQueryType(), + remainingQueryIndex); + } + while (multiviewDeviceGroup.IterateNext()); } - while (multiviewDeviceGroup.IterateNext()); } } @@ -6738,24 +6766,28 @@ void CmdBuffer::EndQueryIndexed( { DbgBarrierPreCmd(DbgBarrierQueryBeginEnd); - // NOTE: This function is illegal to call for TimestampQueryPools and AccelerationStructureQueryPools - const PalQueryPool* pQueryPool = QueryPool::ObjectFromHandle(queryPool)->AsPalQueryPool(); - Pal::QueryType queryType = pQueryPool->PalQueryType(); - if (queryType == Pal::QueryType::StreamoutStats) - { - queryType = static_cast(static_cast(queryType) + index); - } + const QueryPool* pBasePool = QueryPool::ObjectFromHandle(queryPool); - utils::IterateMask deviceGroup(m_curDeviceMask); - do { - const uint32_t deviceIdx = deviceGroup.Index(); + // NOTE: This function is illegal to call for TimestampQueryPools and AccelerationStructureQueryPools + const PalQueryPool* pQueryPool = pBasePool->AsPalQueryPool(); + Pal::QueryType queryType = pQueryPool->PalQueryType(); + if (queryType == Pal::QueryType::StreamoutStats) + { + queryType = static_cast(static_cast(queryType) + index); + } - PalCmdBuffer(deviceIdx)->CmdEndQuery(*pQueryPool->PalPool(deviceIdx), - queryType, - query); + utils::IterateMask deviceGroup(m_curDeviceMask); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + PalCmdBuffer(deviceIdx)->CmdEndQuery(*pQueryPool->PalPool(deviceIdx), + queryType, + query); + } + while (deviceGroup.IterateNext()); } - while (deviceGroup.IterateNext()); DbgBarrierPostCmd(DbgBarrierQueryBeginEnd); } @@ -7221,13 +7253,6 @@ void CmdBuffer::PalCmdReleaseThenAcquire( } pAcquireReleaseInfo->pImageBarriers = pImageBarriers; - for (uint32_t i = 0; i < pAcquireReleaseInfo->memoryBarrierCount; i++) - { - if (ppBuffers != nullptr) - { - pBufferBarriers[i].memory.address = ppBuffers[i]->GpuVirtAddr(deviceIdx); - } - } pAcquireReleaseInfo->pMemoryBarriers = pBufferBarriers; PalCmdBuffer(deviceIdx)->CmdReleaseThenAcquire(*pAcquireReleaseInfo); @@ -7267,13 +7292,6 @@ void CmdBuffer::PalCmdAcquire( } pAcquireReleaseInfo->pImageBarriers = pImageBarriers; - for (uint32_t i = 0; i < pAcquireReleaseInfo->memoryBarrierCount; i++) - { - if (ppBuffers != nullptr) - { - pBufferBarriers[i].memory.address = ppBuffers[i]->GpuVirtAddr(deviceIdx); - } - } pAcquireReleaseInfo->pMemoryBarriers = pBufferBarriers; if (pEvent->IsUseToken()) @@ -7357,13 +7375,6 @@ void CmdBuffer::PalCmdRelease( } pAcquireReleaseInfo->pImageBarriers = pImageBarriers; - for (uint32_t i = 0; i < pAcquireReleaseInfo->memoryBarrierCount; i++) - { - if (ppBuffers != nullptr) - { - pBufferBarriers[i].memory.address = ppBuffers[i]->GpuVirtAddr(deviceIdx); - } - } pAcquireReleaseInfo->pMemoryBarriers = pBufferBarriers; if (pEvent->IsUseToken()) @@ -9075,61 +9086,27 @@ void CmdBuffer::WritePushConstants( const UserDataLayout& userDataLayout = pLayout->GetInfo().userDataLayout; - if (userDataLayout.scheme == PipelineLayoutScheme::Compact) - { - // Program the user data register only if the current user data layout base matches that of the given - // layout. Otherwise, what's happening is that the application is pushing constants for a future - // pipeline layout (e.g. at the top of the command buffer) and this register write will be redundant because - // a future vkCmdBindPipeline will reprogram the user data registers during the rebase. - if (PalPipelineBindingOwnedBy(palBindPoint, apiBindPoint) && - (pBindState->userDataLayout.compact.pushConstRegBase == userDataLayout.compact.pushConstRegBase) && - (pBindState->userDataLayout.compact.pushConstRegCount >= (startInDwords + lengthInDwords))) - { - utils::IterateMask deviceGroup(m_curDeviceMask); - do - { - const uint32_t deviceIdx = deviceGroup.Index(); - - PalCmdBuffer(deviceIdx)->CmdSetUserData( - palBindPoint, - pBindState->userDataLayout.compact.pushConstRegBase + startInDwords, - lengthInDwords, - pUserDataPtr); - } - while (deviceGroup.IterateNext()); - } - } - else if (userDataLayout.scheme == PipelineLayoutScheme::Indirect) + // Program the user data register only if the current user data layout base matches that of the given + // layout. Otherwise, what's happening is that the application is pushing constants for a future + // pipeline layout (e.g. at the top of the command buffer) and this register write will be redundant because + // a future vkCmdBindPipeline will reprogram the user data registers during the rebase. + if (PalPipelineBindingOwnedBy(palBindPoint, apiBindPoint) && + (pBindState->userDataLayout.common.pushConstRegBase == userDataLayout.common.pushConstRegBase) && + (pBindState->userDataLayout.common.pushConstRegCount >= (startInDwords + lengthInDwords))) { utils::IterateMask deviceGroup(m_curDeviceMask); - do { const uint32_t deviceIdx = deviceGroup.Index(); - Pal::gpusize gpuAddr; - - void* pCpuAddr = PalCmdBuffer(deviceIdx)->CmdAllocateEmbeddedData( - userDataLayout.indirect.pushConstSizeInDword, - m_pDevice->GetProperties().descriptorSizes.alignmentInDwords, - &gpuAddr); - - memcpy(pCpuAddr, pUserData, userDataLayout.indirect.pushConstSizeInDword * sizeof(uint32_t)); - - const uint32_t gpuAddrLow = static_cast(gpuAddr); - PalCmdBuffer(deviceIdx)->CmdSetUserData( palBindPoint, - userDataLayout.indirect.pushConstPtrRegBase, - PipelineLayout::SetPtrRegCount, - &gpuAddrLow); + pBindState->userDataLayout.common.pushConstRegBase + startInDwords, + lengthInDwords, + pUserDataPtr); } while (deviceGroup.IterateNext()); } - else - { - VK_NEVER_CALLED(); - } } // ===================================================================================================================== @@ -9931,7 +9908,8 @@ DynamicVertexInputInternalData* CmdBuffer::BuildUberFetchShaderInternalData( pVertexBindingDescriptions, vertexAttributeDescriptionCount, pVertexAttributeDescriptions, - pUberFetchShaderInternalData); + pUberFetchShaderInternalData, + m_flags.offsetMode); Pal::gpusize gpuAddress = {}; if (uberFetchShaderInternalDataSize > 0) @@ -10037,31 +10015,37 @@ void CmdBuffer::SetVertexInput( if (firstChanged <= lastChanged) { - Pal::VertexBufferViews bufferViews = - { - .firstBuffer = firstChanged, - .bufferCount = (lastChanged - firstChanged) + 1, - .offsetMode = (m_flags.offsetMode == 1) ? true : false - }; - Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; auto pBinding = &PerGpuState(deviceIdx)->vbBindings[firstChanged]; - if (m_flags.offsetMode) { + Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; for (uint32_t idx = 0; idx < (lastChanged - firstChanged + 1); idx++) { vertexViews[idx].gpuva = pBinding[idx].gpuAddr; vertexViews[idx].sizeInBytes = pBinding[idx].range; vertexViews[idx].strideInBytes = pBinding[idx].stride; } - bufferViews.pVertexBufferViews = vertexViews; + + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstChanged, + .bufferCount = (lastChanged - firstChanged) + 1, + .offsetMode = true, + .pVertexBufferViews = vertexViews + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } else { - bufferViews.pBufferViewInfos = pBinding; + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstChanged, + .bufferCount = (lastChanged - firstChanged) + 1, + .offsetMode = false, + .pBufferViewInfos = pBinding + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } - - PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } if (vertexBufferCount != pBindState->dynamicBindInfo.gfxDynState.vertexBufferCount) @@ -10094,7 +10078,7 @@ void CmdBuffer::SetRenderingAttachmentLocations( // ===================================================================================================================== void CmdBuffer::SetRenderingInputAttachmentIndices( - const VkRenderingInputAttachmentIndexInfoKHR* pLocationInfo) + const VkRenderingInputAttachmentIndexInfoKHR* pInputAttachmentIndexInfo) { } diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index 34384780..fbda6e8b 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -408,7 +408,7 @@ VkResult ComputePipeline::Create( &pipelineOptimizerKey, &apiPsoHash, &tempModule, - &cacheId[0]); + cacheId); binaryCreateInfo.apiPsoHash = apiPsoHash; diff --git a/icd/api/vk_conv.cpp b/icd/api/vk_conv.cpp index 73869c24..24445a72 100644 --- a/icd/api/vk_conv.cpp +++ b/icd/api/vk_conv.cpp @@ -1383,8 +1383,14 @@ VkResult InitializeUberFetchShaderFormatTable( bufferInfo.stride = 0; pPhysicalDevice->PalDevice()->CreateUntypedBufferViewSrds(1, &bufferInfo, zeroStrideSrd); + uint32_t oobMask = defaultSrd[3] ^ zeroStrideSrd[3]; + + // OOB mask should 2. If PAL changes relevant logic, UberFetchShaderFormatInfo::bufferFormat/unpackedBufferFormat + // might to need be fixed. + VK_ASSERT(oobMask == 0x20000000); + // Save the modified bits in buffer SRD - pFormatInfoMap->SetBufferFormatMask(defaultSrd[3] ^ zeroStrideSrd[3]); + pFormatInfoMap->SetBufferFormatMask(oobMask); return VK_SUCCESS; } #undef INIT_UBER_FORMATINFO @@ -1393,14 +1399,20 @@ VkResult InitializeUberFetchShaderFormatTable( UberFetchShaderFormatInfo GetUberFetchShaderFormatInfo( const UberFetchShaderFormatInfoMap* pFormatInfoMap, const VkFormat vkFormat, - const bool isZeroStride) + const bool isZeroStride, + const bool isOffsetMode) { UberFetchShaderFormatInfo formatInfo = {}; auto pFormatInfo = pFormatInfoMap->FindKey(vkFormat); if (pFormatInfo != nullptr) { formatInfo = *pFormatInfo; - if (isZeroStride) + if (isOffsetMode) + { + formatInfo.bufferFormat = formatInfo.bufferFormat | pFormatInfoMap->GetBufferFormatMask(); + formatInfo.unpackedBufferFormat = formatInfo.unpackedBufferFormat | pFormatInfoMap->GetBufferFormatMask(); + } + else if (isZeroStride) { // Apply zero stride modified bits, which are caclulated in UberFetchShaderFormatInfoMap initialization. formatInfo.bufferFormat = formatInfo.bufferFormat ^ pFormatInfoMap->GetBufferFormatMask(); diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 2e6cbc20..74ee8655 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -552,7 +552,7 @@ VkResult Device::Create( { deviceFeatures.robustBufferAccessExtended = true; { - deviceFeatures.robustVertexBufferExtend = true; + deviceFeatures.robustVertexBufferExtend = false; } } @@ -1885,7 +1885,7 @@ VkResult Device::CreateInternalComputePipeline( auto pShaderInfo = &pipelineBuildInfo.pipelineInfo.cs; pShaderInfo->pModuleData = ShaderModule::GetFirstValidShaderData(&shaderModule); - pipelineBuildInfo.compilerType = pCompiler->CheckCompilerType(&pipelineBuildInfo.pipelineInfo); + pipelineBuildInfo.compilerType = pCompiler->CheckCompilerType(&pipelineBuildInfo.pipelineInfo, 0, 0); pShaderInfo->pModuleData = ShaderModule::GetShaderData(pipelineBuildInfo.compilerType, &shaderModule); pShaderInfo->pSpecializationInfo = pSpecializationInfo; diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 82811670..3e87bfa4 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -63,6 +63,7 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, const GraphicsPipelineShaderStageInfo* pShaderInfo, const PipelineLayout* pPipelineLayout, @@ -128,6 +129,7 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( pDevice, pCreateInfo, extStructs, + libInfo, flags, pShaderInfo, pPipelineLayout, @@ -201,6 +203,7 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( pDevice, pCreateInfo, extStructs, + libInfo, flags, pShaderInfo, pPipelineLayout, @@ -568,9 +571,13 @@ VkResult GraphicsPipeline::CreatePipelineObjects( } // ===================================================================================================================== static bool IsGplFastLinkPossible( - const GraphicsPipelineLibraryInfo& libInfo) + const Device* pDevice, + const GraphicsPipelineLibraryInfo& libInfo, + const PipelineLayout* pPipelineLayout) { bool result = false; + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); + if ((libInfo.flags.isLibrary == false) && (libInfo.flags.optimize == false) && (libInfo.pFragmentShaderLib != nullptr) && @@ -581,8 +588,19 @@ static bool IsGplFastLinkPossible( const GraphicsPipelineBinaryCreateInfo& fragmentCreateInfo = libInfo.pFragmentShaderLib->GetPipelineBinaryCreateInfo(); + bool isPushConstCompatible = true; + + if (settings.pipelineLayoutPushConstantCompatibilityCheck) + { + isPushConstCompatible = (pPipelineLayout->GetInfo().userDataLayout.common.pushConstRegCount == + libInfo.pFragmentShaderLib->GetUserDataLayout()->common.pushConstRegCount) && + (pPipelineLayout->GetInfo().userDataLayout.common.pushConstRegCount == + libInfo.pPreRasterizationShaderLib->GetUserDataLayout()->common.pushConstRegCount); + } + if ((preRasterCreateInfo.pShaderLibraries[GraphicsLibraryPreRaster] != nullptr) && - (fragmentCreateInfo.pShaderLibraries[GraphicsLibraryFragment] != nullptr)) + (fragmentCreateInfo.pShaderLibraries[GraphicsLibraryFragment] != nullptr) && + isPushConstCompatible) { result = true; } @@ -606,10 +624,8 @@ void DumpGplFastLinkInfo( uint64_t dumpHash = settings.dumpPipelineWithApiHash ? createInfo.apiPsoHash : info.internalPipelineHash.stable; Vkgc::PipelineDumpOptions dumpOptions = {}; - dumpOptions.pDumpDir = settings.pipelineDumpDir; - dumpOptions.filterPipelineDumpByType = settings.filterPipelineDumpByType; - dumpOptions.filterPipelineDumpByHash = settings.filterPipelineDumpByHash; - dumpOptions.dumpDuplicatePipelines = settings.dumpDuplicatePipelines; + char tempBuff[Util::MaxPathStrLen]; + PipelineCompiler::InitPipelineDumpOption(&dumpOptions, settings, tempBuff, createInfo.compilerType); Vkgc::PipelineBuildInfo pipelineInfo = {}; pipelineInfo.pGraphicsInfo = &createInfo.pipelineInfo; @@ -733,7 +749,7 @@ VkResult GraphicsPipeline::Create( pPipelineLayout = PipelineLayout::ObjectFromHandle(pCreateInfo->layout); GraphicsPipelineLibraryInfo libInfo = {}; - GraphicsPipelineCommon::ExtractLibraryInfo(pCreateInfo, extStructs, flags, &libInfo); + GraphicsPipelineCommon::ExtractLibraryInfo(pDevice, pCreateInfo, extStructs, flags, &libInfo); // 1. Check whether GPL fast link is possible if (pDevice->GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink) @@ -759,7 +775,7 @@ VkResult GraphicsPipeline::Create( } } - if (IsGplFastLinkPossible(libInfo)) + if (IsGplFastLinkPossible(pDevice, libInfo, pPipelineLayout)) { result = pDevice->GetCompiler(DefaultDeviceIndex)->BuildGplFastLinkCreateInfo( pDevice, pCreateInfo, extStructs, flags, libInfo, pPipelineLayout, &binaryMetadata, &binaryCreateInfo); @@ -806,6 +822,7 @@ VkResult GraphicsPipeline::Create( BuildApiHash(pCreateInfo, flags, extStructs, + libInfo, binaryCreateInfo, &apiPsoHash, &elfHash); @@ -823,6 +840,7 @@ VkResult GraphicsPipeline::Create( pDevice, pCreateInfo, extStructs, + libInfo, flags, &shaderStageInfo, &binaryCreateInfo, @@ -841,6 +859,7 @@ VkResult GraphicsPipeline::Create( pDevice, pCreateInfo, extStructs, + libInfo, flags, &shaderStageInfo, pPipelineLayout, @@ -861,10 +880,12 @@ VkResult GraphicsPipeline::Create( pDevice, pCreateInfo, extStructs, + libInfo, flags, &pipelineOptimizerKey, &binaryMetadata, - &objectCreateInfo); + &objectCreateInfo, + &binaryCreateInfo); if (result == VK_SUCCESS) { @@ -1040,6 +1061,7 @@ VkResult GraphicsPipeline::CreateCacheId( Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, VkPipelineCreateFlags2KHR flags, GraphicsPipelineShaderStageInfo* pShaderStageInfo, GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, @@ -1070,6 +1092,7 @@ VkResult GraphicsPipeline::CreateCacheId( pDevice, pCreateInfo, extStructs, + libInfo, flags, pShaderStageInfo, pShaderOptimizerKeys, @@ -1080,6 +1103,7 @@ VkResult GraphicsPipeline::CreateCacheId( BuildApiHash(pCreateInfo, flags, extStructs, + libInfo, *pBinaryCreateInfo, pApiPsoHash, &elfHash); @@ -1349,9 +1373,12 @@ VkResult GraphicsPipeline::DeferCreateOptimizedPipeline( if (result == VK_SUCCESS) { + GraphicsPipelineLibraryInfo libInfo = {}; + ExtractLibraryInfo(nullptr, nullptr, extStructs, 0, &libInfo); result = CreatePipelineBinaries(pDevice, nullptr, extStructs, + libInfo, 0, pShaderStageInfo, nullptr, diff --git a/icd/api/vk_graphics_pipeline_library.cpp b/icd/api/vk_graphics_pipeline_library.cpp index 058a2530..a166963f 100644 --- a/icd/api/vk_graphics_pipeline_library.cpp +++ b/icd/api/vk_graphics_pipeline_library.cpp @@ -452,6 +452,7 @@ VkResult GraphicsPipelineLibrary::Create( const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, VkPipelineCreateFlags2KHR flags, + uint32_t internalFlags, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { @@ -462,7 +463,7 @@ VkResult GraphicsPipelineLibrary::Create( void* pSysMem = nullptr; GraphicsPipelineLibraryInfo libInfo; - ExtractLibraryInfo(pCreateInfo, extStructs, flags, &libInfo); + ExtractLibraryInfo(pDevice, pCreateInfo, extStructs, flags, &libInfo); GraphicsPipelineBinaryCreateInfo binaryCreateInfo = {}; GraphicsPipelineShaderStageInfo shaderStageInfo = {}; @@ -470,6 +471,11 @@ VkResult GraphicsPipelineLibrary::Create( binaryCreateInfo.pipelineInfo.iaState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + if ((internalFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FORCE_LLPC) != 0) + { + binaryCreateInfo.compilerType = PipelineCompilerTypeLlpc; + } + // 1. Build shader stage infos if (result == VK_SUCCESS) { @@ -517,6 +523,7 @@ VkResult GraphicsPipelineLibrary::Create( pDevice, pCreateInfo, extStructs, + libInfo, flags, &shaderStageInfo, shaderOptimizerKeys, @@ -529,6 +536,7 @@ VkResult GraphicsPipelineLibrary::Create( BuildApiHash(pCreateInfo, flags, extStructs, + libInfo, binaryCreateInfo, &apiPsoHash, &elfHash); @@ -550,6 +558,7 @@ VkResult GraphicsPipelineLibrary::Create( pDevice, pCreateInfo, extStructs, + libInfo, flags, &shaderStageInfo, pPipelineLayout, @@ -583,10 +592,12 @@ VkResult GraphicsPipelineLibrary::Create( pDevice, pCreateInfo, extStructs, + libInfo, flags, &pipelineOptimizerKey, &binaryMetadata, - &objectCreateInfo); + &objectCreateInfo, + &binaryCreateInfo); // Calculate object size apiSize = sizeof(GraphicsPipelineLibrary); @@ -661,6 +672,12 @@ VkResult GraphicsPipelineLibrary::Destroy( Device* pDevice, const VkAllocationCallbacks* pAllocator) { + if (m_altLibrary != nullptr) + { + m_altLibrary->Destroy(pDevice, pAllocator); + m_altLibrary = nullptr; + } + PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); uint32_t libraryMask = 0; @@ -712,7 +729,8 @@ GraphicsPipelineLibrary::GraphicsPipelineLibrary( m_objectCreateInfo(objectInfo), m_pBinaryCreateInfo(pBinaryInfo), m_libInfo(libInfo), - m_elfHash(elfHash) + m_elfHash(elfHash), + m_altLibrary(nullptr) { Util::MetroHash::Hash dummyCacheHash = {}; Pipeline::Init( diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index db539869..4198c0c3 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -196,7 +196,6 @@ Image::Image( pCreateInfo->samples > VK_SAMPLE_COUNT_1_BIT, imageFormat, extraLayoutUsages), - m_pSwapChain(nullptr), m_ResourceKey(resourceKey), m_memoryRequirements{} { @@ -1528,8 +1527,6 @@ VkResult Image::BindSwapchainMemory( const SwapChain::Properties& properties = pSwapchain->GetProperties(); - m_pSwapChain = pSwapchain; - Memory* pMemory = Memory::ObjectFromHandle(properties.imageMemory[swapChainImageIndex]); Image* pSwapchainImage = Image::ObjectFromHandle(properties.images[swapChainImageIndex]); @@ -1977,6 +1974,7 @@ void Image::CalculateMemoryRequirementsInternal( { pMemoryRequirements->size = Util::RoundUpToMultiple(palReqs.size, pMemoryRequirements->alignment); } + } // ===================================================================================================================== @@ -2394,10 +2392,6 @@ void Image::CalculateSparseMemoryRequirements( // ===================================================================================================================== void Image::RegisterPresentableImageWithSwapChain(SwapChain* pSwapChain) { - // Registration is only allowed to happen once - VK_ASSERT(m_pSwapChain == nullptr); - m_pSwapChain = pSwapChain; - // If swapchain requires this image to be treated as SRGB. m_internalFlags.treatAsSrgb = pSwapChain->GetProperties().flags.treatAsSrgb; diff --git a/icd/api/vk_indirect_commands_layout.cpp b/icd/api/vk_indirect_commands_layout.cpp index 8fdc7582..a3d7c8c2 100644 --- a/icd/api/vk_indirect_commands_layout.cpp +++ b/icd/api/vk_indirect_commands_layout.cpp @@ -254,18 +254,13 @@ void IndirectCommandsLayout::BuildPalCreateInfo( const PipelineLayout* pPipelineLayout = PipelineLayout::ObjectFromHandle(token.pushconstantPipelineLayout); const UserDataLayout& userDataLayout = pPipelineLayout->GetInfo().userDataLayout; - if (userDataLayout.scheme == PipelineLayoutScheme::Indirect) - { - VK_NOT_IMPLEMENTED; - } - uint32_t startInDwords = token.pushconstantOffset / sizeof(uint32_t); uint32_t lengthInDwords = PipelineLayout::GetPushConstantSizeInDword(token.pushconstantSize); pIndirectParams[i].type = Pal::IndirectParamType::SetUserData; pIndirectParams[i].userData.entryCount = lengthInDwords; pIndirectParams[i].sizeInBytes = sizeof(uint32_t) * lengthInDwords; - pIndirectParams[i].userData.firstEntry = userDataLayout.compact.pushConstRegBase + startInDwords; + pIndirectParams[i].userData.firstEntry = userDataLayout.common.pushConstRegBase + startInDwords; pIndirectParams[i].userDataShaderUsage = VkToPalShaderStageMask(token.pushconstantShaderStageFlags); break; } diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index f2fcf552..21fa1db5 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -336,7 +336,7 @@ VkResult Memory::Create( } else { - vkResult = OpenExternalMemory(pDevice, importInfo, &pMemory); + vkResult = OpenExternalMemory(pDevice, createInfo, importInfo, &pMemory); } } else @@ -1057,9 +1057,10 @@ void Memory::Free( // Opens a POSIX external shared handle and creates a memory object corresponding to it. // Open external memory should not be multi-instance allocation. VkResult Memory::OpenExternalMemory( - Device* pDevice, - const ImportMemoryInfo& importInfo, - Memory** ppMemory) + Device* pDevice, + const Pal::GpuMemoryCreateInfo& localCreateInfo, + const ImportMemoryInfo& importInfo, + Memory** ppMemory) { Pal::ExternalGpuMemoryOpenInfo openInfo = {}; Pal::GpuMemoryCreateInfo createInfo = {}; @@ -1071,6 +1072,11 @@ VkResult Memory::OpenExternalMemory( createInfo.flags.globalGpuVa = pDevice->IsGlobalGpuVaEnabled(); createInfo.heapAccess = Pal::GpuHeapAccess::GpuHeapAccessExplicit; + openInfo.flags.gl2Uncached = localCreateInfo.flags.gl2Uncached; + openInfo.flags.mallRangeActive = localCreateInfo.flags.mallRangeActive; + openInfo.mallPolicy = localCreateInfo.mallPolicy; + openInfo.mallRange = localCreateInfo.mallRange; + VK_ASSERT(pDevice != nullptr); VK_ASSERT(ppMemory != nullptr); diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index e6d2e5f1..eb313c7e 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -488,12 +488,14 @@ VkResult PhysicalDevice::Create( // ===================================================================================================================== // Converts from PAL format feature properties to Vulkan equivalents. static void GetFormatFeatureFlags( + const PhysicalDevice* pPhysicalDevice, const Pal::MergedFormatPropertiesTable& formatProperties, VkFormat format, VkImageTiling imageTiling, VkFormatFeatureFlags* pOutFormatFeatureFlags, const RuntimeSettings& settings) { + const Pal::DeviceProperties& palProps = pPhysicalDevice->PalProperties(); const Pal::SwizzledFormat swizzledFormat = VkToPalFormat(format, settings); const size_t formatIdx = static_cast(swizzledFormat.format); @@ -549,7 +551,12 @@ static void GetFormatFeatureFlags( retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - retFlags &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; +#if VKI_BUILD_GFX11 + if (palProps.gfxLevel < Pal::GfxIpLevel::GfxIp11_0) +#endif + { + retFlags &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + } } else { @@ -793,6 +800,20 @@ VkResult PhysicalDevice::Initialize() const RuntimeSettings& settings = GetRuntimeSettings(); + if (settings.clampMaxImageSize > 0) + { + const Pal::Extent3d& maxDimensions = m_properties.imageProperties.maxDimensions; + + m_properties.imageProperties.maxDimensions.width = + Util::Min(maxDimensions.width, settings.clampMaxImageSize); + + m_properties.imageProperties.maxDimensions.height = + Util::Min(maxDimensions.height, settings.clampMaxImageSize); + + m_properties.imageProperties.maxDimensions.depth = + Util::Min(maxDimensions.depth, settings.clampMaxImageSize); + } + if (result == Pal::Result::Success) { // Finalize the PAL device @@ -1256,8 +1277,8 @@ void PhysicalDevice::PopulateFormatProperties() VkFormatFeatureFlags optimalFlags = 0; VkFormatFeatureFlags bufferFlags = 0; - GetFormatFeatureFlags(fmtProperties, format, VK_IMAGE_TILING_LINEAR, &linearFlags, settings); - GetFormatFeatureFlags(fmtProperties, format, VK_IMAGE_TILING_OPTIMAL, &optimalFlags, settings); + GetFormatFeatureFlags(this, fmtProperties, format, VK_IMAGE_TILING_LINEAR, &linearFlags, settings); + GetFormatFeatureFlags(this, fmtProperties, format, VK_IMAGE_TILING_OPTIMAL, &optimalFlags, settings); bufferFlags = linearFlags; @@ -4106,13 +4127,8 @@ bool PhysicalDevice::RayTracingSupported() const static bool IsKhrCooperativeMatrixSupported( const PhysicalDevice* pPhysicalDevice) { - const bool hasHardwareSupport = - ((pPhysicalDevice == nullptr) || - (pPhysicalDevice->PalProperties().gfxipProperties.flags.supportCooperativeMatrix)); - - bool emulateSupport = false; - - return hasHardwareSupport || emulateSupport; + return ((pPhysicalDevice == nullptr) || + (pPhysicalDevice->PalProperties().gfxipProperties.flags.supportCooperativeMatrix)); } // ===================================================================================================================== diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index d11fe4f1..29d73be6 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -183,6 +183,7 @@ VkResult PipelineLayout::ConvertCreateInfo( pDevice, pIn, pushConstantsSizeInBytes, + pushConstantsUserDataNodeCount, pInfo, pPipelineInfo, pSetUserDataLayouts); @@ -296,8 +297,10 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Finally, the vertex buffer table pointer is in the last user data register when applicable. // This allocation allows the descriptor set bindings to easily persist across pipeline switches. - VkResult result = VK_SUCCESS; - auto* pUserDataLayout = &pInfo->userDataLayout.compact; + VkResult result = VK_SUCCESS; + + auto* pUserDataLayout = &pInfo->userDataLayout.compact; + auto* pCommonUserDataLayout = &pInfo->userDataLayout.common; const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); @@ -426,8 +429,8 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Allocate user data for push constants pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; - pUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; - pUserDataLayout->pushConstRegCount = pushConstRegCount; + pCommonUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; + pCommonUserDataLayout->pushConstRegCount = pushConstRegCount; pInfo->userDataRegCount += pushConstRegCount; // Populate user data layouts for each descriptor set that is active @@ -581,6 +584,7 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( const Device* pDevice, const VkPipelineLayoutCreateInfo* pIn, const uint32_t pushConstantsSizeInBytes, + const uint32_t pushConstantsUserDataNodeCount, Info* pInfo, PipelineInfo* pPipelineInfo, SetUserDataLayout* pSetUserDataLayouts) @@ -608,8 +612,11 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( VK_ASSERT(settings.pipelineLayoutMode != PipelineLayoutAngle); VK_ASSERT(settings.enableEarlyCompile == false); - VkResult result = VK_SUCCESS; - auto* pUserDataLayout = &pInfo->userDataLayout.indirect; + VkResult result = VK_SUCCESS; + + auto* pUserDataLayout = &pInfo->userDataLayout.indirect; + auto* pCommonUserDataLayout = &pInfo->userDataLayout.common; + uint32_t totalDynDescCount = 0; memset(pPipelineInfo, 0, sizeof(PipelineInfo)); @@ -632,12 +639,12 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( pInfo->userDataRegCount += 1; } - // Allocate user data for push constant buffer pointer - pUserDataLayout->pushConstPtrRegBase = pInfo->userDataRegCount; - pUserDataLayout->pushConstSizeInDword = GetPushConstantSizeInDword(pushConstantsSizeInBytes); - pPipelineInfo->numUserDataNodes += 1; - pPipelineInfo->numRsrcMapNodes += 1; - pInfo->userDataRegCount += 1; + // Allocate user data for push constants + uint32_t pushConstRegCount = GetPushConstantSizeInDword(pushConstantsSizeInBytes); + pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; + pCommonUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; + pCommonUserDataLayout->pushConstRegCount = pushConstRegCount; + pInfo->userDataRegCount += pushConstRegCount; // Allocate user data for transform feedback buffer if (ReserveXfbNode(pDevice)) @@ -1259,8 +1266,11 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( { VK_ASSERT(m_info.userDataLayout.scheme == PipelineLayoutScheme::Compact); - VkResult result = VK_SUCCESS; - const auto& userDataLayout = m_info.userDataLayout.compact; + VkResult result = VK_SUCCESS; + + const auto& userDataLayout = m_info.userDataLayout.compact; + const auto& commonUserDataLayout = m_info.userDataLayout.common; + const bool enableEarlyCompile = m_pDevice->GetRuntimeSettings().enableEarlyCompile; Vkgc::ResourceMappingRootNode* pUserDataNodes = static_cast(pBuffer); @@ -1407,12 +1417,12 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( } // TODO: Build the internal push constant resource mapping - if (userDataLayout.pushConstRegCount > 0) + if (commonUserDataLayout.pushConstRegCount > 0) { auto pPushConstNode = &pUserDataNodes[userDataNodeCount]; pPushConstNode->node.type = Vkgc::ResourceMappingNodeType::PushConst; - pPushConstNode->node.offsetInDwords = userDataLayout.pushConstRegBase; - pPushConstNode->node.sizeInDwords = userDataLayout.pushConstRegCount; + pPushConstNode->node.offsetInDwords = commonUserDataLayout.pushConstRegBase; + pPushConstNode->node.sizeInDwords = commonUserDataLayout.pushConstRegCount; pPushConstNode->node.srdRange.set = Vkgc::InternalDescriptorSetId; pPushConstNode->visibility = stageMask; @@ -1530,7 +1540,6 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( VK_ASSERT(m_info.userDataLayout.scheme == PipelineLayoutScheme::Indirect); constexpr uint32_t VbTablePtrRegCount = 1; // PAL requires all indirect user data tables to be 1DW - constexpr uint32_t PushConstPtrRegCount = 1; constexpr uint32_t TransformFeedbackRegCount = 1; constexpr uint32_t ReverseThreadGroupRegCount = 1; constexpr uint32_t DebugPrintfRegCount = 1; @@ -1539,7 +1548,8 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( #endif constexpr uint32_t DescSetsPtrRegCount = 2 * SetPtrRegCount * MaxDescriptorSets; - const auto& userDataLayout = m_info.userDataLayout.indirect; + const auto& userDataLayout = m_info.userDataLayout.indirect; + const auto& commonUserDataLayout = m_info.userDataLayout.common; const bool uberFetchShaderEnabled = IsUberFetchShaderEnabled(m_pDevice); const bool transformFeedbackEnabled = ReserveXfbNode(m_pDevice); @@ -1560,8 +1570,7 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( regBaseOffset += SetPtrRegCount; } - const uint32_t pushConstPtrRegBase = regBaseOffset; - regBaseOffset += PushConstPtrRegCount; + regBaseOffset += commonUserDataLayout.pushConstRegCount; const uint32_t transformFeedbackRegBase = transformFeedbackEnabled ? regBaseOffset : InvalidReg; if (transformFeedbackRegBase != InvalidReg) @@ -1628,29 +1637,14 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( &mappingNodeCount); } - // Build push constants mapping - if (userDataLayout.pushConstSizeInDword > 0) + if (commonUserDataLayout.pushConstRegCount > 0) { - // Build mapping for push constant resource - Vkgc::ResourceMappingNode* pPushConstNode = &pResourceNodes[mappingNodeCount]; - - pPushConstNode->type = Vkgc::ResourceMappingNodeType::PushConst; - pPushConstNode->offsetInDwords = 0; - pPushConstNode->sizeInDwords = userDataLayout.pushConstSizeInDword; - pPushConstNode->srdRange.set = Vkgc::InternalDescriptorSetId; - pPushConstNode->srdRange.strideInDwords = 0; - - ++mappingNodeCount; - - // Build mapping for the pointer pointing to push constants buffer - Vkgc::ResourceMappingRootNode* pPushConstPtrNode = &pUserDataNodes[userDataNodeCount]; - - pPushConstPtrNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; - pPushConstPtrNode->node.offsetInDwords = pushConstPtrRegBase; - pPushConstPtrNode->node.sizeInDwords = PushConstPtrRegCount; - pPushConstPtrNode->node.tablePtr.nodeCount = 1; - pPushConstPtrNode->node.tablePtr.pNext = pPushConstNode; - pPushConstPtrNode->visibility = stageMask; + auto pPushConstNode = &pUserDataNodes[userDataNodeCount]; + pPushConstNode->node.type = Vkgc::ResourceMappingNodeType::PushConst; + pPushConstNode->node.offsetInDwords = commonUserDataLayout.pushConstRegBase; + pPushConstNode->node.sizeInDwords = commonUserDataLayout.pushConstRegCount; + pPushConstNode->node.srdRange.set = Vkgc::InternalDescriptorSetId; + pPushConstNode->visibility = stageMask; userDataNodeCount += 1; } diff --git a/icd/api/vk_query.cpp b/icd/api/vk_query.cpp index 9822879d..a187ad82 100644 --- a/icd/api/vk_query.cpp +++ b/icd/api/vk_query.cpp @@ -63,7 +63,9 @@ VkResult QueryPool::Create( else #endif { - result = PalQueryPool::Create(pDevice, pCreateInfo, pAllocator, &pObject); + { + result = PalQueryPool::Create(pDevice, pCreateInfo, pAllocator, &pObject); + } } } else diff --git a/icd/api/vk_swapchain.cpp b/icd/api/vk_swapchain.cpp index fce1dad2..a78b8e67 100644 --- a/icd/api/vk_swapchain.cpp +++ b/icd/api/vk_swapchain.cpp @@ -784,7 +784,6 @@ VkResult SwapChain::AcquireNextImage( { result = PalToVkResult(m_pPalSwapChain->AcquireNextImage(acquireInfo, pImageIndex)); - } } @@ -911,8 +910,7 @@ Pal::IGpuMemory* SwapChain::UpdatePresentInfo( // Let the fullscreen manager perform any fullscreen ownership transitions and override some of this present // information in case it has enabled fullscreen. - if ((m_pFullscreenMgr != nullptr) - ) + if (m_pFullscreenMgr != nullptr) { m_pFullscreenMgr->TryEnterExclusive(this); diff --git a/icd/res/ver.h b/icd/res/ver.h index 6cfc716e..86be3368 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 308 +#define VULKAN_ICD_BUILD_VERSION 310 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,7 +45,7 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2024.Q2.2" +#define VULKAN_DRIVER_INFO_STR "2024.Q2.3" #define VULKAN_DRIVER_INFO_STR_LLPC "(LLPC)" // These values tell which version of the conformance test the driver is compliant against diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index c5e0cd7a..0a9d8a6c 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -176,11 +176,13 @@ void VulkanSettingsLoader::OverrideSettingsBySystemInfo() char executableName[PATH_MAX]; char executablePath[PATH_MAX]; utils::GetExecutableNameAndPath(executableName, executablePath); - Util::Snprintf(m_settings.pipelineDumpDir, - sizeof(m_settings.pipelineDumpDir), + char tmpDirStr[DD_SETTINGS_MAX_PATH_SIZE] = {0}; + Util::Snprintf(tmpDirStr, + sizeof(tmpDirStr), "%s/%s", m_settings.pipelineDumpDir, executableName); + Util::Strncpy(m_settings.pipelineDumpDir, tmpDirStr, sizeof(m_settings.pipelineDumpDir)); } MakeAbsolutePath(m_settings.pipelineDumpDir, sizeof(m_settings.pipelineDumpDir), @@ -834,6 +836,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.optImgMaskToApplyShaderReadUsageForTransferSrc = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; m_settings.forceDepthClampBasedOnZExport = true; + + m_settings.clampMaxImageSize = 16384u; } if (appProfile == AppProfile::SeriousSamFusion) @@ -841,6 +845,76 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.preciseAnisoMode = DisablePreciseAnisoAll; m_settings.useAnisoThreshold = true; m_settings.anisoThreshold = 1.0f; + + m_settings.clampMaxImageSize = 16384u; + } + + if ((appProfile == AppProfile::TalosVR) || + (appProfile == AppProfile::SeriousSamVrTheLastHope) || + (appProfile == AppProfile::SedpEngine)) + { + m_settings.clampMaxImageSize = 16384u; + } + + if (appProfile == AppProfile::SeriousSam4) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.forceEnableDcc = ForceDccDefault; + } + + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; + + m_settings.clampMaxImageSize = 16384u; + } + + if (appProfile == AppProfile::KnockoutCity) + { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage| + ForceDccFor32BppShaderStorage); + } + + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccFor64BppShaderStorage); + + if (pInfo->revision == Pal::AsicRevision::Navi22) + { + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + } + } + } + if (appProfile == AppProfile::EvilGenius2) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; + + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + if (pInfo->revision == Pal::AsicRevision::Navi21) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; + } + } + + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; + } } if (appProfile == AppProfile::QuakeEnhanced) @@ -1366,6 +1440,10 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( // It looks incorrect pipeline layout is used. Force indirect can make optimized pipeline layout compatible // with fast-linked pipeline. m_settings.pipelineLayoutSchemeSelectionStrategy = PipelineLayoutSchemeSelectionStrategy::ForceIndirect; + + // It results from incorrect behavior of DXVK. Incompatible push constant size leads to Gpu page fault + // during fast link in pipeline creation. + m_settings.pipelineLayoutPushConstantCompatibilityCheck = true; } if (appProfile == AppProfile::AshesOfTheSingularity) @@ -1575,11 +1653,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( #endif } - if (appProfile == AppProfile::Zink) - { - m_settings.padVertexBuffers = true; - } - if (appProfile == AppProfile::SpidermanRemastered) { m_settings.supportMutableDescriptors = false; @@ -1897,8 +1970,31 @@ void VulkanSettingsLoader::UpdatePalSettings() pPalSettings->textureOptLevel = m_settings.vulkanTexFilterQuality; + switch (m_settings.disableBinningPsKill) + { + case DisableBinningPsKillEnable: + pPalSettings->disableBinningPsKill = Pal::OverrideMode::Enabled; + break; + case DisableBinningPsKillDisable: + pPalSettings->disableBinningPsKill = Pal::OverrideMode::Disabled; + break; + case DisableBinningPsKillDefault: + default: + pPalSettings->disableBinningPsKill = Pal::OverrideMode::Default; + break; + } + pPalSettings->hintDisableSmallSurfColorCompressionSize = m_settings.disableSmallSurfColorCompressionSize; + pPalSettings->binningContextStatesPerBin = m_settings.binningContextStatesPerBin; + pPalSettings->binningPersistentStatesPerBin = m_settings.binningPersistentStatesPerBin; + + // if 0 than we can skip it and let use pal's default value + if (m_settings.binningMaxPrimPerBatch > 0) + { + pPalSettings->binningMaxPrimPerBatch = m_settings.binningMaxPrimPerBatch; + } + // Setting disableSkipFceOptimization to false enables an optimization in PAL that disregards the FCE in a transition // if one of the built in clear colors are used (white/black) and the image is TCC compatible. pPalSettings->disableSkipFceOptimization = false; diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index 41ab5ff0..689d0128 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -901,6 +901,18 @@ "Scope": "Driver", "Type": "enum" }, + { + "Name": "PipelineLayoutPushConstantCompatibilityCheck", + "Description": "Decide whether to enable push constant compatibility check in fast link", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, { "Name": "PipelineBinningMode", "Description": "Specifies whether to override binning setting for pipeline.", @@ -933,6 +945,108 @@ "Scope": "Driver", "Type": "enum" }, + { + "Name": "DisableBinningPsKill", + "Description": "Disable binning when the pixels can be rejected before the PS and the PS can kill the pixel.", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": "DisableBinningPsKillDefault" + }, + "ValidValues": { + "IsEnum": true, + "Values": [ + { + "Name": "DisableBinningPsKillDisable", + "Value": 0, + "Description": "Enable Binning." + }, + { + "Name": "DisableBinningPsKillEnable", + "Value": 1, + "Description": "Disable Binning" + }, + { + "Name": "DisableBinningPsKillDefault", + "Value": 2, + "Description": "Default PAL values" + } + ], + "Name": "DisableBinningPsKill" + }, + "Scope": "Driver", + "Type": "enum" + }, + { + "Name": "BinningMaxPrimPerBatch", + "Description": "Max Prims per Batch", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": 0 + }, + "Scope": "Driver", + "Type": "uint32" + }, + { + "Name": "BinningContextStatesPerBin", + "Description": "Binning Context States Per Bin", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": 0 + }, + "Scope": "Driver", + "Type": "uint32" + }, + { + "Name": "BinningPersistentStatesPerBin", + "Description": "Binning Persistent State Per Bin", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": 0 + }, + "Scope": "Driver", + "Type": "uint32" + }, + { + "Name": "BinningOverridePbbForMrt", + "Description": "Override binning setting for MRT >= 2 targets.", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": "BinningOverridePbbForMrtDefault" + }, + "ValidValues": { + "IsEnum": true, + "Values": [ + { + "Name": "BinningOverridePbbForMrtDisable", + "Value": 0, + "Description": "Force PBB off for shader" + }, + { + "Name": "BinningOverridePbbForMrtEnable", + "Value": 1, + "Description": "Force PBB on for shader" + }, + { + "Name": "BinningOverridePbbForMrtDefault", + "Value": 2, + "Description": "Default PAL value" + } + ], + "Name": "BinningOverridePbbForMrt" + }, + "Scope": "Driver", + "Type": "enum" + }, { "Name": "PipelineUseProfileHashAsClientHash", "Description": "If true, the profile hash (calculated exclusively from SPIRV + entry point) is used as PAL client hash and will appear in e.g. GPUProfiler layer measurements. Useful when writing and updating pipeline profiles for applications. ", @@ -1314,6 +1428,18 @@ "Scope": "Driver", "Type": "bool" }, + { + "Name": "ForcePerComponentFetchForUnalignedVbFormat", + "Description": "Force stride for unaligned vertex buffer format", + "Tags": [ + "SPIRV Options" + ], + "Defaults": { + "Default": true + }, + "Scope": "Driver", + "Type": "bool" + }, { "Name": "EnableUberFetchShader", "Description": "Enable uber fetch shder.", @@ -9166,6 +9292,18 @@ "Type": "bool", "Scope": "Driver", "Name": "EnableColorClearAutoSync" + }, + { + "Name": "ClampMaxImageSize", + "Description": "Clamp the max image dimensions (width, height and depth) to the minimum of the specified value and HW reported value. Useful for compatibility issues with older games running on newer HW.", + "Tags": [ + "General" + ], + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "Scope": "Driver" } ] } \ No newline at end of file