From 38bcef334cae0fa4906b71bfca093eb3f3052dff Mon Sep 17 00:00:00 2001 From: Satti Date: Tue, 19 Nov 2024 03:04:22 -0800 Subject: [PATCH 1/2] Update Intel Thread Counts --- .../windows/hardware_core_enumerator.cc | 37 ++++++++++++++++++- tools/ci_build/build.py | 4 -- winml/lib/Api/HardwareCoreEnumerator.cpp | 27 ++++++++++---- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/onnxruntime/core/platform/windows/hardware_core_enumerator.cc b/onnxruntime/core/platform/windows/hardware_core_enumerator.cc index bf3b53afbd7d3..add20f55ce109 100644 --- a/onnxruntime/core/platform/windows/hardware_core_enumerator.cc +++ b/onnxruntime/core/platform/windows/hardware_core_enumerator.cc @@ -1,7 +1,8 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "hardware_core_enumerator.h" +#include "core/platform/windows/env.h" #include #include #include @@ -83,6 +84,40 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() { // # of physical cores = # of P cores + # of E Cores + # of Soc Cores. // # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores. auto cores = GetCoreInfo(); + #if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__) + const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI" + bool isIntelSpecifiedPlatform = false; + const int kVendorID_IntelSpecifiedPlatformIDs[3] = {//ExtendedModel,ExtendedFamily,Family Code, and Model Number + 0xa06a, //MTL + 0xc065, //ARL-H + 0xb065 //ARL-U + }; + + int regs_leaf0[4]; + int regs_leaf1[4]; + __cpuid(regs_leaf0, 0); + __cpuid(regs_leaf1, 0x1); + + + auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) && + (kVendorID_Intel[2] == regs_leaf0[3]); + + for(int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs){ + if ((regs_leaf1[0]>>4) == intelSpecifiedPlatform){ + isIntelSpecifiedPlatform = true; + } + } + + if (isIntel) { + if(isIntelSpecifiedPlatform){ + //We want to exclude cores without an LLC + return cores.LLCCores; + } + else{ + return cores.PhysicalCores; + } + } +#endif return cores.LLCCores; } diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 24dc6124d4a89..a2818e960224b 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1581,10 +1581,6 @@ def generate_build_tree( # The "/profile" flag implies "/DEBUG:FULL /DEBUGTYPE:cv,fixup /OPT:REF /OPT:NOICF /INCREMENTAL:NO /FIXED:NO". We set it for satisfying a Microsoft internal compliance requirement. External users # do not need to have it. ldflags = ["/profile", "/DYNAMICBASE"] - # Address Sanitizer libs do not have a Qspectre version. So they two cannot be both enabled. - if not args.enable_address_sanitizer: - # Also enable a special perf patch that was made for Intel Meteor Lake mobile CPUs - cflags += ["/Qspectre", "/DONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH"] if config == "Release": cflags += ["/O2", "/Ob2", "/DNDEBUG"] elif config == "RelWithDebInfo": diff --git a/winml/lib/Api/HardwareCoreEnumerator.cpp b/winml/lib/Api/HardwareCoreEnumerator.cpp index 1763290718a8f..d7d03feb6a290 100644 --- a/winml/lib/Api/HardwareCoreEnumerator.cpp +++ b/winml/lib/Api/HardwareCoreEnumerator.cpp @@ -1,8 +1,8 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. + +// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "lib/Api/pch/pch.h" - #include "HardwareCoreEnumerator.h" namespace WINMLP { @@ -86,24 +86,35 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() { // # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores. auto cores = GetCoreInfo(); -#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__) + #if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__) const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI" + bool isIntelSpecifiedPlatform = false; + const int kVendorID_IntelSpecifiedPlatformIDs[3] = {//ExtendedModel,ExtendedFamily,Family Code, and Model Number + 0xa06a, //MTL + 0xc065, //ARL-H + 0xb065 //ARL-U + }; + int regs_leaf0[4]; - int regs_leaf7[4]; + int regs_leaf1[4]; __cpuid(regs_leaf0, 0); - __cpuid(regs_leaf7, 0x7); + __cpuid(regs_leaf1, 0x1); + auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) && (kVendorID_Intel[2] == regs_leaf0[3]); - auto isHybrid = (regs_leaf7[3] & (1 << 15)); + for(int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs){ + if ((regs_leaf1[0]>>4) == intelSpecifiedPlatform){ + isIntelSpecifiedPlatform = true; + } + } - if (isIntel && isHybrid) { + if (isIntel && isIntelSpecifiedPlatform) { // We want to use the number of physical cores, but exclude cores without an LLC return cores.LLCCores; } #endif - return cores.PhysicalCores; } From 3a1dd1d7c226a4b2699df8daa81dc110509ccba3 Mon Sep 17 00:00:00 2001 From: Satti Date: Fri, 22 Nov 2024 15:16:22 -0800 Subject: [PATCH 2/2] Resolve formatting --- .../windows/hardware_core_enumerator.cc | 28 +++++++++---------- winml/lib/Api/HardwareCoreEnumerator.cpp | 18 ++++++------ 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/onnxruntime/core/platform/windows/hardware_core_enumerator.cc b/onnxruntime/core/platform/windows/hardware_core_enumerator.cc index add20f55ce109..7464ab4c57d01 100644 --- a/onnxruntime/core/platform/windows/hardware_core_enumerator.cc +++ b/onnxruntime/core/platform/windows/hardware_core_enumerator.cc @@ -84,36 +84,34 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() { // # of physical cores = # of P cores + # of E Cores + # of Soc Cores. // # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores. auto cores = GetCoreInfo(); - #if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__) +#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__) const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI" bool isIntelSpecifiedPlatform = false; - const int kVendorID_IntelSpecifiedPlatformIDs[3] = {//ExtendedModel,ExtendedFamily,Family Code, and Model Number - 0xa06a, //MTL - 0xc065, //ARL-H - 0xb065 //ARL-U - }; + const int kVendorID_IntelSpecifiedPlatformIDs[3] = { + // ExtendedModel, ExtendedFamily, Family Code, and Model Number + 0xa06a, // MTL + 0xc065, // ARL-H + 0xb065 // ARL-U + }; int regs_leaf0[4]; int regs_leaf1[4]; __cpuid(regs_leaf0, 0); __cpuid(regs_leaf1, 0x1); + auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) && (kVendorID_Intel[2] == regs_leaf0[3]); - auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) && - (kVendorID_Intel[2] == regs_leaf0[3]); - - for(int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs){ - if ((regs_leaf1[0]>>4) == intelSpecifiedPlatform){ + for (int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs) { + if ((regs_leaf1[0] >> 4) == intelSpecifiedPlatform) { isIntelSpecifiedPlatform = true; } } if (isIntel) { - if(isIntelSpecifiedPlatform){ - //We want to exclude cores without an LLC + if (isIntelSpecifiedPlatform) { + // We want to exclude cores without an LLC return cores.LLCCores; - } - else{ + } else { return cores.PhysicalCores; } } diff --git a/winml/lib/Api/HardwareCoreEnumerator.cpp b/winml/lib/Api/HardwareCoreEnumerator.cpp index d7d03feb6a290..f1272fc1b8626 100644 --- a/winml/lib/Api/HardwareCoreEnumerator.cpp +++ b/winml/lib/Api/HardwareCoreEnumerator.cpp @@ -86,26 +86,26 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() { // # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores. auto cores = GetCoreInfo(); - #if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__) +#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__) const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI" bool isIntelSpecifiedPlatform = false; - const int kVendorID_IntelSpecifiedPlatformIDs[3] = {//ExtendedModel,ExtendedFamily,Family Code, and Model Number - 0xa06a, //MTL - 0xc065, //ARL-H - 0xb065 //ARL-U - }; + const int kVendorID_IntelSpecifiedPlatformIDs[3] = { + // ExtendedModel,ExtendedFamily,Family Code, and Model Number + 0xa06a, // MTL + 0xc065, // ARL-H + 0xb065 // ARL-U + }; int regs_leaf0[4]; int regs_leaf1[4]; __cpuid(regs_leaf0, 0); __cpuid(regs_leaf1, 0x1); - auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) && (kVendorID_Intel[2] == regs_leaf0[3]); - for(int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs){ - if ((regs_leaf1[0]>>4) == intelSpecifiedPlatform){ + for (int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs) { + if ((regs_leaf1[0] >> 4) == intelSpecifiedPlatform) { isIntelSpecifiedPlatform = true; } }