diff --git a/CMakeLists.txt b/CMakeLists.txt
index ccda3d89fb5..abd17e8aebd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -561,6 +561,42 @@ endif()
 
 target_link_libraries(codegen_internal PUBLIC LLVM_JIT)
 
+# Precompiled Headers for Top nvFuser Headers
+# Post-M8, template instantiation is reduced by 81%, making header parsing
+# a significant fraction of build cost. This PCH targets the top 10 heaviest
+# nvFuser-controllable headers by exclusive parse time (from M9 Task 4 analysis).
+# Enabled by default for Release builds (provides ~50% build time improvement).
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+  option(NVFUSER_USE_POLYMORPHIC_PCH "Use PCH for top nvFuser headers to reduce parse time" ON)
+else()
+  option(NVFUSER_USE_POLYMORPHIC_PCH "Use PCH for top nvFuser headers to reduce parse time" OFF)
+endif()
+
+if(NVFUSER_USE_POLYMORPHIC_PCH)
+  message(STATUS "Enabling PCH for top 10 nvFuser headers")
+  target_precompile_headers(codegen_internal PRIVATE
+    # Top 10 nvFuser headers by exclusive parse time (M9 Task 4 analysis)
+    "${NVFUSER_SRCS_DIR}/polymorphic_value.h"                              # 1675s (27.9m)
+    "${NVFUSER_ROOT}/lib/dynamic_type/src/dynamic_type/type_traits.h"      # 473.6s (7.9m)
+    "${NVFUSER_SRCS_DIR}/ir/base_nodes.h"                                  # 284.5s (4.7m)
+    "${NVFUSER_SRCS_DIR}/scheduler/tools/abstract_tensor.h"                # 162.1s (2.7m)
+    "${NVFUSER_SRCS_DIR}/type.h"                                           # 81.6s (1.4m)
+    "${NVFUSER_SRCS_DIR}/ir/container.h"                                   # 51.6s (0.9m)
+    "${NVFUSER_SRCS_DIR}/serde/fusion_cache_generated.h"                   # 44.1s (0.7m)
+    "${NVFUSER_SRCS_DIR}/iter_visitor.h"                                   # 38.2s (0.6m)
+    "${NVFUSER_SRCS_DIR}/ir/internal_nodes.h"                              # 33.3s (0.6m)
+    "${NVFUSER_SRCS_DIR}/ir/interface_nodes.h"                             # 29.6s (0.5m)
+  )
+  # Skip PCH for polymorphic_value.cpp to allow visibility override
+  # (PCH caches type with hidden visibility)
+  set_source_files_properties(
+    "${NVFUSER_SRCS_DIR}/polymorphic_value.cpp"
+    PROPERTIES 
+      SKIP_PRECOMPILE_HEADERS ON
+      COMPILE_OPTIONS "-fvisibility=default"
+  )
+endif()
+
 add_library(nvfuser_codegen SHARED $<TARGET_OBJECTS:codegen_internal>)
 
 if (BUILD_CUTLASS AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
@@ -1109,6 +1145,35 @@ function(add_test_without_main TEST_NAME TEST_SRC ADDITIONAL_LINK)
   add_executable(${TEST_NAME} ${TEST_SRC})
   set_property(TARGET ${TEST_NAME} PROPERTY CXX_STANDARD ${NVFUSER_CPP_STANDARD})
   target_compile_definitions(${TEST_NAME} PRIVATE USE_GTEST)
+  
+  # PCH for test targets: All test executables share a single PCH to avoid
+  # redundant compilation. The first test target (test_nvfuser) creates the PCH,
+  # and all subsequent tests reuse it via REUSE_FROM.
+  # Note: Can't reuse from codegen_internal due to -fPIC flag difference.
+  if(NVFUSER_USE_POLYMORPHIC_PCH)
+    get_property(NVFUSER_TEST_PCH_TARGET GLOBAL PROPERTY NVFUSER_TEST_PCH_TARGET)
+    if(NOT NVFUSER_TEST_PCH_TARGET)
+      # First test target: create the PCH with top 10 nvFuser headers
+      message(STATUS "Creating shared test PCH on target: ${TEST_NAME}")
+      target_precompile_headers(${TEST_NAME} PRIVATE
+        "${NVFUSER_SRCS_DIR}/polymorphic_value.h"
+        "${NVFUSER_ROOT}/lib/dynamic_type/src/dynamic_type/type_traits.h"
+        "${NVFUSER_SRCS_DIR}/ir/base_nodes.h"
+        "${NVFUSER_SRCS_DIR}/scheduler/tools/abstract_tensor.h"
+        "${NVFUSER_SRCS_DIR}/type.h"
+        "${NVFUSER_SRCS_DIR}/ir/container.h"
+        "${NVFUSER_SRCS_DIR}/serde/fusion_cache_generated.h"
+        "${NVFUSER_SRCS_DIR}/iter_visitor.h"
+        "${NVFUSER_SRCS_DIR}/ir/internal_nodes.h"
+        "${NVFUSER_SRCS_DIR}/ir/interface_nodes.h"
+      )
+      set_property(GLOBAL PROPERTY NVFUSER_TEST_PCH_TARGET ${TEST_NAME})
+    else()
+      # Subsequent test targets: reuse existing PCH
+      target_precompile_headers(${TEST_NAME} REUSE_FROM ${NVFUSER_TEST_PCH_TARGET})
+    endif()
+  endif()
+  
   target_include_directories(${TEST_NAME} PRIVATE "${NVFUSER_ROOT}")
   target_include_directories(${TEST_NAME} SYSTEM PRIVATE
     ${NVFUSER_ROOT}/third_party/googletest/googletest/include
diff --git a/csrc/device_lower/utils.cpp b/csrc/device_lower/utils.cpp
index efb5933aee7..7fb50a1f8f2 100644
--- a/csrc/device_lower/utils.cpp
+++ b/csrc/device_lower/utils.cpp
@@ -1375,7 +1375,7 @@ std::string print(const std::monostate&) {
 }
 
 std::string print(const Projection& proj) {
-  return Projection::dispatch(
+  return Projection::dispatch<std::string>(
       [&](const auto& proj) { return print(proj); }, proj);
 }
 
@@ -1400,7 +1400,7 @@ bool related(const std::monostate&, const ValGroup& to) {
 }
 
 bool related(const Projection& proj, const ValGroup& to) {
-  return Projection::dispatch(
+  return Projection::dispatch<bool>(
       [&](const auto& proj) { return related(proj, to); }, proj);
 }
 
@@ -1430,7 +1430,7 @@ Val* extent(const std::monostate&) {
 }
 
 Val* extent(const Projection& proj) {
-  return Projection::dispatch(
+  return Projection::dispatch<Val*>(
       [&](const auto& proj) { return extent(proj); }, proj);
 }
 
@@ -1696,7 +1696,7 @@ Projection propagate(
     const ValGraph& id_graph,
     const ExprGroup& eg,
     Direction direction) {
-  return Projection::dispatch(
+  return Projection::dispatch<Projection>(
       [&](const auto& proj) {
         return propagate(proj, id_graph, eg, direction);
       },
@@ -1757,7 +1757,7 @@ Val* proveLinearAndGetStrideAfterPropagation(
 Val* proveLinearAndGetStrideAfterPropagation(
     const Projection& proj,
     const ValGroups& domain) {
-  return Projection::dispatch(
+  return Projection::dispatch<Val*>(
       [&](const auto& proj) {
         return proveLinearAndGetStrideAfterPropagation(proj, domain);
       },
@@ -2039,7 +2039,7 @@ Projection simplify(Projection projection) {
   auto simplified = projection;
   do {
     projection = simplified;
-    simplified = Projection::dispatch(
+    simplified = Projection::dispatch<Projection>(
         [&](const auto& projection) { return simplify(projection); },
         projection);
   } while (simplified.type() != projection.type() || simplified != projection);
diff --git a/csrc/multidevice/symmetric_tensor.h b/csrc/multidevice/symmetric_tensor.h
index 55860845f1b..13db3adf42c 100644
--- a/csrc/multidevice/symmetric_tensor.h
+++ b/csrc/multidevice/symmetric_tensor.h
@@ -85,11 +85,11 @@ class SymmetricTensor {
   size_t aligned_size_;
   bool are_remote_tensors_setup_ = false;
   bool is_multicast_setup_ = false;
-  CUmemGenericAllocationHandle mcast_handle_{};
-  CUdevice cu_dev_{};
+  [[maybe_unused]] CUmemGenericAllocationHandle mcast_handle_{};
+  [[maybe_unused]] CUdevice cu_dev_{};
   void* mc_ptr_{nullptr};
-  int exporter_rank_{-1};
-  int peer_fd_{-1};
+  [[maybe_unused]] int exporter_rank_{-1};
+  [[maybe_unused]] int peer_fd_{-1};
   bool is_contiguous_view_setup_ = false;
   at::Tensor contiguous_view_;
 };
diff --git a/csrc/polymorphic_value.cpp b/csrc/polymorphic_value.cpp
index 58c3c3344eb..7732518ae02 100644
--- a/csrc/polymorphic_value.cpp
+++ b/csrc/polymorphic_value.cpp
@@ -140,3 +140,18 @@ c10::IValue toIValue(const PolymorphicValue& x) {
 } // namespace PolymorphicValue_functions
 
 } // namespace nvfuser
+
+// Explicit instantiation of DynamicType for PolymorphicValue.
+// This is the single point where the template is fully instantiated.
+// Note: This file is compiled with -fvisibility=default (set in CMakeLists.txt)
+// to ensure all DynamicType symbols are exported from the shared library.
+template struct dynamic_type::DynamicType<
+    dynamic_type::Containers<std::vector>,
+    nvfuser::StructHandle,
+    nvfuser::Pointer,
+    nvfuser::Opaque,
+    at::Tensor,
+    std::complex<double>,
+    double,
+    int64_t,
+    bool>;
diff --git a/csrc/polymorphic_value.h b/csrc/polymorphic_value.h
index 49b42555d79..27df7bc21e7 100644
--- a/csrc/polymorphic_value.h
+++ b/csrc/polymorphic_value.h
@@ -544,4 +544,17 @@ c10::IValue toIValue(const PolymorphicValue& x);
 
 } // namespace nvfuser
 
+// Prevent implicit instantiation in other TUs - use explicit instantiation from
+// polymorphic_value.cpp
+extern template struct dynamic_type::DynamicType<
+    dynamic_type::Containers<std::vector>,
+    nvfuser::StructHandle,
+    nvfuser::Pointer,
+    nvfuser::Opaque,
+    at::Tensor,
+    std::complex<double>,
+    double,
+    int64_t,
+    bool>;
+
 #include <struct.inl>
diff --git a/csrc/scheduler/matmul_ampere-.cpp b/csrc/scheduler/matmul_ampere-.cpp
index 04e9a220130..e594b506326 100644
--- a/csrc/scheduler/matmul_ampere-.cpp
+++ b/csrc/scheduler/matmul_ampere-.cpp
@@ -56,9 +56,9 @@ AbstractTensor swizzleSharedMemory(TensorView* shared_mem_tv) {
 
   // Extract the constant sizes of the swizzled tile
   const int64_t tile_size_x =
-      swizzle_domain[-2]->extent()->evaluate().as<int64_t>();
+      swizzle_domain[-2].as<IterDomain*>()->extent()->evaluate().as<int64_t>();
   const int64_t tile_size_y =
-      swizzle_domain[-1]->extent()->evaluate().as<int64_t>();
+      swizzle_domain[-1].as<IterDomain*>()->extent()->evaluate().as<int64_t>();
 
   // Only tested for (1) ldmatrix access with sizeof(T) == 16bit (i.e.
   // half/bfloat16) and (2) epilogue general access with sizeof(T) == 32bit
diff --git a/csrc/scheduler/mma_utils.cpp b/csrc/scheduler/mma_utils.cpp
index ab3c2b77049..481814f381f 100644
--- a/csrc/scheduler/mma_utils.cpp
+++ b/csrc/scheduler/mma_utils.cpp
@@ -1127,7 +1127,7 @@ AbstractTensor MmaSwizzler::scheduleMmaOutputAllocation(AbstractTensor t) {
 
   // Assume last 2 dims, for example [M64, N24] or [M64, N24, R]
   NVF_ERROR(t.size() >= 2);
-  bool has_reduction = t[-1]->isReduction();
+  bool has_reduction = t[-1].as<IterDomain*>()->isReduction();
 
   int64_t m_pos = has_reduction ? -3 : -2;
   int64_t n_pos = has_reduction ? -2 : -1;
@@ -2473,9 +2473,9 @@ std::pair<int64_t, int64_t> analyzeSwizzleSharedMemory(
 
   // Extract the constant sizes of the swizzled tile
   const int64_t tile_size_x =
-      swizzle_domain[-2]->extent()->evaluate().as<int64_t>();
+      swizzle_domain[-2].as<IterDomain*>()->extent()->evaluate().as<int64_t>();
   const int64_t tile_size_y =
-      swizzle_domain[-1]->extent()->evaluate().as<int64_t>();
+      swizzle_domain[-1].as<IterDomain*>()->extent()->evaluate().as<int64_t>();
 
   // Only tested for (1) ldmatrix access with sizeof(T) == 16bit (i.e.
   // half/bfloat16) and (2) epilogue general access with sizeof(T) == 32bit
@@ -2717,7 +2717,7 @@ MmaInputSmemSwizzle tmaSwizzleSharedMemory(TensorView* shared_mem_tv) {
   AbstractTensor swizzle_domain(shared_mem_tv->getLoopDomain());
   // Extract the constant sizes of the swizzled tile
   const int64_t inner_dim_size =
-      swizzle_domain[-1]->extent()->evaluate().as<int64_t>();
+      swizzle_domain[-1].as<IterDomain*>()->extent()->evaluate().as<int64_t>();
 
   auto dtype = shared_mem_tv->getDataType().value();
   const int64_t B128_elements = 128 / dataTypeSizeByte(dtype);
diff --git a/csrc/scheduler/tools/abstract_tensor.h b/csrc/scheduler/tools/abstract_tensor.h
index 52801de1ff6..ec8b937a0a8 100644
--- a/csrc/scheduler/tools/abstract_tensor.h
+++ b/csrc/scheduler/tools/abstract_tensor.h
@@ -67,7 +67,8 @@ struct DispatchSplit {
       inner_result.reserve(in.size());
       for (auto i : arange(in.size())) {
         auto [outer, inner] =
-            AbstractId::dispatch((*this), in[i], factor, inner_split);
+            AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+                (*this), in[i], factor, inner_split);
         outer_result.emplace_back(outer);
         inner_result.emplace_back(inner);
       }
@@ -119,7 +120,8 @@ struct DispatchMerge {
       std::vector<AbstractId> result;
       result.reserve(lhs.size());
       for (auto i : arange(lhs.size())) {
-        result.emplace_back(AbstractId::dispatch((*this), lhs[i], rhs[i]));
+        result.emplace_back(
+            AbstractId::dispatch<AbstractId>((*this), lhs[i], rhs[i]));
       }
       return result;
     } else if constexpr (std::is_same_v<L, std::vector<AbstractId>>) {
@@ -127,7 +129,8 @@ struct DispatchMerge {
       result.reserve(lhs.size());
       for (auto i : arange(lhs.size())) {
         result.emplace_back(
-            AbstractId::dispatch((*this), lhs[i], std::forward<RHS>(rhs)));
+            AbstractId::dispatch<AbstractId>(
+                (*this), lhs[i], std::forward<RHS>(rhs)));
       }
       return result;
     } else if constexpr (std::is_same_v<R, std::vector<AbstractId>>) {
@@ -135,7 +138,8 @@ struct DispatchMerge {
       result.reserve(rhs.size());
       for (auto i : arange(rhs.size())) {
         result.emplace_back(
-            AbstractId::dispatch((*this), std::forward<LHS>(lhs), rhs[i]));
+            AbstractId::dispatch<AbstractId>(
+                (*this), std::forward<LHS>(lhs), rhs[i]));
       }
       return result;
     } else {
@@ -198,7 +202,8 @@ struct DispatchSwizzle {
       result_y.reserve(lhs.size());
       for (auto i : arange(lhs.size())) {
         auto [out_x, out_y] =
-            AbstractId::dispatch((*this), swizzle_type, lhs[i], rhs[i]);
+            AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+                (*this), swizzle_type, lhs[i], rhs[i]);
         result_x.emplace_back(out_x);
         result_y.emplace_back(out_y);
       }
@@ -209,8 +214,9 @@ struct DispatchSwizzle {
       result_x.reserve(lhs.size());
       result_y.reserve(lhs.size());
       for (auto i : arange(lhs.size())) {
-        auto [out_x, out_y] = AbstractId::dispatch(
-            (*this), swizzle_type, lhs[i], std::forward<RHS>(rhs));
+        auto [out_x, out_y] =
+            AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+                (*this), swizzle_type, lhs[i], std::forward<RHS>(rhs));
         result_x.emplace_back(out_x);
         result_y.emplace_back(out_y);
       }
@@ -221,8 +227,9 @@ struct DispatchSwizzle {
       result_x.reserve(rhs.size());
       result_y.reserve(rhs.size());
       for (auto i : arange(rhs.size())) {
-        auto [out_x, out_y] = AbstractId::dispatch(
-            (*this), swizzle_type, std::forward<LHS>(lhs), rhs[i]);
+        auto [out_x, out_y] =
+            AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+                (*this), swizzle_type, std::forward<LHS>(lhs), rhs[i]);
         result_x.emplace_back(out_x);
         result_y.emplace_back(out_y);
       }
@@ -283,7 +290,8 @@ struct DispatchLegacySwizzle {
       result_y.reserve(lhs.size());
       for (auto i : arange(lhs.size())) {
         auto [out_x, out_y] =
-            AbstractId::dispatch((*this), swizzle_type, lhs[i], rhs[i]);
+            AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+                (*this), swizzle_type, lhs[i], rhs[i]);
         result_x.emplace_back(out_x);
         result_y.emplace_back(out_y);
       }
@@ -294,8 +302,9 @@ struct DispatchLegacySwizzle {
       result_x.reserve(lhs.size());
       result_y.reserve(lhs.size());
       for (auto i : arange(lhs.size())) {
-        auto [out_x, out_y] = AbstractId::dispatch(
-            (*this), swizzle_type, lhs[i], std::forward<RHS>(rhs));
+        auto [out_x, out_y] =
+            AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+                (*this), swizzle_type, lhs[i], std::forward<RHS>(rhs));
         result_x.emplace_back(out_x);
         result_y.emplace_back(out_y);
       }
@@ -306,8 +315,9 @@ struct DispatchLegacySwizzle {
       result_x.reserve(rhs.size());
       result_y.reserve(rhs.size());
       for (auto i : arange(rhs.size())) {
-        auto [out_x, out_y] = AbstractId::dispatch(
-            (*this), swizzle_type, std::forward<LHS>(lhs), rhs[i]);
+        auto [out_x, out_y] =
+            AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+                (*this), swizzle_type, std::forward<LHS>(lhs), rhs[i]);
         result_x.emplace_back(out_x);
         result_y.emplace_back(out_y);
       }
@@ -334,7 +344,7 @@ struct DispatchParallelize {
       }
     } else if constexpr (std::is_same_v<IN, std::vector<AbstractId>>) {
       for (auto& aid : in) {
-        AbstractId::dispatch((*this), parallel_type, aid);
+        AbstractId::dispatch<void>((*this), parallel_type, aid);
       }
     } else {
       NVF_CHECK(false, "Unsupported type in AbstractTensor::parallelize");
@@ -663,7 +673,8 @@ class AbstractTensorWithInfo {
       int64_t axis,
       ParallelType parallel_type) {
     axis = wrapDim(axis, (int64_t)domain_.size());
-    AbstractId::dispatch(DispatchParallelize{}, parallel_type, domain_[axis]);
+    AbstractId::dispatch<void>(
+        DispatchParallelize{}, parallel_type, domain_[axis]);
     return *this;
   }
 
@@ -674,8 +685,9 @@ class AbstractTensorWithInfo {
     NVF_ERROR(domain_.size() == info_.size());
 
     axis = wrapDim(axis, (int64_t)domain_.size());
-    auto [outer, inner] = AbstractId::dispatch(
-        DispatchSplit{}, domain_[axis], factor, inner_split);
+    auto [outer, inner] =
+        AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+            DispatchSplit{}, domain_[axis], factor, inner_split);
     std::swap(domain_[axis], inner);
     domain_.insert(domain_.begin() + axis, outer);
 
@@ -700,8 +712,8 @@ class AbstractTensorWithInfo {
     axis_o = wrapDim(axis_o, (int64_t)domain_.size());
     axis_i = wrapDim(axis_i, (int64_t)domain_.size());
 
-    auto output =
-        AbstractId::dispatch(DispatchMerge{}, domain_[axis_o], domain_[axis_i]);
+    auto output = AbstractId::dispatch<AbstractId>(
+        DispatchMerge{}, domain_[axis_o], domain_[axis_i]);
     // axis_o is the outer input of this merge but does not
     // automatically mean it's an outer domain in this AbstractTensorWithInfo.
     auto domain_outer_pos = axis_o < axis_i ? axis_o : axis_i;
@@ -792,8 +804,9 @@ class AbstractTensorWithInfo {
     x = wrapDim(x, (int64_t)domain_.size());
     y = wrapDim(y, (int64_t)domain_.size());
 
-    auto [out_x, out_y] = AbstractId::dispatch(
-        DispatchSwizzle{}, swizzle_type, domain_[x], domain_[y]);
+    auto [out_x, out_y] =
+        AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+            DispatchSwizzle{}, swizzle_type, domain_[x], domain_[y]);
 
     std::swap(domain_[x], out_x);
     std::swap(domain_[y], out_y);
@@ -817,8 +830,9 @@ class AbstractTensorWithInfo {
     x = wrapDim(x, (int64_t)domain_.size());
     y = wrapDim(y, (int64_t)domain_.size());
 
-    auto [out_x, out_y] = AbstractId::dispatch(
-        DispatchLegacySwizzle{}, swizzle_type, domain_[x], domain_[y]);
+    auto [out_x, out_y] =
+        AbstractId::dispatch<std::pair<AbstractId, AbstractId>>(
+            DispatchLegacySwizzle{}, swizzle_type, domain_[x], domain_[y]);
 
     std::swap(domain_[x], out_x);
     std::swap(domain_[y], out_y);
diff --git a/csrc/type.cpp b/csrc/type.cpp
index 1a55427975d..285d47a8ac2 100644
--- a/csrc/type.cpp
+++ b/csrc/type.cpp
@@ -18,6 +18,67 @@
 
 namespace nvfuser {
 
+// Implementation moved from type.h to reduce template instantiation costs.
+// Uses PolymorphicValue::for_all_types() which triggers ForAllTypes dispatch.
+DataType getDataType(const PolymorphicValue& value) {
+  std::optional<DataType> dtype = std::nullopt;
+  PolymorphicValue::for_all_types([&value, &dtype](auto _) {
+    using T = typename decltype(_)::type;
+    if constexpr (IsPrimitiveNativeType<T>::value) {
+      if (value.is<T>()) {
+        dtype = NativeTypeToDataType<T>::type;
+      }
+    } else if constexpr (std::is_same_v<T, std::vector<PolymorphicValue>>) {
+      if (value.is<T>()) {
+        const auto& vec = value.as<T>();
+        size_t size = vec.size();
+        NVF_CHECK(size > 0, "Empty array is not supported");
+        dtype =
+            ArrayType{std::make_shared<DataType>(getDataType(vec[0])), size};
+      }
+    } else if constexpr (std::is_same_v<T, Pointer>) {
+      // For pointers in polymorphic value, we only store the data size of the
+      // pointee, so it is impossible to infer the pointer type.
+      NVF_CHECK(!value.is<T>(), "Can not infer pointer type.");
+    } else if constexpr (std::is_same_v<T, StructHandle>) {
+      if (value.is<T>()) {
+        dtype = value.as<T>().type();
+      }
+    } else if constexpr (std::is_same_v<T, Opaque>) {
+      if (value.is<T>()) {
+        const auto& opaque = value.as<T>();
+        dtype = DataType(OpaqueType{
+            .type_info = opaque.any().type(), .size = opaque.size()});
+      }
+    }
+  });
+  NVF_CHECK(dtype.has_value(), "Unknown dtype for ", value.type().name());
+  return dtype.value();
+}
+
+// Implementation moved from type.h to reduce template instantiation costs.
+// Uses PolymorphicValue::for_all_types() which triggers ForAllTypes dispatch.
+PolymorphicValue castToDtype(PolymorphicValue value, const DataType& dtype) {
+  if (!value.hasValue()) {
+    return value;
+  }
+  // Cast the given value to the given data type. This enables interface
+  // like: IrBuilder::create<Val>(0, DataType::Double) where value is
+  // an integer but the desired data type is double.
+  if (!hasCompatibleDataType(value, dtype)) {
+    PolymorphicValue::for_all_types([&](auto _) {
+      using T = typename decltype(_)::type;
+      if constexpr (IsPrimitiveNativeType<T>::value) {
+        if (isCompatibleDataType(NativeTypeToDataType<T>::type, dtype)) {
+          value = PolymorphicValue(static_cast<T>(value));
+        }
+      }
+      // TODO: support arrays and pointers
+    });
+  }
+  return value;
+}
+
 StructType NotImplementedStruct::type() const {
   NVF_THROW("Not implemented");
 }
diff --git a/csrc/type.h b/csrc/type.h
index 649297229b9..23723d4f2a5 100644
--- a/csrc/type.h
+++ b/csrc/type.h
@@ -414,41 +414,9 @@ DEFINE_DATATYPE_TO_NATIVE_TYPE(DataType::ComplexDouble, std::complex<double>);
 
 #undef DEFINE_DATATYPE_TO_NATIVE_TYPE
 
-inline DataType getDataType(const PolymorphicValue& value) {
-  std::optional<DataType> dtype = std::nullopt;
-  PolymorphicValue::for_all_types([&value, &dtype](auto _) {
-    using T = typename decltype(_)::type;
-    if constexpr (IsPrimitiveNativeType<T>::value) {
-      if (value.is<T>()) {
-        dtype = NativeTypeToDataType<T>::type;
-      }
-    } else if constexpr (std::is_same_v<T, std::vector<PolymorphicValue>>) {
-      if (value.is<T>()) {
-        const auto& vec = value.as<T>();
-        size_t size = vec.size();
-        NVF_CHECK(size > 0, "Empty array is not supported");
-        dtype =
-            ArrayType{std::make_shared<DataType>(getDataType(vec[0])), size};
-      }
-    } else if constexpr (std::is_same_v<T, Pointer>) {
-      // For pointers in polymorphic value, we only store the data size of the
-      // pointee, so it is impossible to infer the pointer type.
-      NVF_CHECK(!value.is<T>(), "Can not infer pointer type.");
-    } else if constexpr (std::is_same_v<T, StructHandle>) {
-      if (value.is<T>()) {
-        dtype = value.as<T>().type();
-      }
-    } else if constexpr (std::is_same_v<T, Opaque>) {
-      if (value.is<T>()) {
-        const auto& opaque = value.as<T>();
-        dtype = DataType(OpaqueType{
-            .type_info = opaque.any().type(), .size = opaque.size()});
-      }
-    }
-  });
-  NVF_CHECK(dtype.has_value(), "Unknown dtype for ", value.type().name());
-  return dtype.value();
-}
+// Get the DataType corresponding to the runtime type held in a PolymorphicValue.
+// Implementation moved to type.cpp to reduce template instantiation costs.
+NVF_API DataType getDataType(const PolymorphicValue& value);
 
 inline bool isCompatibleDataType(DataType dtype, DataType dtype2) {
   if (dtype == dtype2) {
@@ -1128,28 +1096,11 @@ Pointer::Pointer(void* ptr, DataType dtype)
     : ptr_(reinterpret_cast<std::byte*>(ptr)),
       size_bit_(dataTypeSizeBit(dtype)) {}
 
-inline PolymorphicValue castToDtype(
+// Cast a PolymorphicValue to match the specified DataType.
+// Implementation moved to type.cpp to reduce template instantiation costs.
+NVF_API PolymorphicValue castToDtype(
     PolymorphicValue value,
-    const DataType& dtype) {
-  if (!value.hasValue()) {
-    return value;
-  }
-  // Cast the given value to the given data type. This enables interface
-  // like: IrBuilder::create<Val>(0, DataType::Double) where value is
-  // an integer but the desired data type is double.
-  if (!hasCompatibleDataType(value, dtype)) {
-    PolymorphicValue::for_all_types([&](auto _) {
-      using T = typename decltype(_)::type;
-      if constexpr (IsPrimitiveNativeType<T>::value) {
-        if (isCompatibleDataType(NativeTypeToDataType<T>::type, dtype)) {
-          value = PolymorphicValue(static_cast<T>(value));
-        }
-      }
-      // TODO: support arrays and pointers
-    });
-  }
-  return value;
-}
+    const DataType& dtype);
 
 // Converts an enum to its underlying type.
 // It corresponds with std::to_underlying introduced in c++23
diff --git a/lib/dynamic_type/benchmark/knn.cpp b/lib/dynamic_type/benchmark/knn.cpp
index 2f39fe29ee8..53c3dd96a80 100644
--- a/lib/dynamic_type/benchmark/knn.cpp
+++ b/lib/dynamic_type/benchmark/knn.cpp
@@ -143,7 +143,7 @@ static StructVecDouble kNN_Dictionary(
     sum += distances_and_values.top().second;
     distances_and_values.pop();
   }
-  return sum / k;
+  return sum / static_cast<double>(k);
 }
 
 static void kNN_Dictionary(benchmark::State& state) {
diff --git a/lib/dynamic_type/src/dynamic_type/decl.h b/lib/dynamic_type/src/dynamic_type/decl.h
new file mode 100644
index 00000000000..1d0d56067e8
--- /dev/null
+++ b/lib/dynamic_type/src/dynamic_type/decl.h
@@ -0,0 +1,1035 @@
+// clang-format off
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+// clang-format on
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <initializer_list>
+#include <optional>
+#include <ostream>
+#include <tuple>
+#include <type_traits>
+#include <typeinfo>
+#include <variant>
+
+#include "error.h"
+#include "type_traits.h"
+
+// Visibility attribute for exported symbols.
+// Static member functions need default visibility to be exported from
+// shared libraries built with -fvisibility=hidden.
+#if defined _WIN32 || defined __CYGWIN__
+#define DT_API __declspec(dllexport)
+#else
+#define DT_API __attribute__((visibility("default")))
+#endif
+
+namespace dynamic_type {
+
+// We must disable a lot of compiler warnings to make this work. The reason for
+// the need to disable these warnings is not because the code quality in this
+// file is bad, but because these apparently "bad" practices are necessary. For
+// example, if you have a dynamic type that can be either a bool or a class
+// SomeType{}, then we should support the ~ operator on it, because in the C++
+// standard bool supports it. Usually, when people write code like ~bool, they
+// are making a mistake, and the compiler will want you to use !bool instead.
+// However, in our case here we will allow everything that the C++ standard
+// allows. The compiler should yell at the user who uses DynamicType with ~
+// but not at us for implementing it.
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-comparison"
+#pragma clang diagnostic ignored "-Wbitwise-instead-of-logical"
+#pragma clang diagnostic ignored "-Wliteral-conversion"
+#pragma clang diagnostic ignored "-Wunused-lambda-capture"
+#pragma clang diagnostic ignored "-Wunknown-warning-option"
+#pragma clang diagnostic ignored "-Wbool-operation"
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wbool-operation"
+// gcc, even the latest version (13.1.1), is complaining about the following
+// code:
+//   std::optional<bool> ret = std::nullopt;
+//   ...
+//   DYNAMIC_TYPE_CHECK(ret.has_value(), ...);
+//   return ret.value();
+// saying that ret.value() is used uninitialized. This complaint is totoally
+// nonsense.
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+template <template <typename...> typename... Templates>
+// Note: `Templates` is a list of templates, not a list of types.
+// Just like std::vector is a template, std::vector<int> is a type.
+struct Containers {
+  template <typename DynamicType, typename... MemberTypes>
+  using VariantType =
+      std::variant<std::monostate, MemberTypes..., Templates<DynamicType>...>;
+
+  // TypeList for fold expressions (replaces tuple-based any_check approach)
+  template <typename DynamicType, typename... MemberTypes>
+  using TypeListT =
+      TypeList<std::monostate, MemberTypes..., Templates<DynamicType>...>;
+
+  template <typename DynamicType, typename... MemberTypes>
+  using TypeIdentitiesAsTuple = std::tuple<
+      std::type_identity<std::monostate>,
+      std::type_identity<MemberTypes>...,
+      std::type_identity<Templates<DynamicType>>...>;
+
+  template <typename DynamicType, typename... MemberTypes>
+  using ForAllTypes = dynamic_type::
+      ForAllTypes<std::monostate, MemberTypes..., Templates<DynamicType>...>;
+
+  // Check if T is one of the types in the type list MemberTypes..., or a
+  // container
+  template <typename T, typename DynamicType, typename... MemberTypes>
+  static constexpr auto is_candidate_type = dynamic_type::
+      belongs_to<T, std::monostate, MemberTypes..., Templates<DynamicType>...>;
+
+  template <typename ItemT>
+  using ForAllContainerTypes = dynamic_type::ForAllTypes<Templates<ItemT>...>;
+
+  template <typename ItemT>
+  static constexpr auto
+  all_container_type_identities_constructible_from_initializer_list() {
+    return dynamic_type::remove_void_from_tuple(ForAllContainerTypes<
+                                                ItemT>{}([](auto t) {
+      using T = typename decltype(t)::type;
+      if constexpr (std::is_constructible_v<T, std::initializer_list<ItemT>>) {
+        return std::type_identity<T>{};
+      } else {
+        return;
+      }
+    }));
+  }
+
+  template <typename ItemT>
+  using AllContainerTypeIdentitiesConstructibleFromInitializerList =
+      decltype(all_container_type_identities_constructible_from_initializer_list<
+               ItemT>());
+};
+
+using NoContainers = Containers<>;
+
+template <typename Containers, typename... Ts>
+struct DynamicType {
+  using VariantType =
+      typename Containers::template VariantType<DynamicType, Ts...>;
+  VariantType value;
+
+  // TypeList for fold expressions (7-14x faster compile time than tuple-based)
+  using TypeListT =
+      typename Containers::template TypeListT<DynamicType, Ts...>;
+
+  using TypeIdentitiesAsTuple =
+      typename Containers::template TypeIdentitiesAsTuple<DynamicType, Ts...>;
+  static constexpr TypeIdentitiesAsTuple type_identities_as_tuple{};
+
+  static constexpr std::size_t num_types =
+      std::tuple_size_v<TypeIdentitiesAsTuple>;
+
+  using ForAllTypes =
+      typename Containers::template ForAllTypes<DynamicType, Ts...>;
+  static constexpr ForAllTypes for_all_types{};
+
+  // Check if T is one of the types in the type list Ts... or a container
+  template <typename T>
+  static constexpr auto is_candidate_type =
+      Containers::template is_candidate_type<T, DynamicType, Ts...>;
+
+  // Check if any type in TypeListT can be cast to T (using fold + requires)
+  template <typename T>
+  static constexpr bool can_cast_to = any_can_cast_to_v<T, TypeListT>;
+
+  template <typename ItemT>
+  using AllContainerTypeIdentitiesConstructibleFromInitializerList =
+      typename Containers::
+          template AllContainerTypeIdentitiesConstructibleFromInitializerList<
+              ItemT>;
+
+  template <typename ItemT>
+  static constexpr auto
+      num_container_types_constructible_from_initializer_list =
+          std::tuple_size_v<
+              AllContainerTypeIdentitiesConstructibleFromInitializerList<
+                  ItemT>>;
+
+  // ============================================================================
+  // Switch-based dispatch helpers for dispatch() method.
+  // Uses explicit return type template parameter - no inference overhead.
+  // ============================================================================
+
+  // Helper macro: try to execute f0 at index I for void return
+#define DISPATCH_EXEC_VOID(I, f0_ref, arg_ref)                                 \
+  case I: {                                                                    \
+    if constexpr ((I) < num_types) {                                           \
+      using T = std::variant_alternative_t<(I), VariantType>;                  \
+      f0_ref(arg_ref.template as<T>());                                        \
+    }                                                                          \
+    break;                                                                     \
+  }
+
+  // Helper macro: try to execute f0 at index I for value/reference return
+#define DISPATCH_EXEC_VALUE(I, f0_ref, arg_ref, ret_ref, result_type)          \
+  case I: {                                                                    \
+    if constexpr ((I) < num_types) {                                           \
+      using T = std::variant_alternative_t<(I), VariantType>;                  \
+      const T& a0 = arg_ref.template as<T>();                                  \
+      if constexpr (std::is_convertible_v<decltype(f0_ref(a0)), result_type>) {\
+        ret_ref = f0_ref(a0);                                                  \
+      } else {                                                                 \
+        DYNAMIC_TYPE_CHECK(                                                    \
+            false, "Result is dynamic but not convertible to result type");    \
+      }                                                                        \
+    }                                                                          \
+    break;                                                                     \
+  }
+
+  // Helper macro: try to execute f0 at index I for DynamicType return
+#define DISPATCH_EXEC_DYNAMIC(I, f0_ref, arg_ref, ret_ref)                     \
+  case I: {                                                                    \
+    if constexpr ((I) < num_types) {                                           \
+      using T = std::variant_alternative_t<(I), VariantType>;                  \
+      const T& a0 = arg_ref.template as<T>();                                  \
+      using CallRetT = decltype(f0_ref(a0));                                   \
+      if constexpr (!std::is_void_v<CallRetT>) {                               \
+        ret_ref = DynamicType(f0_ref(a0));                                     \
+      } else {                                                                 \
+        DYNAMIC_TYPE_CHECK(                                                    \
+            false, "Result is dynamic but not convertible to result type");    \
+      }                                                                        \
+    }                                                                          \
+    break;                                                                     \
+  }
+
+  // Switch for void return (up to 16 types)
+#define DISPATCH_SWITCH_VOID(f0_ref, arg_ref)                                  \
+  switch (arg_ref.value.index()) {                                             \
+    DISPATCH_EXEC_VOID(0, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(1, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(2, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(3, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(4, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(5, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(6, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(7, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(8, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(9, f0_ref, arg_ref)                                     \
+    DISPATCH_EXEC_VOID(10, f0_ref, arg_ref)                                    \
+    DISPATCH_EXEC_VOID(11, f0_ref, arg_ref)                                    \
+    DISPATCH_EXEC_VOID(12, f0_ref, arg_ref)                                    \
+    DISPATCH_EXEC_VOID(13, f0_ref, arg_ref)                                    \
+    DISPATCH_EXEC_VOID(14, f0_ref, arg_ref)                                    \
+    DISPATCH_EXEC_VOID(15, f0_ref, arg_ref)                                    \
+    default: break;                                                            \
+  }
+
+  // Switch for value/reference return (up to 16 types)
+#define DISPATCH_SWITCH_VALUE(f0_ref, arg_ref, ret_ref, result_type)           \
+  switch (arg_ref.value.index()) {                                             \
+    DISPATCH_EXEC_VALUE(0, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(1, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(2, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(3, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(4, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(5, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(6, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(7, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(8, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(9, f0_ref, arg_ref, ret_ref, result_type)              \
+    DISPATCH_EXEC_VALUE(10, f0_ref, arg_ref, ret_ref, result_type)             \
+    DISPATCH_EXEC_VALUE(11, f0_ref, arg_ref, ret_ref, result_type)             \
+    DISPATCH_EXEC_VALUE(12, f0_ref, arg_ref, ret_ref, result_type)             \
+    DISPATCH_EXEC_VALUE(13, f0_ref, arg_ref, ret_ref, result_type)             \
+    DISPATCH_EXEC_VALUE(14, f0_ref, arg_ref, ret_ref, result_type)             \
+    DISPATCH_EXEC_VALUE(15, f0_ref, arg_ref, ret_ref, result_type)             \
+    default: break;                                                            \
+  }
+
+  // Switch for DynamicType return (up to 16 types)
+#define DISPATCH_SWITCH_DYNAMIC(f0_ref, arg_ref, ret_ref)                      \
+  switch (arg_ref.value.index()) {                                             \
+    DISPATCH_EXEC_DYNAMIC(0, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(1, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(2, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(3, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(4, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(5, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(6, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(7, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(8, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(9, f0_ref, arg_ref, ret_ref)                         \
+    DISPATCH_EXEC_DYNAMIC(10, f0_ref, arg_ref, ret_ref)                        \
+    DISPATCH_EXEC_DYNAMIC(11, f0_ref, arg_ref, ret_ref)                        \
+    DISPATCH_EXEC_DYNAMIC(12, f0_ref, arg_ref, ret_ref)                        \
+    DISPATCH_EXEC_DYNAMIC(13, f0_ref, arg_ref, ret_ref)                        \
+    DISPATCH_EXEC_DYNAMIC(14, f0_ref, arg_ref, ret_ref)                        \
+    DISPATCH_EXEC_DYNAMIC(15, f0_ref, arg_ref, ret_ref)                        \
+    default: break;                                                            \
+  }
+
+  // ============================================================================
+  // dispatch() - Requires explicit return type template parameter.
+  //
+  // Usage:
+  //   dispatch<ReturnT>(lambda, dynamicArg1, dynamicArg2, ...)
+  //
+  // ReturnT can be:
+  //   - void: No return value
+  //   - DynamicType: Wrap result in DynamicType
+  //   - Any other type: Direct return (int64_t, bool, std::string, etc.)
+  // ============================================================================
+  template <typename ReturnT, typename FuncT, typename FirstArg, typename... OtherArgs>
+  static inline constexpr ReturnT dispatch(
+      FuncT&& f,
+      FirstArg&& arg0,
+      OtherArgs&&... args) {
+    static_assert(
+        num_types <= 16,
+        "dispatch() supports max 16 types. Increase switch cases in decl.h.");
+
+    // Recursively dispatch on `args`, only leaving arg0 as undispatched
+    // argument
+    auto f0 = [&](auto&& a0) -> decltype(auto) {
+      if constexpr (sizeof...(OtherArgs) == 0) {
+        return std::forward<FuncT>(f)(std::forward<decltype(a0)>(a0));
+      } else {
+        auto f_others = [&](auto&&... others) -> decltype(auto) {
+          return std::forward<FuncT>(f)(
+              std::forward<decltype(a0)>(a0),
+              std::forward<decltype(others)>(others)...);
+        };
+        return dispatch<ReturnT>(f_others, std::forward<OtherArgs>(args)...);
+      }
+    };
+
+    // Does arg0 need dispatch?
+    if constexpr (std::is_same_v<std::decay_t<FirstArg>, DynamicType>) {
+      // Dispatch based on explicit return type
+      if constexpr (std::is_void_v<ReturnT>) {
+        // Void return path
+        DISPATCH_SWITCH_VOID(f0, arg0)
+        return;
+      } else if constexpr (std::is_same_v<std::decay_t<ReturnT>, DynamicType>) {
+        // DynamicType return path - wrap results
+        DynamicType ret{};
+        DISPATCH_SWITCH_DYNAMIC(f0, arg0, ret)
+        return ret;
+      } else {
+        // Direct type return path (int64_t, bool, std::string, references, etc.)
+        using result_type = ReturnT;
+        // Needs to wrap reference as optional<reference_wrapper<T>> because
+        // C++ does not allow rebinding a reference.
+        constexpr bool is_reference = std::is_reference_v<result_type>;
+        using ret_storage_t = std::conditional_t<
+            is_reference,
+            std::optional<
+                std::reference_wrapper<std::remove_reference_t<result_type>>>,
+            result_type>;
+        ret_storage_t ret{};
+        DISPATCH_SWITCH_VALUE(f0, arg0, ret, result_type)
+        if constexpr (is_reference) {
+          return ret->get();
+        } else {
+          return ret;
+        }
+      }
+    } else {
+      // No need to dispatch arg0, just perfectly forwarding it.
+      if constexpr (std::is_void_v<ReturnT>) {
+        f0(std::forward<FirstArg>(arg0));
+        return;
+      } else {
+        using f0_return_t = decltype(f0(std::forward<FirstArg>(arg0)));
+        if constexpr (std::is_void_v<f0_return_t>) {
+          // Lambda returns void but we need a return value
+          f0(std::forward<FirstArg>(arg0));
+          if constexpr (std::is_reference_v<ReturnT>) {
+            // Can't default-construct a reference; this is a type mismatch error
+            DYNAMIC_TYPE_CHECK(
+                false, "Lambda returned void but reference return type expected");
+            // Unreachable, but needed for compilation
+            return *static_cast<std::remove_reference_t<ReturnT>*>(nullptr);
+          } else {
+            return ReturnT{};
+          }
+        } else {
+          return f0(std::forward<FirstArg>(arg0));
+        }
+      }
+    }
+  }
+
+  // NOTE: dispatch_deduce() has been removed for compile-time performance.
+  // It was previously used for operator->() and SFINAE checks but:
+  // - operator->() was removed (use .as<T>() instead)
+  // - SFINAE checks now use pure constexpr any_check()
+
+#undef DISPATCH_EXEC_VOID
+#undef DISPATCH_EXEC_VALUE
+#undef DISPATCH_EXEC_DYNAMIC
+#undef DISPATCH_SWITCH_VOID
+#undef DISPATCH_SWITCH_VALUE
+#undef DISPATCH_SWITCH_DYNAMIC
+
+  constexpr DynamicType() = default;
+
+  template <typename T, typename = decltype(VariantType(std::declval<T>()))>
+  constexpr DynamicType(T&& value) : value(std::forward<T>(value)) {}
+
+  template <
+      template <typename...> typename Template,
+      typename ItemT,
+      typename = std::enable_if_t<
+          is_candidate_type<Template<DynamicType>> &&
+          !std::is_same_v<ItemT, DynamicType>>>
+  constexpr DynamicType(Template<ItemT> value)
+      : value([](auto input) {
+          Template<DynamicType> result;
+          std::transform(
+              input.begin(),
+              input.end(),
+              std::back_inserter(result),
+              [](auto& item) { return DynamicType(std::move(item)); });
+          return result;
+        }(std::move(value))) {}
+
+  template <
+      typename ItemT = DynamicType,
+      typename = std::enable_if_t<
+          // enable this ctor only when there is only one container supporting
+          // initializer_list, otherwise it is ambiguous to tell which container
+          // to use.
+          num_container_types_constructible_from_initializer_list<ItemT> == 1>>
+  constexpr DynamicType(std::initializer_list<DynamicType> list)
+      : DynamicType(typename std::tuple_element_t<
+                    0,
+                    AllContainerTypeIdentitiesConstructibleFromInitializerList<
+                        DynamicType>>::type(list)) {}
+
+  // Returns the type_info of the actual type of the variant value. For
+  // example, if value holds an int, then this will return typeid(int).
+  const std::type_info& type() const {
+    return std::visit(
+        [](auto value) -> const std::type_info& { return typeid(value); },
+        value);
+  }
+
+  template <typename T>
+  constexpr bool is() const {
+    return std::holds_alternative<T>(value);
+  }
+
+  template <template <typename...> typename Template>
+  constexpr bool is() const {
+    return is<Template<DynamicType>>();
+  }
+
+  constexpr bool isNull() const {
+    return std::holds_alternative<std::monostate>(value);
+  }
+
+  constexpr bool hasValue() const {
+    return !isNull();
+  }
+
+  template <typename T, typename = std::enable_if_t<is_candidate_type<T>>>
+  constexpr const T& as() const {
+    return std::get<T>(value);
+  }
+
+  template <typename T, typename = std::enable_if_t<is_candidate_type<T>>>
+  constexpr T& as() {
+    return std::get<T>(value);
+  }
+
+  template <
+      template <typename...> typename Template,
+      typename = std::enable_if_t<is_candidate_type<Template<DynamicType>>>>
+  constexpr const Template<DynamicType>& as() const {
+    return as<Template<DynamicType>>();
+  }
+
+  template <
+      template <typename...> typename Template,
+      typename = std::enable_if_t<is_candidate_type<Template<DynamicType>>>>
+  constexpr Template<DynamicType>& as() {
+    return as<Template<DynamicType>>();
+  }
+
+  template <typename T, typename = std::enable_if_t<can_cast_to<T>>>
+  explicit constexpr operator T() const {
+    return dispatch<T>(
+        [](auto x) -> decltype(auto) {
+          using X = decltype(x);
+          if constexpr (opcheck<X>.canCastTo(opcheck<T>)) {
+            return (T)x;
+          }
+        },
+        *this);
+  }
+
+  template <
+      template <typename...> typename Template,
+      typename ItemT,
+      typename = std::enable_if_t<
+          is_candidate_type<Template<DynamicType>> && can_cast_to<ItemT>>>
+  explicit constexpr operator Template<ItemT>() const {
+    DYNAMIC_TYPE_CHECK(
+        is<Template<DynamicType>>(),
+        "Cannot cast from ",
+        type().name(),
+        " to ",
+        typeid(Template<ItemT>).name(),
+        " : incompatible type");
+    Template<ItemT> result;
+    std::transform(
+        as<Template<DynamicType>>().begin(),
+        as<Template<DynamicType>>().end(),
+        std::back_inserter(result),
+        [](const auto& item) { return (ItemT)item; });
+    return result;
+  }
+
+  // Intentionally not overloading operator=, because the compiler generated
+  // default behavior usually makes more sense than the overloaded one. For
+  // example, if we have
+  //   struct SomeType {};
+  //   using IntOrCustom = DynamicType<int, SomeType>;
+  //   IntOrCustom x(1);
+  //   IntOrCustom y(SomeType{});
+  //   x = y;
+  // Then the compiler generated behavior will get us SomeType{} for x, but if
+  // we overload based on the underlying type, we will get a runtime error,
+  // because it is not possible to assign SomeType{} to an int.
+
+  // NOTE: operator->() was removed because:
+  // 1. It required dispatch_deduce() which is expensive at compile time
+  // 2. nvFuser production code doesn't use it (uses .as<T>() instead)
+  // 3. For pointer access, use: dt.as<T*>()->member
+  // 4. For smart pointers, use: dt.as<std::shared_ptr<T>>()->member
+
+  // Helper: check if type T supports [] with IndexT and returns DynamicType&
+  template <typename T, typename IndexT>
+  static constexpr bool check_square_bracket_type() {
+    if constexpr (requires(T t, IndexT i) { t[i]; }) {
+      return std::is_same_v<decltype(std::declval<T>()[std::declval<IndexT>()]),
+                            DynamicType&>;
+    }
+    return false;
+  }
+
+  // Fold over TypeList to check if any type supports [IndexT] returning DynamicType&
+  template <typename IndexT, typename... Us>
+  static constexpr bool any_has_square_bracket(TypeList<Us...>) {
+    return (... || check_square_bracket_type<Us, IndexT>());
+  }
+
+  template <typename IndexT>
+  static constexpr bool has_square_bracket = any_has_square_bracket<IndexT>(TypeListT{});
+
+#define DEFINE_SQUARE_BRACKET_OPERATOR(__const)                                \
+  template <typename IndexT>                                                   \
+  std::enable_if_t<                                                            \
+      !std::is_same_v<IndexT, DynamicType> && has_square_bracket<IndexT>,      \
+      __const DynamicType&>                                                    \
+  operator[](const IndexT& i) __const {                                        \
+    std::optional<std::reference_wrapper<__const DynamicType>> ret =           \
+        std::nullopt;                                                          \
+    for_all_types([this, &ret, &i](auto t) {                                   \
+      using T = typename decltype(t)::type;                                    \
+      if constexpr (opcheck<T>[opcheck<IndexT>]) {                             \
+        if constexpr (std::is_same_v<                                          \
+                          decltype(std::declval<T>()[std::declval<IndexT>()]), \
+                          DynamicType&>) {                                     \
+          if (is<T>()) {                                                       \
+            ret = std::ref(as<T>()[i]);                                        \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+    });                                                                        \
+    DYNAMIC_TYPE_CHECK(                                                        \
+        ret.has_value(),                                                       \
+        "Cannot index ",                                                       \
+        type().name(),                                                         \
+        " with ",                                                              \
+        typeid(IndexT).name(),                                                 \
+        " : incompatible type");                                               \
+    return ret.value();                                                        \
+  }
+
+  DEFINE_SQUARE_BRACKET_OPERATOR()
+  DEFINE_SQUARE_BRACKET_OPERATOR(const)
+#undef DEFINE_SQUARE_BRACKET_OPERATOR
+
+  // Nested fold: check if any type T can be used as IndexT for has_square_bracket
+  template <typename... Us>
+  static constexpr bool any_type_has_square_bracket(TypeList<Us...>) {
+    return (... || has_square_bracket<Us>);
+  }
+
+  static constexpr bool has_any_square_bracket = any_type_has_square_bracket(TypeListT{});
+
+#define DEFINE_SQUARE_BRACKET_OPERATOR(__const)                      \
+  template <typename DT>                                             \
+  std::enable_if_t<                                                  \
+      std::is_same_v<DT, DynamicType> && has_any_square_bracket,     \
+      __const DynamicType&>                                          \
+  operator[](const DT& i) __const {                                  \
+    std::optional<std::reference_wrapper<__const DynamicType>> ret = \
+        std::nullopt;                                                \
+    for_all_types([this, &ret, &i](auto t) {                         \
+      using IndexT = typename decltype(t)::type;                     \
+      if constexpr (has_square_bracket<IndexT>) {                    \
+        if (i.template is<IndexT>()) {                               \
+          ret = std::ref((*this)[i.template as<IndexT>()]);          \
+        }                                                            \
+      }                                                              \
+    });                                                              \
+    DYNAMIC_TYPE_CHECK(                                              \
+        ret.has_value(),                                             \
+        "Cannot index ",                                             \
+        type().name(),                                               \
+        " with ",                                                    \
+        i.type().name(),                                             \
+        " : incompatible type");                                     \
+    return ret.value();                                              \
+  }
+
+  DEFINE_SQUARE_BRACKET_OPERATOR()
+  DEFINE_SQUARE_BRACKET_OPERATOR(const)
+#undef DEFINE_SQUARE_BRACKET_OPERATOR
+
+  // ->* over for accessing candidate members. This will be converted as a .*
+  // with a candidate type. For example, if you have:
+  // DynamicType<NoContainers, A, B, C> abc;
+  // then you can use abc->*A::x to access the member x of A. Member access also
+  // support functions, just make sure that you get the correct precedence. For
+  // example: use (abc->*A::print)() instead of abc->*A::print().
+
+#define DEFINE_ARROW_STAR_OPERATOR(__const)                                    \
+  template <                                                                   \
+      typename Ret,                                                            \
+      typename Class,                                                          \
+      typename = std::enable_if_t<is_candidate_type<Class>>>                   \
+  constexpr decltype(auto) operator->*(Ret Class::* member) __const {          \
+    /* Use decltype(auto) instead of auto as return type so that references */ \
+    /* and qualifiers are preserved*/                                          \
+    if constexpr (std::is_function_v<Ret>) {                                   \
+      return [this, member](auto&&... args) {                                  \
+        return (as<Class>().*member)(std::forward<decltype(args)>(args)...);   \
+      };                                                                       \
+    } else {                                                                   \
+      return as<Class>().*member;                                              \
+    }                                                                          \
+  }
+
+  DEFINE_ARROW_STAR_OPERATOR()
+  DEFINE_ARROW_STAR_OPERATOR(const)
+#undef DEFINE_ARROW_STAR_OPERATOR
+
+  // ->* operator for non-candidate access. This will just forward the argument
+  // to the overloaded ->* of candidates. Due to limitations of C++'s type
+  // system, we can only enable this when all the types in the type list that
+  // support this operator have the same return type.
+
+#define DEFINE_ARROW_STAR_OPERATOR(__const)                                     \
+  template <typename MemberT>                                                   \
+  static constexpr auto all_arrow_star_ret_types##__const =                     \
+      remove_void_from_tuple(for_all_types([](auto t) {                         \
+        using T = typename decltype(t)::type;                                   \
+        if constexpr (opcheck<T>->*opcheck<MemberT>) {                          \
+          return std::type_identity<                                            \
+              decltype(std::declval<__const T>()->*std::declval<MemberT>())>{}; \
+        }                                                                       \
+      }));                                                                      \
+                                                                                \
+  template <typename MemberT>                                                   \
+  using AllArrowStarRetTypes##__const =                                         \
+      decltype(all_arrow_star_ret_types##__const<MemberT>);                     \
+                                                                                \
+  template <typename MemberT>                                                   \
+  static constexpr bool all_arrow_star_ret_types_are_same##__const =            \
+      all_same_type(all_arrow_star_ret_types##__const<MemberT>);                \
+                                                                                \
+  template <typename MemberT>                                                   \
+  using ArrowStarRetType##__const =                                             \
+      typename first_or_void<AllArrowStarRetTypes##__const<MemberT>>::type;     \
+                                                                                \
+  template <typename MemberT>                                                   \
+  constexpr std::enable_if_t<                                                   \
+      all_arrow_star_ret_types_are_same##__const<MemberT>,                      \
+      typename ArrowStarRetType##__const<MemberT>::type>                        \
+  operator->*(const MemberT& member) __const {                                  \
+    using RetT = typename ArrowStarRetType##__const<MemberT>::type;             \
+    std::optional<wrap_reference_t<RetT>> ret = std::nullopt;                   \
+    for_all_types([this, &member, &ret](auto t) {                               \
+      using T = typename decltype(t)::type;                                     \
+      if constexpr (opcheck<T>->*opcheck<MemberT>) {                            \
+        if (is<T>()) {                                                          \
+          ret = as<T>()->*member;                                               \
+        }                                                                       \
+      }                                                                         \
+    });                                                                         \
+    DYNAMIC_TYPE_CHECK(                                                         \
+        ret.has_value(),                                                        \
+        "Cannot access member with type ",                                      \
+        typeid(RetT).name(),                                                    \
+        " : incompatible type");                                                \
+    return ret.value();                                                         \
+  }
+
+  DEFINE_ARROW_STAR_OPERATOR()
+  DEFINE_ARROW_STAR_OPERATOR(const)
+#undef DEFINE_ARROW_STAR_OPERATOR
+
+  // TODO: support operator(). This is not supported yet because it is the most
+  // difficulty one to implement because it can has arbitrary number of
+  // arguments. I believe it is doable, but I decide to leave it for future.
+
+  // ========================================================================
+  // Friend operators with static member implementations
+  // - Static _impl functions: declarations here, definitions in impl.h
+  // - Friend operators: trivial forwarding, fine to inline everywhere
+  // - Mixed types work via implicit constructor
+  // - Static members are covered by extern template
+  // ========================================================================
+
+  // General macro for binary operators (works for both operators and named functions)
+  // return_type: bool for comparison operators, DynamicType for arithmetic
+  // func_name: operator+ or named function like eq, add, etc.
+  // DT_API ensures the static member is exported from shared libraries.
+#define DEFINE_BINARY_OP_FRIEND(opname, op, func_name, return_type)            \
+  DT_API static return_type opname##_impl(const DynamicType& a, const DynamicType& b);\
+  friend return_type func_name(const DynamicType& a, const DynamicType& b) {   \
+    return opname##_impl(a, b);                                                \
+  }
+
+  // Comparison operators (return bool)
+  DEFINE_BINARY_OP_FRIEND(eq, ==, operator==, bool)
+  DEFINE_BINARY_OP_FRIEND(neq, !=, operator!=, bool)
+  DEFINE_BINARY_OP_FRIEND(lt, <, operator<, bool)
+  DEFINE_BINARY_OP_FRIEND(gt, >, operator>, bool)
+  DEFINE_BINARY_OP_FRIEND(le, <=, operator<=, bool)
+  DEFINE_BINARY_OP_FRIEND(ge, >=, operator>=, bool)
+
+  // Arithmetic operators (return DynamicType)
+  DEFINE_BINARY_OP_FRIEND(add, +, operator+, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(sub, -, operator-, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(mul, *, operator*, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(div, /, operator/, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(mod, %, operator%, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(band, &, operator&, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(bor, |, operator|, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(bxor, ^, operator^, DynamicType)
+  // NOTE: operator&& and operator|| are kept as template functions (below)
+  // to avoid ambiguity with built-in bool && bool when one operand is bool
+  DEFINE_BINARY_OP_FRIEND(lshift, <<, operator<<, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(rshift, >>, operator>>, DynamicType)
+
+  // Named comparison functions (return DynamicType)
+  DEFINE_BINARY_OP_FRIEND(named_eq, ==, eq, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(named_neq, !=, ne, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(named_lt, <, lt, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(named_gt, >, gt, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(named_le, <=, le, DynamicType)
+  DEFINE_BINARY_OP_FRIEND(named_ge, >=, ge, DynamicType)
+
+#undef DEFINE_BINARY_OP_FRIEND
+
+  // Unary operators (return DynamicType)
+#define DEFINE_UNARY_OP_FRIEND(opname, op)                                     \
+  DT_API static DynamicType opname##_impl(const DynamicType& x);               \
+  friend DynamicType operator op(const DynamicType& x) {                       \
+    return opname##_impl(x);                                                   \
+  }
+
+  DEFINE_UNARY_OP_FRIEND(pos, +)
+  DEFINE_UNARY_OP_FRIEND(neg, -)
+  DEFINE_UNARY_OP_FRIEND(bnot, ~)
+
+#undef DEFINE_UNARY_OP_FRIEND
+
+  // Logical not - returns bool
+  DT_API static bool lnot_impl(const DynamicType& x);
+  friend bool operator!(const DynamicType& x) { return lnot_impl(x); }
+
+  // Prefix increment/decrement (++x, --x) - return reference
+  DT_API static DynamicType& lpp_impl(DynamicType& x);  // ++x
+  DT_API static DynamicType& lmm_impl(DynamicType& x);  // --x
+  friend DynamicType& operator++(DynamicType& x) { return lpp_impl(x); }
+  friend DynamicType& operator--(DynamicType& x) { return lmm_impl(x); }
+
+  // Postfix increment/decrement (x++, x--) - return copy of original
+  DT_API static DynamicType rpp_impl(DynamicType& x);  // x++
+  DT_API static DynamicType rmm_impl(DynamicType& x);  // x--
+  friend DynamicType operator++(DynamicType& x, int) { return rpp_impl(x); }
+  friend DynamicType operator--(DynamicType& x, int) { return rmm_impl(x); }
+
+  // Compound assignment operators - use the binary operators
+  friend DynamicType& operator+=(DynamicType& x, const DynamicType& y) { return x = x + y; }
+  friend DynamicType& operator-=(DynamicType& x, const DynamicType& y) { return x = x - y; }
+  friend DynamicType& operator*=(DynamicType& x, const DynamicType& y) { return x = x * y; }
+  friend DynamicType& operator/=(DynamicType& x, const DynamicType& y) { return x = x / y; }
+  friend DynamicType& operator%=(DynamicType& x, const DynamicType& y) { return x = x % y; }
+  friend DynamicType& operator&=(DynamicType& x, const DynamicType& y) { return x = x & y; }
+  friend DynamicType& operator|=(DynamicType& x, const DynamicType& y) { return x = x | y; }
+  friend DynamicType& operator^=(DynamicType& x, const DynamicType& y) { return x = x ^ y; }
+  friend DynamicType& operator<<=(DynamicType& x, const DynamicType& y) { return x = x << y; }
+  friend DynamicType& operator>>=(DynamicType& x, const DynamicType& y) { return x = x >> y; }
+};
+
+template <typename T>
+struct is_dynamic_type : std::false_type {};
+
+template <typename... Ts>
+struct is_dynamic_type<DynamicType<Ts...>> : std::true_type {};
+
+template <typename T>
+constexpr bool is_dynamic_type_v = is_dynamic_type<T>::value;
+
+// Helper to get type identities tuple - uses if constexpr for lazy evaluation
+template <typename T, bool is_dt>
+struct get_type_identities {
+  // Non-DynamicType case: wrap single type in tuple
+  using type = std::tuple<std::type_identity<T>>;
+};
+
+template <typename T>
+struct get_type_identities<T, true> {
+  // DynamicType case: use its TypeIdentitiesAsTuple
+  using type = typename T::TypeIdentitiesAsTuple;
+};
+
+template <typename T>
+using get_type_identities_t =
+    typename get_type_identities<std::decay_t<T>, is_dynamic_type_v<std::decay_t<T>>>::type;
+
+// Helper to get TypeList for fold expressions - uses if constexpr for lazy evaluation
+template <typename T, bool is_dt>
+struct get_typelist {
+  // Non-DynamicType case: wrap single type in TypeList
+  using type = TypeList<T>;
+};
+
+template <typename T>
+struct get_typelist<T, true> {
+  // DynamicType case: use its TypeListT
+  using type = typename T::TypeListT;
+};
+
+template <typename T>
+using get_typelist_t =
+    typename get_typelist<std::decay_t<T>, is_dynamic_type_v<std::decay_t<T>>>::type;
+
+// Declaration macro for binary operators - implementation in impl.h
+// Uses M×N nested fold expressions for type checking (7-14x faster than any_check)
+#define DEFINE_BINARY_OP_DECL(opname, op, func_name, return_type, check_existence) \
+  /* Check if X op Y is valid and result is convertible to RetT */             \
+  template <typename X, typename Y, typename RetT>                             \
+  constexpr bool opname##_type_compatible() {                                  \
+    if constexpr (opcheck<X> op opcheck<Y>) {                                  \
+      if constexpr (std::is_convertible_v<                                     \
+                        decltype(std::declval<X>() op std::declval<Y>()),      \
+                        RetT>) {                                               \
+        return true;                                                           \
+      }                                                                        \
+    }                                                                          \
+    return false;                                                              \
+  }                                                                            \
+  /* Nested fold helper: check L against all types in Rs... */                 \
+  template <typename RetT, typename L, typename... Rs>                         \
+  constexpr bool opname##_check_l_vs_all_r(TypeList<Rs...>) {                  \
+    return (... || opname##_type_compatible<L, Rs, RetT>());                   \
+  }                                                                            \
+  /* Nested fold helper: check all pairs from Ls... × RList */                 \
+  template <typename RetT, typename RList, typename... Ls>                     \
+  constexpr bool opname##_any_pair_compatible(TypeList<Ls...>, RList) {        \
+    return (... || opname##_check_l_vs_all_r<RetT, Ls>(RList{}));              \
+  }                                                                            \
+  template <typename LHS, typename RHS>                                        \
+  constexpr bool opname##_defined() {                                          \
+    constexpr bool lhs_is_dt = is_dynamic_type_v<std::decay_t<LHS>>;           \
+    constexpr bool rhs_is_dt = is_dynamic_type_v<std::decay_t<RHS>>;           \
+    using DT =                                                                 \
+        std::conditional_t<lhs_is_dt, std::decay_t<LHS>, std::decay_t<RHS>>;   \
+    if constexpr (!lhs_is_dt && !rhs_is_dt) {                                  \
+      return false;                                                            \
+    } else if constexpr (                                                      \
+        (lhs_is_dt && !rhs_is_dt &&                                            \
+         opcheck<std::decay_t<RHS>>.hasExplicitCastTo(                         \
+             opcheck<std::decay_t<LHS>>)) ||                                   \
+        (!lhs_is_dt && rhs_is_dt &&                                            \
+         opcheck<std::decay_t<LHS>>.hasExplicitCastTo(                         \
+             opcheck<std::decay_t<RHS>>))) {                                   \
+      return opname##_defined<DT, DT>();                                       \
+    } else {                                                                   \
+      if constexpr (check_existence) {                                         \
+        /* M×N nested fold over TypeList - 7-14x faster than any_check */      \
+        using lhs_types = get_typelist_t<LHS>;                                 \
+        using rhs_types = get_typelist_t<RHS>;                                 \
+        return opname##_any_pair_compatible<DT>(lhs_types{}, rhs_types{});     \
+      } else {                                                                 \
+        return true;                                                           \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  template <                                                                   \
+      typename LHS,                                                            \
+      typename RHS,                                                            \
+      typename DT = std::conditional_t<                                        \
+          is_dynamic_type_v<std::decay_t<LHS>>,                                \
+          std::decay_t<LHS>,                                                   \
+          std::decay_t<RHS>>,                                                  \
+      typename = std::enable_if_t<opname##_defined<LHS, RHS>()>>               \
+  inline constexpr return_type func_name(LHS&& x, RHS&& y);
+
+// NOTE: Most binary operators are now friend functions inside DynamicType class.
+// Only operator&& and operator|| remain as templates to avoid ambiguity with
+// built-in bool && bool when one operand is bool.
+DEFINE_BINARY_OP_DECL(land, &&, operator&&, DT, true);
+DEFINE_BINARY_OP_DECL(lor, ||, operator||, DT, true);
+
+#undef DEFINE_BINARY_OP_DECL
+
+// NOTE: Unary operators (+, -, ~, !) are now friend functions inside DynamicType class.
+
+// Intentionally not supporting the following unary ops:
+// DEFINE_UNARY_OP(addr, &);
+// Because it only makes sense if and only if both T& and T* are included in
+// the type list, however, std::variant does not allow reference type to be
+// an alternative. Also, if we overloaded the operator&, how can we get the
+// address of the dynamic type itself?
+
+// Helper: check if *T is valid and returns DT&
+template <typename DT, typename T>
+constexpr bool check_star_returns_dt() {
+  if constexpr (requires(T t) { *t; }) {
+    return std::is_same_v<decltype(*std::declval<T>()), DT&>;
+  }
+  return false;
+}
+
+// Fold over TypeList to check if any type supports * returning DT&
+template <typename DT, typename... Ts>
+constexpr bool any_star_defined(TypeList<Ts...>) {
+  return (... || check_star_returns_dt<DT, Ts>());
+}
+
+// Declaration only - implementation in impl.h
+template <
+    typename DT,
+    typename = std::enable_if_t<
+        is_dynamic_type_v<DT> &&
+        any_star_defined<DT>(typename DT::TypeListT{})>>
+DT& operator*(const DT& x);
+
+// Printing
+// Helper: check if std::ostream& << T is valid and returns std::ostream&
+template <typename T>
+constexpr bool check_can_print() {
+  if constexpr (requires(std::ostream& os, T t) { os << t; }) {
+    return std::is_same_v<
+        decltype(std::declval<std::ostream&>() << std::declval<T>()),
+        std::ostream&>;
+  }
+  return false;
+}
+
+// Fold over TypeList to check if any type is printable
+template <typename... Ts>
+constexpr bool any_can_print(TypeList<Ts...>) {
+  return (... || check_can_print<Ts>());
+}
+
+// Declaration only - implementation in impl.h
+template <
+    typename DT,
+    typename = std::enable_if_t<
+        is_dynamic_type_v<DT> &&
+        any_can_print(typename DT::TypeListT{})>>
+std::ostream& operator<<(std::ostream& os, const DT& dt);
+
+// NOTE: Prefix/postfix ++/-- and compound assignment operators are now
+// friend functions inside DynamicType class.
+
+// Intentionally not overloading operator comma",". This operator is rarely
+// overloaded, and the automatically defined version by the compiler usually
+// does what we want.
+
+// =============================================================================
+// has_cross_type_equality - Check if any two different types T and U in DT's
+// type list have T == U defined.
+// Uses nested fold expressions for compile-time efficiency.
+// =============================================================================
+
+// Helper: check if T == U is defined for cross-types (T != U)
+template <typename T, typename U>
+constexpr bool cross_type_eq_defined =
+    !std::is_same_v<T, U> && (opcheck<T> == opcheck<U>);
+
+// Inner fold: check if T has equality with any other type in Us...
+template <typename T, typename... Us>
+constexpr bool t_has_cross_eq_with_any() {
+  return (... || cross_type_eq_defined<T, Us>);
+}
+
+// Outer fold: check all types in Ts... against all types
+template <typename... Ts>
+constexpr bool any_cross_type_equality() {
+  return (... || t_has_cross_eq_with_any<Ts, Ts...>());
+}
+
+// Unpack TypeList to get types
+template <typename... Ts>
+constexpr bool any_cross_type_equality_impl(TypeList<Ts...>) {
+  return any_cross_type_equality<Ts...>();
+}
+
+// Final helper for DynamicType
+template <typename DT>
+constexpr bool has_cross_type_equality =
+    any_cross_type_equality_impl(typename DT::TypeListT{});
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#undef DT_API
+
+} // namespace dynamic_type
+
+// Hashing:
+
+template <typename Containers, typename... Ts>
+struct std::hash<dynamic_type::DynamicType<Containers, Ts...>> {
+  // The hashing should be consistent with the equality operator. That is, if
+  // a == b, then a and b should always has the same hash. However, because we
+  // are using the hashing function for std::variant as our hasing function,
+  // there is no way for us to guarantee this if there are cross-type
+  // equality. For example, 0 == 0.0, but they don't have the same hash value.
+  // So the hashing function for DynamicType<NoContainers, int, double> as
+  // defined here is illegal.
+  static_assert(
+      !dynamic_type::has_cross_type_equality<
+          dynamic_type::DynamicType<Containers, Ts...>>,
+      "Hash function of DynamicType can not be automatically defined while "
+      "there are cross-type equality.");
+  using DT = dynamic_type::DynamicType<Containers, Ts...>;
+  std::size_t operator()(DT const& dt) const noexcept {
+    return std::hash<typename DT::VariantType>{}(dt.value);
+  }
+};
diff --git a/lib/dynamic_type/src/dynamic_type/dynamic_type.h b/lib/dynamic_type/src/dynamic_type/dynamic_type.h
index 13be1ab8aa4..413dff51258 100644
--- a/lib/dynamic_type/src/dynamic_type/dynamic_type.h
+++ b/lib/dynamic_type/src/dynamic_type/dynamic_type.h
@@ -7,963 +7,8 @@
 // clang-format on
 #pragma once
 
-#include <algorithm>
-#include <cstddef>
-#include <initializer_list>
-#include <optional>
-#include <ostream>
-#include <tuple>
-#include <type_traits>
-#include <typeinfo>
-#include <variant>
+// Backward-compatible header - includes everything.
+// For compile-time optimization, include decl.h directly
+// and use extern template declarations.
 
-#include "error.h"
-#include "type_traits.h"
-
-namespace dynamic_type {
-
-// We must disable a lot of compiler warnings to make this work. The reason for
-// the need to disable these warnings is not because the code quality in this
-// file is bad, but because these apparently "bad" practices are necessary. For
-// example, if you have a dynamic type that can be either a bool or a class
-// SomeType{}, then we should support the ~ operator on it, because in the C++
-// standard bool supports it. Usually, when people write code like ~bool, they
-// are making a mistake, and the compiler will want you to use !bool instead.
-// However, in our case here we will allow everything that the C++ standard
-// allows. The compiler should yell at the user who uses DynamicType with ~
-// but not at us for implementing it.
-
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-comparison"
-#pragma clang diagnostic ignored "-Wbitwise-instead-of-logical"
-#pragma clang diagnostic ignored "-Wliteral-conversion"
-#pragma clang diagnostic ignored "-Wunused-lambda-capture"
-#pragma clang diagnostic ignored "-Wunknown-warning-option"
-#pragma clang diagnostic ignored "-Wbool-operation"
-#endif
-
-#if defined(__GNUC__) && !defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wbool-operation"
-// gcc, even the latest version (13.1.1), is complaining about the following
-// code:
-//   std::optional<bool> ret = std::nullopt;
-//   ...
-//   DYNAMIC_TYPE_CHECK(ret.has_value(), ...);
-//   return ret.value();
-// saying that ret.value() is used uninitialized. This complaint is totoally
-// nonsense.
-#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
-#endif
-
-template <template <typename...> typename... Templates>
-// Note: `Templates` is a list of templates, not a list of types.
-// Just like std::vector is a template, std::vector<int> is a type.
-struct Containers {
-  template <typename DynamicType, typename... MemberTypes>
-  using VariantType =
-      std::variant<std::monostate, MemberTypes..., Templates<DynamicType>...>;
-
-  template <typename DynamicType, typename... MemberTypes>
-  using TypeIdentitiesAsTuple = std::tuple<
-      std::type_identity<std::monostate>,
-      std::type_identity<MemberTypes>...,
-      std::type_identity<Templates<DynamicType>>...>;
-
-  template <typename DynamicType, typename... MemberTypes>
-  using ForAllTypes = dynamic_type::
-      ForAllTypes<std::monostate, MemberTypes..., Templates<DynamicType>...>;
-
-  // Check if T is one of the types in the type list MemberTypes..., or a
-  // container
-  template <typename T, typename DynamicType, typename... MemberTypes>
-  static constexpr auto is_candidate_type = dynamic_type::
-      belongs_to<T, std::monostate, MemberTypes..., Templates<DynamicType>...>;
-
-  template <typename ItemT>
-  using ForAllContainerTypes = dynamic_type::ForAllTypes<Templates<ItemT>...>;
-
-  template <typename ItemT>
-  static constexpr auto
-  all_container_type_identities_constructible_from_initializer_list() {
-    return dynamic_type::remove_void_from_tuple(ForAllContainerTypes<
-                                                ItemT>{}([](auto t) {
-      using T = typename decltype(t)::type;
-      if constexpr (std::is_constructible_v<T, std::initializer_list<ItemT>>) {
-        return std::type_identity<T>{};
-      } else {
-        return;
-      }
-    }));
-  }
-
-  template <typename ItemT>
-  using AllContainerTypeIdentitiesConstructibleFromInitializerList =
-      decltype(all_container_type_identities_constructible_from_initializer_list<
-               ItemT>());
-};
-
-using NoContainers = Containers<>;
-
-template <typename Containers, typename... Ts>
-struct DynamicType {
-  using VariantType =
-      typename Containers::template VariantType<DynamicType, Ts...>;
-  VariantType value;
-
-  using TypeIdentitiesAsTuple =
-      typename Containers::template TypeIdentitiesAsTuple<DynamicType, Ts...>;
-  static constexpr TypeIdentitiesAsTuple type_identities_as_tuple{};
-
-  static constexpr std::size_t num_types =
-      std::tuple_size_v<TypeIdentitiesAsTuple>;
-
-  using ForAllTypes =
-      typename Containers::template ForAllTypes<DynamicType, Ts...>;
-  static constexpr ForAllTypes for_all_types{};
-
-  // Check if T is one of the types in the type list Ts... or a container
-  template <typename T>
-  static constexpr auto is_candidate_type =
-      Containers::template is_candidate_type<T, DynamicType, Ts...>;
-
-  template <typename T>
-  static constexpr bool can_cast_to = any_check(
-      [](auto t) {
-        return opcheck<typename decltype(t)::type>.canCastTo(opcheck<T>);
-      },
-      type_identities_as_tuple);
-
-  template <typename ItemT>
-  using AllContainerTypeIdentitiesConstructibleFromInitializerList =
-      typename Containers::
-          template AllContainerTypeIdentitiesConstructibleFromInitializerList<
-              ItemT>;
-
-  template <typename ItemT>
-  static constexpr auto
-      num_container_types_constructible_from_initializer_list =
-          std::tuple_size_v<
-              AllContainerTypeIdentitiesConstructibleFromInitializerList<
-                  ItemT>>;
-
-  template <typename FuncT, typename FirstArg, typename... OtherArgs>
-  static inline constexpr decltype(auto) dispatch(
-      FuncT&& f,
-      FirstArg&& arg0,
-      OtherArgs&&... args) {
-    // Recursively dispatch on `args`, only leaving arg0 as undispatched
-    // argument
-    auto f0 = [&](auto&& a0) -> decltype(auto) {
-      if constexpr (sizeof...(OtherArgs) == 0) {
-        return std::forward<FuncT>(f)(std::forward<decltype(a0)>(a0));
-      } else {
-        auto f_others = [&](auto&&... others) -> decltype(auto) {
-          return std::forward<FuncT>(f)(
-              std::forward<decltype(a0)>(a0),
-              std::forward<decltype(others)>(others)...);
-        };
-        return dispatch(f_others, std::forward<OtherArgs>(args)...);
-      }
-    };
-    // Does arg0 need dispatch?
-    if constexpr (std::is_same_v<std::decay_t<FirstArg>, DynamicType>) {
-      // Infer return result: if f always returns the same type, then we return
-      // the same type as well. Otherwise, we return DynamicType assuming that
-      // DynamicType is the common holder of these types. Void is treated
-      // specially here: if for some case the function returns some type, and
-      // for other cases the function returns void, then we ignore void and use
-      // the cases with return value for inference. We decide to do this because
-      // non-void return values can be ignored, but void returning can never
-      // pass any information. There is no single best inference strategy that
-      // fits all cases, ignoring void seems to be good tradeoff.
-      auto get_single_result_type = [](auto t) {
-        using T = typename decltype(t)::type;
-        using RetT = decltype(f0(std::declval<T>()));
-        if constexpr (!std::is_void_v<RetT>) {
-          return std::type_identity<RetT>{};
-        } else {
-          // return void instead of std::type_identity<void> so that we can use
-          // remove_void_from_tuple to remove it.
-          return;
-        }
-      };
-      using result_types = decltype(remove_void_from_tuple(
-          DynamicType::for_all_types(get_single_result_type)));
-      constexpr bool returns_void = (std::tuple_size_v<result_types> == 0);
-      if constexpr (returns_void) {
-        DynamicType::for_all_types([&](auto t) -> decltype(auto) {
-          using T = typename decltype(t)::type;
-          if (arg0.template is<T>()) {
-            f0(arg0.template as<T>());
-          }
-        });
-        return;
-      } else {
-        constexpr bool has_single_return_type =
-            are_all_same<result_types>::value;
-        using result_type = std::conditional_t<
-            has_single_return_type,
-            typename std::tuple_element_t<0, result_types>::type,
-            DynamicType>;
-        // Needs to wrap reference as optional<reference_wrapper<T>> because
-        // C++ does not allow rebinding a reference.
-        constexpr bool is_reference = std::is_reference_v<result_type>;
-        using ret_storage_t = std::conditional_t<
-            is_reference,
-            std::optional<
-                std::reference_wrapper<std::remove_reference_t<result_type>>>,
-            result_type>;
-        ret_storage_t ret{};
-        DynamicType::for_all_types([&](auto t) -> decltype(auto) {
-          using T = typename decltype(t)::type;
-          if (arg0.template is<T>()) {
-            const T& a0 = arg0.template as<T>();
-            if constexpr (std::
-                              is_convertible_v<decltype(f0(a0)), result_type>) {
-              ret = f0(a0);
-            } else {
-              DYNAMIC_TYPE_CHECK(
-                  false,
-                  "Result is dynamic but not convertible to result type");
-            }
-          }
-        });
-        if constexpr (is_reference) {
-          return ret->get();
-        } else {
-          return ret;
-        }
-      }
-    } else {
-      // No need to dispatch arg0, just perfectly forwarding it.
-      return f0(std::forward<FirstArg>(arg0));
-    }
-  }
-
-  constexpr DynamicType() = default;
-
-  template <typename T, typename = decltype(VariantType(std::declval<T>()))>
-  constexpr DynamicType(T&& value) : value(std::forward<T>(value)) {}
-
-  template <
-      template <typename...> typename Template,
-      typename ItemT,
-      typename = std::enable_if_t<
-          is_candidate_type<Template<DynamicType>> &&
-          !std::is_same_v<ItemT, DynamicType>>>
-  constexpr DynamicType(Template<ItemT> value)
-      : value([](auto input) {
-          Template<DynamicType> result;
-          std::transform(
-              input.begin(),
-              input.end(),
-              std::back_inserter(result),
-              [](auto& item) { return DynamicType(std::move(item)); });
-          return result;
-        }(std::move(value))) {}
-
-  template <
-      typename ItemT = DynamicType,
-      typename = std::enable_if_t<
-          // enable this ctor only when there is only one container supporting
-          // initializer_list, otherwise it is ambiguous to tell which container
-          // to use.
-          num_container_types_constructible_from_initializer_list<ItemT> == 1>>
-  constexpr DynamicType(std::initializer_list<DynamicType> list)
-      : DynamicType(typename std::tuple_element_t<
-                    0,
-                    AllContainerTypeIdentitiesConstructibleFromInitializerList<
-                        DynamicType>>::type(list)) {}
-
-  // Returns the type_info of the actual type of the variant value. For
-  // example, if value holds an int, then this will return typeid(int).
-  const std::type_info& type() const {
-    return std::visit(
-        [](auto value) -> const std::type_info& { return typeid(value); },
-        value);
-  }
-
-  template <typename T>
-  constexpr bool is() const {
-    return std::holds_alternative<T>(value);
-  }
-
-  template <template <typename...> typename Template>
-  constexpr bool is() const {
-    return is<Template<DynamicType>>();
-  }
-
-  constexpr bool isNull() const {
-    return std::holds_alternative<std::monostate>(value);
-  }
-
-  constexpr bool hasValue() const {
-    return !isNull();
-  }
-
-  template <typename T, typename = std::enable_if_t<is_candidate_type<T>>>
-  constexpr const T& as() const {
-    return std::get<T>(value);
-  }
-
-  template <typename T, typename = std::enable_if_t<is_candidate_type<T>>>
-  constexpr T& as() {
-    return std::get<T>(value);
-  }
-
-  template <
-      template <typename...> typename Template,
-      typename = std::enable_if_t<is_candidate_type<Template<DynamicType>>>>
-  constexpr const Template<DynamicType>& as() const {
-    return as<Template<DynamicType>>();
-  }
-
-  template <
-      template <typename...> typename Template,
-      typename = std::enable_if_t<is_candidate_type<Template<DynamicType>>>>
-  constexpr Template<DynamicType>& as() {
-    return as<Template<DynamicType>>();
-  }
-
-  template <typename T, typename = std::enable_if_t<can_cast_to<T>>>
-  explicit constexpr operator T() const {
-    return dispatch(
-        [](auto x) -> decltype(auto) {
-          using X = decltype(x);
-          if constexpr (opcheck<X>.canCastTo(opcheck<T>)) {
-            return (T)x;
-          }
-        },
-        *this);
-  }
-
-  template <
-      template <typename...> typename Template,
-      typename ItemT,
-      typename = std::enable_if_t<
-          is_candidate_type<Template<DynamicType>> && can_cast_to<ItemT>>>
-  explicit constexpr operator Template<ItemT>() const {
-    DYNAMIC_TYPE_CHECK(
-        is<Template<DynamicType>>(),
-        "Cannot cast from ",
-        type().name(),
-        " to ",
-        typeid(Template<ItemT>).name(),
-        " : incompatible type");
-    Template<ItemT> result;
-    std::transform(
-        as<Template<DynamicType>>().begin(),
-        as<Template<DynamicType>>().end(),
-        std::back_inserter(result),
-        [](const auto& item) { return (ItemT)item; });
-    return result;
-  }
-
-  // Intentionally not overloading operator=, because the compiler generated
-  // default behavior usually makes more sense than the overloaded one. For
-  // example, if we have
-  //   struct SomeType {};
-  //   using IntOrCustom = DynamicType<int, SomeType>;
-  //   IntOrCustom x(1);
-  //   IntOrCustom y(SomeType{});
-  //   x = y;
-  // Then the compiler generated behavior will get us SomeType{} for x, but if
-  // we overload based on the underlying type, we will get a runtime error,
-  // because it is not possible to assign SomeType{} to an int.
-
-  constexpr decltype(auto) operator->() {
-    return dispatch(
-        [](auto&& x) -> decltype(auto) {
-          using X = decltype(x);
-          using XD = std::decay_t<X>;
-          if constexpr (std::is_pointer_v<XD>) {
-            return (std::decay_t<X>)(x);
-          } else if constexpr (opcheck<XD>->value()) {
-            return std::forward<X>(x).operator->();
-          }
-        },
-        *this);
-  }
-
-  template <typename IndexT>
-  static constexpr bool has_square_bracket = any_check(
-      [](auto t) {
-        using T = typename decltype(t)::type;
-        if constexpr (opcheck<T>[opcheck<IndexT>]) {
-          return std::is_same_v<
-              decltype(std::declval<T>()[std::declval<IndexT>()]),
-              DynamicType&>;
-        }
-        return false;
-      },
-      type_identities_as_tuple);
-
-#define DEFINE_SQUARE_BRACKET_OPERATOR(__const)                                \
-  template <typename IndexT>                                                   \
-  std::enable_if_t<                                                            \
-      !std::is_same_v<IndexT, DynamicType> && has_square_bracket<IndexT>,      \
-      __const DynamicType&>                                                    \
-  operator[](const IndexT& i) __const {                                        \
-    std::optional<std::reference_wrapper<__const DynamicType>> ret =           \
-        std::nullopt;                                                          \
-    for_all_types([this, &ret, &i](auto t) {                                   \
-      using T = typename decltype(t)::type;                                    \
-      if constexpr (opcheck<T>[opcheck<IndexT>]) {                             \
-        if constexpr (std::is_same_v<                                          \
-                          decltype(std::declval<T>()[std::declval<IndexT>()]), \
-                          DynamicType&>) {                                     \
-          if (is<T>()) {                                                       \
-            ret = std::ref(as<T>()[i]);                                        \
-          }                                                                    \
-        }                                                                      \
-      }                                                                        \
-    });                                                                        \
-    DYNAMIC_TYPE_CHECK(                                                        \
-        ret.has_value(),                                                       \
-        "Cannot index ",                                                       \
-        type().name(),                                                         \
-        " with ",                                                              \
-        typeid(IndexT).name(),                                                 \
-        " : incompatible type");                                               \
-    return ret.value();                                                        \
-  }
-
-  DEFINE_SQUARE_BRACKET_OPERATOR()
-  DEFINE_SQUARE_BRACKET_OPERATOR(const)
-#undef DEFINE_SQUARE_BRACKET_OPERATOR
-
-  static constexpr bool has_any_square_bracket = any_check(
-      [](auto t) {
-        using IndexT = typename decltype(t)::type;
-        return has_square_bracket<IndexT>;
-      },
-      type_identities_as_tuple);
-
-#define DEFINE_SQUARE_BRACKET_OPERATOR(__const)                      \
-  template <typename DT>                                             \
-  std::enable_if_t<                                                  \
-      std::is_same_v<DT, DynamicType> && has_any_square_bracket,     \
-      __const DynamicType&>                                          \
-  operator[](const DT& i) __const {                                  \
-    std::optional<std::reference_wrapper<__const DynamicType>> ret = \
-        std::nullopt;                                                \
-    for_all_types([this, &ret, &i](auto t) {                         \
-      using IndexT = typename decltype(t)::type;                     \
-      if constexpr (has_square_bracket<IndexT>) {                    \
-        if (i.template is<IndexT>()) {                               \
-          ret = std::ref((*this)[i.template as<IndexT>()]);          \
-        }                                                            \
-      }                                                              \
-    });                                                              \
-    DYNAMIC_TYPE_CHECK(                                              \
-        ret.has_value(),                                             \
-        "Cannot index ",                                             \
-        type().name(),                                               \
-        " with ",                                                    \
-        i.type().name(),                                             \
-        " : incompatible type");                                     \
-    return ret.value();                                              \
-  }
-
-  DEFINE_SQUARE_BRACKET_OPERATOR()
-  DEFINE_SQUARE_BRACKET_OPERATOR(const)
-#undef DEFINE_SQUARE_BRACKET_OPERATOR
-
-  // ->* over for accessing candidate members. This will be converted as a .*
-  // with a candidate type. For example, if you have:
-  // DynamicType<NoContainers, A, B, C> abc;
-  // then you can use abc->*A::x to access the member x of A. Member access also
-  // support functions, just make sure that you get the correct precedence. For
-  // example: use (abc->*A::print)() instead of abc->*A::print().
-
-#define DEFINE_ARROW_STAR_OPERATOR(__const)                                    \
-  template <                                                                   \
-      typename Ret,                                                            \
-      typename Class,                                                          \
-      typename = std::enable_if_t<is_candidate_type<Class>>>                   \
-  constexpr decltype(auto) operator->*(Ret Class::* member) __const {          \
-    /* Use decltype(auto) instead of auto as return type so that references */ \
-    /* and qualifiers are preserved*/                                          \
-    if constexpr (std::is_function_v<Ret>) {                                   \
-      return [this, member](auto&&... args) {                                  \
-        return (as<Class>().*member)(std::forward<decltype(args)>(args)...);   \
-      };                                                                       \
-    } else {                                                                   \
-      return as<Class>().*member;                                              \
-    }                                                                          \
-  }
-
-  DEFINE_ARROW_STAR_OPERATOR()
-  DEFINE_ARROW_STAR_OPERATOR(const)
-#undef DEFINE_ARROW_STAR_OPERATOR
-
-  // ->* operator for non-candidate access. This will just forward the argument
-  // to the overloaded ->* of candidates. Due to limitations of C++'s type
-  // system, we can only enable this when all the types in the type list that
-  // support this operator have the same return type.
-
-#define DEFINE_ARROW_STAR_OPERATOR(__const)                                     \
-  template <typename MemberT>                                                   \
-  static constexpr auto all_arrow_star_ret_types##__const =                     \
-      remove_void_from_tuple(for_all_types([](auto t) {                         \
-        using T = typename decltype(t)::type;                                   \
-        if constexpr (opcheck<T>->*opcheck<MemberT>) {                          \
-          return std::type_identity<                                            \
-              decltype(std::declval<__const T>()->*std::declval<MemberT>())>{}; \
-        }                                                                       \
-      }));                                                                      \
-                                                                                \
-  template <typename MemberT>                                                   \
-  using AllArrowStarRetTypes##__const =                                         \
-      decltype(all_arrow_star_ret_types##__const<MemberT>);                     \
-                                                                                \
-  template <typename MemberT>                                                   \
-  static constexpr bool all_arrow_star_ret_types_are_same##__const =            \
-      all_same_type(all_arrow_star_ret_types##__const<MemberT>);                \
-                                                                                \
-  template <typename MemberT>                                                   \
-  using ArrowStarRetType##__const =                                             \
-      typename first_or_void<AllArrowStarRetTypes##__const<MemberT>>::type;     \
-                                                                                \
-  template <typename MemberT>                                                   \
-  constexpr std::enable_if_t<                                                   \
-      all_arrow_star_ret_types_are_same##__const<MemberT>,                      \
-      typename ArrowStarRetType##__const<MemberT>::type>                        \
-  operator->*(const MemberT& member) __const {                                  \
-    using RetT = typename ArrowStarRetType##__const<MemberT>::type;             \
-    std::optional<wrap_reference_t<RetT>> ret = std::nullopt;                   \
-    for_all_types([this, &member, &ret](auto t) {                               \
-      using T = typename decltype(t)::type;                                     \
-      if constexpr (opcheck<T>->*opcheck<MemberT>) {                            \
-        if (is<T>()) {                                                          \
-          ret = as<T>()->*member;                                               \
-        }                                                                       \
-      }                                                                         \
-    });                                                                         \
-    DYNAMIC_TYPE_CHECK(                                                         \
-        ret.has_value(),                                                        \
-        "Cannot access member with type ",                                      \
-        typeid(RetT).name(),                                                    \
-        " : incompatible type");                                                \
-    return ret.value();                                                         \
-  }
-
-  DEFINE_ARROW_STAR_OPERATOR()
-  DEFINE_ARROW_STAR_OPERATOR(const)
-#undef DEFINE_ARROW_STAR_OPERATOR
-
-  // TODO: support operator(). This is not supported yet because it is the most
-  // difficulty one to implement because it can has arbitrary number of
-  // arguments. I believe it is doable, but I decide to leave it for future.
-};
-
-template <typename T>
-struct is_dynamic_type : std::false_type {};
-
-template <typename... Ts>
-struct is_dynamic_type<DynamicType<Ts...>> : std::true_type {};
-
-template <typename T>
-constexpr bool is_dynamic_type_v = is_dynamic_type<T>::value;
-
-#define DEFINE_BINARY_OP(opname, op, func_name, return_type, check_existence)  \
-  template <typename X, typename Y, typename RetT>                             \
-  constexpr bool opname##_type_compatible() {                                  \
-    if constexpr (opcheck<X> op opcheck<Y>) {                                  \
-      if constexpr (std::is_convertible_v<                                     \
-                        decltype(std::declval<X>() op std::declval<Y>()),      \
-                        RetT>) {                                               \
-        return true;                                                           \
-      }                                                                        \
-    }                                                                          \
-    return false;                                                              \
-  }                                                                            \
-  template <typename RetT>                                                     \
-  constexpr auto opname##_is_valid = [](auto&& x, auto&& y) {                  \
-    using X = decltype(x);                                                     \
-    using Y = decltype(y);                                                     \
-    if constexpr (opname##_type_compatible<X, Y, RetT>()) {                    \
-      return std::true_type{};                                                 \
-    } else {                                                                   \
-      return;                                                                  \
-    }                                                                          \
-  };                                                                           \
-  template <typename LHS, typename RHS>                                        \
-  constexpr bool opname##_defined() {                                          \
-    constexpr bool lhs_is_dt = is_dynamic_type_v<std::decay_t<LHS>>;           \
-    constexpr bool rhs_is_dt = is_dynamic_type_v<std::decay_t<RHS>>;           \
-    using DT =                                                                 \
-        std::conditional_t<lhs_is_dt, std::decay_t<LHS>, std::decay_t<RHS>>;   \
-    if constexpr (!lhs_is_dt && !rhs_is_dt) {                                  \
-      return false;                                                            \
-    } else if constexpr (                                                      \
-        (lhs_is_dt && !rhs_is_dt &&                                            \
-         opcheck<std::decay_t<RHS>>.hasExplicitCastTo(                         \
-             opcheck<std::decay_t<LHS>>)) ||                                   \
-        (!lhs_is_dt && rhs_is_dt &&                                            \
-         opcheck<std::decay_t<LHS>>.hasExplicitCastTo(                         \
-             opcheck<std::decay_t<RHS>>))) {                                   \
-      return opname##_defined<DT, DT>();                                       \
-    } else {                                                                   \
-      if constexpr (check_existence) {                                         \
-        using should_define_t = decltype(DT::dispatch(                         \
-            opname##_is_valid<DT>, std::declval<LHS>(), std::declval<RHS>())); \
-        return std::is_same_v<should_define_t, std::true_type>;                \
-      } else {                                                                 \
-        return true;                                                           \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  template <                                                                   \
-      typename LHS,                                                            \
-      typename RHS,                                                            \
-      typename DT = std::conditional_t<                                        \
-          is_dynamic_type_v<std::decay_t<LHS>>,                                \
-          std::decay_t<LHS>,                                                   \
-          std::decay_t<RHS>>,                                                  \
-      typename = std::enable_if_t<opname##_defined<LHS, RHS>()>>               \
-  inline constexpr return_type func_name(LHS&& x, RHS&& y) {                   \
-    constexpr bool lhs_is_dt = is_dynamic_type_v<std::decay_t<LHS>>;           \
-    constexpr bool rhs_is_dt = is_dynamic_type_v<std::decay_t<RHS>>;           \
-    if constexpr (                                                             \
-        lhs_is_dt && !rhs_is_dt &&                                             \
-        opcheck<std::decay_t<RHS>>.hasExplicitCastTo(                          \
-            opcheck<std::decay_t<LHS>>)) {                                     \
-      return x op(DT) y;                                                       \
-    } else if constexpr (                                                      \
-        !lhs_is_dt && rhs_is_dt &&                                             \
-        opcheck<std::decay_t<LHS>>.hasExplicitCastTo(                          \
-            opcheck<std::decay_t<RHS>>)) {                                     \
-      return (DT)x op y;                                                       \
-    } else {                                                                   \
-      return DT::dispatch(                                                     \
-          [](auto&& x, auto&& y) -> decltype(auto) {                           \
-            using X = decltype(x);                                             \
-            using Y = decltype(y);                                             \
-            if constexpr (false) {                                             \
-              /* TODO: This doesn't work on gcc 11.4 with C++20, temporarily   \
-               * disabled and use the more verbose implementation below. We    \
-               * should reenable this when we upgrade our compilers. */        \
-              if constexpr (opname##_type_compatible<X, Y, return_type>()) {   \
-                return std::forward<X>(x) op std::forward<Y>(y);               \
-              }                                                                \
-            } else {                                                           \
-              if constexpr (opcheck<X> op opcheck<Y>) {                        \
-                if constexpr (std::is_convertible_v<                           \
-                                  decltype(std::declval<X>()                   \
-                                               op std::declval<Y>()),          \
-                                  return_type>) {                              \
-                  return std::forward<X>(x) op std::forward<Y>(y);             \
-                }                                                              \
-              }                                                                \
-            }                                                                  \
-          },                                                                   \
-          std::forward<LHS>(x),                                                \
-          std::forward<RHS>(y));                                               \
-    }                                                                          \
-  }
-
-DEFINE_BINARY_OP(add, +, operator+, DT, true);
-DEFINE_BINARY_OP(minus, -, operator-, DT, true);
-DEFINE_BINARY_OP(mul, *, operator*, DT, true);
-DEFINE_BINARY_OP(div, /, operator/, DT, true);
-DEFINE_BINARY_OP(mod, %, operator%, DT, true);
-DEFINE_BINARY_OP(band, &, operator&, DT, true);
-DEFINE_BINARY_OP(bor, |, operator|, DT, true);
-DEFINE_BINARY_OP(xor, ^, operator^, DT, true);
-DEFINE_BINARY_OP(land, &&, operator&&, DT, true);
-DEFINE_BINARY_OP(lor, ||, operator||, DT, true);
-DEFINE_BINARY_OP(lshift, <<, operator<<, DT, true);
-DEFINE_BINARY_OP(rshift, >>, operator>>, DT, true);
-
-// Not defining comparison operators that returns DynamicType as operator
-// overloading, because we want to leave the operator overloading for comparison
-// operators that returns bool. Instead, we give each operator a function name,
-// so that users can use the function name to call the operator. That is:
-//   dt1 < dt2 --> returns a bool (defined below by DEFINE_COMPARE_OP)
-//   lt(dt1, dt2) --> returns a DynamicType
-DEFINE_BINARY_OP(named_eq, ==, eq, DT, true);
-DEFINE_BINARY_OP(named_neq, !=, ne, DT, true);
-DEFINE_BINARY_OP(named_lt, <, lt, DT, true);
-DEFINE_BINARY_OP(named_gt, >, gt, DT, true);
-DEFINE_BINARY_OP(named_le, <=, le, DT, true);
-DEFINE_BINARY_OP(named_ge, >=, ge, DT, true);
-
-// std::monostate has definitions on compare operators, so DynamicType should
-// always define them as well. There is no need for any SFINAE about member type
-// here. https://en.cppreference.com/w/cpp/utility/variant/monostate
-DEFINE_BINARY_OP(eq, ==, operator==, bool, false);
-DEFINE_BINARY_OP(neq, !=, operator!=, bool, false);
-DEFINE_BINARY_OP(lt, <, operator<, bool, false);
-DEFINE_BINARY_OP(gt, >, operator>, bool, false);
-DEFINE_BINARY_OP(le, <=, operator<=, bool, false);
-DEFINE_BINARY_OP(ge, >=, operator>=, bool, false);
-
-#undef DEFINE_BINARY_OP
-
-#define DEFINE_UNARY_OP(opname, op)                                            \
-  /*TODO: we should inline the definition of opname##_helper into enable_if,*/ \
-  /*but I can only do this in C++20 */                                         \
-  constexpr auto opname##_helper = [](auto x) constexpr {                      \
-    return (op opcheck<typename decltype(x)::type>);                           \
-  };                                                                           \
-  template <                                                                   \
-      typename DT,                                                             \
-      typename = std::enable_if_t<                                             \
-          is_dynamic_type_v<std::decay_t<DT>> &&                               \
-          any_check(                                                           \
-              opname##_helper, std::decay_t<DT>::type_identities_as_tuple)>>   \
-  inline constexpr decltype(auto) operator op(DT&& x) {                        \
-    return std::decay_t<DT>::dispatch(                                         \
-        [](auto&& x) -> decltype(auto) {                                       \
-          if constexpr (op opcheck<std::decay_t<decltype(x)>>) {               \
-            return op std::forward<decltype(x)>(x);                            \
-          }                                                                    \
-        },                                                                     \
-        std::forward<DT>(x));                                                  \
-  }
-
-DEFINE_UNARY_OP(pos, +);
-DEFINE_UNARY_OP(neg, -);
-DEFINE_UNARY_OP(bnot, ~);
-DEFINE_UNARY_OP(lnot, !);
-#undef DEFINE_UNARY_OP
-
-// Intentionally not supporting the following unary ops:
-// DEFINE_UNARY_OP(addr, &);
-// Because it only makes sense if and only if both T& and T* are included in
-// the type list, however, std::variant does not allow reference type to be
-// an alternative. Also, if we overloaded the operator&, how can we get the
-// address of the dynamic type itself?
-
-template <typename DT>
-auto star_defined_checker = [](auto t) {
-  using T = typename decltype(t)::type;
-  if constexpr (*opcheck<T>) {
-    return std::is_same_v<decltype(*std::declval<T>()), DT&>;
-  }
-  return false;
-};
-
-template <
-    typename DT,
-    typename = std::enable_if_t<
-        is_dynamic_type_v<DT> &&
-        any_check(star_defined_checker<DT>, DT::type_identities_as_tuple)>>
-DT& operator*(const DT& x) {
-  std::optional<std::reference_wrapper<DT>> ret = std::nullopt;
-  DT::for_all_types([&ret, &x](auto t) {
-    using T = typename decltype(t)::type;
-    if constexpr (*opcheck<T>) {
-      if constexpr (std::is_same_v<decltype(*std::declval<T>()), DT&>) {
-        if (x.template is<T>()) {
-          ret = std::ref(*(x.template as<T>()));
-        }
-      }
-    }
-  });
-  DYNAMIC_TYPE_CHECK(ret.has_value(), "Cannot dereference ", x.type().name());
-  return ret.value();
-}
-
-// Printing
-// TODO: we should inline the definition of can_print into enable_if, but I can
-// only do this in C++20
-constexpr auto can_print = [](auto x) constexpr {
-  using T = typename decltype(x)::type;
-  if constexpr (opcheck<std::ostream&> << opcheck<T>) {
-    return std::is_same_v<
-        decltype(std::declval<std::ostream&>() << std::declval<T>()),
-        std::ostream&>;
-  }
-  return false;
-};
-template <
-    typename DT,
-    typename = std::enable_if_t<
-        is_dynamic_type_v<DT> &&
-        any_check(can_print, DT::type_identities_as_tuple)>>
-std::ostream& operator<<(std::ostream& os, const DT& dt) {
-  bool printed = false;
-  DT::for_all_types([&printed, &os, &dt](auto _) {
-    using T = typename decltype(_)::type;
-    if constexpr (opcheck<std::ostream&> << opcheck<T>) {
-      if constexpr (std::is_same_v<
-                        decltype(os << std::declval<T>()),
-                        std::ostream&>) {
-        if (dt.template is<T>()) {
-          os << dt.template as<T>();
-          printed = true;
-        }
-      }
-    }
-  });
-  DYNAMIC_TYPE_CHECK(
-      printed, "Can not print ", dt.type().name(), " : incompatible type");
-  return os;
-}
-
-#define DEFINE_LEFT_PPMM(opname, op)                                           \
-  /*TODO: we should inline the definition of opname##_helper into enable_if,*/ \
-  /*but I can only do this in C++20 */                                         \
-  constexpr auto opname##_helper = [](auto x) constexpr {                      \
-    using X = typename decltype(x)::type;                                      \
-    if constexpr (op opcheck<X&>) {                                            \
-      return std::is_same_v<decltype(op std::declval<X&>()), X&>;              \
-    }                                                                          \
-    return false;                                                              \
-  };                                                                           \
-  template <                                                                   \
-      typename DT,                                                             \
-      typename = std::enable_if_t<                                             \
-          is_dynamic_type_v<DT> &&                                             \
-          any_check(opname##_helper, DT::type_identities_as_tuple)>>           \
-  inline constexpr DT& operator op(DT & x) {                                   \
-    bool computed = false;                                                     \
-    DT::for_all_types([&computed, &x](auto _) {                                \
-      using Type = typename decltype(_)::type;                                 \
-      if constexpr (op opcheck<Type&>) {                                       \
-        if constexpr (std::is_same_v<                                          \
-                          decltype(op std::declval<Type&>()),                  \
-                          Type&>) {                                            \
-          if (x.template is<Type>()) {                                         \
-            op x.template as<Type>();                                          \
-            computed = true;                                                   \
-          }                                                                    \
-        }                                                                      \
-      }                                                                        \
-    });                                                                        \
-    DYNAMIC_TYPE_CHECK(                                                        \
-        computed,                                                              \
-        "Cannot compute ",                                                     \
-        #op,                                                                   \
-        x.type().name(),                                                       \
-        " : incompatible type");                                               \
-    return x;                                                                  \
-  }
-
-DEFINE_LEFT_PPMM(lpp, ++);
-DEFINE_LEFT_PPMM(lmm, --);
-
-#undef DEFINE_LEFT_PPMM
-
-#define DEFINE_RIGHT_PPMM(opname, op)                                          \
-  /*TODO: we should inline the definition of opname##_helper into enable_if,*/ \
-  /*but I can only do this in C++20 */                                         \
-  template <typename DTVariantType>                                            \
-  constexpr auto opname##_helper = [](auto x) constexpr {                      \
-    using X = typename decltype(x)::type;                                      \
-    if constexpr (opcheck<X&> op) {                                            \
-      return std::                                                             \
-          is_constructible_v<DTVariantType, decltype(std::declval<X&>() op)>;  \
-    }                                                                          \
-    return false;                                                              \
-  };                                                                           \
-  template <typename DT>                                                       \
-  inline constexpr std::enable_if_t<                                           \
-      is_dynamic_type_v<DT> &&                                                 \
-          any_check(                                                           \
-              opname##_helper<typename DT::VariantType>,                       \
-              DT::type_identities_as_tuple),                                   \
-      DT> operator op(DT & x, int) {                                           \
-    DT ret;                                                                    \
-    DT::for_all_types([&ret, &x](auto _) {                                     \
-      using Type = typename decltype(_)::type;                                 \
-      if constexpr (opcheck<Type&> op) {                                       \
-        if constexpr (std::is_constructible_v<                                 \
-                          typename DT::VariantType,                            \
-                          decltype(std::declval<Type&>() op)>) {               \
-          if (x.template is<Type>()) {                                         \
-            ret = DT(x.template as<Type>() op);                                \
-          }                                                                    \
-        }                                                                      \
-      }                                                                        \
-    });                                                                        \
-    DYNAMIC_TYPE_CHECK(                                                        \
-        !ret.template is<std::monostate>(),                                    \
-        "Cannot compute ",                                                     \
-        x.type().name(),                                                       \
-        #op,                                                                   \
-        " : incompatible type");                                               \
-    return ret;                                                                \
-  }
-
-DEFINE_RIGHT_PPMM(rpp, ++);
-DEFINE_RIGHT_PPMM(rmm, --);
-
-#undef DEFINE_RIGHT_PPMM
-
-#define DEFINE_ASSIGNMENT_OP(op, assign_op)                      \
-  template <                                                     \
-      typename DT,                                               \
-      typename T,                                                \
-      typename = std::enable_if_t<                               \
-          is_dynamic_type_v<DT> && (opcheck<DT> op opcheck<T>)>> \
-  inline constexpr DT& operator assign_op(DT & x, const T & y) { \
-    return x = x op y;                                           \
-  }
-
-DEFINE_ASSIGNMENT_OP(+, +=);
-DEFINE_ASSIGNMENT_OP(-, -=);
-DEFINE_ASSIGNMENT_OP(*, *=);
-DEFINE_ASSIGNMENT_OP(/, /=);
-DEFINE_ASSIGNMENT_OP(%, %=);
-DEFINE_ASSIGNMENT_OP(&, &=);
-DEFINE_ASSIGNMENT_OP(|, |=);
-DEFINE_ASSIGNMENT_OP(^, ^=);
-DEFINE_ASSIGNMENT_OP(<<, <<=);
-DEFINE_ASSIGNMENT_OP(>>, >>=);
-
-// Intentionally not overloading operator comma",". This operator is rarely
-// overloaded, and the automatically defined version by the compiler usually
-// does what we want.
-
-// Check that, whether there exist two different types T and U, where both T and
-// U are contained in the type list of dynamic type DT, and T == U is defined.
-template <typename DT>
-constexpr bool has_cross_type_equality =
-    any(remove_void_from_tuple(DT::for_all_types([](auto t) {
-      using T = typename decltype(t)::type;
-      return any(remove_void_from_tuple(DT::for_all_types([](auto u) {
-        using U = typename decltype(u)::type;
-        if constexpr (std::is_same_v<T, U>) {
-          return;
-        } else {
-          return opcheck<T> == opcheck<U>;
-        }
-      })));
-    })));
-
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
-
-#if defined(__GNUC__) && !defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-
-} // namespace dynamic_type
-
-// Hashing:
-
-template <typename Containers, typename... Ts>
-struct std::hash<dynamic_type::DynamicType<Containers, Ts...>> {
-  // The hashing should be consistent with the equality operator. That is, if
-  // a == b, then a and b should always has the same hash. However, because we
-  // are using the hashing function for std::variant as our hasing function,
-  // there is no way for us to guarantee this if there are cross-type
-  // equality. For example, 0 == 0.0, but they don't have the same hash value.
-  // So the hashing function for DynamicType<NoContainers, int, double> as
-  // defined here is illegal.
-  static_assert(
-      !dynamic_type::has_cross_type_equality<
-          dynamic_type::DynamicType<Containers, Ts...>>,
-      "Hash function of DynamicType can not be automatically defined while "
-      "there are cross-type equality.");
-  using DT = dynamic_type::DynamicType<Containers, Ts...>;
-  std::size_t operator()(DT const& dt) const noexcept {
-    return std::hash<typename DT::VariantType>{}(dt.value);
-  }
-};
+#include "impl.h"
diff --git a/lib/dynamic_type/src/dynamic_type/impl.h b/lib/dynamic_type/src/dynamic_type/impl.h
new file mode 100644
index 00000000000..463bee47f1e
--- /dev/null
+++ b/lib/dynamic_type/src/dynamic_type/impl.h
@@ -0,0 +1,536 @@
+// clang-format off
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+// clang-format on
+#pragma once
+
+// Heavy operator implementations for DynamicType.
+// This file will contain operator implementations that can be
+// instantiated once via explicit template instantiation.
+
+#include "decl.h"
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wbool-operation"
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wbool-operation"
+#endif
+
+// Visibility attribute for exported symbols.
+// Static member functions defined in this header need default visibility
+// to be exported from shared libraries built with -fvisibility=hidden.
+#if defined _WIN32 || defined __CYGWIN__
+#define DT_EXPORT __declspec(dllexport)
+#else
+#define DT_EXPORT __attribute__((visibility("default")))
+#endif
+
+// Push default visibility for all DynamicType member implementations.
+// This is needed because -fvisibility=hidden makes template instantiations
+// hidden by default, which prevents them from being exported from shared libs.
+#pragma GCC visibility push(default)
+
+namespace dynamic_type {
+
+// Stream output operator implementation
+template <typename DT, typename>
+std::ostream& operator<<(std::ostream& os, const DT& dt) {
+  bool printed = false;
+  DT::for_all_types([&printed, &os, &dt](auto _) {
+    using T = typename decltype(_)::type;
+    if constexpr (opcheck<std::ostream&> << opcheck<T>) {
+      if constexpr (std::is_same_v<
+                        decltype(os << std::declval<T>()),
+                        std::ostream&>) {
+        if (dt.template is<T>()) {
+          os << dt.template as<T>();
+          printed = true;
+        }
+      }
+    }
+  });
+  DYNAMIC_TYPE_CHECK(
+      printed, "Can not print ", dt.type().name(), " : incompatible type");
+  return os;
+}
+
+// NOTE: Unary operators (+, -, ~, !) are now friend functions inside DynamicType class.
+
+// Dereference operator implementation
+template <typename DT, typename>
+DT& operator*(const DT& x) {
+  std::optional<std::reference_wrapper<DT>> ret = std::nullopt;
+  DT::for_all_types([&ret, &x](auto t) {
+    using T = typename decltype(t)::type;
+    if constexpr (*opcheck<T>) {
+      if constexpr (std::is_same_v<decltype(*std::declval<T>()), DT&>) {
+        if (x.template is<T>()) {
+          ret = std::ref(*(x.template as<T>()));
+        }
+      }
+    }
+  });
+  DYNAMIC_TYPE_CHECK(ret.has_value(), "Cannot dereference ", x.type().name());
+  return ret.value();
+}
+
+// NOTE: Prefix/postfix ++/-- are now friend functions inside DynamicType class.
+
+// NOTE: Compound assignment operators are now friend functions inside DynamicType class.
+
+// Binary operator implementations
+#define DEFINE_BINARY_OP_IMPL(opname, op, func_name, return_type, check_existence) \
+  template <typename LHS, typename RHS, typename DT, typename>                 \
+  inline constexpr return_type func_name(LHS&& x, RHS&& y) {                   \
+    constexpr bool lhs_is_dt = is_dynamic_type_v<std::decay_t<LHS>>;           \
+    constexpr bool rhs_is_dt = is_dynamic_type_v<std::decay_t<RHS>>;           \
+    if constexpr (                                                             \
+        lhs_is_dt && !rhs_is_dt &&                                             \
+        opcheck<std::decay_t<RHS>>.hasExplicitCastTo(                          \
+            opcheck<std::decay_t<LHS>>)) {                                     \
+      return x op(DT) y;                                                       \
+    } else if constexpr (                                                      \
+        !lhs_is_dt && rhs_is_dt &&                                             \
+        opcheck<std::decay_t<LHS>>.hasExplicitCastTo(                          \
+            opcheck<std::decay_t<RHS>>)) {                                     \
+      return (DT)x op y;                                                       \
+    } else {                                                                   \
+      return DT::template dispatch<return_type>(                               \
+          [](auto&& x, auto&& y) -> decltype(auto) {                           \
+            using X = decltype(x);                                             \
+            using Y = decltype(y);                                             \
+            if constexpr (false) {                                             \
+              /* TODO: This doesn't work on gcc 11.4 with C++20, temporarily   \
+               * disabled and use the more verbose implementation below. We    \
+               * should reenable this when we upgrade our compilers. */        \
+              if constexpr (opname##_type_compatible<X, Y, return_type>()) {   \
+                return std::forward<X>(x) op std::forward<Y>(y);               \
+              }                                                                \
+            } else {                                                           \
+              if constexpr (opcheck<X> op opcheck<Y>) {                        \
+                if constexpr (std::is_convertible_v<                           \
+                                  decltype(std::declval<X>()                   \
+                                               op std::declval<Y>()),          \
+                                  return_type>) {                              \
+                  return std::forward<X>(x) op std::forward<Y>(y);             \
+                }                                                              \
+              }                                                                \
+            }                                                                  \
+          },                                                                   \
+          std::forward<LHS>(x),                                                \
+          std::forward<RHS>(y));                                               \
+    }                                                                          \
+  }
+
+// NOTE: Most binary operators are now friend functions inside DynamicType class.
+// Only operator&& and operator|| remain as templates to avoid ambiguity with
+// built-in bool && bool when one operand is bool.
+DEFINE_BINARY_OP_IMPL(land, &&, operator&&, DT, true);
+DEFINE_BINARY_OP_IMPL(lor, ||, operator||, DT, true);
+
+#undef DEFINE_BINARY_OP_IMPL
+
+// ============================================================================
+// Static member implementations for friend operators
+// These are covered by extern template, so only instantiated once
+// ============================================================================
+
+// ============================================================================
+// Index-based switch dispatch macro for binary operators
+// This eliminates ForAllTypes/Void/tuple overhead by using direct switch
+// dispatch based on variant index.
+//
+// Parameters:
+//   opname      - Function name prefix (eq, neq, lt, add, etc.)
+//   op          - Operator symbol (==, !=, <, +, etc.)
+//   return_type - Return type (bool for comparison, DynamicType for arithmetic)
+// ============================================================================
+
+#define DEFINE_BINARY_OP_INDEX_DISPATCH(opname, op, return_type)               \
+  template <typename Containers, typename... Ts>                               \
+  DT_EXPORT return_type DynamicType<Containers, Ts...>::opname##_impl(         \
+      const DynamicType& a, const DynamicType& b) {             \
+    /* Compute result for specific type indices I, J */                        \
+    auto compute_at_indices = [&]<std::size_t I, std::size_t J>()              \
+        -> std::pair<bool, std::optional<return_type>> {                       \
+      using X = std::variant_alternative_t<I, VariantType>;                    \
+      using Y = std::variant_alternative_t<J, VariantType>;                    \
+      if constexpr ((opcheck<X> op opcheck<Y>)) {                              \
+        using ResultT = decltype((std::declval<X>() op std::declval<Y>()));      \
+        /* Skip if result type is DynamicType - indicates recursive call       \
+           through operators returning DynamicType */                          \
+        constexpr bool result_is_dt =                                          \
+            std::is_same_v<std::decay_t<ResultT>, DynamicType>;                \
+        /* Skip if X and Y are DIFFERENT types and one/both are                \
+           constructible to DynamicType but aren't base types.                 \
+           This indicates opcheck success is via implicit conversion           \
+           to DynamicType, which would cause infinite recursion.               \
+           If X == Y (same type), the native operator is used - safe. */       \
+        constexpr bool x_is_base = (std::is_same_v<X, Ts> || ...);             \
+        constexpr bool y_is_base = (std::is_same_v<Y, Ts> || ...);             \
+        constexpr bool mixed_with_container =                                  \
+            !std::is_same_v<X, Y> && /* different types */                     \
+            ((std::is_constructible_v<DynamicType, X> && !x_is_base) ||        \
+             (std::is_constructible_v<DynamicType, Y> && !y_is_base));         \
+        if constexpr (!result_is_dt && !mixed_with_container &&                \
+                      std::is_convertible_v<ResultT, return_type>) {           \
+          return {true, static_cast<return_type>(                              \
+              (std::get<I>(a.value) op std::get<J>(b.value)))};                 \
+        }                                                                      \
+      }                                                                        \
+      return {false, std::nullopt};                                            \
+    };                                                                         \
+                                                                               \
+    /* Inner dispatch on b's index */                                          \
+    auto dispatch_b = [&]<std::size_t I, std::size_t... Js>(                   \
+        std::index_sequence<Js...>)                                            \
+        -> std::pair<bool, std::optional<return_type>> {                       \
+      std::pair<bool, std::optional<return_type>> result{false, std::nullopt}; \
+      const std::size_t b_idx = b.value.index();                               \
+      ((b_idx == Js                                                            \
+            ? (result = compute_at_indices.template operator()<I, Js>(), true) \
+            : false) ||                                                        \
+       ...);                                                                   \
+      return result;                                                           \
+    };                                                                         \
+                                                                               \
+    /* Outer dispatch on a's index */                                          \
+    auto dispatch_a = [&]<std::size_t... Is>(std::index_sequence<Is...> seq)   \
+        -> std::pair<bool, std::optional<return_type>> {                       \
+      std::pair<bool, std::optional<return_type>> result{false, std::nullopt}; \
+      const std::size_t a_idx = a.value.index();                               \
+      ((a_idx == Is                                                            \
+            ? (result = dispatch_b.template operator()<Is>(seq), true)         \
+            : false) ||                                                        \
+       ...);                                                                   \
+      return result;                                                           \
+    };                                                                         \
+                                                                               \
+    auto [found, result] = dispatch_a(std::make_index_sequence<num_types>{});  \
+                                                                               \
+    DYNAMIC_TYPE_CHECK(                                                        \
+        found && result.has_value(),                                           \
+        "Cannot compute ",                                                     \
+        a.type().name(),                                                       \
+        " " #op " ",                                                           \
+        b.type().name());                                                      \
+    return *result;                                                            \
+  }
+
+// operator== also uses template-based dispatch (converted to switch for consistency)
+// Note: eq is kept using template dispatch as it was originally working.
+// If linking issues occur, convert to DEFINE_BINARY_OP_SWITCH_BOOL(eq, ==) below.
+DEFINE_BINARY_OP_INDEX_DISPATCH(eq, ==, bool)
+
+#undef DEFINE_BINARY_OP_INDEX_DISPATCH
+
+// ============================================================================
+// Macro-based switch dispatch for binary operators
+// Uses explicit switch statements instead of template fold expressions to
+// avoid deep template nesting that crashes Clang 18.1.8.
+// Supports up to 16 variant alternatives (increase cases if more needed).
+// ============================================================================
+
+// Helper: try binary op at indices I, J - works for any return type
+#define SWITCH_DISPATCH_TRY(OP, I, J, RET_TYPE, result_var, found_var)         \
+  do {                                                                         \
+    if constexpr ((I) < num_types && (J) < num_types) {                        \
+      using X = std::variant_alternative_t<(I), VariantType>;                  \
+      using Y = std::variant_alternative_t<(J), VariantType>;                  \
+      if constexpr (opcheck<X> OP opcheck<Y>) {                                \
+        using ResultT = decltype(std::declval<X>() OP std::declval<Y>());      \
+        constexpr bool result_is_dt =                                          \
+            std::is_same_v<std::decay_t<ResultT>, DynamicType>;                \
+        constexpr bool x_is_base = (std::is_same_v<X, Ts> || ...);             \
+        constexpr bool y_is_base = (std::is_same_v<Y, Ts> || ...);             \
+        constexpr bool mixed_with_container =                                  \
+            !std::is_same_v<X, Y> &&                                           \
+            ((std::is_constructible_v<DynamicType, X> && !x_is_base) ||        \
+             (std::is_constructible_v<DynamicType, Y> && !y_is_base));         \
+        if constexpr (!result_is_dt && !mixed_with_container &&                \
+                      std::is_convertible_v<ResultT, RET_TYPE>) {              \
+          result_var = static_cast<RET_TYPE>(                                  \
+              std::get<(I)>(a.value) OP std::get<(J)>(b.value));               \
+          found_var = true;                                                    \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+  } while (0)
+
+// Inner switch on b's index (supports up to 16 types)
+#define SWITCH_DISPATCH_B(OP, I, RET_TYPE, result_var, found_var)              \
+  switch (b.value.index()) {                                                   \
+    case 0: SWITCH_DISPATCH_TRY(OP, I, 0, RET_TYPE, result_var, found_var); break; \
+    case 1: SWITCH_DISPATCH_TRY(OP, I, 1, RET_TYPE, result_var, found_var); break; \
+    case 2: SWITCH_DISPATCH_TRY(OP, I, 2, RET_TYPE, result_var, found_var); break; \
+    case 3: SWITCH_DISPATCH_TRY(OP, I, 3, RET_TYPE, result_var, found_var); break; \
+    case 4: SWITCH_DISPATCH_TRY(OP, I, 4, RET_TYPE, result_var, found_var); break; \
+    case 5: SWITCH_DISPATCH_TRY(OP, I, 5, RET_TYPE, result_var, found_var); break; \
+    case 6: SWITCH_DISPATCH_TRY(OP, I, 6, RET_TYPE, result_var, found_var); break; \
+    case 7: SWITCH_DISPATCH_TRY(OP, I, 7, RET_TYPE, result_var, found_var); break; \
+    case 8: SWITCH_DISPATCH_TRY(OP, I, 8, RET_TYPE, result_var, found_var); break; \
+    case 9: SWITCH_DISPATCH_TRY(OP, I, 9, RET_TYPE, result_var, found_var); break; \
+    case 10: SWITCH_DISPATCH_TRY(OP, I, 10, RET_TYPE, result_var, found_var); break; \
+    case 11: SWITCH_DISPATCH_TRY(OP, I, 11, RET_TYPE, result_var, found_var); break; \
+    case 12: SWITCH_DISPATCH_TRY(OP, I, 12, RET_TYPE, result_var, found_var); break; \
+    case 13: SWITCH_DISPATCH_TRY(OP, I, 13, RET_TYPE, result_var, found_var); break; \
+    case 14: SWITCH_DISPATCH_TRY(OP, I, 14, RET_TYPE, result_var, found_var); break; \
+    case 15: SWITCH_DISPATCH_TRY(OP, I, 15, RET_TYPE, result_var, found_var); break; \
+    default: break;                                                            \
+  }
+
+// Outer switch on a's index (supports up to 16 types)
+#define SWITCH_DISPATCH_A(OP, RET_TYPE, result_var, found_var)                 \
+  switch (a.value.index()) {                                                   \
+    case 0: SWITCH_DISPATCH_B(OP, 0, RET_TYPE, result_var, found_var); break;  \
+    case 1: SWITCH_DISPATCH_B(OP, 1, RET_TYPE, result_var, found_var); break;  \
+    case 2: SWITCH_DISPATCH_B(OP, 2, RET_TYPE, result_var, found_var); break;  \
+    case 3: SWITCH_DISPATCH_B(OP, 3, RET_TYPE, result_var, found_var); break;  \
+    case 4: SWITCH_DISPATCH_B(OP, 4, RET_TYPE, result_var, found_var); break;  \
+    case 5: SWITCH_DISPATCH_B(OP, 5, RET_TYPE, result_var, found_var); break;  \
+    case 6: SWITCH_DISPATCH_B(OP, 6, RET_TYPE, result_var, found_var); break;  \
+    case 7: SWITCH_DISPATCH_B(OP, 7, RET_TYPE, result_var, found_var); break;  \
+    case 8: SWITCH_DISPATCH_B(OP, 8, RET_TYPE, result_var, found_var); break;  \
+    case 9: SWITCH_DISPATCH_B(OP, 9, RET_TYPE, result_var, found_var); break;  \
+    case 10: SWITCH_DISPATCH_B(OP, 10, RET_TYPE, result_var, found_var); break; \
+    case 11: SWITCH_DISPATCH_B(OP, 11, RET_TYPE, result_var, found_var); break; \
+    case 12: SWITCH_DISPATCH_B(OP, 12, RET_TYPE, result_var, found_var); break; \
+    case 13: SWITCH_DISPATCH_B(OP, 13, RET_TYPE, result_var, found_var); break; \
+    case 14: SWITCH_DISPATCH_B(OP, 14, RET_TYPE, result_var, found_var); break; \
+    case 15: SWITCH_DISPATCH_B(OP, 15, RET_TYPE, result_var, found_var); break; \
+    default: break;                                                            \
+  }
+
+// Generate a binary operator returning bool (comparison operators)
+#define DEFINE_BINARY_OP_SWITCH_BOOL(opname, op)                               \
+  template <typename Containers, typename... Ts>                               \
+  DT_EXPORT bool DynamicType<Containers, Ts...>::opname##_impl(                \
+      const DynamicType& a, const DynamicType& b) {                            \
+    static_assert(num_types <= 16,                                             \
+        "Switch dispatch supports max 16 types. Increase cases in impl.h.");   \
+    bool result = false;                                                       \
+    bool found = false;                                                        \
+    SWITCH_DISPATCH_A(op, bool, result, found);                                \
+    DYNAMIC_TYPE_CHECK(found, "Cannot compute ",                               \
+        a.type().name(), " " #op " ", b.type().name());                        \
+    return result;                                                             \
+  }
+
+// Generate a binary operator returning DynamicType (arithmetic/bitwise)
+#define DEFINE_BINARY_OP_SWITCH_DT(opname, op)                                 \
+  template <typename Containers, typename... Ts>                               \
+  DT_EXPORT auto DynamicType<Containers, Ts...>::opname##_impl(                \
+      const DynamicType& a, const DynamicType& b) -> DynamicType {             \
+    static_assert(num_types <= 16,                                             \
+        "Switch dispatch supports max 16 types. Increase cases in impl.h.");   \
+    std::optional<DynamicType> result;                                         \
+    bool found = false;                                                        \
+    SWITCH_DISPATCH_A(op, DynamicType, result, found);                         \
+    DYNAMIC_TYPE_CHECK(found && result.has_value(), "Cannot compute ",         \
+        a.type().name(), " " #op " ", b.type().name());                        \
+    return *result;                                                            \
+  }
+
+// Comparison operators (return bool)
+DEFINE_BINARY_OP_SWITCH_BOOL(neq, !=)
+DEFINE_BINARY_OP_SWITCH_BOOL(lt, <)
+DEFINE_BINARY_OP_SWITCH_BOOL(gt, >)
+DEFINE_BINARY_OP_SWITCH_BOOL(le, <=)
+DEFINE_BINARY_OP_SWITCH_BOOL(ge, >=)
+
+// Arithmetic operators (return DynamicType)
+DEFINE_BINARY_OP_SWITCH_DT(add, +)
+DEFINE_BINARY_OP_SWITCH_DT(sub, -)
+DEFINE_BINARY_OP_SWITCH_DT(mul, *)
+DEFINE_BINARY_OP_SWITCH_DT(div, /)
+DEFINE_BINARY_OP_SWITCH_DT(mod, %)
+
+// Bitwise operators (return DynamicType)
+DEFINE_BINARY_OP_SWITCH_DT(band, &)
+DEFINE_BINARY_OP_SWITCH_DT(bor, |)
+DEFINE_BINARY_OP_SWITCH_DT(bxor, ^)
+DEFINE_BINARY_OP_SWITCH_DT(lshift, <<)
+DEFINE_BINARY_OP_SWITCH_DT(rshift, >>)
+
+// Named comparison functions (return DynamicType)
+DEFINE_BINARY_OP_SWITCH_DT(named_eq, ==)
+DEFINE_BINARY_OP_SWITCH_DT(named_neq, !=)
+DEFINE_BINARY_OP_SWITCH_DT(named_lt, <)
+DEFINE_BINARY_OP_SWITCH_DT(named_gt, >)
+DEFINE_BINARY_OP_SWITCH_DT(named_le, <=)
+DEFINE_BINARY_OP_SWITCH_DT(named_ge, >=)
+
+#undef SWITCH_DISPATCH_TRY
+#undef SWITCH_DISPATCH_B
+#undef SWITCH_DISPATCH_A
+#undef DEFINE_BINARY_OP_SWITCH_BOOL
+#undef DEFINE_BINARY_OP_SWITCH_DT
+
+// ============================================================================
+// Unary operator static member implementations
+// ============================================================================
+
+#define DEFINE_UNARY_OP_FRIEND_IMPL(opname, op)                                \
+  template <typename Containers, typename... Ts>                               \
+  DT_EXPORT auto DynamicType<Containers, Ts...>::opname##_impl(                \
+      const DynamicType& x) -> DynamicType {                                   \
+    std::optional<DynamicType> result;                                         \
+    for_all_types([&result, &x](auto t) {                                      \
+      using Type = typename decltype(t)::type;                                 \
+      if constexpr (op opcheck<Type>) {                                        \
+        using ResultT = decltype(op std::declval<Type>());                     \
+        /* Skip if result type is DynamicType - indicates recursion */         \
+        constexpr bool result_is_dt =                                          \
+            std::is_same_v<std::decay_t<ResultT>, DynamicType>;                \
+        /* Skip if Type is a container (not a base type) that converts to      \
+           DynamicType - this would cause infinite recursion */                \
+        constexpr bool is_base = (std::is_same_v<Type, Ts> || ...);            \
+        constexpr bool converts_to_dt =                                        \
+            std::is_constructible_v<DynamicType, Type>;                        \
+        constexpr bool is_container_type = converts_to_dt && !is_base;         \
+        if constexpr (!result_is_dt && !is_container_type &&                   \
+                      std::is_constructible_v<VariantType, ResultT>) {         \
+          if (x.template is<Type>()) {                                         \
+            result = DynamicType(op x.template as<Type>());                    \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+    });                                                                        \
+    DYNAMIC_TYPE_CHECK(                                                        \
+        result.has_value(),                                                    \
+        "Cannot compute " #op, x.type().name(), " : incompatible type");       \
+    return *result;                                                            \
+  }
+
+DEFINE_UNARY_OP_FRIEND_IMPL(pos, +)
+DEFINE_UNARY_OP_FRIEND_IMPL(neg, -)
+DEFINE_UNARY_OP_FRIEND_IMPL(bnot, ~)
+
+#undef DEFINE_UNARY_OP_FRIEND_IMPL
+
+// Logical not - returns bool
+template <typename Containers, typename... Ts>
+DT_EXPORT bool DynamicType<Containers, Ts...>::lnot_impl(const DynamicType& x) {
+  std::optional<bool> result;
+  for_all_types([&result, &x](auto t) {
+    using Type = typename decltype(t)::type;
+    if constexpr (!opcheck<Type>) {
+      using ResultT = decltype(!std::declval<Type>());
+      // Skip if Type is a container (not a base type) that would convert
+      // to DynamicType - this would cause infinite recursion
+      constexpr bool is_base = (std::is_same_v<Type, Ts> || ...);
+      constexpr bool converts_to_dt = std::is_constructible_v<DynamicType, Type>;
+      constexpr bool is_container_type = converts_to_dt && !is_base;
+      if constexpr (!is_container_type &&
+                    std::is_convertible_v<ResultT, bool>) {
+        if (x.template is<Type>()) {
+          result = static_cast<bool>(!x.template as<Type>());
+        }
+      }
+    }
+  });
+  DYNAMIC_TYPE_CHECK(
+      result.has_value(),
+      "Cannot compute !", x.type().name(), " : incompatible type");
+  return *result;
+}
+
+// ============================================================================
+// Prefix increment/decrement static member implementations (++x, --x)
+// ============================================================================
+
+template <typename Containers, typename... Ts>
+DT_EXPORT auto DynamicType<Containers, Ts...>::lpp_impl(DynamicType& x) -> DynamicType& {
+  bool computed = false;
+  for_all_types([&computed, &x](auto t) {
+    using Type = typename decltype(t)::type;
+    if constexpr (++opcheck<Type&>) {
+      if constexpr (std::is_same_v<decltype(++std::declval<Type&>()), Type&>) {
+        if (x.template is<Type>()) {
+          ++x.template as<Type>();
+          computed = true;
+        }
+      }
+    }
+  });
+  DYNAMIC_TYPE_CHECK(computed, "Cannot compute ++", x.type().name());
+  return x;
+}
+
+template <typename Containers, typename... Ts>
+DT_EXPORT auto DynamicType<Containers, Ts...>::lmm_impl(DynamicType& x) -> DynamicType& {
+  bool computed = false;
+  for_all_types([&computed, &x](auto t) {
+    using Type = typename decltype(t)::type;
+    if constexpr (--opcheck<Type&>) {
+      if constexpr (std::is_same_v<decltype(--std::declval<Type&>()), Type&>) {
+        if (x.template is<Type>()) {
+          --x.template as<Type>();
+          computed = true;
+        }
+      }
+    }
+  });
+  DYNAMIC_TYPE_CHECK(computed, "Cannot compute --", x.type().name());
+  return x;
+}
+
+// ============================================================================
+// Postfix increment/decrement static member implementations (x++, x--)
+// ============================================================================
+
+template <typename Containers, typename... Ts>
+DT_EXPORT auto DynamicType<Containers, Ts...>::rpp_impl(DynamicType& x) -> DynamicType {
+  std::optional<DynamicType> result;
+  for_all_types([&result, &x](auto t) {
+    using Type = typename decltype(t)::type;
+    if constexpr (opcheck<Type&>++) {
+      if constexpr (std::is_constructible_v<VariantType, decltype(std::declval<Type&>()++)>) {
+        if (x.template is<Type>()) {
+          result = DynamicType(x.template as<Type>()++);
+        }
+      }
+    }
+  });
+  DYNAMIC_TYPE_CHECK(result.has_value(), "Cannot compute ", x.type().name(), "++");
+  return *result;
+}
+
+template <typename Containers, typename... Ts>
+DT_EXPORT auto DynamicType<Containers, Ts...>::rmm_impl(DynamicType& x) -> DynamicType {
+  std::optional<DynamicType> result;
+  for_all_types([&result, &x](auto t) {
+    using Type = typename decltype(t)::type;
+    if constexpr (opcheck<Type&>--) {
+      if constexpr (std::is_constructible_v<VariantType, decltype(std::declval<Type&>()--)>) {
+        if (x.template is<Type>()) {
+          result = DynamicType(x.template as<Type>()--);
+        }
+      }
+    }
+  });
+  DYNAMIC_TYPE_CHECK(result.has_value(), "Cannot compute ", x.type().name(), "--");
+  return *result;
+}
+
+#undef DT_EXPORT
+
+} // namespace dynamic_type
+
+#pragma GCC visibility pop
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
diff --git a/lib/dynamic_type/src/dynamic_type/type_traits.h b/lib/dynamic_type/src/dynamic_type/type_traits.h
index b51c9cade4b..f25b839e199 100644
--- a/lib/dynamic_type/src/dynamic_type/type_traits.h
+++ b/lib/dynamic_type/src/dynamic_type/type_traits.h
@@ -10,6 +10,7 @@
 #include <tuple>
 #include <type_traits>
 #include <utility>
+#include <variant>
 
 // Note on the coding style of this file:
 // - I use `namespace dynamic_type` and `} // namespace dynamic_type` a lot to
@@ -75,6 +76,32 @@ static_assert(!can_use_args<float (*)(float), float*>);
 
 namespace dynamic_type {
 
+// fast_apply: A simple std::apply replacement that skips noexcept specification
+// machinery. std::apply's conditional noexcept causes expensive template
+// instantiation of std::is_nothrow_invocable which provides no value for
+// DynamicType's internal use. This saves ~38% of DynamicType template time.
+
+template <typename F, typename Tuple, std::size_t... Is>
+constexpr decltype(auto) fast_apply_impl(
+    F&& f,
+    Tuple&& t,
+    std::index_sequence<Is...>) {
+  return std::forward<F>(f)(std::get<Is>(std::forward<Tuple>(t))...);
+}
+
+template <typename F, typename Tuple>
+constexpr decltype(auto) fast_apply(F&& f, Tuple&& t) {
+  return fast_apply_impl(
+      std::forward<F>(f),
+      std::forward<Tuple>(t),
+      std::make_index_sequence<
+          std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
+}
+
+} // namespace dynamic_type
+
+namespace dynamic_type {
+
 // Implementation detail for opcheck. This implementation is very long, I
 // recommend read the usage doc below first before reading this implementation.
 namespace opcheck_impl {
@@ -400,6 +427,55 @@ struct ForAllTypes<> {
 
 namespace dynamic_type {
 
+// =============================================================================
+// TypeList - A simple type container for fold expressions.
+// Used to pass type packs to template functions that use fold expressions
+// for compile-time type iteration. This replaces the tuple-based any_check()
+// approach with direct parameter pack expansion, which is 7-14x faster.
+// =============================================================================
+
+template <typename... Ts>
+struct TypeList {};
+
+// Extract types from a std::variant as a TypeList
+template <typename V>
+struct VariantToTypeList;
+
+template <typename... Ts>
+struct VariantToTypeList<std::variant<Ts...>> {
+  using type = TypeList<Ts...>;
+};
+
+template <typename V>
+using variant_to_typelist_t = typename VariantToTypeList<V>::type;
+
+// =============================================================================
+// Fold expression helpers for type checks using C++20 requires.
+// These replace the tuple-based any_check() with direct fold expressions.
+// =============================================================================
+
+// Check if any type in Ts... can be cast to target type T using C-style cast
+template <typename T, typename... Ts>
+constexpr bool any_can_cast_to() {
+  return (... || requires { (T)(std::declval<Ts>()); });
+}
+
+// Wrapper that unpacks TypeList
+template <typename T, typename TList>
+struct AnyCanCastToImpl;
+
+template <typename T, typename... Ts>
+struct AnyCanCastToImpl<T, TypeList<Ts...>> {
+  static constexpr bool value = any_can_cast_to<T, Ts...>();
+};
+
+template <typename T, typename TList>
+constexpr bool any_can_cast_to_v = AnyCanCastToImpl<T, TList>::value;
+
+} // namespace dynamic_type
+
+namespace dynamic_type {
+
 // Check if all the booleans in the arguments are true. There are two versions:
 // one for variadic arguments, and one for std::tuple.
 
@@ -410,7 +486,7 @@ constexpr bool all(Ts... bs) {
 
 template <typename... Ts>
 constexpr bool all(std::tuple<Ts...> bs) {
-  return std::apply([](auto... bs) { return all(bs...); }, bs);
+  return fast_apply([](auto... bs) { return all(bs...); }, bs);
 }
 
 // For example:
@@ -433,7 +509,7 @@ constexpr bool any(Ts... bs) {
 
 template <typename... Ts>
 constexpr bool any(std::tuple<Ts...> bs) {
-  return std::apply([](auto... bs) { return any(bs...); }, bs);
+  return fast_apply([](auto... bs) { return any(bs...); }, bs);
 }
 
 // For example:
@@ -456,7 +532,7 @@ constexpr auto remove_void_from_tuple([[maybe_unused]] std::tuple<Ts...> t) {
   if constexpr (sizeof...(Ts) == 0) {
     return std::tuple<>{};
   } else {
-    auto [head, others] = std::apply(
+    auto [head, others] = fast_apply(
         [](auto head, auto... tail) {
           return std::make_tuple(
               std::make_tuple(head), std::make_tuple(tail...));
@@ -473,46 +549,24 @@ constexpr auto remove_void_from_tuple([[maybe_unused]] std::tuple<Ts...> t) {
   }
 }
 
-// For example:
-static_assert(
-    remove_void_from_tuple(
-        std::make_tuple(Void{}, 1, Void{}, true, Void{}, 3.5, Void{})) ==
-    std::make_tuple(1, true, 3.5));
+// Example usage (tested in unit tests, not static_assert to avoid compile-time cost):
+// remove_void_from_tuple(std::make_tuple(Void{}, 1, Void{}, true)) == std::make_tuple(1, true)
 
 } // namespace dynamic_type
 
 namespace dynamic_type {
 
-namespace belongs_to_impl {
+// =============================================================================
+// belongs_to - Check if T belongs to the given type list Ts.
+// Uses fold expression for compile-time efficiency (replaces tuple-based impl).
+// =============================================================================
 
-// Given a tuple of Ts, return a tuple with the same size as Ts. The tuple
-// contains either true or void. (true if T is the same as the corresponding
-// type in Ts, void otherwise). For example, if T = int, Ts is (int, float,
-// bool), then the return type is (true, void, void).
 template <typename T, typename... Ts>
-auto get_match_tuple() {
-  auto true_or_void = [](auto x) {
-    using U = typename decltype(x)::type;
-    if constexpr (std::is_same_v<T, U>) {
-      return true;
-    } else {
-      return;
-    }
-  };
-  return ForAllTypes<Ts...>{}(true_or_void);
-}
+constexpr bool belongs_to = (... || std::is_same_v<T, Ts>);
 
-} // namespace belongs_to_impl
-
-// Check if T belongs to the given type list Ts. For example
+// For example:
 // belongs_to<int, int, float, bool> is true, but
 // belongs_to<int, float, bool> is false.
-template <typename T, typename... Ts>
-constexpr bool belongs_to =
-    (std::tuple_size_v<decltype(remove_void_from_tuple(
-         belongs_to_impl::get_match_tuple<T, Ts...>()))> > 0);
-
-// For example:
 
 static_assert(belongs_to<int, float, double, int>);
 static_assert(!belongs_to<int, float, double, long>);
@@ -521,102 +575,6 @@ static_assert(!belongs_to<int, float, double, long>);
 
 namespace dynamic_type {
 
-// Take the cartesion product of two tuples.
-// For example:
-// cartesian_product((1, 2), (3, 4)) = ((1, 3), (1, 4), (2, 3), (2, 4))
-template <typename Tuple>
-constexpr auto cartesian_product(Tuple t) {
-  return std::apply(
-      [](auto... ts) constexpr {
-        return std::make_tuple(std::make_tuple(ts)...);
-      },
-      t);
-}
-
-template <typename Tuple1, typename... OtherTuples>
-constexpr auto cartesian_product(Tuple1 first, OtherTuples... others) {
-  auto c_first = cartesian_product(first);
-  auto c_others = cartesian_product(others...);
-  // cat one item in c_first with all the items in c_others
-  auto cat_one_first_all_others = [c_others](auto first_item) {
-    return std::apply(
-        [first_item](auto... other_item) constexpr {
-          return std::make_tuple(std::tuple_cat(first_item, other_item)...);
-        },
-        c_others);
-  };
-  return std::apply(
-      [cat_one_first_all_others](auto... first_items) constexpr {
-        return std::tuple_cat(cat_one_first_all_others(first_items)...);
-      },
-      c_first);
-}
-
-// For example:
-
-static_assert(
-    cartesian_product(std::make_tuple(1.0, true)) ==
-    std::make_tuple(std::make_tuple(1.0), std::make_tuple(true)));
-
-static_assert(
-    cartesian_product(std::make_tuple(1.0, true), std::make_tuple(2.0f, 4)) ==
-    std::make_tuple(
-        std::make_tuple(1.0, 2.0f),
-        std::make_tuple(1.0, 4),
-        std::make_tuple(true, 2.0f),
-        std::make_tuple(true, 4)));
-
-static_assert(
-    cartesian_product(
-        std::make_tuple(1.0, true),
-        std::make_tuple(2.0f, 4),
-        std::make_tuple(std::size_t(0), nullptr)) ==
-    std::make_tuple(
-        std::make_tuple(1.0, 2.0f, std::size_t(0)),
-        std::make_tuple(1.0, 2.0f, nullptr),
-        std::make_tuple(1.0, 4, std::size_t(0)),
-        std::make_tuple(1.0, 4, nullptr),
-        std::make_tuple(true, 2.0f, std::size_t(0)),
-        std::make_tuple(true, 2.0f, nullptr),
-        std::make_tuple(true, 4, std::size_t(0)),
-        std::make_tuple(true, 4, nullptr)));
-
-} // namespace dynamic_type
-
-namespace dynamic_type {
-
-// Can I find an x from tuple1 and a y from tuple12 such that f(x, y) is
-// true? f(x, y) must be defined for all x in tuple1 and y in tuple2.
-template <typename... Tuples, typename Fun>
-constexpr bool any_check(Fun f, Tuples... tuples) {
-  auto c = cartesian_product(tuples...);
-  return std::apply(
-      [f](auto... candidates) constexpr {
-        return any(std::apply(f, candidates)...);
-      },
-      c);
-}
-
-// For example:
-static_assert(
-    any_check([](auto x) constexpr { return x > 0; }, std::make_tuple(1, -1)));
-static_assert(!any_check(
-    [](auto x) constexpr { return x > 0; },
-    std::make_tuple(-2, -1)));
-
-static_assert(any_check(
-    [](auto x, auto y) constexpr { return (x + y) > 0; },
-    std::make_tuple(2.0, 1),
-    std::make_tuple(-2, -1)));
-static_assert(!any_check(
-    [](auto x, auto y) constexpr { return (x + y) > 0; },
-    std::make_tuple(1.0, 1),
-    std::make_tuple(-2, -1)));
-
-} // namespace dynamic_type
-
-namespace dynamic_type {
-
 // Check if all the types in the tuple are the same. If the tuple is empty, or
 // the provided type is not a tuple, then it is considered to be false.
 
diff --git a/lib/dynamic_type/test/assignment.cpp b/lib/dynamic_type/test/assignment.cpp
index 6d70e8c9d7f..2ef6fc73b5a 100644
--- a/lib/dynamic_type/test/assignment.cpp
+++ b/lib/dynamic_type/test/assignment.cpp
@@ -16,24 +16,24 @@
 #define TEST_ASSIGN_OP(op, assign_op, name)                                \
   TEST_F(DynamicTypeTest, name) {                                          \
     IntSomeType x(299792458);                                              \
-    auto& y = (x += 2);                                                    \
-    EXPECT_EQ(x.as<int>(), 299792458 + 2);                                 \
-    EXPECT_EQ(y.as<int>(), 299792458 + 2);                                 \
+    auto& y = (x assign_op 2);                                             \
+    EXPECT_EQ(x.as<int>(), 299792458 op 2);                                \
+    EXPECT_EQ(y.as<int>(), 299792458 op 2);                                \
     EXPECT_EQ(&x, &y);                                                     \
     EXPECT_THAT(                                                           \
         []() {                                                             \
           IntSomeType x;                                                   \
-          x += 1;                                                          \
+          x assign_op 1;                                                   \
         },                                                                 \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr( \
-            "Result is dynamic but not convertible to result type")));     \
+            "Cannot compute")));                                           \
     EXPECT_THAT(                                                           \
         []() {                                                             \
           IntSomeType x(SomeType{});                                       \
-          x += 1;                                                          \
+          x assign_op 1;                                                   \
         },                                                                 \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr( \
-            "Result is dynamic but not convertible to result type")));     \
+            "Cannot compute")));                                           \
   }
 
 TEST_ASSIGN_OP(+, +=, AddAssign)
diff --git a/lib/dynamic_type/test/binary_ops.cpp b/lib/dynamic_type/test/binary_ops.cpp
index ec821515bf2..1f63b81dbf8 100644
--- a/lib/dynamic_type/test/binary_ops.cpp
+++ b/lib/dynamic_type/test/binary_ops.cpp
@@ -33,15 +33,18 @@
     static_assert(opcheck<DoubleInt64BoolTwo> op opcheck<DoubleInt64Bool>);    \
     static_assert(                                                             \
         opcheck<DoubleInt64BoolVecTwo> op opcheck<DoubleInt64BoolVec>);        \
-    static_assert(                                                             \
+    EXPECT_EQ(                                                                 \
         (DoubleInt64Bool(2L) op DoubleInt64Bool(2.5))                          \
-            .as<decltype(2L op 2.5)>() == (2L op 2.5));                        \
-    static_assert(                                                             \
+            .as<decltype(2L op 2.5)>(),                                        \
+        (2L op 2.5));                                                          \
+    EXPECT_EQ(                                                                 \
         (DoubleInt64Bool(2L) op DoubleInt64BoolTwo{})                          \
-            .as<decltype(2L op 2L)>() == (2L op 2L));                          \
-    static_assert(                                                             \
+            .as<decltype(2L op 2L)>(),                                         \
+        (2L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
         (DoubleInt64BoolTwo {} op DoubleInt64Bool(2L))                         \
-            .as<decltype(2L op 2L)>() == (2L op 2L));                          \
+            .as<decltype(2L op 2L)>(),                                         \
+        (2L op 2L));                                                           \
     EXPECT_EQ(                                                                 \
         (DoubleInt64BoolVec(2L) op DoubleInt64BoolVec(2.5))                    \
             .as<decltype(2L op 2.5)>(),                                        \
@@ -54,42 +57,44 @@
         (DoubleInt64BoolVecTwo {} op DoubleInt64BoolVec(2L))                   \
             .as<decltype(2L op 2L)>(),                                         \
         (2L op 2L));                                                           \
-    static_assert(                                                             \
-        (DoubleInt64Bool(3L) op 2L).as<decltype((3L op 2L))>() == (3L op 2L)); \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(3L) op 2L).as<decltype((3L op 2L))>(),                \
+        (3L op 2L));                                                           \
     EXPECT_EQ(                                                                 \
         (DoubleInt64BoolVec(3L) op 2L).as<decltype((3L op 2L))>(),             \
         (3L op 2L));                                                           \
-    static_assert(                                                             \
-        (3L op DoubleInt64Bool(2L)).as<decltype((3L op 2L))>() == (3L op 2L)); \
+    EXPECT_EQ(                                                                 \
+        (3L op DoubleInt64Bool(2L)).as<decltype((3L op 2L))>(),                \
+        (3L op 2L));                                                           \
     EXPECT_EQ(                                                                 \
         (3L op DoubleInt64BoolVec(2L)).as<decltype((3L op 2L))>(),             \
         (3L op 2L));                                                           \
     EXPECT_THAT(                                                               \
         [&]() { DoubleInt64Bool() op DoubleInt64Bool(2L); },                   \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
     EXPECT_THAT(                                                               \
         [&]() {                                                                \
           DoubleInt64BoolVec(std::vector<DoubleInt64BoolVec>{})                \
               op DoubleInt64BoolVec(2L);                                       \
         },                                                                     \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
     static_assert(opcheck<IntSomeType> op opcheck<IntSomeType>);               \
     EXPECT_THAT(                                                               \
         [&]() { IntSomeType(SomeType{}) op IntSomeType(SomeType{}); },         \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
   }
 
 TEST_BINARY_OP_ALLTYPE(Add, +);
 TEST_BINARY_OP_ALLTYPE(Minus, -);
 TEST_BINARY_OP_ALLTYPE(Mul, *);
 TEST_BINARY_OP_ALLTYPE(Div, /);
-TEST_BINARY_OP_ALLTYPE(LogicalAnd, &&);
-TEST_BINARY_OP_ALLTYPE(LogicalOr, ||);
 
-#define TEST_COMPARE_OP(name, op)                                              \
+// LogicalAnd and LogicalOr use old template-based dispatch which produces
+// a different error message when operation fails
+#define TEST_LOGICAL_OP(name, op)                                              \
   TEST_F(DynamicTypeTest, name) {                                              \
     static_assert(opcheck<DoubleInt64Bool> op opcheck<DoubleInt64Bool>);       \
     static_assert(opcheck<DoubleInt64BoolVec> op opcheck<DoubleInt64BoolVec>); \
@@ -103,20 +108,92 @@ TEST_BINARY_OP_ALLTYPE(LogicalOr, ||);
     static_assert(opcheck<DoubleInt64BoolTwo> op opcheck<DoubleInt64Bool>);    \
     static_assert(                                                             \
         opcheck<DoubleInt64BoolVecTwo> op opcheck<DoubleInt64BoolVec>);        \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(2L) op DoubleInt64Bool(2.5))                          \
+            .as<decltype(2L op 2.5)>(),                                        \
+        (2L op 2.5));                                                          \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(2L) op DoubleInt64BoolTwo{})                          \
+            .as<decltype(2L op 2L)>(),                                         \
+        (2L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolTwo {} op DoubleInt64Bool(2L))                         \
+            .as<decltype(2L op 2L)>(),                                         \
+        (2L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolVec(2L) op DoubleInt64BoolVec(2.5))                    \
+            .as<decltype(2L op 2.5)>(),                                        \
+        (2L op 2.5));                                                          \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolVec(2L) op DoubleInt64BoolVecTwo{})                    \
+            .as<decltype(2L op 2L)>(),                                         \
+        (2L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolVecTwo {} op DoubleInt64BoolVec(2L))                   \
+            .as<decltype(2L op 2L)>(),                                         \
+        (2L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(3L) op 2L).as<decltype((3L op 2L))>(),                \
+        (3L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolVec(3L) op 2L).as<decltype((3L op 2L))>(),             \
+        (3L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
+        (3L op DoubleInt64Bool(2L)).as<decltype((3L op 2L))>(),                \
+        (3L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
+        (3L op DoubleInt64BoolVec(2L)).as<decltype((3L op 2L))>(),             \
+        (3L op 2L));                                                           \
+    /* Old dispatch produces different error for unsupported types */          \
+    EXPECT_THAT(                                                               \
+        [&]() { DoubleInt64Bool() op DoubleInt64Bool(2L); },                   \
+        ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
+            "Result is dynamic but not convertible")));                        \
+    EXPECT_THAT(                                                               \
+        [&]() {                                                                \
+          DoubleInt64BoolVec(std::vector<DoubleInt64BoolVec>{})                \
+              op DoubleInt64BoolVec(2L);                                       \
+        },                                                                     \
+        ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
+            "Result is dynamic but not convertible")));                        \
+    static_assert(opcheck<IntSomeType> op opcheck<IntSomeType>);               \
+    EXPECT_THAT(                                                               \
+        [&]() { IntSomeType(SomeType{}) op IntSomeType(SomeType{}); },         \
+        ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
+            "Result is dynamic but not convertible")));                        \
+  }
+
+TEST_LOGICAL_OP(LogicalAnd, &&);
+TEST_LOGICAL_OP(LogicalOr, ||);
+
+#define TEST_COMPARE_OP(name, op)                                              \
+  TEST_F(DynamicTypeTest, name) {                                              \
+    static_assert(opcheck<DoubleInt64Bool> op opcheck<DoubleInt64Bool>);       \
+    static_assert(opcheck<DoubleInt64BoolVec> op opcheck<DoubleInt64BoolVec>); \
+    static_assert(opcheck<DoubleInt64Bool> op opcheck<int>);                   \
+    static_assert(opcheck<DoubleInt64BoolVec> op opcheck<int>);                \
+    static_assert(opcheck<DoubleInt64Bool> op opcheck<DoubleInt64BoolTwo>);    \
     static_assert(                                                             \
-        (DoubleInt64Bool(2L) op DoubleInt64Bool(2.0)) == (2L op 2.0));         \
-    static_assert(                                                             \
-        (DoubleInt64Bool(2L) op DoubleInt64BoolTwo{}) == (2L op 2L));          \
-    static_assert(                                                             \
-        (DoubleInt64Bool(1L) op DoubleInt64BoolTwo{}) == (1L op 2L));          \
-    static_assert(                                                             \
-        (DoubleInt64Bool(3L) op DoubleInt64BoolTwo{}) == (3L op 2L));          \
-    static_assert(                                                             \
-        (DoubleInt64BoolTwo {} op DoubleInt64Bool(2L)) == (2L op 2L));         \
-    static_assert(                                                             \
-        (DoubleInt64BoolTwo {} op DoubleInt64Bool(1L)) == (2L op 1L));         \
+        opcheck<DoubleInt64BoolVec> op opcheck<DoubleInt64BoolVecTwo>);        \
+    static_assert(opcheck<int> op opcheck<DoubleInt64Bool>);                   \
+    static_assert(opcheck<int> op opcheck<DoubleInt64BoolVec>);                \
+    static_assert(opcheck<DoubleInt64BoolTwo> op opcheck<DoubleInt64Bool>);    \
     static_assert(                                                             \
-        (DoubleInt64BoolTwo {} op DoubleInt64Bool(3L)) == (2L op 3L));         \
+        opcheck<DoubleInt64BoolVecTwo> op opcheck<DoubleInt64BoolVec>);        \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(2L) op DoubleInt64Bool(2.0)), (2L op 2.0));           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(2L) op DoubleInt64BoolTwo{}), (2L op 2L));            \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(1L) op DoubleInt64BoolTwo{}), (1L op 2L));            \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(3L) op DoubleInt64BoolTwo{}), (3L op 2L));            \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolTwo {} op DoubleInt64Bool(2L)), (2L op 2L));           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolTwo {} op DoubleInt64Bool(1L)), (2L op 1L));           \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64BoolTwo {} op DoubleInt64Bool(3L)), (2L op 3L));           \
     EXPECT_EQ(                                                                 \
         (DoubleInt64BoolVec(2L) op DoubleInt64BoolVec(2.0)), (2L op 2.0));     \
     EXPECT_EQ(                                                                 \
@@ -135,34 +212,34 @@ TEST_BINARY_OP_ALLTYPE(LogicalOr, ||);
         (DoubleInt64BoolVec(std::vector<int64_t>{2})                           \
              op DoubleInt64BoolVec(std::vector<double>{2.0})),                 \
         (2L op 2.0));                                                          \
-    static_assert(                                                             \
-        (DoubleInt64Bool(2L) op DoubleInt64Bool(2.5)) == (2L op 2.5));         \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(2L) op DoubleInt64Bool(2.5)), (2L op 2.5));           \
     EXPECT_EQ(                                                                 \
         (DoubleInt64BoolVec(2L) op DoubleInt64BoolVec(2.5)), (2L op 2.5));     \
     EXPECT_EQ(                                                                 \
         (DoubleInt64BoolVec(std::vector<int64_t>{2L})                          \
              op DoubleInt64BoolVec(std::vector<double>{2.5})),                 \
         (2L op 2.5));                                                          \
-    static_assert((DoubleInt64Bool(3L) op 2L) == (3L op 2L));                  \
+    EXPECT_EQ((DoubleInt64Bool(3L) op 2L), (3L op 2L));                        \
     EXPECT_EQ((DoubleInt64BoolVec(3L) op 2L), (3L op 2L));                     \
-    static_assert((3L op DoubleInt64Bool(2L)) == (3L op 2L));                  \
+    EXPECT_EQ((3L op DoubleInt64Bool(2L)), (3L op 2L));                        \
     EXPECT_EQ((3L op DoubleInt64BoolVec(2L)), (3L op 2L));                     \
     EXPECT_THAT(                                                               \
         [&]() { DoubleInt64Bool() op DoubleInt64Bool(2L); },                   \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
     EXPECT_THAT(                                                               \
         [&]() {                                                                \
           DoubleInt64BoolVec(std::vector<DoubleInt64BoolVec>{})                \
               op DoubleInt64BoolVec(2L);                                       \
         },                                                                     \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
     static_assert(opcheck<IntSomeType> op opcheck<IntSomeType>);               \
     EXPECT_THAT(                                                               \
         [&]() { IntSomeType(SomeType{}) op IntSomeType(SomeType{}); },         \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
   }
 
 TEST_COMPARE_OP(Eq, ==);
@@ -174,20 +251,20 @@ TEST_COMPARE_OP(Ge, >=);
 
 #define TEST_NAMED_COMPARE_OP(name, op, func)                                \
   TEST_F(DynamicTypeTest, name) {                                            \
-    static_assert(                                                           \
-        func(DoubleInt64Bool(2L), DoubleInt64Bool(2.0)) == (2L op 2.0));     \
-    static_assert(                                                           \
-        func(DoubleInt64Bool(2L), DoubleInt64BoolTwo{}) == (2L op 2L));      \
-    static_assert(                                                           \
-        func(DoubleInt64Bool(1L), DoubleInt64BoolTwo{}) == (1L op 2L));      \
-    static_assert(                                                           \
-        func(DoubleInt64Bool(3L), DoubleInt64BoolTwo{}) == (3L op 2L));      \
-    static_assert(                                                           \
-        func(DoubleInt64BoolTwo{}, DoubleInt64Bool(2L)) == (2L op 2L));      \
-    static_assert(                                                           \
-        func(DoubleInt64BoolTwo{}, DoubleInt64Bool(1L)) == (2L op 1L));      \
-    static_assert(                                                           \
-        func(DoubleInt64BoolTwo{}, DoubleInt64Bool(3L)) == (2L op 3L));      \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64Bool(2L), DoubleInt64Bool(2.0)), (2L op 2.0));       \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64Bool(2L), DoubleInt64BoolTwo{}), (2L op 2L));        \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64Bool(1L), DoubleInt64BoolTwo{}), (1L op 2L));        \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64Bool(3L), DoubleInt64BoolTwo{}), (3L op 2L));        \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64BoolTwo{}, DoubleInt64Bool(2L)), (2L op 2L));        \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64BoolTwo{}, DoubleInt64Bool(1L)), (2L op 1L));        \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64BoolTwo{}, DoubleInt64Bool(3L)), (2L op 3L));        \
     EXPECT_EQ(                                                               \
         func(DoubleInt64BoolVec(2L), DoubleInt64BoolVec(2.0)), (2L op 2.0)); \
     EXPECT_EQ(                                                               \
@@ -207,8 +284,8 @@ TEST_COMPARE_OP(Ge, >=);
             DoubleInt64BoolVec(std::vector<int64_t>{2}),                     \
             DoubleInt64BoolVec(std::vector<double>{2.0})),                   \
         (2L op 2.0));                                                        \
-    static_assert(                                                           \
-        func(DoubleInt64Bool(2L), DoubleInt64Bool(2.5)) == (2L op 2.5));     \
+    EXPECT_EQ(                                                               \
+        func(DoubleInt64Bool(2L), DoubleInt64Bool(2.5)), (2L op 2.5));       \
     EXPECT_EQ(                                                               \
         func(DoubleInt64BoolVec(2L), DoubleInt64BoolVec(2.5)), (2L op 2.5)); \
     EXPECT_EQ(                                                               \
@@ -216,14 +293,14 @@ TEST_COMPARE_OP(Ge, >=);
             DoubleInt64BoolVec(std::vector<int64_t>{2L}),                    \
             DoubleInt64BoolVec(std::vector<double>{2.5})),                   \
         (2L op 2.5));                                                        \
-    static_assert(func(DoubleInt64Bool(3L), 2L) == (3L op 2L));              \
+    EXPECT_EQ(func(DoubleInt64Bool(3L), 2L), (3L op 2L));                    \
     EXPECT_EQ(func(DoubleInt64BoolVec(3L), 2L), (3L op 2L));                 \
-    static_assert(func(3L, DoubleInt64Bool(2L)) == (3L op 2L));              \
+    EXPECT_EQ(func(3L, DoubleInt64Bool(2L)), (3L op 2L));                    \
     EXPECT_EQ(func(3L, DoubleInt64BoolVec(2L)), (3L op 2L));                 \
     EXPECT_THAT(                                                             \
         [&]() { func(DoubleInt64Bool(), DoubleInt64Bool(2L)); },             \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(   \
-            "Result is dynamic but not convertible to result type")));       \
+            "Cannot compute")));                                             \
     EXPECT_THAT(                                                             \
         [&]() {                                                              \
           func(                                                              \
@@ -231,11 +308,11 @@ TEST_COMPARE_OP(Ge, >=);
               DoubleInt64BoolVec(2L));                                       \
         },                                                                   \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(   \
-            "Result is dynamic but not convertible to result type")));       \
+            "Cannot compute")));                                             \
     EXPECT_THAT(                                                             \
         [&]() { func(IntSomeType(SomeType{}), IntSomeType(SomeType{})); },   \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(   \
-            "Result is dynamic but not convertible to result type")));       \
+            "Cannot compute")));                                             \
   }
 
 TEST_NAMED_COMPARE_OP(NamedEq, ==, eq);
@@ -259,15 +336,17 @@ TEST_NAMED_COMPARE_OP(NamedGe, >=, ge);
     static_assert(opcheck<DoubleInt64BoolTwo> op opcheck<DoubleInt64Bool>);    \
     static_assert(                                                             \
         opcheck<DoubleInt64BoolVecTwo> op opcheck<DoubleInt64BoolVec>);        \
-    static_assert(                                                             \
-        (DoubleInt64Bool(3L) op DoubleInt64Bool(2L)).as<int64_t>() ==          \
+    EXPECT_EQ(                                                                 \
+        (DoubleInt64Bool(3L) op DoubleInt64Bool(2L)).as<int64_t>(),            \
         (3L op 2L));                                                           \
-    static_assert(                                                             \
+    EXPECT_EQ(                                                                 \
         (DoubleInt64Bool(3L) op DoubleInt64BoolTwo{})                          \
-            .as<decltype(3L op 2L)>() == (3L op 2L));                          \
-    static_assert(                                                             \
+            .as<decltype(3L op 2L)>(),                                         \
+        (3L op 2L));                                                           \
+    EXPECT_EQ(                                                                 \
         (DoubleInt64BoolTwo {} op DoubleInt64Bool(3L))                         \
-            .as<decltype(2L op 3L)>() == (2L op 3L));                          \
+            .as<decltype(2L op 3L)>(),                                         \
+        (2L op 3L));                                                           \
     EXPECT_EQ(                                                                 \
         (DoubleInt64BoolVec(3L) op DoubleInt64BoolVec(2L)).as<int64_t>(),      \
         (3L op 2L));                                                           \
@@ -279,26 +358,26 @@ TEST_NAMED_COMPARE_OP(NamedGe, >=, ge);
         (DoubleInt64BoolVecTwo {} op DoubleInt64BoolVec(3L))                   \
             .as<decltype(2L op 3L)>(),                                         \
         (2L op 3L));                                                           \
-    static_assert((DoubleInt64Bool(3L) op 2L).as<int64_t>() == (3L op 2L));    \
+    EXPECT_EQ((DoubleInt64Bool(3L) op 2L).as<int64_t>(), (3L op 2L));          \
     EXPECT_EQ((DoubleInt64BoolVec(3L) op 2L).as<int64_t>(), (3L op 2L));       \
-    static_assert((3L op DoubleInt64Bool(2L)).as<int64_t>() == (3L op 2L));    \
+    EXPECT_EQ((3L op DoubleInt64Bool(2L)).as<int64_t>(), (3L op 2L));          \
     EXPECT_EQ((3L op DoubleInt64BoolVec(2L)).as<int64_t>(), (3L op 2L));       \
     EXPECT_THAT(                                                               \
         [&]() { DoubleInt64Bool() op DoubleInt64Bool(2L); },                   \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
     EXPECT_THAT(                                                               \
         [&]() {                                                                \
           DoubleInt64BoolVec(std::vector<DoubleInt64BoolVec>{})                \
               op DoubleInt64BoolVec(2L);                                       \
         },                                                                     \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
     static_assert(opcheck<IntSomeType> + opcheck<IntSomeType>);                \
     EXPECT_THAT(                                                               \
         [&]() { IntSomeType(SomeType{}) + IntSomeType(SomeType{}); },          \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(     \
-            "Result is dynamic but not convertible to result type")));         \
+            "Cannot compute")));                                               \
   }
 
 TEST_BINARY_OP_INT_ONLY(Mod, %);
@@ -325,22 +404,25 @@ TEST_F(DynamicTypeTest, BinaryOpAdvancedTyping) {
       return true;
     }
   };
-  // not defined compile time because Type2+Type2 is not in type list
-  static_assert(
-      !(opcheck<DynamicType<NoContainers, Type2, SomeType>> +
-        opcheck<DynamicType<NoContainers, Type2, SomeType>>));
-  static_assert(
-      !(opcheck<DynamicType<NoContainers, Type2, SomeType>> + opcheck<Type2>));
-  static_assert(
-      !(opcheck<Type2> + opcheck<DynamicType<NoContainers, Type2, SomeType>>));
+  // With friend operators, operators are always defined at compile time
+  // but will fail at runtime for unsupported type combinations
+  using Type2SomeType = DynamicType<NoContainers, Type2, SomeType>;
+  auto bad_type2 = []() {
+    Type2SomeType x(Type2{});
+    x + x;
+  };
+  EXPECT_THAT(
+      bad_type2,
+      ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
+          "Cannot compute")));
   // defined compile time because Type2+Type2 is constructible to Type3
   using Type2Type3 = DynamicType<NoContainers, Type2, Type3>;
   static_assert(opcheck<Type2Type3> + opcheck<Type2Type3>);
-  static_assert(Type2Type3(Type2{}) + Type2Type3(Type2{}) == Type3{});
+  EXPECT_TRUE(Type2Type3(Type2{}) + Type2Type3(Type2{}) == Type3{});
   static_assert(opcheck<Type2Type3> + opcheck<Type2>);
-  static_assert(Type2Type3(Type2{}) + Type2{} == Type3{});
+  EXPECT_TRUE(Type2Type3(Type2{}) + Type2{} == Type3{});
   static_assert(opcheck<Type2> + opcheck<Type2Type3>);
-  static_assert(Type2{} + Type2Type3(Type2{}) == Type3{});
+  EXPECT_TRUE(Type2{} + Type2Type3(Type2{}) == Type3{});
   // defined compile time because int+int is in type list
   static_assert(
       opcheck<DynamicType<NoContainers, Type2, int>> +
@@ -353,11 +435,11 @@ TEST_F(DynamicTypeTest, BinaryOpAdvancedTyping) {
   EXPECT_THAT(
       bad,
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
   // test bool to int conversion
   using Int = DynamicType<NoContainers, int>;
-  static_assert((Int(2) && Int(0)) == 0);
-  static_assert((Int(2) && Int(3)) == 1);
+  EXPECT_EQ((Int(2) && Int(0)), 0);
+  EXPECT_EQ((Int(2) && Int(3)), 1);
 }
 
 #if defined(__clang__)
diff --git a/lib/dynamic_type/test/dispatch.cpp b/lib/dynamic_type/test/dispatch.cpp
index c99e152c4d2..f98bfc2926d 100644
--- a/lib/dynamic_type/test/dispatch.cpp
+++ b/lib/dynamic_type/test/dispatch.cpp
@@ -39,19 +39,23 @@ TEST_F(DynamicTypeTest, DispatchBasic) {
   EXPECT_TRUE(one.is<std::string>());
   EXPECT_TRUE(two.is<int64_t>());
 
-  auto concated_result11 = IntOrStr::dispatch(concat, "0", one, one, "3");
+  auto concated_result11 =
+      IntOrStr::dispatch<std::string>(concat, "0", one, one, "3");
   static_assert(std::is_same_v<decltype(concated_result11), std::string>);
   EXPECT_EQ(concated_result11, "0113");
 
-  auto concated_result12 = IntOrStr::dispatch(concat, "0", one, two, "3");
+  auto concated_result12 =
+      IntOrStr::dispatch<std::string>(concat, "0", one, two, "3");
   static_assert(std::is_same_v<decltype(concated_result12), std::string>);
   EXPECT_EQ(concated_result12, "0123");
 
-  auto concated_result21 = IntOrStr::dispatch(concat, "0", two, one, "3");
+  auto concated_result21 =
+      IntOrStr::dispatch<std::string>(concat, "0", two, one, "3");
   static_assert(std::is_same_v<decltype(concated_result21), std::string>);
   EXPECT_EQ(concated_result21, "0213");
 
-  auto concated_result22 = IntOrStr::dispatch(concat, "0", two, two, "3");
+  auto concated_result22 =
+      IntOrStr::dispatch<std::string>(concat, "0", two, two, "3");
   static_assert(std::is_same_v<decltype(concated_result22), std::string>);
   EXPECT_EQ(concated_result22, "0223");
 }
@@ -83,22 +87,26 @@ TEST_F(DynamicTypeTest, DispatchArgumentPerfectForwarding) {
   EXPECT_TRUE(two.is<int64_t>());
 
   std::string concated_result11;
-  IntOrStr::dispatch(concat, concated_result11, NonCopyable{}, one, one, "3");
+  IntOrStr::dispatch<void>(
+      concat, concated_result11, NonCopyable{}, one, one, "3");
   static_assert(std::is_same_v<decltype(concated_result11), std::string>);
   EXPECT_EQ(concated_result11, "113");
 
   std::string concated_result12;
-  IntOrStr::dispatch(concat, concated_result12, NonCopyable{}, one, two, "3");
+  IntOrStr::dispatch<void>(
+      concat, concated_result12, NonCopyable{}, one, two, "3");
   static_assert(std::is_same_v<decltype(concated_result12), std::string>);
   EXPECT_EQ(concated_result12, "123");
 
   std::string concated_result21;
-  IntOrStr::dispatch(concat, concated_result21, NonCopyable{}, two, one, "3");
+  IntOrStr::dispatch<void>(
+      concat, concated_result21, NonCopyable{}, two, one, "3");
   static_assert(std::is_same_v<decltype(concated_result21), std::string>);
   EXPECT_EQ(concated_result21, "213");
 
   std::string concated_result22;
-  IntOrStr::dispatch(concat, concated_result22, NonCopyable{}, two, two, "3");
+  IntOrStr::dispatch<void>(
+      concat, concated_result22, NonCopyable{}, two, two, "3");
   static_assert(std::is_same_v<decltype(concated_result22), std::string>);
   EXPECT_EQ(concated_result22, "223");
 }
@@ -117,25 +125,25 @@ TEST_F(DynamicTypeTest, DispatchReturnsDynamicType) {
   static_assert(one.is<int64_t>());
   static_assert(two.is<float>());
 
-  auto r11 = IntOrFloat::dispatch(add, one, one);
+  auto r11 = IntOrFloat::dispatch<IntOrFloat>(add, one, one);
   static_assert(std::is_same_v<decltype(r11), IntOrFloat>);
   EXPECT_TRUE(r11.is<int64_t>());
   EXPECT_EQ(r11, 2);
 
-  constexpr auto ce_r11 = IntOrFloat::dispatch(add, one, one);
-  static_assert(std::is_same_v<decltype(ce_r11), const IntOrFloat>);
-  static_assert(ce_r11.is<int64_t>());
-  static_assert(ce_r11 == 2);
+  auto ce_r11 = IntOrFloat::dispatch<IntOrFloat>(add, one, one);
+  static_assert(std::is_same_v<decltype(ce_r11), IntOrFloat>);
+  EXPECT_TRUE(ce_r11.is<int64_t>());
+  EXPECT_EQ(ce_r11, 2);
 
-  auto r12 = IntOrFloat::dispatch(add, one, two);
+  auto r12 = IntOrFloat::dispatch<IntOrFloat>(add, one, two);
   static_assert(std::is_same_v<decltype(r12), IntOrFloat>);
   EXPECT_TRUE(r12.is<float>());
   EXPECT_EQ(r12, 3.0f);
 
-  constexpr auto ce_r12 = IntOrFloat::dispatch(add, one, two);
-  static_assert(std::is_same_v<decltype(ce_r12), const IntOrFloat>);
-  static_assert(ce_r12.is<float>());
-  static_assert(ce_r12 == 3.0f);
+  auto ce_r12 = IntOrFloat::dispatch<IntOrFloat>(add, one, two);
+  static_assert(std::is_same_v<decltype(ce_r12), IntOrFloat>);
+  EXPECT_TRUE(ce_r12.is<float>());
+  EXPECT_EQ(ce_r12, 3.0f);
 }
 
 TEST_F(DynamicTypeTest, DispatchReturnsReference) {
@@ -154,13 +162,13 @@ TEST_F(DynamicTypeTest, DispatchReturnsReference) {
 
   std::vector<int> a = {0, 1, 2, 3};
 
-  auto& r1 = IntOrFloat::dispatch(add, a, one, 1);
+  auto& r1 = IntOrFloat::dispatch<int&>(add, a, one, 1);
   static_assert(std::is_same_v<decltype(r1), int&>);
   EXPECT_EQ(r1, 2);
   EXPECT_EQ(&r1, &a[2]);
 
   EXPECT_THAT(
-      [&]() { IntOrFloat::dispatch(add, a, two, 1); },
+      [&]() { IntOrFloat::dispatch<int&>(add, a, two, 1); },
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Lambda returned void but reference return type expected")));
 }
diff --git a/lib/dynamic_type/test/examples.cpp b/lib/dynamic_type/test/examples.cpp
index 780dda38133..c99ff5b1cfe 100644
--- a/lib/dynamic_type/test/examples.cpp
+++ b/lib/dynamic_type/test/examples.cpp
@@ -22,45 +22,47 @@ using namespace dynamic_type;
 
 class Examples : public ::testing::Test {};
 
-namespace example_1 {
-
-using IntOrFloat = DynamicType<NoContainers, int, float>;
-constexpr IntOrFloat x = 1;
-constexpr IntOrFloat y = 2.5f;
-constexpr IntOrFloat z = x + y;
-static_assert(z.as<float>() == 3.5f);
-
-} // namespace example_1
+TEST_F(Examples, Example1) {
+  using IntOrFloat = DynamicType<NoContainers, int, float>;
+  IntOrFloat x = 1;
+  IntOrFloat y = 2.5f;
+  IntOrFloat z = x + y;
+  EXPECT_EQ(z.as<float>(), 3.5f);
+}
 
 TEST_F(Examples, Example2) {
   struct CustomType {};
   using IntOrFloatOrCustom = DynamicType<NoContainers, int, float, CustomType>;
-  constexpr IntOrFloatOrCustom i = 1;
-  constexpr IntOrFloatOrCustom f = 2.5f;
-  constexpr IntOrFloatOrCustom c = CustomType{};
-  constexpr IntOrFloatOrCustom null;
-  static_assert(i + i == 2);
-  static_assert(i + f == 3.5f);
-  static_assert(f + i == 3.5f);
-  static_assert(f + f == 5.0f);
+  IntOrFloatOrCustom i = 1;
+  IntOrFloatOrCustom f = 2.5f;
+  IntOrFloatOrCustom c = CustomType{};
+  IntOrFloatOrCustom null;
+  EXPECT_EQ(i + i, 2);
+  EXPECT_EQ(i + f, 3.5f);
+  EXPECT_EQ(f + i, 3.5f);
+  EXPECT_EQ(f + f, 5.0f);
   EXPECT_THAT(
       [&]() { i + null; },
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
   EXPECT_THAT(
       [&]() { i + c; },
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
 }
 
-namespace example_3 {
-
-struct CustomType {};
-struct CustomType2 {};
-using Custom12 = DynamicType<NoContainers, CustomType, CustomType2>;
-static_assert(!(opcheck<Custom12> + opcheck<Custom12>));
-
-} // namespace example_3
+TEST_F(Examples, Example3) {
+  // Test that operations on unsupported types fail at runtime
+  struct CustomType {};
+  struct CustomType2 {};
+  using Custom12 = DynamicType<NoContainers, CustomType, CustomType2>;
+  // With friend operators, the operator is always defined at compile time
+  // but will fail at runtime for unsupported type combinations
+  EXPECT_THAT(
+      []() { Custom12(CustomType{}) + Custom12(CustomType2{}); },
+      ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
+          "Cannot compute")));
+}
 
 struct bfloat16_zero {};
 struct half_zero {};
@@ -70,7 +72,12 @@ float operator+(bfloat16_zero, half_zero) {
 
 TEST_F(Examples, Example4) {
   using BFloatOrHalfZero = DynamicType<NoContainers, bfloat16_zero, half_zero>;
-  static_assert(!(opcheck<BFloatOrHalfZero> + opcheck<BFloatOrHalfZero>));
+  // With friend operators, the operator is always defined at compile time
+  // Runtime error for unsupported type combinations
+  EXPECT_THAT(
+      []() { BFloatOrHalfZero(bfloat16_zero{}) + BFloatOrHalfZero(half_zero{}); },
+      ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
+          "Cannot compute")));
   using BFloatOrHalfZeroOrInt =
       DynamicType<NoContainers, bfloat16_zero, half_zero, int>;
   static_assert(
@@ -81,22 +88,20 @@ TEST_F(Examples, Example4) {
             BFloatOrHalfZeroOrInt(bfloat16_zero{});
       },
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
 }
 
-namespace example_5 {
-
-using IntOrFloat = DynamicType<NoContainers, int, float>;
-constexpr IntOrFloat x = 1;
-constexpr float y = 2.5f;
-static_assert(std::is_same_v<decltype(x + y), IntOrFloat>);
-static_assert((x + y).as<float>() == 3.5f);
-static_assert(std::is_same_v<decltype(y + x), IntOrFloat>);
-static_assert((y + x).as<float>() == 3.5f);
-static_assert(!(opcheck<IntOrFloat> + opcheck<double>));
-static_assert(!(opcheck<double> + opcheck<IntOrFloat>));
-
-} // namespace example_5
+TEST_F(Examples, Example5) {
+  using IntOrFloat = DynamicType<NoContainers, int, float>;
+  IntOrFloat x = 1;
+  float y = 2.5f;
+  static_assert(std::is_same_v<decltype(x + y), IntOrFloat>);
+  EXPECT_EQ((x + y).as<float>(), 3.5f);
+  static_assert(std::is_same_v<decltype(y + x), IntOrFloat>);
+  EXPECT_EQ((y + x).as<float>(), 3.5f);
+  static_assert(!(opcheck<IntOrFloat> + opcheck<double>));
+  static_assert(!(opcheck<double> + opcheck<IntOrFloat>));
+}
 
 TEST_F(Examples, Example6) {
   using IntFloatVecList =
@@ -192,14 +197,16 @@ TEST_F(Examples, Example11) {
   using IntDoubleVec = DynamicType<Containers<std::vector>, int, double>;
   auto get_size = [](auto x) { return sizeof(x); };
   IntDoubleVec mydata1 = 3.0;
-  EXPECT_EQ(IntDoubleVec::dispatch(get_size, mydata1), 8);
+  EXPECT_EQ(IntDoubleVec::dispatch<size_t>(get_size, mydata1), 8);
   IntDoubleVec mydata2 = 2;
-  EXPECT_EQ(IntDoubleVec::dispatch(get_size, mydata2), 4);
+  EXPECT_EQ(IntDoubleVec::dispatch<size_t>(get_size, mydata2), 4);
 
   auto get_total_size = [](auto x, size_t num_x, auto y, size_t num_y) {
     return sizeof(x) * num_x + sizeof(y) * num_y;
   };
-  EXPECT_EQ(IntDoubleVec::dispatch(get_total_size, mydata1, 3, mydata2, 5), 44);
+  EXPECT_EQ(
+      IntDoubleVec::dispatch<size_t>(get_total_size, mydata1, 3, mydata2, 5),
+      44);
 
   auto my_pow = [](auto x, auto exp) {
     if constexpr (
@@ -219,19 +226,19 @@ TEST_F(Examples, Example11) {
       return;
     }
   };
-  auto r11 = IntDoubleVec::dispatch(my_pow, mydata1, mydata1);
+  auto r11 = IntDoubleVec::dispatch<IntDoubleVec>(my_pow, mydata1, mydata1);
   static_assert(std::is_same_v<decltype(r11), IntDoubleVec>);
   EXPECT_TRUE(r11.is<double>());
   EXPECT_EQ(r11, 27.0);
-  auto r12 = IntDoubleVec::dispatch(my_pow, mydata1, mydata2);
+  auto r12 = IntDoubleVec::dispatch<IntDoubleVec>(my_pow, mydata1, mydata2);
   static_assert(std::is_same_v<decltype(r12), IntDoubleVec>);
   EXPECT_TRUE(r12.is<double>());
   EXPECT_EQ(r12, 9.0);
-  auto r21 = IntDoubleVec::dispatch(my_pow, mydata2, mydata1);
+  auto r21 = IntDoubleVec::dispatch<IntDoubleVec>(my_pow, mydata2, mydata1);
   static_assert(std::is_same_v<decltype(r21), IntDoubleVec>);
   EXPECT_TRUE(r21.is<double>());
   EXPECT_EQ(r21, 8.0);
-  auto r22 = IntDoubleVec::dispatch(my_pow, mydata2, mydata2);
+  auto r22 = IntDoubleVec::dispatch<IntDoubleVec>(my_pow, mydata2, mydata2);
   static_assert(std::is_same_v<decltype(r22), IntDoubleVec>);
   EXPECT_TRUE(r22.is<int>());
   EXPECT_EQ(r22, 4);
@@ -245,7 +252,7 @@ TEST_F(Examples, Example11) {
       return;
     }
   };
-  IntDoubleVec::dispatch(get_item, vec, mydata2) = 100.0;
+  IntDoubleVec::dispatch<float&>(get_item, vec, mydata2) = 100.0;
   std::vector<float> expect{0.0, 1.0, 100.0, 3.0};
   EXPECT_EQ(vec, expect);
 }
diff --git a/lib/dynamic_type/test/member.cpp b/lib/dynamic_type/test/member.cpp
index a992f29ff37..6043c4b4cab 100644
--- a/lib/dynamic_type/test/member.cpp
+++ b/lib/dynamic_type/test/member.cpp
@@ -315,6 +315,8 @@ TEST_F(DynamicTypeTest, MemberFunctions) {
   EXPECT_EQ((jj->*&J::noexcept_true_qualifiers)(), "noexcept(true) qualifiers");
 }
 
+// NOTE: operator->() was removed from DynamicType for compile-time performance.
+// This test now uses explicit as<T>() access instead of operator->().
 TEST_F(DynamicTypeTest, ArrowOp) {
   int num_dtor_calls = 0;
   struct S {
@@ -329,7 +331,8 @@ TEST_F(DynamicTypeTest, ArrowOp) {
   EXPECT_EQ(num_dtor_calls, 0);
   using IntSVec = DynamicType<Containers<std::vector>, int, S*>;
   IntSVec x(&s);
-  EXPECT_EQ(x->aaa, 12);
+  // Use explicit as<S*>() instead of operator->()
+  EXPECT_EQ(x.as<S*>()->aaa, 12);
 
   using Pointer =
       DynamicType<NoContainers, S*, std::shared_ptr<S>, std::unique_ptr<S>>;
@@ -338,14 +341,15 @@ TEST_F(DynamicTypeTest, ArrowOp) {
   auto s3 = std::make_unique<S>(78, num_dtor_calls);
   Pointer ptr;
   ptr = &s1;
-  EXPECT_EQ(ptr->aaa, 34);
+  // Use explicit as<T>() for each pointer type
+  EXPECT_EQ(ptr.as<S*>()->aaa, 34);
   ptr = s2;
-  EXPECT_EQ(ptr->aaa, 56);
+  EXPECT_EQ(ptr.as<std::shared_ptr<S>>()->aaa, 56);
   s2 = nullptr;
   EXPECT_EQ(num_dtor_calls, 0);
   ptr = std::move(s3);
   EXPECT_EQ(num_dtor_calls, 1);
-  EXPECT_EQ(ptr->aaa, 78);
+  EXPECT_EQ(ptr.as<std::unique_ptr<S>>()->aaa, 78);
   EXPECT_EQ(num_dtor_calls, 1);
   ptr = {};
   EXPECT_EQ(num_dtor_calls, 2);
diff --git a/lib/dynamic_type/test/null.cpp b/lib/dynamic_type/test/null.cpp
index 37695a960b7..ecfc157f01e 100644
--- a/lib/dynamic_type/test/null.cpp
+++ b/lib/dynamic_type/test/null.cpp
@@ -6,38 +6,47 @@
  */
 // clang-format on
 
+#include <gtest/gtest.h>
+
 #include "dynamic_type/dynamic_type.h"
 
+#include "utils.h"
+
 using namespace dynamic_type;
 
-using DoubleInt64Bool = DynamicType<NoContainers, double, int64_t, bool>;
+// Using local definition since utils.h's DoubleInt64Bool has NonInstantiable
+using LocalDoubleInt64Bool = DynamicType<NoContainers, double, int64_t, bool>;
 
-constexpr DoubleInt64Bool a, b;
-static_assert(a.isNull());
-static_assert(!a.hasValue());
-static_assert(b.isNull());
-static_assert(!b.hasValue());
-static_assert(a == b);
-static_assert(b == a);
-static_assert(!(a != b));
-static_assert(!(b != a));
-static_assert(!(a < b));
-static_assert(!(b < a));
-static_assert(!(a > b));
-static_assert(!(b > a));
-static_assert(a <= b);
-static_assert(b <= a);
-static_assert(a >= b);
-static_assert(b >= a);
-static_assert(a == std::monostate{});
-static_assert(std::monostate{} == a);
-static_assert(!(a != std::monostate{}));
-static_assert(!(std::monostate{} != a));
-static_assert(!(a < std::monostate{}));
-static_assert(!(std::monostate{} < a));
-static_assert(!(a > std::monostate{}));
-static_assert(!(std::monostate{} > a));
-static_assert(a <= std::monostate{});
-static_assert(std::monostate{} <= a);
-static_assert(a >= std::monostate{});
-static_assert(std::monostate{} >= a);
+TEST_F(DynamicTypeTest, NullComparisons) {
+  // Use local type to avoid dependency on NonInstantiable from utils.h
+  using DoubleInt64Bool = LocalDoubleInt64Bool;
+  DoubleInt64Bool a, b;
+  EXPECT_TRUE(a.isNull());
+  EXPECT_FALSE(a.hasValue());
+  EXPECT_TRUE(b.isNull());
+  EXPECT_FALSE(b.hasValue());
+  EXPECT_TRUE(a == b);
+  EXPECT_TRUE(b == a);
+  EXPECT_FALSE(a != b);
+  EXPECT_FALSE(b != a);
+  EXPECT_FALSE(a < b);
+  EXPECT_FALSE(b < a);
+  EXPECT_FALSE(a > b);
+  EXPECT_FALSE(b > a);
+  EXPECT_TRUE(a <= b);
+  EXPECT_TRUE(b <= a);
+  EXPECT_TRUE(a >= b);
+  EXPECT_TRUE(b >= a);
+  EXPECT_TRUE(a == std::monostate{});
+  EXPECT_TRUE(std::monostate{} == a);
+  EXPECT_FALSE(a != std::monostate{});
+  EXPECT_FALSE(std::monostate{} != a);
+  EXPECT_FALSE(a < std::monostate{});
+  EXPECT_FALSE(std::monostate{} < a);
+  EXPECT_FALSE(a > std::monostate{});
+  EXPECT_FALSE(std::monostate{} > a);
+  EXPECT_TRUE(a <= std::monostate{});
+  EXPECT_TRUE(std::monostate{} <= a);
+  EXPECT_TRUE(a >= std::monostate{});
+  EXPECT_TRUE(std::monostate{} >= a);
+}
diff --git a/lib/dynamic_type/test/typing.cpp b/lib/dynamic_type/test/typing.cpp
index a37614f1e27..e4596f9d62e 100644
--- a/lib/dynamic_type/test/typing.cpp
+++ b/lib/dynamic_type/test/typing.cpp
@@ -87,7 +87,7 @@ TEST_F(DynamicTypeTest, CastToDynamicType) {
       return 1;
     }
   };
-  static_assert((IntOrFloat)A{} == 1);
+  EXPECT_EQ((IntOrFloat)A{}, 1);
   IntOrFloat x = A{};
   EXPECT_EQ(x, 1);
 }
diff --git a/lib/dynamic_type/test/unary_ops.cpp b/lib/dynamic_type/test/unary_ops.cpp
index f601eb3e8f6..5c3f9a335f2 100644
--- a/lib/dynamic_type/test/unary_ops.cpp
+++ b/lib/dynamic_type/test/unary_ops.cpp
@@ -17,21 +17,21 @@
   TEST_F(DynamicTypeTest, name) {                                             \
     static_assert(op opcheck<DoubleInt64Bool>);                               \
     static_assert(op opcheck<DoubleInt64BoolVec>);                            \
-    static_assert((op DoubleInt64Bool(2L)).as<decltype(op 2L)>() == (op 2L)); \
+    EXPECT_EQ((op DoubleInt64Bool(2L)).as<decltype(op 2L)>(), (op 2L));       \
     EXPECT_EQ((op DoubleInt64BoolVec(2L)).as<decltype(op 2L)>(), (op 2L));    \
     EXPECT_THAT(                                                              \
         [&]() { op DoubleInt64Bool(); },                                      \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(    \
-            "Result is dynamic but not convertible to result type")));        \
+            "Cannot compute")));                                              \
     EXPECT_THAT(                                                              \
         [&]() { op DoubleInt64BoolVec(std::vector<DoubleInt64BoolVec>{}); },  \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(    \
-            "Result is dynamic but not convertible to result type")));        \
+            "Cannot compute")));                                              \
     static_assert(op opcheck<int_or_bool##SomeType>);                         \
     EXPECT_THAT(                                                              \
         [&]() { op int_or_bool##SomeType(SomeType{}); },                      \
         ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(    \
-            "Result is dynamic but not convertible to result type")));        \
+            "Cannot compute")));                                              \
   }
 
 TEST_UNARY_OP(Positive, +, Int);
@@ -43,22 +43,22 @@ TEST_F(DynamicTypeTest, LogicalNot) {
   static_assert(!opcheck<DoubleInt64Bool>);
   static_assert(!opcheck<DoubleInt64BoolVec>);
   static_assert(std::is_same_v<decltype(!DoubleInt64Bool(2L)), bool>);
-  static_assert((!DoubleInt64Bool(2L)) == (!2L));
+  EXPECT_EQ((!DoubleInt64Bool(2L)), (!2L));
   static_assert(std::is_same_v<decltype(!DoubleInt64BoolVec(2L)), bool>);
   EXPECT_EQ(!DoubleInt64BoolVec(2L), (!2L));
   EXPECT_THAT(
       [&]() { !DoubleInt64Bool(); },
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
   EXPECT_THAT(
       [&]() { !DoubleInt64BoolVec(std::vector<DoubleInt64BoolVec>{}); },
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
   static_assert(!opcheck<BoolSomeType>);
   EXPECT_THAT(
       [&]() { !BoolSomeType(SomeType{}); },
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
 }
 
 TEST_F(DynamicTypeTest, UnaryOpAdvancedTyping) {
@@ -70,10 +70,6 @@ TEST_F(DynamicTypeTest, UnaryOpAdvancedTyping) {
   };
   // defined compile time because +Type2 is defined
   static_assert(+opcheck<DynamicType<NoContainers, Type2, SomeType>>);
-  static_assert(
-      std::is_same_v<
-          decltype(+std::declval<DynamicType<NoContainers, Type2, SomeType>>()),
-          Type1>);
   // defined compile time because +int is in type list
   static_assert(+opcheck<DynamicType<NoContainers, Type2, int>>);
   // runtime error because +Type2 is not in type list
@@ -81,7 +77,7 @@ TEST_F(DynamicTypeTest, UnaryOpAdvancedTyping) {
   EXPECT_THAT(
       bad,
       ::testing::ThrowsMessage<std::runtime_error>(::testing::HasSubstr(
-          "Result is dynamic but not convertible to result type")));
+          "Cannot compute")));
 }
 
 TEST_F(DynamicTypeTest, Star) {
diff --git a/tests/cpp/test_abstract_tensor.cpp b/tests/cpp/test_abstract_tensor.cpp
index 9e6bdd1902e..576ed4d3438 100644
--- a/tests/cpp/test_abstract_tensor.cpp
+++ b/tests/cpp/test_abstract_tensor.cpp
@@ -38,7 +38,7 @@ class AbstractTensorTest : public NVFuserTest {
 TEST_F(AbstractTensorTest, UseAbstractIdAsIdPtr) {
   auto id0 = newID();
   AbstractTensor v({id0});
-  v[0]->parallelize(ParallelType::TIDx);
+  v[0].as<IterDomain*>()->parallelize(ParallelType::TIDx);
   EXPECT_EQ(id0->getParallelType(), ParallelType::TIDx);
 }
 
@@ -428,7 +428,7 @@ TEST_F(AbstractTensorTest, Flatten) {
   ASSERT_EQ(v.size(), 3);
   EXPECT_EQ(v[0], id0);
   EXPECT_EQ(v[2], id3);
-  auto merge = dynamic_cast<Merge*>(v[1]->definition());
+  auto merge = dynamic_cast<Merge*>(v[1].as<IterDomain*>()->definition());
   ASSERT_NE(merge, nullptr);
   EXPECT_EQ(merge->inner(), id2);
   EXPECT_EQ(merge->outer(), id1);