diff --git a/cmake/CCCLConfigureTarget.cmake b/cmake/CCCLConfigureTarget.cmake
index 1d85080ed89..c8665a21c38 100644
--- a/cmake/CCCLConfigureTarget.cmake
+++ b/cmake/CCCLConfigureTarget.cmake
@@ -22,12 +22,6 @@ function(cccl_configure_target target_name)
       PROPERTIES
         CXX_STANDARD ${CCT_DIALECT}
         CUDA_STANDARD ${CCT_DIALECT}
-        # Must manually request that the standards above are actually respected
-        # or else CMake will silently fail to configure the targets correctly...
-        # Note that this doesn't actually work as of CMake 3.16:
-        # https://gitlab.kitware.com/cmake/cmake/-/issues/20953
-        # We'll leave these properties enabled in hopes that they will someday
-        # work.
         CXX_STANDARD_REQUIRED ON
         CUDA_STANDARD_REQUIRED ON
     )
diff --git a/cmake/CCCLUtilities.cmake b/cmake/CCCLUtilities.cmake
index 7a405d3f3f2..e5e732d2295 100644
--- a/cmake/CCCLUtilities.cmake
+++ b/cmake/CCCLUtilities.cmake
@@ -65,3 +65,150 @@ function(cccl_add_compile_test full_test_name_var name_prefix subdir test_id)
   )
   set(${full_test_name_var} ${test_name} PARENT_SCOPE)
 endfunction()
+
+# cccl_add_xfail_compile_target_test(
+#   <target_name>
+#   [TEST_NAME <test_name>]
+#   [ERROR_REGEX <regex>]
+#   [SOURCE_FILE <source_file>]
+#   [ERROR_REGEX_LABEL <error_string>]
+#   [ERROR_NUMBER <error_number>]
+#   [ERROR_NUMBER_TARGET_NAME_REGEX <regex>]
+# )
+#
+# Given a configured build target that is expected to fail to compile:
+# - Mark the target as excluded from the `all` target.
+# - Create a CTest test that compiles the target. If TEST_NAME is provided, it is used.
+#   Otherwise, the target_name is used as the test name.
+# - When the test runs, it passes if exactly one of the following conditions is met:
+#   - A provided / detected error regex matches the compilation output, ignoring exit code.
+#   - No error regex is provided / detected, and the compilation fails.
+#
+# An error regex may be explicitly provided via ERROR_REGEX, or it may be
+# detected by scanning the SOURCE_FILE for a specially formatted comment.
+#
+# If ERROR_REGEX_LABEL is provided, the SOURCE_FILE will read, looking for a comment of the form:
+#
+# // <ERROR_REGEX_LABEL> {{"error_regex"}}
+#
+# An error number may be appended to the ERROR_REGEX_LABEL in the comment:
+#
+# // <ERROR_REGEX_LABEL>-<error_number> {{"error_regex"}}
+#
+# If ERROR_NUMBER_TARGET_NAME_REGEX is specified, the regex is used to capture
+# the error_number from the target name. If target_name is
+# "cccl.test.my_test.err_5.foo_3" and ERROR_NUMBER_TARGET_NAME_REGEX is
+# "\\.err_([0-9]+)", the captured error number "5."
+#
+# // <ERROR_REGEX_LABEL>-<captured_error_number> {{"error_regex"}}
+#
+# If ERROR_NUMBER is provided, ERROR_NUMBER_TARGET_NAME_REGEX is ignored.
+# If ERROR_NUMBER_TARGET_NAME_REGEX is provided but does not match, a plain ERROR_REGEX_LABEL is used.
+#
+# If both SOURCE_FILE and ERROR_REGEX_LABEL are provided, the source file will be added to the
+# current directory's CMAKE_CONFIGURE_DEPENDS to ensure that changes to the file will re-trigger CMake.
+function(cccl_add_xfail_compile_target_test target_name)
+  set(options)
+  set(oneValueArgs
+    ERROR_REGEX
+    SOURCE_FILE
+    ERROR_REGEX_LABEL
+    ERROR_NUMBER
+    ERROR_NUMBER_TARGET_NAME_REGEX
+  )
+  set(multiValueArgs)
+  cmake_parse_arguments(cccl_xfail "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  if (cccl_xfail_UNPARSED_ARGUMENTS)
+    message(FATAL_ERROR "Unparsed arguments: ${cccl_xfail_UNPARSED_ARGUMENTS}")
+  endif()
+
+  set(test_name "${target_name}")
+  if (DEFINED cccl_xfail_TEST_NAME)
+    set(test_name "${cccl_xfail_TEST_NAME}")
+  endif()
+
+  set(regex)
+  if (DEFINED cccl_xfail_ERROR_REGEX)
+    set(regex "${cccl_xfail_ERROR_REGEX}")
+  elseif (DEFINED cccl_xfail_SOURCE_FILE AND DEFINED cccl_xfail_ERROR_REGEX_LABEL)
+    get_filename_component(src_absolute "${cccl_xfail_SOURCE_FILE}" ABSOLUTE)
+    set(error_label_regex "${cccl_xfail_ERROR_REGEX_LABEL}")
+
+    # Cache all error label matches (with and without error numbers) as global properties.
+    # This avoids re-reading and re-parsing the source file multiple times if multiple
+    # tests are added for the same source file. Properties are used instead of cache variables
+    # to ensure that the source is not cached in between CMake executions.
+    string(MD5 source_filename_md5 "${src_absolute}")
+    set(error_cache_property "_cccl_xfail_error_cache_${source_filename_md5}")
+    get_property(error_cache_set GLOBAL PROPERTY "${error_cache_property}" SET)
+    if (error_cache_set)
+      get_property(error_cache GLOBAL PROPERTY "${error_cache_property}")
+    else()
+      file(READ "${src_absolute}" source_contents)
+      string(REGEX MATCHALL "//[ \t]*${error_label_regex}(-[0-9]+)?[ \t]*{{\"([^\"]+)\"}}" error_cache "${source_contents}")
+      set_property(GLOBAL PROPERTY "${error_cache_property}" "${error_cache}")
+    endif()
+
+    # Changes to the source file should re-run CMake to pick-up new error specs:
+    set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${src_absolute}")
+
+    set(error_number)
+    if (DEFINED cccl_xfail_ERROR_NUMBER)
+      set(error_number "${cccl_xfail_ERROR_NUMBER}")
+    elseif (DEFINED cccl_xfail_ERROR_NUMBER_TARGET_NAME_REGEX)
+      string(REGEX MATCH "${cccl_xfail_ERROR_NUMBER_TARGET_NAME_REGEX}" matched ${target_name})
+      if (matched)
+        set(error_number "${CMAKE_MATCH_1}")
+      endif()
+    endif()
+
+    # Look for a labeled error with the specific error number.
+    if (NOT "${error_number}" STREQUAL "") # Check strings to allow "0"
+      string(REGEX MATCH "//[ \t]*${error_label_regex}-${error_number}[ \t]*{{\"([^\"]+)\"}}" matched "${error_cache}")
+      if (matched)
+        set(regex "${CMAKE_MATCH_1}")
+      endif()
+    endif()
+
+    if (NOT regex)
+      # Look for a labeled error without an error number.
+      string(REGEX MATCH "//[ \t]*${error_label_regex}[ \t]*{{\"([^\"]+)\"}}" matched "${error_cache}")
+      if (matched)
+        set(regex "${CMAKE_MATCH_1}")
+      endif()
+    endif()
+  endif()
+
+  message(VERBOSE "CCCL: Adding XFAIL test: ${test_name}")
+  if (regex)
+    message(VERBOSE "CCCL:   with expected regex: '${regex}'")
+  endif()
+
+  set_target_properties(${test_target} PROPERTIES EXCLUDE_FROM_ALL true)
+
+  # The same target may be reused for multiple tests, and the output file
+  # may exist if using a regex to check for warnings. Add a setup fixture to
+  # delete the output file before each test run.
+  if (NOT TEST ${target_name}.clean)
+    add_test(NAME ${target_name}.clean COMMAND "${CMAKE_COMMAND}" -E rm -f
+      "$<TARGET_FILE:${target_name}>"
+      "$<TARGET_OBJECTS:${target_name}>"
+    )
+    set_tests_properties(${test_name}.clean PROPERTIES FIXTURES_SETUP ${target_name}.clean)
+  endif()
+
+  add_test(NAME ${test_name}
+           COMMAND ${CMAKE_COMMAND} --build "${CMAKE_BINARY_DIR}"
+                                    --target ${test_target}
+                                    --config $<CONFIGURATION>
+  )
+  set_tests_properties(${test_name} PROPERTIES FIXTURES_CLEANUP ${target_name}.clean)
+
+  if (regex)
+    set_tests_properties(${test_name} PROPERTIES PASS_REGULAR_EXPRESSION "${regex}")
+  else()
+    set_tests_properties(${test_name} PROPERTIES WILL_FAIL true)
+  endif()
+
+endfunction()
diff --git a/cub/cub/device/device_segmented_reduce.cuh b/cub/cub/device/device_segmented_reduce.cuh
index 7e4f4c997e7..71632e0b8b9 100644
--- a/cub/cub/device/device_segmented_reduce.cuh
+++ b/cub/cub/device/device_segmented_reduce.cuh
@@ -806,10 +806,13 @@ struct DeviceSegmentedReduce
 
     using InputValueT  = detail::it_value_t<InputIteratorT>;
     using OutputTupleT = detail::non_void_value_t<OutputIteratorT, KeyValuePair<OffsetT, InputValueT>>;
+    using OutputKeyT   = typename OutputTupleT::Key;
     using OutputValueT = typename OutputTupleT::Value;
     using AccumT       = OutputTupleT;
     using InitT        = detail::reduce::empty_problem_init_t<AccumT>;
 
+    static_assert(::cuda::std::is_same_v<int, OutputKeyT>, "Output key type must be int.");
+
     // Wrapped input iterator to produce index-value <OffsetT, InputT> tuples
     using ArgIndexInputIteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
     ArgIndexInputIteratorT d_indexed_in(d_in);
@@ -921,9 +924,11 @@ struct DeviceSegmentedReduce
 
     using init_t = detail::reduce::empty_problem_init_t<accum_t>;
 
-    // The output value type
+    using output_key_t   = typename output_tuple_t::first_type;
     using output_value_t = typename output_tuple_t::second_type;
 
+    static_assert(::cuda::std::is_same_v<int, output_key_t>, "Output key type must be int.");
+
     // Wrapped input iterator to produce index-value <offset_t, InputT> tuples
     auto d_indexed_in = THRUST_NS_QUALIFIER::make_transform_iterator(
       THRUST_NS_QUALIFIER::counting_iterator<::cuda::std::int64_t>{0},
@@ -1265,8 +1270,11 @@ struct DeviceSegmentedReduce
     using OutputTupleT = cub::detail::non_void_value_t<OutputIteratorT, KeyValuePair<OffsetT, InputValueT>>;
     using AccumT       = OutputTupleT;
     using InitT        = detail::reduce::empty_problem_init_t<AccumT>;
+    using OutputKeyT   = typename OutputTupleT::Key;
     using OutputValueT = typename OutputTupleT::Value;
 
+    static_assert(::cuda::std::is_same_v<int, OutputKeyT>, "Output key type must be int.");
+
     // Wrapped input iterator to produce index-value <OffsetT, InputT> tuples
     using ArgIndexInputIteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
     ArgIndexInputIteratorT d_indexed_in(d_in);
@@ -1375,8 +1383,11 @@ struct DeviceSegmentedReduce
     using output_tuple_t = detail::non_void_value_t<OutputIteratorT, ::cuda::std::pair<input_t, input_value_t>>;
     using accum_t        = output_tuple_t;
     using init_t         = detail::reduce::empty_problem_init_t<accum_t>;
+    using output_key_t   = typename output_tuple_t::first_type;
     using output_value_t = typename output_tuple_t::second_type;
 
+    static_assert(::cuda::std::is_same_v<int, output_key_t>, "Output key type must be int.");
+
     // Wrapped input iterator to produce index-value <input_t, InputT> tuples
     auto d_indexed_in = THRUST_NS_QUALIFIER::make_transform_iterator(
       THRUST_NS_QUALIFIER::counting_iterator<::cuda::std::int64_t>{0},
diff --git a/cub/test/CMakeLists.txt b/cub/test/CMakeLists.txt
index 3be20151a72..e2c60f7aa7b 100644
--- a/cub/test/CMakeLists.txt
+++ b/cub/test/CMakeLists.txt
@@ -175,32 +175,12 @@ function(cub_add_test target_name_var test_name test_src cub_target launcher_id)
 
     _cub_is_fail_test(is_fail_test "${test_src}")
     if (is_fail_test)
-      set_target_properties(${test_target} PROPERTIES EXCLUDE_FROM_ALL true
-                                           EXCLUDE_FROM_DEFAULT_BUILD true)
-      add_test(NAME ${test_target}
-               COMMAND ${CMAKE_COMMAND} --build "${CMAKE_BINARY_DIR}"
-                                        --target ${test_target}
-                                        --config $<CONFIGURATION>)
-      string(REGEX MATCH "err_([0-9]+)" MATCH_RESULT "${test_name}")
-      file(READ ${test_src} test_content)
-      if(MATCH_RESULT)
-        string(REGEX MATCH "// expected-error-${CMAKE_MATCH_1}+ {{\"([^\"]+)\"}}" expected_errors_matches ${test_content})
-
-        if (expected_errors_matches)
-          set_tests_properties(${test_target} PROPERTIES PASS_REGULAR_EXPRESSION "${CMAKE_MATCH_1}")
-        else()
-          set_tests_properties(${test_target} PROPERTIES WILL_FAIL true)
-        endif()
-      else()
-        string(REGEX MATCH "// expected-error {{\"([^\"]+)\"}}" expected_errors_matches ${test_content})
-
-        if (expected_errors_matches)
-          set_tests_properties(${test_target} PROPERTIES PASS_REGULAR_EXPRESSION "${CMAKE_MATCH_1}")
-        else()
-          set_tests_properties(${test_target} PROPERTIES WILL_FAIL true)
-        endif()
-      endif()
-    else() # Not fail test:
+      cccl_add_xfail_compile_target_test(${test_target}
+        SOURCE_FILE "${test_src}"
+        ERROR_REGEX_LABEL "expected-error"
+        ERROR_NUMBER_TARGET_NAME_REGEX "\\.err_([0-9]+)"
+      )
+    else()
       # Add to the active configuration's meta target
       add_dependencies(${config_meta_target} ${test_target})
 
diff --git a/cub/test/test_device_segmented_reduce_offset_type_fail.cu b/cub/test/test_device_segmented_reduce_offset_type_fail.cu
index b40549f4f21..f9924ccfa21 100644
--- a/cub/test/test_device_segmented_reduce_offset_type_fail.cu
+++ b/cub/test/test_device_segmented_reduce_offset_type_fail.cu
@@ -1,42 +1,55 @@
 // SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 // SPDX-License-Identifier: BSD-3
 
-// %PARAM% TEST_ERR err 0:1:2:3:4:5
+// %PARAM% TEST_ERR err 0:1:2:3:4:5:6:7
 
 #include <cub/device/device_segmented_reduce.cuh>
 
+template <typename T>
+void mark_as_used(T&&)
+{}
+
 int main()
 {
   using offset_t = float; // error
   // using offset_t = int; // ok
   float *d_in{}, *d_out{};
+  cub::KeyValuePair<float, float>* d_kv_out{};
+  ::cuda::std::pair<float, float>* d_pair_out{};
   offset_t* d_offsets{};
   std::size_t temp_storage_bytes{};
   std::uint8_t* d_temp_storage{};
 
+  // Only one of these is used per path, suppress undesired diagnostics:
+  mark_as_used(d_out);
+  mark_as_used(d_kv_out);
+  mark_as_used(d_pair_out);
+  mark_as_used(d_offsets);
+
 #if TEST_ERR == 0
-  // expected-error {{"Offset iterator type should be integral."}}
+  // expected-error-0 {{"Offset iterator value type should be integral."}}
   cub::DeviceSegmentedReduce::Reduce(
     d_temp_storage, temp_storage_bytes, d_in, d_out, 0, d_offsets, d_offsets + 1, ::cuda::minimum<>{}, 0);
-
 #elif TEST_ERR == 1
-  // expected-error {{"Offset iterator type should be integral."}}
+  // expected-error-1 {{"Offset iterator value type should be integral."}}
   cub::DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, 0, d_offsets, d_offsets + 1);
-
 #elif TEST_ERR == 2
-  // expected-error {{"Offset iterator type should be integral."}}
+  // expected-error-2 {{"Offset iterator value type should be integral."}}
   cub::DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, 0, d_offsets, d_offsets + 1);
-
 #elif TEST_ERR == 3
-  // expected-error {{"Offset iterator type should be integral."}}
-  cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, 0, d_offsets, d_offsets + 1);
-
-#elif TEST_ERR == 4
-  // expected-error {{"Offset iterator type should be integral."}}
+  // expected-error-3 {{"Offset iterator value type should be integral."}}
   cub::DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, 0, d_offsets, d_offsets + 1);
-
+#elif TEST_ERR == 4
+  // expected-error-4 {{"Output key type must be int."}}
+  cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_kv_out, 0, d_offsets, d_offsets + 1);
 #elif TEST_ERR == 5
-  // expected-error {{"Offset iterator type should be integral."}}
-  cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, 0, d_offsets, d_offsets + 1);
+  // expected-error-5 {{"Output key type must be int."}}
+  cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_pair_out, 0, 1);
+#elif TEST_ERR == 6
+  // expected-error-6 {{"Output key type must be int."}}
+  cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_kv_out, 0, d_offsets, d_offsets + 1);
+#elif TEST_ERR == 7
+  // expected-error-7 {{"Output key type must be int."}}
+  cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_pair_out, 0, 1);
 #endif
 }
diff --git a/cub/test/test_param_general_fail.cu b/cub/test/test_param_general_fail.cu
new file mode 100644
index 00000000000..6980bd819ca
--- /dev/null
+++ b/cub/test/test_param_general_fail.cu
@@ -0,0 +1,21 @@
+// %PARAM% TEST_ERR err 0:1:2:3
+
+// This compilation sometimes passes, sometimes fails.
+// It's role is to ensure that exit code is not checked for regex matches and binary objects are cleaned
+// before each test run.
+// This allows the failure machinery to test for non-fatal warnings.
+int main()
+{
+  // Used if not specified otherwise:
+  // expected-error {{"fail generic"}}
+
+#if TEST_ERR == 0
+#  pragma message "fail zero" // expected-error-0 {{"fail zero"}}
+#elif TEST_ERR == 1
+#  pragma message "fail generic"
+#elif TEST_ERR == 2
+  static_assert(false, "fail two"); // expected-error-2 {{"fail two"}}
+#elif TEST_ERR == 3
+  static_assert(false, "fail generic");
+#endif
+}