12 files changed
+67
-11
lines changedSubmodule hipBLAS-common updated 1 file
- library/src/amd_detail/rocblaslt/src/Tensile/Logic/asm_full/aquavanjaram/gfx942/FreeSize/aquavanjaram_Cijk_Ailk_Bjlk_S_MX_B_Bias_HAS_SAV_UserArgs.yaml-1204.7k
- library/src/amd_detail/rocblaslt/src/Tensile/Logic/asm_full/aquavanjaram/gfx942/FreeSize/aquavanjaram_Cijk_Ailk_Bljk_S_MX_B_Bias_HAS_SAV_UserArgs.yaml-1184.9k
- library/src/amd_detail/rocblaslt/src/Tensile/Logic/asm_full/aquavanjaram/gfx942/FreeSize/aquavanjaram_Cijk_Alik_Bjlk_S_MX_B_Bias_HAS_SAV_UserArgs.yaml-1184.9k
- library/src/amd_detail/rocblaslt/src/Tensile/Logic/asm_full/aquavanjaram/gfx942/FreeSize/aquavanjaram_Cijk_Alik_Bljk_BBS_BH_Bias_HAS_SAV_UserArgs.yaml-264
- library/src/amd_detail/rocblaslt/src/Tensile/Logic/asm_full/aquavanjaram/gfx942/FreeSize/aquavanjaram_Cijk_Alik_Bljk_BBS_STA_BH_Bias_HAS_SAV_UserArgs.yaml-289.1k
- library/src/amd_detail/rocblaslt/src/Tensile/Logic/asm_full/aquavanjaram/gfx942/FreeSize/aquavanjaram_Cijk_Alik_Bljk_S_MX_B_Bias_HAS_SAV_UserArgs.yaml-1175.1k
- tensilelite/Tensile/Components/LocalRead.py+83-64
- tensilelite/Tensile/KernelWriter.py+3-3
- tensilelite/Tensile/Source/lib/source/ContractionSolution.cpp+1-1
- lib/include/rocRoller/CodeGen/Instruction.hpp+10-7
- lib/include/rocRoller/CodeGen/Instruction_impl.hpp+33-33
- lib/include/rocRoller/Scheduling/Costs/Cost.hpp+2
- lib/include/rocRoller/Scheduling/Scheduler.hpp+72-11
- lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp+16-7
- lib/source/Costs/Cost.cpp+8-3
- lib/source/Expression_generate.cpp+39-18
- lib/source/Scheduler.cpp+218-55
- lib/source/Schedulers/CooperativeScheduler.cpp+15-9
- lib/source/Schedulers/PriorityScheduler.cpp+7-4
- lib/source/Schedulers/RandomScheduler.cpp+40-22
- lib/source/Schedulers/RoundRobinScheduler.cpp+4-3
- lib/source/Schedulers/SequentialScheduler.cpp+6-2
- test/unit/DependencyTest.cpp+6-1
- test/unit/EnumToStringTest.cpp-1
- test/unit/LockTest.cpp+132-31
- test/unit/SchedulerTest.cpp+67-57
- CHANGELOG.md+6
- benchmark/benchmark_device_merge_sort.cpp+12-4
- benchmark/benchmark_device_merge_sort.hpp+15-1
- benchmark/benchmark_utils.hpp+12-2
- common/utils_custom_type.hpp+83
- rocprim/include/rocprim/config.hpp+4-2
- rocprim/include/rocprim/device/config_types.hpp+2-2
- rocprim/include/rocprim/device/detail/device_batch_memcpy.hpp+15-4
- rocprim/include/rocprim/device/detail/device_histogram.hpp+21-20
- rocprim/include/rocprim/device/detail/device_merge_sort.hpp+1-1
- rocprim/include/rocprim/device/detail/device_merge_sort_mergepath.hpp+5-5
- rocprim/include/rocprim/device/detail/device_partition.hpp+15-4
- rocprim/include/rocprim/device/detail/ordered_block_id.hpp+100-4
- rocprim/include/rocprim/device/device_histogram.hpp+17-17
- rocprim/include/rocprim/device/device_partition.hpp+93-41
- rocprim/include/rocprim/device/device_select.hpp+75-49
- rocprim/include/rocprim/iterator/arg_index_iterator.hpp+4-4
- rocprim/include/rocprim/iterator/texture_cache_iterator.hpp+4-4
- rocprim/include/rocprim/thread/thread_reduce.hpp+4-3
- test/rocprim/CMakeLists.txt+10
- test/rocprim/test_arg_index_iterator.cpp+88-47
- test/rocprim/test_config_dispatch.cpp+2-2
- test/rocprim/test_constant_iterator.cpp+86
- test/rocprim/test_counting_iterator.cpp+88-5
- test/rocprim/test_device_histogram.cpp+191-370
- test/rocprim/test_device_merge_sort.cpp+113-86
- test/rocprim/test_device_partition.cpp+228-200
- test/rocprim/test_device_scan.cpp+454-466
- test/rocprim/test_discard_iterator.cpp+77-79
- test/rocprim/test_predicate_iterator.cpp+133
- test/rocprim/test_rocprim_tuple.cpp+384
- test/rocprim/test_rocprim_types.cpp+235
- test/rocprim/test_texture_cache_iterator.cpp+110
- test/rocprim/test_thread_algos.cpp+261-47
- test/rocprim/test_transform_iterator.cpp+109-29
- test/rocprim/test_utils.hpp+39-5
- test/rocprim/test_utils_data_generation.hpp+62-43
- test/rocprim/test_utils_data_generation_with_rocrand.hpp+20-20
- test/rocprim/test_utils_get_random_data.hpp+5
- test/rocprim/test_warp_scan.hpp+2-2
- test/rocprim/test_zip_iterator.cpp+22-39
0 commit comments