Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/test/Driver/clang-linker-wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
// CHK-CMDS-AOT-NV-NEXT: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
// CHK-CMDS-AOT-NV-NEXT: clang{{.*}} -o [[CLANGOUT:.*]] -dumpdir a.out.nvptx64.sm_50.img. --target=nvptx64-nvidia-cuda -march={{.*}}
// CHK-CMDS-AOT-NV-NEXT: ptxas{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
// CHK-CMDS-AOT-NV-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
// CHK-CMDS-AOT-NV-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image3=kind=ptx,sm={{(compute_)?50|pute_50}},file=[[CLANGOUT]] --image3=kind=elf,sm=50,file=[[PTXASOUT]]
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: [[FATBINOUT]]
// CHK-CMDS-AOT-NV-NEXT: clang{{.*}} -c -o [[LLCOUT:.*]] [[WRAPPEROUT]]
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
Expand Down Expand Up @@ -166,7 +166,7 @@
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -c -o [[LLCOUT1:.*]] [[WRAPPEROUT1]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -o [[CLANGOUT:.*]] -dumpdir a.out.nvptx64.sm_50.img. --target=nvptx64-nvidia-cuda -march={{.*}}
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: ptxas{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image3=kind=ptx,sm={{(compute_)?50|pute_50}},file=[[CLANGOUT]] --image3=kind=elf,sm=50,file=[[PTXASOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: [[FATBINOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -c -o [[LLCOUT2:.*]] [[WRAPPEROUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/linker-wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ __attribute__((visibility("protected"), used)) int x;

// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70
// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] -dumpdir a.out.nvptx64.sm_52.img. --target=nvptx64-nvidia-cuda -march=sm_52
// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]]
// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image3=kind=elf,sm=70,file=[[IMG_SM70]] --image3=kind=elf,sm=52,file=[[IMG_SM52]]
// CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o

// RUN: llvm-offload-binary -o %t.out \
Expand Down Expand Up @@ -240,7 +240,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA

// RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_89.img. --target=nvptx64-nvidia-cuda
// RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image=profile=sm_89,file={{.*}}.img
// RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image3=kind=elf,sm=89,file={{.*}}.img
// RELOCATABLE-LINK-CUDA: /usr/bin/ld.lld{{.*}}-r
// RELOCATABLE-LINK-CUDA: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading

Expand Down
19 changes: 16 additions & 3 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,9 +481,22 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32");
CmdArgs.push_back("--create");
CmdArgs.push_back(*TempFileOrErr);
for (const auto &[File, Arch] : InputFiles)
CmdArgs.push_back(
Args.MakeArgString("--image=profile=" + Arch + ",file=" + File));

for (const auto &[File, Arch] : InputFiles) {
// When bundling SYCL NVPTX, we may get both a cubin (sm_*) and a PTX
// assembly image (compute_*). `fatbinary` needs the right kind for each.
StringRef Kind = "elf";
StringRef ArchId = Arch;
if (Arch.starts_with("sm_")) {
ArchId = Arch.drop_front(3);
} else if (Arch.starts_with("compute_")) {
Kind = "ptx";
ArchId = Arch.drop_front(8);
}

CmdArgs.push_back(Args.MakeArgString("--image3=kind=" + Kind +
",sm=" + ArchId + ",file=" + File));
}

if (Error Err = executeCommands(*FatBinaryPath, CmdArgs))
return std::move(Err);
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// NOTE: Tests fetch_add for acquire and release memory ordering.
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

#include "atomic_memory_order.h"
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/GroupAlgorithm/root_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// XFAIL: (opencl && !cpu)
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641

// RUN: %{build} -I . -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
// RUN: %{build} -I . -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// Disabled temporarily while investigation into the failure is ongoing.
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Matrix/joint_matrix_tensorcores_sm70.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//

// REQUIRES: target-nvidia
// RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_70 -o %t.out
// RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_75 -o %t.out
// RUN: %{run} %t.out
//
// This tests the unified matrix extension interfaces for the cuda backend.
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Matrix/joint_matrix_tensorcores_sm72.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//

// REQUIRES: target-nvidia
// RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_72 -o %t.out
// RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_75 -o %t.out
// RUN: %{run} %t.out
//
// This tests the unified matrix extension interfaces for the cuda backend.
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -DENABLE_64_BIT=false -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -DENABLE_64_BIT=false -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

#include "reduction_utils.hpp"
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -DENABLE_64_BIT=true -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -DENABLE_64_BIT=true -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

#include "reduction_range_1d_dw.cpp"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<1>, reduction, func)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<1>, reduction, func)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<2>, reduction, func)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<2>, reduction, func)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<2>, reduction, func)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<3>, reduction, func)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<3>, reduction, func)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

// This test performs basic checks of parallel_for(range<3>, reduction, func)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %}
// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %}
// RUN: %{run} %t.out

#include "reduction_utils.hpp"
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/USM/P2P/p2p_atomics.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// REQUIRES: cuda || hip || level_zero
// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out
// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %} -o %t.out
// RUN: %{run} %t.out

#include <cassert>
Expand Down