Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions clang/lib/DPCT/AnalysisInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5034,9 +5034,12 @@ void DeviceFunctionDecl::buildTextureObjectParamsInfo(
return;
for (unsigned Idx = 0; Idx < Parms.size(); ++Idx) {
auto Param = Parms[Idx];
if (DpctGlobalInfo::getUnqualifiedTypeName(Param->getType()) ==
"cudaTextureObject_t")
std::string ParamName =
DpctGlobalInfo::getUnqualifiedTypeName(Param->getType());
if (ParamName == "cudaTextureObject_t" ||
ParamName == "cudaSurfaceObject_t") {
TextureObjectList[Idx] = std::make_shared<TextureObjectInfo>(Param);
}
}
}
std::string DeviceFunctionDecl::getExtraParameters(LocInfo LI) {
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/DPCT/AnalysisInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -2051,7 +2051,7 @@ class TextureInfo {
bool isUseHelperFunc() { return true; }
};

// texture handle info
// texture object info can be used for CUDA texture and surface objects.
class TextureObjectInfo : public TextureInfo {
static const int ReplaceTypeLength;

Expand Down
7 changes: 7 additions & 0 deletions clang/lib/DPCT/RulesLang/APINamesTexture.inc
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,10 @@ FEATURE_REQUEST_FACTORY(
"cuTexRefSetFormat", true, false, true, true,
MEMBER_CALL(ARG(0), true, "set_channel_type", ARG(1)),
MEMBER_CALL(ARG(0), true, "set_channel_num", ARG(2))))

FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext,
ENTRY_TEXTURE("surf1Dread", 0x01, 1))
FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext,
ENTRY_TEXTURE("surf2Dread", 0x02, 1, 2))
FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext,
ENTRY_TEXTURE("surf3Dread", 0x03, 1, 2, 3))
40 changes: 25 additions & 15 deletions clang/lib/DPCT/RulesLang/CallExprRewriterTexture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {

template <class BaseT>
std::shared_ptr<CallExprRewriter>
createRewriter(const CallExpr *C, bool RetAssign, BaseT Base) const {
const static std::string MemberName = "read";
createRewriter(const CallExpr *C, bool RetAssign, bool IsSurfAPI,
BaseT Base) const {
const static std::string MemberName = IsSurfAPI ? "read_byte" : "read";

using ReaderPrinter = decltype(makeMemberCallCreator<false>(
std::declval<std::function<BaseT(const CallExpr *)>>(), false,
MemberName, makeCallArgCreatorWithCall(Idx)...)(C));
Expand All @@ -41,10 +43,14 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
template <typename VecType>
std::shared_ptr<CallExprRewriter>
createbindlessRewriterNormal(const CallExpr *C, bool RetAssign,
const TemplateArgumentInfo &TAI,
bool IsSurfAPI, const TemplateArgumentInfo &TAI,
const std::string &VecTypeName) const {
const static std::string FuncName =
MapNames::getClNamespace() + "ext::oneapi::experimental::sample_image";
const static std::string FuncName = [=]() -> std::string {
if (IsSurfAPI)
return MapNames::getLibraryHelperNamespace() + "experimental::sample_image";
return MapNames::getClNamespace() +
"ext::oneapi::experimental::sample_image";
}();
using FuncNamePrinter =
TemplatedNamePrinter<StringRef, std::vector<TemplateArgumentInfo>>;
using ReaderPrinter =
Expand All @@ -66,7 +72,7 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {

template <typename VecType>
std::shared_ptr<CallExprRewriter>
createbindlessRewriterLod(const CallExpr *C, bool RetAssign,
createbindlessRewriterLod(const CallExpr *C, bool RetAssign, bool IsSurfAPI,
const TemplateArgumentInfo &TAI,
const std::string &VecTypeName) const {
const static std::string FuncName =
Expand Down Expand Up @@ -96,7 +102,7 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
template <typename VecType>
std::shared_ptr<CallExprRewriter>
createbindlessRewriterLayered(const CallExpr *C, bool RetAssign,
const TemplateArgumentInfo &TAI,
bool IsSurfAPI, const TemplateArgumentInfo &TAI,
const std::string &VecTypeName) const {
const static std::string FuncName =
MapNames::getClNamespace() +
Expand Down Expand Up @@ -124,7 +130,7 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
}

std::shared_ptr<CallExprRewriter>
createbindlessRewriter(const CallExpr *C, bool RetAssign,
createbindlessRewriter(const CallExpr *C, bool RetAssign, bool IsSurfAPI,
QualType TargetType) const {
TemplateArgumentInfo TAI;
auto TAL = getTemplateArgsList(C);
Expand All @@ -151,11 +157,12 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
CallExprPrinter<std::string,
decltype(std::make_pair(C, C->getArg(Idx)))...>;
if ((TexType & 0xf0) == 0x10)
return createbindlessRewriterLod<VecType>(C, RetAssign, TAI, VecTypeName);
return createbindlessRewriterLod<VecType>(C, RetAssign, IsSurfAPI, TAI,
VecTypeName);
if ((TexType & 0xf0) == 0xf0)
return createbindlessRewriterLayered<VecType>(C, RetAssign, TAI,
VecTypeName);
return createbindlessRewriterNormal<VecType>(C, RetAssign, TAI,
return createbindlessRewriterLayered<VecType>(C, RetAssign, IsSurfAPI,
TAI, VecTypeName);
return createbindlessRewriterNormal<VecType>(C, RetAssign, IsSurfAPI, TAI,
VecTypeName);
}

Expand All @@ -169,6 +176,9 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
QualType TargetType = Call->getType();
StringRef SourceName;
bool RetAssign = false;
bool IsSurfAPI = false;
if (Source.find("surf") != std::string::npos)
IsSurfAPI = true;
if (SourceExpr->getType()->isPointerType()) {
TargetType = SourceExpr->getType()->getPointeeType();
SourceExpr = Call->getArg(1);
Expand All @@ -181,7 +191,7 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
}
}
if (DpctGlobalInfo::useExtBindlessImages()) {
return createbindlessRewriter(Call, RetAssign, TargetType);
return createbindlessRewriter(Call, RetAssign, IsSurfAPI, TargetType);
}
SourceExpr = SourceExpr->IgnoreImpCasts();
if (auto FD = DpctGlobalInfo::getParentFunction(Call)) {
Expand All @@ -196,7 +206,7 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
MemberInfo->setType(
DpctGlobalInfo::getUnqualifiedTypeName(TargetType), TexType);
SourceName = MemberInfo->getName();
return createRewriter(Call, RetAssign, SourceName);
return createRewriter(Call, RetAssign, IsSurfAPI, SourceName);
}
} else if (auto DRE = dyn_cast<DeclRefExpr>(SourceExpr)) {
auto CallDefRange =
Expand All @@ -213,7 +223,7 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase {
}
}

return createRewriter(Call, RetAssign,
return createRewriter(Call, RetAssign, IsSurfAPI,
std::make_pair(Call, Call->getArg(RetAssign & 0x01)));
}
};
Expand Down
7 changes: 3 additions & 4 deletions clang/lib/DPCT/RulesLang/RulesLang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9706,10 +9706,6 @@ void TextureRule::registerMatcher(MatchFinder &MF) {
"cudaTextureObject_t", "cudaSurfaceObject_t", "CUtexObject"))))))
.bind("texObj"),
this);
MF.addMatcher(typeLoc(loc(qualType(hasDeclaration(typedefDecl(hasAnyName(
"cudaTextureObject_t", "CUtexObject"))))))
.bind("texObj"),
this);
MF.addMatcher(
memberExpr(hasObjectExpression(hasType(
type(hasUnqualifiedDesugaredType(recordType(hasDeclaration(
Expand Down Expand Up @@ -9764,6 +9760,9 @@ void TextureRule::registerMatcher(MatchFinder &MF) {
"tex1Dfetch",
"tex1DLayered",
"tex2DLayered",
"surf1Dread",
"surf2Dread",
"surf3Dread",
"cudaCreateTextureObject",
"cudaDestroyTextureObject",
"cudaGetTextureObjectResourceDesc",
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/DPCT/SrcAPI/APINames.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1484,11 +1484,11 @@ ENTRY(texCubemapLod, texCubemapLod, false, NO_FLAG, P4, "comment")
ENTRY(texCubemapLayered, texCubemapLayered, false, NO_FLAG, P4, "comment")
ENTRY(texCubemapLayeredLod, texCubemapLayeredLod, false, NO_FLAG, P4, "comment")
ENTRY(tex2Dgather, tex2Dgather, false, NO_FLAG, P0, "comment")
ENTRY(surf1Dread, surf1Dread, false, NO_FLAG, P4, "comment")
ENTRY(surf1Dread, surf1Dread, true, NO_FLAG, P4, "Successful")
ENTRY(surf1Dwrite, surf1Dwrite, false, NO_FLAG, P0, "comment")
ENTRY(surf2Dread, surf2Dread, false, NO_FLAG, P4, "comment")
ENTRY(surf2Dread, surf2Dread, true, NO_FLAG, P4, "Successful")
ENTRY(surf2Dwrite, surf2Dwrite, false, NO_FLAG, P0, "comment")
ENTRY(surf3Dread, surf3Dread, false, NO_FLAG, P4, "comment")
ENTRY(surf3Dread, surf3Dread, true, NO_FLAG, P4, "Successful")
ENTRY(surf3Dwrite, surf3Dwrite, false, NO_FLAG, P0, "comment")
ENTRY(surf1DLayeredread, surf1DLayeredread, false, NO_FLAG, P4, "comment")
ENTRY(surf1DLayeredwrite, surf1DLayeredwrite, false, NO_FLAG, P4, "comment")
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Headers/__clang_cuda_runtime_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,9 @@ __host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr,
#endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000
#include "texture_fetch_functions.h"
#include "texture_indirect_functions.h"
#ifdef SYCLomatic_CUSTOMIZATION
#include "surface_indirect_functions.h"
#endif // SYCLomatic_CUSTOMIZATION

// Restore state of __CUDA_ARCH__ and __THROW we had on entry.
#pragma pop_macro("__CUDA_ARCH__")
Expand Down
17 changes: 16 additions & 1 deletion clang/runtime/dpct-rt/include/dpct/bindless_images.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace dpct {
namespace experimental {

#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
#ifndef SYCL_EXT_ONEAPI_BINDLESS_IMAGES

/// The wrapper class of bindless image memory handle.
class image_mem_wrapper {
Expand Down Expand Up @@ -1352,6 +1352,21 @@ static inline void dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest,
sycl::free(temp, q);
}

// A wrapper for sycl sample_image function for the byte addressing image.
template <typename DataT, typename HintT = DataT, typename CoordT>
DataT sample_image(
const sycl::ext::oneapi::experimental::sampled_image_handle &imageHandle,
CoordT &&coords) {
if constexpr (std::is_scalar_v<CoordT>) {
return sycl::ext::oneapi::experimental::sample_image<DataT, HintT, CoordT>(
imageHandle, coords / sizeof(DataT));
} else {
coords[0] = coords[0] / sizeof(DataT);
return sycl::ext::oneapi::experimental::sample_image<DataT, HintT, CoordT>(
imageHandle, coords);
}
}

using image_mem_wrapper_ptr = image_mem_wrapper *;
#ifdef _WIN32
using external_mem_wrapper_ptr = external_mem_wrapper *;
Expand Down
45 changes: 45 additions & 0 deletions clang/runtime/dpct-rt/include/dpct/image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,51 @@ class image_accessor_ext {
typename std::enable_if<Available, data_t>::type read(CoordT x) {
return detail::fetch_data<T>()(_img_acc.read(x, _sampler));
}

/// Read data from accessor.
template <bool Available = dimensions == 3>
typename std::enable_if<Available, data_t>::type read_byte(float x, float y,
float z) {
return detail::fetch_data<T>()(
_img_acc.read(sycl::float4(x / sizeof(T), y, z, 0), _sampler));
}
/// Read data from accessor.
template <class Coord0, class Coord1, class Coord2,
bool Available = dimensions == 3 &&
std::is_integral<Coord0>::value
&&std::is_integral<Coord1>::value
&&std::is_integral<Coord2>::value>
typename std::enable_if<Available, data_t>::type read_byte(Coord0 x, Coord1 y,
Coord2 z) {
return detail::fetch_data<T>()(
_img_acc.read(sycl::int4(x / sizeof(T), y, z, 0), _sampler));
}
/// Read data from accessor.
template <bool Available = dimensions == 2>
typename std::enable_if<Available, data_t>::type read_byte(float x, float y) {
return detail::fetch_data<T>()(
_img_acc.read(sycl::float2(x / sizeof(T), y), _sampler));
}
/// Read data from accessor.
template <class Coord0, class Coord1,
bool Available = dimensions == 2 &&
std::is_integral<Coord0>::value
&&std::is_integral<Coord1>::value>
typename std::enable_if<Available, data_t>::type read_byte(Coord0 x, Coord1 y) {
return detail::fetch_data<T>()(
_img_acc.read(sycl::int2(x / sizeof(T), y), _sampler));
}
/// Read data from accessor.
template <bool Available = dimensions == 1>
typename std::enable_if<Available, data_t>::type read_byte(float x) {
return detail::fetch_data<T>()(_img_acc.read(x / sizeof(T), _sampler));
}
/// Read data from accessor.
template <class CoordT,
bool Available = dimensions == 1 && std::is_integral<CoordT>::value>
typename std::enable_if<Available, data_t>::type read_byte(CoordT x) {
return detail::fetch_data<T>()(_img_acc.read(x / sizeof(T), _sampler));
}
};

template <class T, int dimensions> class image_accessor_ext<T, dimensions, true> {
Expand Down
34 changes: 34 additions & 0 deletions clang/test/dpct/surface.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// RUN: dpct --format-range=none -out-root %T/surface %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only -std=c++14
// RUN: FileCheck --input-file %T/surface/surface.dp.cpp --match-full-lines %s
// RUN: %if build_lit %{icpx -c -fsycl %T/surface/surface.dp.cpp -o %T/surface/surface.dp.o %}

// CHECK: template<typename T> void kernel(dpct::image_accessor_ext<dpct_placeholder/*Fix the type manually*/, 1> surf) {
template<typename T> __global__ void kernel(cudaSurfaceObject_t surf) {
int i;
float j, k, l, m;
// CHECK: surf.read_byte(i);
surf1Dread<T>(surf, i);
// CHECK: i = surf.read_byte(i);
surf1Dread<T>(&i, surf, i);
// CHECK: surf.read_byte(j, i);
surf2Dread<T>(surf, j, i);
// CHECK: i = surf.read_byte(j, i);
surf2Dread<T>(&i, surf, j, i);
// CHECK: surf.read_byte(k, j, i);
surf3Dread<T>(surf, k, j, i);
// CHECK: i = surf.read_byte(k, j, i);
surf3Dread<T>(&i, surf, k, j, i);
}

int main() {
// CHECK: dpct::image_wrapper_base_p surf;
cudaSurfaceObject_t surf;
// CHECK: dpct::image_data resDesc;
cudaResourceDesc resDesc;
// CHECK: surf = dpct::create_image_wrapper(resDesc);
cudaCreateSurfaceObject(&surf, &resDesc);

kernel<int><<<1,1>>>(surf);
cudaDestroySurfaceObject(surf);
cudaGetSurfaceObjectResourceDesc(&resDesc, surf);
}
18 changes: 18 additions & 0 deletions clang/test/dpct/texture/surface_object_bindless_image.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
// RUN: FileCheck --input-file %T/texture/surface_object_bindless_image/surface_object_bindless_image.dp.cpp --match-full-lines %s
// RUN: %if build_lit %{icpx -c -fsycl %T/texture/surface_object_bindless_image/surface_object_bindless_image.dp.cpp -o %T/texture/surface_object_bindless_image/surface_object_bindless_image.dp.o %}

// CHECK: template<typename T> void kernel(sycl::ext::oneapi::experimental::sampled_image_handle surf) {
template<typename T> __global__ void kernel(cudaSurfaceObject_t surf) {
int i;
float j, k, l, m;
// CHECK: dpct::experimental::sample_image<T>(surf, float(i));
surf1Dread<T>(surf, i);
// CHECK: i = dpct::experimental::sample_image<T>(surf, float(i));
surf1Dread<T>(&i, surf, i);
// CHECK: dpct::experimental::sample_image<T>(surf, sycl::float2(j, i));
surf2Dread<T>(surf, j, i);
// CHECK: i = dpct::experimental::sample_image<T>(surf, sycl::float2(j, i));
surf2Dread<T>(&i, surf, j, i);
// CHECK: dpct::experimental::sample_image<T>(surf, sycl::float3(k, j, i));
surf3Dread<T>(surf, k, j, i);
// CHECK: i = dpct::experimental::sample_image<T>(surf, sycl::float3(k, j, i));
surf3Dread<T>(&i, surf, k, j, i);
}

int main() {
// CHECK: sycl::ext::oneapi::experimental::sampled_image_handle surf;
cudaSurfaceObject_t surf;
Expand Down