diff --git a/src/include/OSL/platform.h b/src/include/OSL/platform.h index c133ef89f..cf0ed36f1 100644 --- a/src/include/OSL/platform.h +++ b/src/include/OSL/platform.h @@ -488,7 +488,11 @@ /// to use regular assert() for this purpose if you need to eliminate the /// dependency on this header from a particular place (and don't mind that /// assert won't format identically on all platforms). -#ifndef NDEBUG +/// +/// These macros are no-ops when compiling for CUDA because they were found +/// to cause strange issues in device code (e.g., function bodies being +/// eliminated when OSL_DASSERT is used). +#if !defined(NDEBUG) && !defined(__CUDACC__) # define OSL_DASSERT OSL_ASSERT # define OSL_DASSERT_MSG OSL_ASSERT_MSG #else diff --git a/src/testrender/CMakeLists.txt b/src/testrender/CMakeLists.txt index 5f72bfbdb..7e89188e2 100644 --- a/src/testrender/CMakeLists.txt +++ b/src/testrender/CMakeLists.txt @@ -22,13 +22,23 @@ if (OSL_USE_OPTIX) ) # We need to make sure that the PTX files are regenerated whenever these - # headers change. + # files change. set (testrender_cuda_headers - cuda/rend_lib.h) + cuda/rend_lib.h + background.h + optics.h + render_params.h + raytracer.h + sampling.h + shading.h + shading.cpp + simpleraytracer.cpp + cuda/vec_math.h + ) # Generate PTX for all of the CUDA files foreach (cudasrc ${testrender_cuda_srcs}) - NVCC_COMPILE ( ${cudasrc} "" ptx_generated "" ) + NVCC_COMPILE ( ${cudasrc} "${testrender_cuda_headers}" ptx_generated "" ) list (APPEND ptx_list ${ptx_generated}) endforeach () @@ -48,7 +58,7 @@ if (OSL_USE_OPTIX) list (APPEND ptx_list ${rend_lib_ptx}) add_custom_target (testrender_ptx ALL - DEPENDS ${ptx_list} + DEPENDS ${ptx_list} ${testrender_cuda_headers} SOURCES ${testrender_cuda_srcs} ) # Install the PTX files in a fixed location so that they can be diff --git a/src/testrender/background.h b/src/testrender/background.h index ac196cf27..3c79dd927 100644 --- a/src/testrender/background.h +++ b/src/testrender/background.h @@ -10,17 +10,51 @@ OSL_NAMESPACE_ENTER + +#ifdef __CUDACC__ +// std::upper_bound is not supported in device code, so define a version of it here. +// Adapted from the LLVM Project, see https://llvm.org/LICENSE.txt for license information. +template +inline OSL_HOSTDEVICE const T* +upper_bound_cuda(const T* data, int count, const T value) +{ + const T* first = data; + const T value_ = value; + int len = count; + while (len != 0) { + int l2 = len / 2; + const T* m = first; + m += l2; + if (value_ < *m) + len = l2; + else { + first = ++m; + len -= l2 + 1; + } + } + return first; +} +#endif + + struct Background { + OSL_HOSTDEVICE Background() : values(0), rows(0), cols(0) {} + + OSL_HOSTDEVICE ~Background() { +#ifndef __CUDACC__ delete[] values; delete[] rows; delete[] cols; +#endif } - template void prepare(int resolution, F cb, T* data) + template + void prepare(int resolution, F cb, T* data) { + // These values are set via set_variables() in CUDA res = resolution; if (res < 32) res = 32; // validate @@ -29,6 +63,7 @@ struct Background { values = new Vec3[res * res]; rows = new float[res]; cols = new float[res * res]; + for (int y = 0, i = 0; y < res; y++) { for (int x = 0; x < res; x++, i++) { values[i] = cb(map(x + 0.5f, y + 0.5f), data); @@ -43,8 +78,9 @@ struct Background { cols[i - res + x] /= cols[i - 1]; } // normalize the pdf across all scanlines - for (int y = 0; y < res; y++) + for (int y = 0; y < res; y++) { rows[y] /= rows[res - 1]; + } // both eval and sample below return a "weight" that is // value[i] / row*col_pdf, so might as well bake it into the table @@ -65,6 +101,7 @@ struct Background { #endif } + OSL_HOSTDEVICE Vec3 eval(const Vec3& dir, float& pdf) const { // map from sphere to unit-square @@ -90,6 +127,7 @@ struct Background { return values[i]; } + OSL_HOSTDEVICE Vec3 sample(float rx, float ry, Dual2& dir, float& pdf) const { float row_pdf, col_pdf; @@ -101,8 +139,96 @@ struct Background { return values[y * res + x]; } +#ifdef __CUDACC__ + OSL_HOSTDEVICE + void set_variables(Vec3* values_in, float* rows_in, float* cols_in, + int res_in) + { + values = values_in; + rows = rows_in; + cols = cols_in; + res = res_in; + invres = __frcp_rn(res); + invjacobian = __fdiv_rn(res * res, float(4 * M_PI)); + assert(res >= 32); + } + + template + OSL_HOSTDEVICE void prepare_cuda(int stride, int idx, F cb) + { + prepare_cuda_01(stride, idx, cb); + if (idx == 0) + prepare_cuda_02(); + prepare_cuda_03(stride, idx); + } + + // Pre-compute the 'values' table in parallel + template + OSL_HOSTDEVICE void prepare_cuda_01(int stride, int idx, F cb) + { + for (int y = 0; y < res; y++) { + const int row_start = y * res; + const int row_end = row_start + res; + int i = row_start + idx; + for (int x = idx; x < res; x += stride, i += stride) { + if (i >= row_end) + continue; + values[i] = cb(map(x + 0.5f, y + 0.5f)); + } + } + } + + // Compute 'cols' and 'rows' using a single thread + OSL_HOSTDEVICE void prepare_cuda_02() + { + for (int y = 0, i = 0; y < res; y++) { + for (int x = 0; x < res; x++, i++) { + cols[i] = std::max(std::max(values[i].x, values[i].y), + values[i].z) + + ((x > 0) ? cols[i - 1] : 0.0f); + } + rows[y] = cols[i - 1] + ((y > 0) ? rows[y - 1] : 0.0f); + // normalize the pdf for this scanline (if it was non-zero) + if (cols[i - 1] > 0) { + for (int x = 0; x < res; x++) { + cols[i - res + x] = __fdiv_rn(cols[i - res + x], + cols[i - 1]); + } + } + } + } + + // Normalize the row PDFs and finalize the 'values' table + OSL_HOSTDEVICE void prepare_cuda_03(int stride, int idx) + { + // normalize the pdf across all scanlines + for (int y = idx; y < res; y += stride) { + rows[y] = __fdiv_rn(rows[y], rows[res - 1]); + } + + // both eval and sample below return a "weight" that is + // value[i] / row*col_pdf, so might as well bake it into the table + for (int y = 0; y < res; y++) { + float row_pdf = rows[y] - (y > 0 ? rows[y - 1] : 0.0f); + const int row_start = y * res; + const int row_end = row_start + res; + int i = row_start + idx; + for (int x = idx; x < res; x += stride, i += stride) { + if (i >= row_end) + continue; + float col_pdf = cols[i] - (x > 0 ? cols[i - 1] : 0.0f); + const float divisor = __fmul_rn(__fmul_rn(row_pdf, col_pdf), + invjacobian); + values[i].x = __fdiv_rn(values[i].x, divisor); + values[i].y = __fdiv_rn(values[i].y, divisor); + values[i].z = __fdiv_rn(values[i].z, divisor); + } + } + } +#endif + private: - Dual2 map(float x, float y) const + OSL_HOSTDEVICE Dual2 map(float x, float y) const { // pixel coordinates of entry (x,y) Dual2 u = Dual2(x, 1, 0) * invres; @@ -115,14 +241,20 @@ struct Background { return make_Vec3(sin_phi * ct, sin_phi * st, cos_phi); } - static float sample_cdf(const float* data, unsigned int n, float x, - unsigned int* idx, float* pdf) + static OSL_HOSTDEVICE float sample_cdf(const float* data, unsigned int n, + float x, unsigned int* idx, + float* pdf) { - OSL_DASSERT(x >= 0); - OSL_DASSERT(x < 1); + OSL_DASSERT(x >= 0.0f); + OSL_DASSERT(x < 1.0f); +#ifndef __CUDACC__ *idx = std::upper_bound(data, data + n, x) - data; +#else + *idx = upper_bound_cuda(data, n, x) - data; +#endif OSL_DASSERT(*idx < n); OSL_DASSERT(x < data[*idx]); + float scaled_sample; if (*idx == 0) { *pdf = data[0]; @@ -137,12 +269,12 @@ struct Background { return std::min(scaled_sample, 0.99999994f); } - Vec3* values; // actual map - float* rows; // probability of choosing a given row 'y' - float* cols; // probability of choosing a given column 'x', given that we've chosen row 'y' - int res; // resolution in pixels of the precomputed table - float invres; // 1 / resolution - float invjacobian; + Vec3* values = nullptr; // actual map + float* rows = nullptr; // probability of choosing a given row 'y' + float* cols = nullptr; // probability of choosing a given column 'x', given that we've chosen row 'y' + int res = -1; // resolution in pixels of the precomputed table + float invres = 0.0f; // 1 / resolution + float invjacobian = 0.0f; }; OSL_NAMESPACE_EXIT diff --git a/src/testrender/cuda/optix_raytracer.cu b/src/testrender/cuda/optix_raytracer.cu index 43e910db7..67acc3eb8 100644 --- a/src/testrender/cuda/optix_raytracer.cu +++ b/src/testrender/cuda/optix_raytracer.cu @@ -4,15 +4,26 @@ #include - -#include "util.h" - #include #include +#include "optix_raytracer.h" #include "rend_lib.h" -#include "render_params.h" +#include "vec_math.h" + +#include "../background.h" +#include "../raytracer.h" +#include "../render_params.h" +#include "../sampling.h" + +// clang-format off +// These files must be included in this specific order +#include "../shading.h" +#include "../shading.cpp" +// clang-format on + +#include OSL_NAMESPACE_ENTER @@ -34,6 +45,208 @@ __device__ __constant__ RenderParams render_params; } +static __device__ void +globals_from_hit(ShaderGlobalsType& sg, float radius = 0.0f, float spread = 0.0f, + Ray::RayType raytype = Ray::RayType::CAMERA) +{ + ShaderGlobalsType local_sg; + // hit-kind 0: quad hit + // 1: sphere hit + optixDirectCall( + optixGetHitKind(), optixGetPrimitiveIndex(), optixGetRayTmax(), + optixGetWorldRayOrigin(), optixGetWorldRayDirection(), &local_sg); + // Setup the ShaderGlobals + const float3 ray_direction = optixGetWorldRayDirection(); + const float3 ray_origin = optixGetWorldRayOrigin(); + const float t_hit = optixGetRayTmax(); + + // Construct a Ray in order to calculate P and its derivatives + Ray ray(F3_TO_V3(ray_origin), F3_TO_V3(ray_direction), radius, spread, + Ray::RayType::CAMERA); + Dual2 t(t_hit); + Dual2 P = ray.point(t); + + sg.I = ray_direction; + sg.N = normalize(optixTransformNormalFromObjectToWorldSpace(V3_TO_F3(local_sg.N))); + sg.Ng = normalize(optixTransformNormalFromObjectToWorldSpace(V3_TO_F3(local_sg.Ng))); + sg.P = V3_TO_F3(P.val()); + sg.dPdx = V3_TO_F3(P.dx()); + sg.dPdy = V3_TO_F3(P.dy()); + sg.dPdu = local_sg.dPdu; + sg.dPdv = local_sg.dPdv; + sg.u = local_sg.u; + sg.v = local_sg.v; + sg.Ci = nullptr; + sg.surfacearea = local_sg.surfacearea; + sg.backfacing = dot(V3_TO_F3(sg.N), V3_TO_F3(sg.I)) > 0.0f; + sg.shaderID = local_sg.shaderID; + + if (sg.backfacing) { + sg.N = -sg.N; + sg.Ng = -sg.Ng; + } + + sg.raytype = raytype; + sg.flipHandedness = dot(V3_TO_F3(sg.N), cross(V3_TO_F3(sg.dPdx), V3_TO_F3(sg.dPdy))) < 0.0f; +} + + +static inline __device__ void +execute_shader(ShaderGlobalsType& sg, char* closure_pool) +{ + if (sg.shaderID < 0) { + // TODO: should probably never get here ... + return; + } + + // Pack the "closure pool" into one of the ShaderGlobals pointers + *(int*)&closure_pool[0] = 0; + sg.renderstate = &closure_pool[0]; + + // Create some run-time options structs. The OSL shader fills in the structs + // as it executes, based on the options specified in the shader source. + NoiseOptCUDA noiseopt; + TextureOptCUDA textureopt; + TraceOptCUDA traceopt; + + // Pack the pointers to the options structs in a faux "context", + // which is a rough stand-in for the host ShadingContext. + ShadingContextCUDA shading_context = { &noiseopt, &textureopt, &traceopt }; + sg.context = &shading_context; + + // Run the OSL callable + void* interactive_ptr = reinterpret_cast( + render_params.interactive_params)[sg.shaderID]; + const unsigned int shaderIdx = 2u + sg.shaderID + 0u; + optixDirectCall( + shaderIdx, &sg /*shaderglobals_ptr*/, + nullptr /*groupdata_ptr*/, + nullptr /*userdata_base_ptr*/, + nullptr /*output_base_ptr*/, + 0 /*shadeindex - unused*/, + interactive_ptr /*interactive_params_ptr*/ + ); +} + + +static inline __device__ void +trace_ray(OptixTraversableHandle handle, const Payload& payload, const float3& origin, + const float3& direction) +{ + uint32_t p0 = payload.raw[0]; + uint32_t p1 = payload.raw[1]; + uint32_t p2 = __float_as_uint(payload.radius); + uint32_t p3 = __float_as_uint(payload.spread); + uint32_t p4 = payload.raytype; + + optixTrace(handle, // handle + origin, // origin + direction, // direction + 1e-3f, // tmin + 1e13f, // tmax + 0, // ray time + OptixVisibilityMask(1), // visibility mask + OPTIX_RAY_FLAG_DISABLE_ANYHIT, // ray flags + 0, // SBT offset + 1, // SBT stride + 0, // miss SBT offset + p0, p1, p2, p3, p4); +}; + +// +// CudaScene +// + +OSL_HOSTDEVICE bool +CudaScene::intersect(const Ray& r, Dual2& t, int& primID, void* sg) const +{ + Payload payload; + payload.sg_ptr = reinterpret_cast(sg); + payload.radius = r.radius; + payload.spread = r.spread; + payload.raytype = *reinterpret_cast(&r.raytype); + TraceData tracedata(*payload.sg_ptr, primID); + trace_ray(handle, payload, V3_TO_F3(r.origin), V3_TO_F3(r.direction)); + primID = tracedata.hit_id; + t = tracedata.hit_t; + return (payload.sg_ptr->shaderID >= 0); +} + + +OSL_HOSTDEVICE float +CudaScene::shapepdf(int primID, const Vec3& x, const Vec3& p) const +{ + SphereParams* spheres = (SphereParams*)spheres_buffer; + QuadParams* quads = (QuadParams*)quads_buffer; + if (primID < num_spheres) { + const SphereParams& params = spheres[primID]; + const OSL::Sphere sphere(F3_TO_V3(params.c), params.r, 0, false); + return sphere.shapepdf(x, p); + } else { + const QuadParams& params = quads[primID - num_spheres]; + const OSL::Quad quad(F3_TO_V3(params.p), F3_TO_V3(params.ex), + F3_TO_V3(params.ey), 0, false); + return quad.shapepdf(x, p); + } +} + + +OSL_HOSTDEVICE bool +CudaScene::islight(int primID) const +{ + SphereParams* spheres = (SphereParams*)spheres_buffer; + QuadParams* quads = (QuadParams*)quads_buffer; + + if (primID < num_spheres) + return spheres[primID].isLight; + return quads[primID - num_spheres].isLight; +} + + +OSL_HOSTDEVICE Vec3 +CudaScene::sample(int primID, const Vec3& x, float xi, float yi, + float& pdf) const +{ + SphereParams* spheres = (SphereParams*)spheres_buffer; + QuadParams* quads = (QuadParams*)quads_buffer; + if (primID < num_spheres) { + const SphereParams& params = spheres[primID]; + const OSL::Sphere sphere(F3_TO_V3(params.c), params.r, 0, false); + return sphere.sample(x, xi, yi, pdf); + } else { + const QuadParams& params = quads[primID - num_spheres]; + const OSL::Quad quad(F3_TO_V3(params.p), F3_TO_V3(params.ex), + F3_TO_V3(params.ey), 0, false); + return quad.sample(x, xi, yi, pdf); + } +} + + +OSL_HOSTDEVICE int +CudaScene::num_prims() const +{ + return num_spheres + num_quads; +} + +//------------------------------------------------------------------------------ + +// Because clang++ 9.0 seems to have trouble with some of the texturing "intrinsics" +// let's do the texture look-ups in this file. +extern "C" __device__ float4 +osl_tex2DLookup(void* handle, float s, float t, float dsdx, float dtdx, float dsdy, float dtdy) +{ + const float2 dx = {dsdx, dtdx}; + const float2 dy = {dsdy, dtdy}; + cudaTextureObject_t texID = cudaTextureObject_t(handle); + return tex2DGrad(texID, s, t, dx, dy); +} + + +// +// OptiX Programs +// + + extern "C" __global__ void __miss__() { @@ -51,12 +264,45 @@ __miss__() extern "C" __global__ void __raygen__setglobals() { + uint3 launch_dims = optixGetLaunchDimensions(); + uint3 launch_index = optixGetLaunchIndex(); + // Set global variables - OSL::pvt::osl_printf_buffer_start = render_params.osl_printf_buffer_start; - OSL::pvt::osl_printf_buffer_end = render_params.osl_printf_buffer_end; - OSL::pvt::s_color_system = render_params.color_system; - OSL::pvt::test_str_1 = render_params.test_str_1; - OSL::pvt::test_str_2 = render_params.test_str_2; + if (launch_index.x == 0 && launch_index.y == 0) { + OSL::pvt::osl_printf_buffer_start + = render_params.osl_printf_buffer_start; + OSL::pvt::osl_printf_buffer_end = render_params.osl_printf_buffer_end; + OSL::pvt::s_color_system = render_params.color_system; + OSL::pvt::test_str_1 = render_params.test_str_1; + OSL::pvt::test_str_2 = render_params.test_str_2; + } + + Background background; + background.set_variables((Vec3*)render_params.bg_values, + (float*)render_params.bg_rows, + (float*)render_params.bg_cols, + render_params.bg_res); + + SimpleRaytracer raytracer; + raytracer.background = background; + raytracer.backgroundResolution = render_params.bg_id >= 0 + ? render_params.bg_res + : 0; + raytracer.backgroundShaderID = render_params.bg_id; + raytracer.max_bounces = render_params.max_bounces; + raytracer.rr_depth = 5; + raytracer.show_albedo_scale = render_params.show_albedo_scale; + + if (render_params.bg_id < 0) + return; + + auto evaler = [&](const Dual2& dir) { + return raytracer.eval_background(dir, nullptr); + }; + + // Background::prepare_cuda must run on a single warp + assert(launch_index.x < 32 && launch_index.y == 0); + background.prepare_cuda(launch_dims.x, launch_index.x, evaler); } @@ -67,37 +313,102 @@ __miss__setglobals() extern "C" __global__ void -__raygen__() +__closesthit__deferred() { - uint3 launch_dims = optixGetLaunchDimensions(); - uint3 launch_index = optixGetLaunchIndex(); - const float3 eye = render_params.eye; - const float3 dir = render_params.dir; - const float3 cx = render_params.cx; - const float3 cy = render_params.cy; - const float invw = render_params.invw; - const float invh = render_params.invh; - - // Compute the pixel coordinates - const float2 d = make_float2(static_cast(launch_index.x) + 0.5f, - static_cast(launch_index.y) + 0.5f); - - // Make the ray for the current pixel - RayGeometry r; - r.origin = eye; - r.direction = normalize(cx * (d.x * invw - 0.5f) + cy * (0.5f - d.y * invh) - + dir); - optixTrace(render_params.traversal_handle, r.origin, r.direction, 1e-3f, - 1e13f, 0, OptixVisibilityMask(1), OPTIX_RAY_FLAG_DISABLE_ANYHIT, - 0, 1, 0); + Payload payload; + payload.get(); + ShaderGlobalsType* sg_ptr = payload.sg_ptr; + TraceData* tracedata = reinterpret_cast(sg_ptr->tracedata); + globals_from_hit(*sg_ptr, payload.radius, payload.spread, payload.raytype); + + const unsigned int hit_idx = optixGetPrimitiveIndex(); + const unsigned int hit_kind = optixGetHitKind(); + if (hit_kind == 0) { + const QuadParams* quads = reinterpret_cast( + render_params.quads_buffer); + tracedata->hit_id = quads[hit_idx].objID; + } else if (hit_kind == 1) { + const SphereParams* spheres = reinterpret_cast( + render_params.spheres_buffer); + tracedata->hit_id = spheres[hit_idx].objID; + } + const float hit_t = optixGetRayTmax(); + tracedata->hit_t = *(uint32_t*)&hit_t; } -// Because clang++ 9.0 seems to have trouble with some of the texturing "intrinsics" -// let's do the texture look-ups in this file. -extern "C" __device__ float4 -osl_tex2DLookup(void* handle, float s, float t) +extern "C" __global__ void +__raygen__deferred() { - cudaTextureObject_t texID = cudaTextureObject_t(handle); - return tex2D(texID, s, t); + Background background; + background.set_variables((Vec3*)render_params.bg_values, + (float*)render_params.bg_rows, + (float*)render_params.bg_cols, + render_params.bg_res); + + Color3 result(0, 0, 0); + const int aa = render_params.aa; + for (int si = 0, n = aa * aa; si < n; si++) { + uint3 launch_index = optixGetLaunchIndex(); + Sampler sampler(launch_index.x, launch_index.y, si); + Vec3 j = sampler.get(); + // warp distribution to approximate a tent filter [-1,+1)^2 + j.x *= 2; + j.x = j.x < 1 ? sqrtf(j.x) - 1 : 1 - sqrtf(2 - j.x); + j.y *= 2; + j.y = j.y < 1 ? sqrtf(j.y) - 1 : 1 - sqrtf(2 - j.y); + + if (render_params.no_jitter) { + j *= 0.0f; + } + + // Compute the pixel coordinates + const float2 d + = make_float2(static_cast(launch_index.x) + 0.5f + j.x, + static_cast(launch_index.y) + 0.5f + j.y); + + SimpleRaytracer raytracer; + raytracer.background = background; + raytracer.backgroundResolution = render_params.bg_id >= 0 + ? render_params.bg_res + : 0; + raytracer.backgroundShaderID = render_params.bg_id; + raytracer.max_bounces = render_params.max_bounces; + raytracer.rr_depth = 5; + raytracer.show_albedo_scale = render_params.show_albedo_scale; + + const Vec3 eye = F3_TO_V3(render_params.eye); + const Vec3 dir = F3_TO_V3(render_params.dir); + const Vec3 up = F3_TO_V3(render_params.up); + const float fov = render_params.fov; + + uint3 launch_dims = optixGetLaunchDimensions(); + raytracer.camera.resolution(launch_dims.x, launch_dims.y); + raytracer.camera.lookat(eye, dir, up, fov); + raytracer.camera.finalize(); + + raytracer.scene = { render_params.num_spheres, render_params.num_quads, + render_params.spheres_buffer, + render_params.quads_buffer, + render_params.traversal_handle }; + + Color3 r = raytracer.subpixel_radiance(d.x, d.y, sampler, nullptr); + + result = OIIO::lerp(result, r, 1.0f / (si + 1)); + } + + uint3 launch_dims = optixGetLaunchDimensions(); + uint3 launch_index = optixGetLaunchIndex(); + float3* output_buffer = reinterpret_cast( + render_params.output_buffer); + int pixel = launch_index.y * launch_dims.x + launch_index.x; + output_buffer[pixel] = C3_TO_F3(result); } + +//------------------------------------------------------------------------------ + +// We need to pull in the definition of SimpleRaytracer::subpixel_radiance(), +// which is shared between the host and CUDA renderers. +#include "../simpleraytracer.cpp" + +//------------------------------------------------------------------------------ diff --git a/src/testrender/cuda/optix_raytracer.h b/src/testrender/cuda/optix_raytracer.h new file mode 100644 index 000000000..76d1f691d --- /dev/null +++ b/src/testrender/cuda/optix_raytracer.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include + +#include "rend_lib.h" +#include "../background.h" +#include "../raytracer.h" +#include "../sampling.h" + +#include + +#ifdef __CUDACC__ + +struct Payload { + union { + uint32_t raw[2]; + OSL_CUDA::ShaderGlobals* sg_ptr; + }; + float radius; + float spread; + OSL::Ray::RayType raytype; + + __forceinline__ __device__ void set() + { + optixSetPayload_0(raw[0]); + optixSetPayload_1(raw[1]); + optixSetPayload_2(__float_as_uint(radius)); + optixSetPayload_3(__float_as_uint(spread)); + optixSetPayload_4((uint32_t)raytype); + } + + __forceinline__ __device__ void get() + { + raw[0] = optixGetPayload_0(); + raw[1] = optixGetPayload_1(); + radius = __uint_as_float(optixGetPayload_2()); + spread = __uint_as_float(optixGetPayload_3()); + raytype = (OSL::Ray::RayType)optixGetPayload_4(); + } +}; + +OSL_NAMESPACE_ENTER + +struct CudaScene { + OSL_HOSTDEVICE bool intersect(const Ray& r, Dual2& t, int& primID, + void* sg = nullptr) const; + OSL_HOSTDEVICE float shapepdf(int primID, const Vec3& x, + const Vec3& p) const; + OSL_HOSTDEVICE bool islight(int primID) const; + OSL_HOSTDEVICE Vec3 sample(int primID, const Vec3& x, float xi, float yi, + float& pdf) const; + OSL_HOSTDEVICE int num_prims() const; + + uint64_t num_spheres; + uint64_t num_quads; + CUdeviceptr spheres_buffer; + CUdeviceptr quads_buffer; + OptixTraversableHandle handle; +}; + +struct SimpleRaytracer { + using ShadingContext = ShadingContextCUDA; + + Background background; + Camera camera; + CudaScene scene; + int aa = 1; + int backgroundResolution = 1024; + int backgroundShaderID = -1; + int max_bounces = 1000000; + int rr_depth = 5; + float show_albedo_scale = 0.0f; + + OSL_HOSTDEVICE Vec3 eval_background(const Dual2& dir, + ShadingContext* ctx, int bounce = -1); + OSL_HOSTDEVICE Color3 subpixel_radiance(float x, float y, Sampler& sampler, + ShadingContext* ctx = nullptr); +}; + +OSL_NAMESPACE_EXIT + +#endif // #ifdef __CUDACC__ diff --git a/src/testrender/cuda/quad.cu b/src/testrender/cuda/quad.cu index ce6ad4e30..8d1ad2fa5 100644 --- a/src/testrender/cuda/quad.cu +++ b/src/testrender/cuda/quad.cu @@ -5,33 +5,45 @@ #include +#include "../raytracer.h" +#include "optix_raytracer.h" #include "rend_lib.h" -#include "render_params.h" -#include "wrapper.h" +#include "vec_math.h" extern "C" __device__ void __direct_callable__quad_shaderglobals(const unsigned int idx, const float t_hit, const float3 ray_origin, const float3 ray_direction, - ShaderGlobals* sg) + OSL_CUDA::ShaderGlobals* sg) { const GenericData* g_data = reinterpret_cast( optixGetSbtDataPointer()); const QuadParams* g_quads = reinterpret_cast( g_data->data); - const QuadParams& quad = g_quads[idx]; - const float3 P = ray_origin + t_hit * ray_direction; - - float3 h = P - quad.p; - - sg->N = sg->Ng = quad.n; - sg->u = dot(h, quad.ex) * quad.eu; - sg->v = dot(h, quad.ey) * quad.ev; - sg->dPdu = quad.ey; - sg->dPdv = quad.ex; - sg->surfacearea = quad.a; - sg->shaderID = quad.shaderID; + const QuadParams& params = g_quads[idx]; + const float3 P = ray_origin + t_hit * ray_direction; + + sg->I = ray_direction; + sg->N = sg->Ng = params.n; + sg->surfacearea = params.a; + sg->shaderID = params.shaderID; + sg->backfacing = dot(V3_TO_F3(sg->N), V3_TO_F3(sg->I)) > 0.0f; + + if (sg->backfacing) { + sg->N = -sg->N; + sg->Ng = -sg->Ng; + } + + const OSL::Quad quad(F3_TO_V3(params.p), F3_TO_V3(params.ex), + F3_TO_V3(params.ey), 0, false); + OSL::Vec3 dPdu, dPdv; + OSL::Dual2 uv = quad.uv(F3_TO_V3(P), F3_TO_V3(sg->N), dPdu, + dPdv); + sg->u = uv.val().x; + sg->v = uv.val().y; + sg->dPdu = V3_TO_F3(dPdu); + sg->dPdv = V3_TO_F3(dPdv); } @@ -42,21 +54,29 @@ __intersection__quad() optixGetSbtDataPointer()); const QuadParams* g_quads = reinterpret_cast( g_data->data); + + Payload payload; + payload.get(); + const OSL_CUDA::ShaderGlobals* sg_ptr + = reinterpret_cast(payload.sg_ptr); + const TraceData* tracedata = reinterpret_cast( + sg_ptr->tracedata); + const int obj_id = tracedata->obj_id; const unsigned int idx = optixGetPrimitiveIndex(); - const QuadParams& quad = g_quads[idx]; + const QuadParams& params = g_quads[idx]; const float3 ray_origin = optixGetObjectRayOrigin(); const float3 ray_direction = optixGetObjectRayDirection(); + const bool self = obj_id == params.objID; - float dn = dot(ray_direction, quad.n); - float en = dot(quad.p - ray_origin, quad.n); - if (dn * en > 0) { - float t = en / dn; - float3 h = (ray_origin + ray_direction * t) - quad.p; - float dx = dot(h, quad.ex) * quad.eu; - float dy = dot(h, quad.ey) * quad.ev; - - if (dx >= 0 && dx < 1.0f && dy >= 0 && dy < 1.0f - && t < optixGetRayTmax()) - optixReportIntersection(t, RAYTRACER_HIT_QUAD); - } + if (self) + return; + + const OSL::Quad quad(F3_TO_V3(params.p), F3_TO_V3(params.ex), + F3_TO_V3(params.ey), 0, false); + const OSL::Ray ray(F3_TO_V3(ray_origin), F3_TO_V3(ray_direction), + payload.radius, payload.spread, payload.raytype); + const OSL::Dual2 t = quad.intersect(ray, self); + + if (t.val() != 0.0f && t.val() < optixGetRayTmax()) + optixReportIntersection(t.val(), RAYTRACER_HIT_QUAD); } diff --git a/src/testrender/cuda/rend_lib.cu b/src/testrender/cuda/rend_lib.cu index 9d59345e7..165a1f9d2 100644 --- a/src/testrender/cuda/rend_lib.cu +++ b/src/testrender/cuda/rend_lib.cu @@ -58,7 +58,7 @@ closure_component_allot(void* pool, int id, size_t prim_size, ((OSL::ClosureComponent*)pool)->id = id; ((OSL::ClosureComponent*)pool)->w = w; - size_t needed = (sizeof(OSL::ClosureComponent) - sizeof(void*) + prim_size + size_t needed = (sizeof(OSL::ClosureComponent) + prim_size + (alignof(OSL::ClosureComponent) - 1)) & ~(alignof(OSL::ClosureComponent) - 1); char* char_ptr = (char*)pool; @@ -120,7 +120,7 @@ closure_add_allot(void* pool, OSL::ClosureColor* a, OSL::ClosureColor* b) __device__ void* osl_allocate_closure_component(void* sg_, int id, int size) { - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; OSL::Color3 w = OSL::Color3(1, 1, 1); // Fix up the alignment @@ -141,7 +141,7 @@ __device__ void* osl_allocate_weighted_closure_component(void* sg_, int id, int size, const void* w) { - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; const OSL::Color3* wc = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); @@ -166,7 +166,7 @@ osl_allocate_weighted_closure_component(void* sg_, int id, int size, __device__ void* osl_mul_closure_color(void* sg_, void* a, const void* w) { - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; const OSL::Color3* wc = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); @@ -198,7 +198,7 @@ osl_mul_closure_float(void* sg_, void* a, float w) { a = __builtin_assume_aligned(a, alignof(float)); - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; if (a == NULL || w == 0.0f) { return NULL; @@ -226,7 +226,7 @@ osl_add_closure_closure(void* sg_, void* a, void* b) a = __builtin_assume_aligned(a, alignof(float)); b = __builtin_assume_aligned(b, alignof(float)); - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; if (a == NULL) { return b; @@ -359,7 +359,7 @@ osl_printf(void* sg_, OSL::ustringhash_pod fmt_str_hash, void* args) __device__ void* osl_get_noise_options(void* sg_) { - ShaderGlobals* sg = ((ShaderGlobals*)sg_); + OSL_CUDA::ShaderGlobals* sg = ((OSL_CUDA::ShaderGlobals*)sg_); NoiseOptCUDA* opt = (NoiseOptCUDA*)((ShadingContextCUDA*)sg->context)->noise_options_ptr(); new (opt) NoiseOptCUDA; @@ -403,7 +403,7 @@ make_float3(const float4& a) // optix_raytrace.cu). // (clang++ 9.0 error 'undefined __nv_tex_surf_handler') extern __device__ float4 -osl_tex2DLookup(void* handle, float s, float t); +osl_tex2DLookup(void* handle, float s, float t, float dsdx, float dtdx, float dsdy, float dtdy); __device__ int osl_texture(void* sg_, OSL::ustringhash_pod name, void* handle, void* opt_, @@ -415,7 +415,7 @@ osl_texture(void* sg_, OSL::ustringhash_pod name, void* handle, void* opt_, if (!handle) return 0; // cudaTextureObject_t texID = cudaTextureObject_t(handle); - float4 fromTexture = osl_tex2DLookup(handle, s, t); + float4 fromTexture = osl_tex2DLookup(handle, s, t, dsdx, dtdx, dsdy, dtdy); // see note above // float4 fromTexture = tex2D(texID, s, t); *((float3*)result) = make_float3(fromTexture.x, fromTexture.y, @@ -444,9 +444,9 @@ osl_range_check_err(int indexvalue, int length, OSL::ustringhash_pod symname, __device__ int osl_get_matrix(void* sg_, void* r, OSL::ustringhash_pod from_) { - r = __builtin_assume_aligned(r, alignof(float)); - OSL::ustringhash from = OSL::ustringhash_from(from_); - ShaderGlobals* sg = (ShaderGlobals*)sg_; + r = __builtin_assume_aligned(r, alignof(float)); + OSL::ustringhash from = OSL::ustringhash_from(from_); + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; if (from == OSL::Hashes::common) { MAT(r).makeIdentity(); return true; @@ -489,9 +489,9 @@ osl_get_matrix(void* sg_, void* r, OSL::ustringhash_pod from_) __device__ int osl_get_inverse_matrix(void* sg_, void* r, OSL::ustringhash_pod to_) { - r = __builtin_assume_aligned(r, alignof(float)); - OSL::ustringhash to = OSL::ustringhash_from(to_); - ShaderGlobals* sg = (ShaderGlobals*)sg_; + r = __builtin_assume_aligned(r, alignof(float)); + OSL::ustringhash to = OSL::ustringhash_from(to_); + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; if (to == OSL::Hashes::common) { MAT(r).makeIdentity(); return true; diff --git a/src/testrender/cuda/rend_lib.h b/src/testrender/cuda/rend_lib.h index 0fe9a027d..4be40a15b 100644 --- a/src/testrender/cuda/rend_lib.h +++ b/src/testrender/cuda/rend_lib.h @@ -17,6 +17,12 @@ #include #include +#include "../raytracer.h" + + +#define RAYTRACER_HIT_QUAD 0 +#define RAYTRACER_HIT_SPHERE 1 + OSL_NAMESPACE_ENTER @@ -87,19 +93,20 @@ struct ShadingContextCUDA { }; +namespace OSL_CUDA { struct ShaderGlobals { - float3 P, dPdx, dPdy; - float3 dPdz; - float3 I, dIdx, dIdy; - float3 N; - float3 Ng; + OSL::Vec3 P, dPdx, dPdy; + OSL::Vec3 dPdz; + OSL::Vec3 I, dIdx, dIdy; + OSL::Vec3 N; + OSL::Vec3 Ng; float u, dudx, dudy; float v, dvdx, dvdy; - float3 dPdu, dPdv; + OSL::Vec3 dPdu, dPdv; float time; float dtime; - float3 dPdtime; - float3 Ps, dPsdx, dPsdy; + OSL::Vec3 dPdtime; + OSL::Vec3 Ps, dPsdx, dPsdy; void* renderstate; void* tracedata; void* objdata; @@ -117,86 +124,25 @@ struct ShaderGlobals { int backfacing; int shaderID; }; - - -enum RayType { - CAMERA = 1, - SHADOW = 2, - REFLECTION = 4, - REFRACTION = 8, - DIFFUSE = 16, - GLOSSY = 32, - SUBSURFACE = 64, - DISPLACEMENT = 128 -}; - - -// Closures supported by the OSL sample renderer. This list is mostly aspirational. -enum ClosureIDs { - EMISSION_ID = 1, - BACKGROUND_ID, - DIFFUSE_ID, - OREN_NAYAR_ID, - TRANSLUCENT_ID, - PHONG_ID, - WARD_ID, - MICROFACET_ID, - REFLECTION_ID, - FRESNEL_REFLECTION_ID, - REFRACTION_ID, - TRANSPARENT_ID, - DEBUG_ID, - HOLDOUT_ID, -}; - - -// ======================================== -// -// Some helper vector functions -// -static __forceinline__ __device__ float3 -operator*(const float a, const float3& b) -{ - return make_float3(a * b.x, a * b.y, a * b.z); -} - -static __forceinline__ __device__ float3 -operator*(const float3& a, const float b) -{ - return make_float3(a.x * b, a.y * b, a.z * b); } -static __forceinline__ __device__ float3 -operator+(const float3& a, const float3& b) -{ - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -} - -static __forceinline__ __device__ float3 -operator-(const float3& a, const float3& b) -{ - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -} - -static __forceinline__ __device__ float3 -operator-(const float3& a) -{ - return make_float3(-a.x, -a.y, -a.z); -} - -static __forceinline__ __device__ float -dot(const float3& a, const float3& b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} - -static __forceinline__ __device__ float3 -normalize(const float3& v) -{ - float invLen = 1.0f / sqrtf(dot(v, v)); - return invLen * v; -} -// -// ======================================== +struct TraceData { + // OUT: The ID of the object that was hit + int32_t hit_id; + union { + // IN: Set before calling trace to avoid self-intersection + int32_t obj_id; + // OUT: The hit distance + float hit_t; + }; + + OSL_HOSTDEVICE TraceData(OSL_CUDA::ShaderGlobals& sg, int id) + { + hit_id = -1; + obj_id = id; + sg.shaderID = -1; // Set in the CH program + sg.tracedata = (void*)this; + } +}; } // anonymous namespace diff --git a/src/testrender/cuda/sphere.cu b/src/testrender/cuda/sphere.cu index c09a7ae32..fdefd366b 100644 --- a/src/testrender/cuda/sphere.cu +++ b/src/testrender/cuda/sphere.cu @@ -6,42 +6,10 @@ #include +#include "../raytracer.h" +#include "optix_raytracer.h" #include "rend_lib.h" -#include "render_params.h" -#include "wrapper.h" - - -static __device__ __inline__ void -calc_uv(float3 shading_normal, float& u, float& v, float3& dPdu, float3& dPdv) -{ - const float3 n = shading_normal; - - const float nx = n.x; - const float ny = n.y; - const float nz = n.z; - - u = (atan2(nx, nz) + M_PI) * 0.5f * float(M_1_PI); - v = acos(ny) * float(M_1_PI); - - float xz2 = nx * nx + nz * nz; - if (xz2 > 0.0f) { - const float PI = float(M_PI); - const float TWOPI = float(2 * M_PI); - float xz = sqrtf(xz2); - float inv = 1.0f / xz; - dPdu = make_float3(-TWOPI * nx, TWOPI * nz, 0.0f); - dPdv = make_float3(-PI * nz * inv * ny, -PI * nx * inv * ny, PI * xz); - } else { - // pick arbitrary axes for poles to avoid division by 0 - if (ny > 0.0f) { - dPdu = make_float3(0.0f, 0.0f, 1.0f); - dPdv = make_float3(1.0f, 0.0f, 0.0f); - } else { - dPdu = make_float3(0.0f, 0.0f, 1.0f); - dPdv = make_float3(-1.0f, 0.0f, 0.0f); - } - } -} +#include "vec_math.h" extern "C" __device__ void @@ -49,20 +17,27 @@ __direct_callable__sphere_shaderglobals(const unsigned int idx, const float t_hit, const float3 ray_origin, const float3 ray_direction, - ShaderGlobals* sg) + OSL_CUDA::ShaderGlobals* sg) { const GenericData* g_data = reinterpret_cast( optixGetSbtDataPointer()); const SphereParams* g_spheres = reinterpret_cast( g_data->data); - const SphereParams& sphere = g_spheres[idx]; + const SphereParams& params = g_spheres[idx]; const float3 P = ray_origin + t_hit * ray_direction; - sg->N = sg->Ng = normalize(P - sphere.c); - sg->surfacearea = sphere.a; - sg->shaderID = sphere.shaderID; - - calc_uv(sg->N, sg->u, sg->v, sg->dPdu, sg->dPdv); + sg->N = sg->Ng = normalize(P - params.c); + sg->surfacearea = params.a; + sg->shaderID = params.shaderID; + + const OSL::Sphere sphere(F3_TO_V3(params.c), params.r, 0, false); + OSL::Vec3 dPdu, dPdv; + OSL::Dual2 uv = sphere.uv(F3_TO_V3(P), F3_TO_V3(sg->N), dPdu, + dPdv); + sg->u = uv.val().x; + sg->v = uv.val().y; + sg->dPdu = V3_TO_F3(dPdu); + sg->dPdv = V3_TO_F3(dPdv); } @@ -73,24 +48,25 @@ __intersection__sphere() optixGetSbtDataPointer()); const SphereParams* g_spheres = reinterpret_cast( g_data->data); + + Payload payload; + payload.get(); + const OSL_CUDA::ShaderGlobals* sg_ptr + = reinterpret_cast(payload.sg_ptr); + const TraceData* tracedata = reinterpret_cast( + sg_ptr->tracedata); + const int obj_id = tracedata->obj_id; const unsigned int idx = optixGetPrimitiveIndex(); - const SphereParams& sphere = g_spheres[idx]; + const SphereParams& params = g_spheres[idx]; const float3 ray_origin = optixGetObjectRayOrigin(); const float3 ray_direction = optixGetObjectRayDirection(); + const bool self = obj_id == params.objID; - float3 oc = sphere.c - ray_origin; - float b = dot(oc, ray_direction); - float det = b * b - dot(oc, oc) + sphere.r2; - if (det >= 0.0f) { - det = sqrtf(det); - float x = b - det; - float y = b + det; - - // NB: this does not included the 'self' check from - // the testrender sphere intersection - float t = (x > 0) ? x : ((y > 0) ? y : 0); + const OSL::Sphere sphere(F3_TO_V3(params.c), params.r, 0, false); + const OSL::Ray ray(F3_TO_V3(ray_origin), F3_TO_V3(ray_direction), + payload.radius, payload.spread, payload.raytype); + const OSL::Dual2 t = sphere.Sphere::intersect(ray, self); - if (t < optixGetRayTmax()) - optixReportIntersection(t, RAYTRACER_HIT_SPHERE); - } + if (t.val() != 0.0f && t.val() < optixGetRayTmax()) + optixReportIntersection(t.val(), RAYTRACER_HIT_SPHERE); } diff --git a/src/testrender/cuda/util.h b/src/testrender/cuda/util.h deleted file mode 100644 index 2d6a18b24..000000000 --- a/src/testrender/cuda/util.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright Contributors to the Open Shading Language project. -// SPDX-License-Identifier: BSD-3-Clause -// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage - - -#pragma once - -#include - -#include - - -struct PRD_radiance { - float3 result; -}; - - -struct RayGeometry { - float3 origin; - float3 direction; -}; - - -static __device__ __inline__ uchar4 -make_color(const float3& c) -{ - return make_uchar4( - static_cast(__saturatef(c.z) * 255.99f), /* B */ - static_cast(__saturatef(c.y) * 255.99f), /* G */ - static_cast(__saturatef(c.x) * 255.99f), /* R */ - 255u); /* A */ -} diff --git a/src/testrender/cuda/vec_math.h b/src/testrender/cuda/vec_math.h new file mode 100644 index 000000000..ca8f5e157 --- /dev/null +++ b/src/testrender/cuda/vec_math.h @@ -0,0 +1,95 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + + +#pragma once + +#include + +#include +#include + +#if !defined(__CUDACC_RTC__) +#include +#include +#endif + +namespace { // anonymous namespace + +// ======================================== +// +// Some helper vector functions +// +static __forceinline__ __device__ float3 +operator*(const float a, const float3& b) +{ + return make_float3(a * b.x, a * b.y, a * b.z); +} + +static __forceinline__ __device__ float3 +operator*(const float3& a, const float b) +{ + return make_float3(a.x * b, a.y * b, a.z * b); +} + +static __forceinline__ __device__ float3 +operator+(const float3& a, const float3& b) +{ + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +} + +static __forceinline__ __device__ float3 +operator-(const float3& a, const float3& b) +{ + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +} + +static __forceinline__ __device__ float3 +operator-(const float3& a) +{ + return make_float3(-a.x, -a.y, -a.z); +} + +static __forceinline__ __device__ float +dot(const float3& a, const float3& b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +static __forceinline__ __device__ float3 +normalize(const float3& v) +{ + float invLen = 1.0f / sqrtf(dot(v, v)); + return invLen * v; +} + +static __forceinline__ __device__ +float3 cross(const float3& a, const float3& b) +{ + return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); +} + + +static __forceinline__ __device__ +float length(const float3& v) +{ + return __fsqrt_rn((v.x * v.x) + (v.y * v.y) + (v.z * v.z)); +} + + +static __forceinline__ __device__ +void ortho(const float3& n, float3& x, float3& y) +{ + x = normalize(fabsf(n.x) > .01f ? make_float3(n.z, 0, -n.x) : make_float3(0, -n.z, n.y)); + y = cross(n, x); +} + + +} // anonymous namespace + +// Conversion macros for casting between vector types +#define F3_TO_V3(f3) (*reinterpret_cast(&f3)) +#define F3_TO_C3(f3) (*reinterpret_cast(&f3)) +#define V3_TO_F3(v3) (*reinterpret_cast(&v3)) +#define C3_TO_F3(c3) (*reinterpret_cast(&c3)) diff --git a/src/testrender/cuda/wrapper.cu b/src/testrender/cuda/wrapper.cu index 79c1dbce9..c14105c9e 100644 --- a/src/testrender/cuda/wrapper.cu +++ b/src/testrender/cuda/wrapper.cu @@ -10,10 +10,14 @@ #include +#include "optix_raytracer.h" #include "rend_lib.h" -#include "util.h" +#include "vec_math.h" #include "../render_params.h" +#include "../shading.h" + +using OSL_CUDA::ShaderGlobals; extern "C" { @@ -30,25 +34,25 @@ __anyhit__any_hit_shadow() static __device__ void -globals_from_hit(ShaderGlobals& sg) +globals_from_hit(OSL_CUDA::ShaderGlobals& sg) { const GenericRecord* record = reinterpret_cast( optixGetSbtDataPointer()); - ShaderGlobals local_sg; + OSL_CUDA::ShaderGlobals local_sg; // hit-kind 0: quad hit // 1: sphere hit - optixDirectCall( + optixDirectCall( optixGetHitKind(), optixGetPrimitiveIndex(), optixGetRayTmax(), optixGetWorldRayOrigin(), optixGetWorldRayDirection(), &local_sg); // Setup the ShaderGlobals const float3 ray_direction = optixGetWorldRayDirection(); const float3 ray_origin = optixGetWorldRayOrigin(); - const float t_hit = optixGetRayTmin(); + const float t_hit = optixGetRayTmax(); sg.I = ray_direction; - sg.N = normalize(optixTransformNormalFromObjectToWorldSpace(local_sg.N)); - sg.Ng = normalize(optixTransformNormalFromObjectToWorldSpace(local_sg.Ng)); + sg.N = normalize(optixTransformNormalFromObjectToWorldSpace(V3_TO_F3(local_sg.N))); + sg.Ng = normalize(optixTransformNormalFromObjectToWorldSpace(V3_TO_F3(local_sg.Ng))); sg.P = ray_origin + t_hit * ray_direction; sg.dPdu = local_sg.dPdu; sg.dPdv = local_sg.dPdv; @@ -56,7 +60,7 @@ globals_from_hit(ShaderGlobals& sg) sg.v = local_sg.v; sg.Ci = NULL; sg.surfacearea = local_sg.surfacearea; - sg.backfacing = dot(sg.N, sg.I) > 0.0f; + sg.backfacing = dot(V3_TO_F3(sg.N), V3_TO_F3(sg.I)) > 0.0f; sg.shaderID = local_sg.shaderID; if (sg.backfacing) { @@ -65,7 +69,7 @@ globals_from_hit(ShaderGlobals& sg) } // NB: These variables are not used in the current iteration of the sample - sg.raytype = CAMERA; + sg.raytype = OSL::Ray::CAMERA; sg.flipHandedness = 0; } @@ -95,38 +99,39 @@ process_closure(const OSL::ClosureColor* closure_tree) const void* cur = closure_tree; while (cur) { - switch (((OSL::ClosureColor*)cur)->id) { - case OSL::ClosureColor::ADD: { + ClosureIDs id = static_cast(((OSL::ClosureColor*)cur)->id); + switch (id) { + case ClosureIDs::ADD: { ptr_stack[stack_idx] = ((OSL::ClosureAdd*)cur)->closureB; weight_stack[stack_idx++] = weight; cur = ((OSL::ClosureAdd*)cur)->closureA; break; } - case OSL::ClosureColor::MUL: { + case ClosureIDs::MUL: { weight *= ((OSL::ClosureMul*)cur)->weight; cur = ((OSL::ClosureMul*)cur)->closure; break; } - case EMISSION_ID: { + case ClosureIDs::EMISSION_ID: { cur = NULL; break; } - case DIFFUSE_ID: - case OREN_NAYAR_ID: - case PHONG_ID: - case WARD_ID: - case REFLECTION_ID: - case REFRACTION_ID: - case FRESNEL_REFLECTION_ID: { + case ClosureIDs::DIFFUSE_ID: + case ClosureIDs::OREN_NAYAR_ID: + case ClosureIDs::PHONG_ID: + case ClosureIDs::WARD_ID: + case ClosureIDs::REFLECTION_ID: + case ClosureIDs::REFRACTION_ID: + case ClosureIDs::FRESNEL_REFLECTION_ID: { result += ((OSL::ClosureComponent*)cur)->w * weight; cur = NULL; break; } - case MICROFACET_ID: { + case ClosureIDs::MICROFACET_ID: { const char* mem = (const char*)((OSL::ClosureComponent*)cur)->data(); OSL::ustringhash dist_uh = *(OSL::ustringhash*)&mem[0]; @@ -161,7 +166,7 @@ __closesthit__closest_hit_osl() // exceeded. alignas(8) char closure_pool[256]; - ShaderGlobals sg; + OSL_CUDA::ShaderGlobals sg; globals_from_hit(sg); // Pack the "closure pool" into one of the ShaderGlobals pointers @@ -184,7 +189,7 @@ __closesthit__closest_hit_osl() void* interactive_ptr = reinterpret_cast( render_params.interactive_params)[sg.shaderID]; const unsigned int shaderIdx = 2u + sg.shaderID + 0u; - optixDirectCall( + optixDirectCall( shaderIdx, &sg /*shaderglobals_ptr*/, nullptr /*groupdata_ptr*/, nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, 0 /*shadeindex - unused*/, interactive_ptr /*interactive_params_ptr*/); diff --git a/src/testrender/cuda/wrapper.h b/src/testrender/cuda/wrapper.h deleted file mode 100644 index c939f6064..000000000 --- a/src/testrender/cuda/wrapper.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#define RAYTRACER_HIT_QUAD 0 -#define RAYTRACER_HIT_SPHERE 1 diff --git a/src/testrender/optics.h b/src/testrender/optics.h index 5a709a065..c8f6ae166 100644 --- a/src/testrender/optics.h +++ b/src/testrender/optics.h @@ -9,7 +9,8 @@ OSL_NAMESPACE_ENTER -inline float + +inline OSL_HOSTDEVICE float fresnel_dielectric(float cosi, float eta) { // special case: ignore fresnel @@ -30,7 +31,7 @@ fresnel_dielectric(float cosi, float eta) return 1.0f; // TIR (no refracted component) } -inline float +inline OSL_HOSTDEVICE float fresnel_refraction(const Vec3& I, const Vec3& N, float eta, Vec3& T) { // compute refracted direction and fresnel term @@ -63,7 +64,7 @@ fresnel_refraction(const Vec3& I, const Vec3& N, float eta, Vec3& T) return 0; } -Color3 +OSL_HOSTDEVICE Color3 fresnel_conductor(float cos_theta, Color3 n, Color3 k) { cos_theta = OIIO::clamp(cos_theta, 0.0f, 1.0f); @@ -89,7 +90,7 @@ fresnel_conductor(float cos_theta, Color3 n, Color3 k) return 0.5f * (rp + rs); } -inline float +inline OSL_HOSTDEVICE float fresnel_schlick(float cos_theta, float F0, float F90) { float x = OIIO::clamp(1.0f - cos_theta, 0.0f, 1.0f); @@ -99,7 +100,7 @@ fresnel_schlick(float cos_theta, float F0, float F90) return OIIO::lerp(F0, F90, x5); } -inline Color3 +inline OSL_HOSTDEVICE Color3 fresnel_generalized_schlick(float cos_theta, Color3 F0, Color3 F90, float exponent) { diff --git a/src/testrender/optixraytracer.cpp b/src/testrender/optixraytracer.cpp index 98c6ff191..59d7f7da0 100644 --- a/src/testrender/optixraytracer.cpp +++ b/src/testrender/optixraytracer.cpp @@ -11,9 +11,11 @@ #include "optixraytracer.h" +#include "cuda/optix_raytracer.h" #include "render_params.h" #include +#include #include #include #include @@ -115,6 +117,10 @@ OptixRaytracer::~OptixRaytracer() { if (m_optix_ctx) OPTIX_CHECK(optixDeviceContextDestroy(m_optix_ctx)); + for (void* ptr : device_ptrs) + cudaFree(ptr); + for (void* arr : array_ptrs) + cudaFreeArray(reinterpret_cast(arr)); } @@ -237,11 +243,13 @@ OptixRaytracer::synch_attributes() podDataSize + sizeof(ustringhash_pod) * numStrings)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_color_system), colorSys, podDataSize, cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_color_system)); CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_osl_printf_buffer), OSL_PRINTF_BUFFER_SIZE)); CUDA_CHECK(cudaMemset(reinterpret_cast(d_osl_printf_buffer), 0, OSL_PRINTF_BUFFER_SIZE)); + device_ptrs.push_back(reinterpret_cast(d_osl_printf_buffer)); // then copy the device string to the end, first strings starting at dataPtr - (numStrings) // FIXME -- Should probably handle alignment better. @@ -309,72 +317,48 @@ OptixRaytracer::create_optix_pg(const OptixProgramGroupDesc* pg_desc, -bool -OptixRaytracer::make_optix_materials() +void +OptixRaytracer::create_modules(State& state) { - // Stand-in: names of shader outputs to preserve - std::vector outputs { "Cout" }; - - std::vector modules; - - // Space for message logging char msg_log[8192]; size_t sizeof_msg_log; - // Make module that contains programs we'll use in this scene - OptixModuleCompileOptions module_compile_options = {}; + // Set the pipeline compile options + state.pipeline_compile_options.traversableGraphFlags + = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY; + state.pipeline_compile_options.usesMotionBlur = false; + state.pipeline_compile_options.numPayloadValues = 5; + state.pipeline_compile_options.numAttributeValues = 3; + state.pipeline_compile_options.exceptionFlags + = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW; + state.pipeline_compile_options.pipelineLaunchParamsVariableName + = "render_params"; - module_compile_options.maxRegisterCount + // Set the module compile options + state.module_compile_options.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT; - module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT; + state.module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT; #if OPTIX_VERSION >= 70400 - module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL; + state.module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL; #else - module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; + state.module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; #endif - OptixPipelineCompileOptions pipeline_compile_options = {}; - - pipeline_compile_options.traversableGraphFlags - = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY; - pipeline_compile_options.usesMotionBlur = false; - pipeline_compile_options.numPayloadValues = 3; - pipeline_compile_options.numAttributeValues = 3; - pipeline_compile_options.exceptionFlags - = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW; - pipeline_compile_options.pipelineLaunchParamsVariableName = "render_params"; - - // Create 'raygen' program - - // Load the renderer CUDA source and generate PTX for it - OptixModule program_module; - load_optix_module("optix_raytracer.ptx", &module_compile_options, - &pipeline_compile_options, &program_module); - - // Record it so we can destroy it later - modules.push_back(program_module); - - OptixModule quad_module; - load_optix_module("quad.ptx", &module_compile_options, - &pipeline_compile_options, &quad_module); - - OptixModule sphere_module; - load_optix_module("sphere.ptx", &module_compile_options, - &pipeline_compile_options, &sphere_module); - - OptixModule wrapper_module; - load_optix_module("wrapper.ptx", &module_compile_options, - &pipeline_compile_options, &wrapper_module); - - OptixModule rend_lib_module; - load_optix_module("rend_lib_testrender.ptx", &module_compile_options, - &pipeline_compile_options, &rend_lib_module); + load_optix_module("optix_raytracer.ptx", &state.module_compile_options, + &state.pipeline_compile_options, &state.program_module); + load_optix_module("quad.ptx", &state.module_compile_options, + &state.pipeline_compile_options, &state.quad_module); + load_optix_module("sphere.ptx", &state.module_compile_options, + &state.pipeline_compile_options, &state.sphere_module); + load_optix_module("wrapper.ptx", &state.module_compile_options, + &state.pipeline_compile_options, &state.wrapper_module); + load_optix_module("rend_lib_testrender.ptx", &state.module_compile_options, + &state.pipeline_compile_options, &state.rend_lib_module); // Retrieve the compiled shadeops PTX const char* shadeops_ptx = nullptr; shadingsys->getattribute("shadeops_cuda_ptx", OSL::TypeDesc::PTR, &shadeops_ptx); - int shadeops_ptx_size = 0; shadingsys->getattribute("shadeops_cuda_ptx_size", OSL::TypeDesc::INT, &shadeops_ptx_size); @@ -382,143 +366,166 @@ OptixRaytracer::make_optix_materials() if (shadeops_ptx == nullptr || shadeops_ptx_size == 0) { errhandler().severefmt( "Could not retrieve PTX for the shadeops library"); - return false; + exit(EXIT_FAILURE); } - // Create the shadeops library program group - OptixModule shadeops_module; + // Create the shadeops module sizeof_msg_log = sizeof(msg_log); - OPTIX_CHECK_MSG(optixModuleCreateFn(m_optix_ctx, &module_compile_options, - &pipeline_compile_options, shadeops_ptx, - shadeops_ptx_size, msg_log, - &sizeof_msg_log, &shadeops_module), - fmtformat("Creating module for shadeops library: {}", - msg_log)); - modules.push_back(shadeops_module); + OPTIX_CHECK_MSG( + optixModuleCreateFn(m_optix_ctx, &state.module_compile_options, + &state.pipeline_compile_options, shadeops_ptx, + shadeops_ptx_size, msg_log, &sizeof_msg_log, + &state.shadeops_module), + fmtformat("Creating module for shadeops library: {}", msg_log)); +} - OptixProgramGroupOptions program_options = {}; - std::vector shader_groups; - // Raygen group - OptixProgramGroupDesc raygen_desc = {}; - raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; - raygen_desc.raygen.module = program_module; - raygen_desc.raygen.entryFunctionName = "__raygen__"; - OptixProgramGroup raygen_group; - create_optix_pg(&raygen_desc, 1, &program_options, &raygen_group); +void +OptixRaytracer::create_programs(State& state) +{ + // Raygen group + { + OptixProgramGroupDesc raygen_desc = {}; + raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; + raygen_desc.raygen.module = state.program_module; + raygen_desc.raygen.entryFunctionName = "__raygen__deferred"; + create_optix_pg(&raygen_desc, 1, &state.program_options, + &state.raygen_group); + } // Set Globals Raygen group - OptixProgramGroupDesc setglobals_raygen_desc = {}; - setglobals_raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; - setglobals_raygen_desc.raygen.module = program_module; - setglobals_raygen_desc.raygen.entryFunctionName = "__raygen__setglobals"; - - OptixProgramGroup setglobals_raygen_group; - sizeof_msg_log = sizeof(msg_log); - OPTIX_CHECK_MSG( - optixProgramGroupCreate(m_optix_ctx, &setglobals_raygen_desc, - 1, // number of program groups - &program_options, // program options - msg_log, &sizeof_msg_log, - &setglobals_raygen_group), - fmtformat("Creating set-globals 'ray-gen' program group: {}", msg_log)); + { + OptixProgramGroupDesc raygen_desc = {}; + raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; + raygen_desc.raygen.module = state.program_module; + raygen_desc.raygen.entryFunctionName = "__raygen__setglobals"; + create_optix_pg(&raygen_desc, 1, &state.program_options, + &state.setglobals_raygen_group); + } // Miss group - OptixProgramGroupDesc miss_desc = {}; - miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; - miss_desc.miss.module - = program_module; // raygen file/module contains miss program - miss_desc.miss.entryFunctionName = "__miss__"; - - OptixProgramGroup miss_group; - create_optix_pg(&miss_desc, 1, &program_options, &miss_group); + { + OptixProgramGroupDesc miss_desc = {}; + miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; + miss_desc.miss.module + = state.program_module; // raygen file/module contains miss program + miss_desc.miss.entryFunctionName = "__miss__"; + create_optix_pg(&miss_desc, 1, &state.program_options, + &state.miss_group); + } // Set Globals Miss group - OptixProgramGroupDesc setglobals_miss_desc = {}; - setglobals_miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; - setglobals_miss_desc.miss.module = program_module; - setglobals_miss_desc.miss.entryFunctionName = "__miss__setglobals"; - OptixProgramGroup setglobals_miss_group; - create_optix_pg(&setglobals_miss_desc, 1, &program_options, - &setglobals_miss_group); + { + OptixProgramGroupDesc setglobals_miss_desc = {}; + setglobals_miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; + setglobals_miss_desc.miss.module = state.program_module; + setglobals_miss_desc.miss.entryFunctionName = "__miss__setglobals"; + create_optix_pg(&setglobals_miss_desc, 1, &state.program_options, + &state.setglobals_miss_group); + } // Hitgroup -- quads - OptixProgramGroupDesc quad_hitgroup_desc = {}; - quad_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; - quad_hitgroup_desc.hitgroup.moduleCH = wrapper_module; - quad_hitgroup_desc.hitgroup.entryFunctionNameCH - = "__closesthit__closest_hit_osl"; - quad_hitgroup_desc.hitgroup.moduleAH = wrapper_module; - quad_hitgroup_desc.hitgroup.entryFunctionNameAH = "__anyhit__any_hit_shadow"; - quad_hitgroup_desc.hitgroup.moduleIS = quad_module; - quad_hitgroup_desc.hitgroup.entryFunctionNameIS = "__intersection__quad"; - OptixProgramGroup quad_hitgroup; - create_optix_pg(&quad_hitgroup_desc, 1, &program_options, &quad_hitgroup); + { + OptixProgramGroupDesc quad_hitgroup_desc = {}; + quad_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; + quad_hitgroup_desc.hitgroup.moduleCH = state.program_module; + quad_hitgroup_desc.hitgroup.entryFunctionNameCH + = "__closesthit__deferred"; + quad_hitgroup_desc.hitgroup.moduleAH = state.wrapper_module; + quad_hitgroup_desc.hitgroup.entryFunctionNameAH + = "__anyhit__any_hit_shadow"; + quad_hitgroup_desc.hitgroup.moduleIS = state.quad_module; + quad_hitgroup_desc.hitgroup.entryFunctionNameIS = "__intersection__quad"; + create_optix_pg(&quad_hitgroup_desc, 1, &state.program_options, + &state.quad_hit_group); + } // Direct-callable -- renderer-specific support functions for OSL on the device - OptixProgramGroupDesc rend_lib_desc = {}; - rend_lib_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; - rend_lib_desc.callables.moduleDC = rend_lib_module; - rend_lib_desc.callables.entryFunctionNameDC - = "__direct_callable__dummy_rend_lib"; - rend_lib_desc.callables.moduleCC = 0; - rend_lib_desc.callables.entryFunctionNameCC = nullptr; - OptixProgramGroup rend_lib_group; - create_optix_pg(&rend_lib_desc, 1, &program_options, &rend_lib_group); + { + OptixProgramGroupDesc rend_lib_desc = {}; + rend_lib_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + rend_lib_desc.callables.moduleDC = state.rend_lib_module; + rend_lib_desc.callables.entryFunctionNameDC + = "__direct_callable__dummy_rend_lib"; + rend_lib_desc.callables.moduleCC = 0; + rend_lib_desc.callables.entryFunctionNameCC = nullptr; + create_optix_pg(&rend_lib_desc, 1, &state.program_options, + &state.rend_lib_group); + } // Direct-callable -- built-in support functions for OSL on the device - OptixProgramGroupDesc shadeops_desc = {}; - shadeops_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; - shadeops_desc.callables.moduleDC = shadeops_module; - shadeops_desc.callables.entryFunctionNameDC - = "__direct_callable__dummy_shadeops"; - shadeops_desc.callables.moduleCC = 0; - shadeops_desc.callables.entryFunctionNameCC = nullptr; - OptixProgramGroup shadeops_group; - create_optix_pg(&shadeops_desc, 1, &program_options, &shadeops_group); + { + OptixProgramGroupDesc shadeops_desc = {}; + shadeops_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + shadeops_desc.callables.moduleDC = state.shadeops_module; + shadeops_desc.callables.entryFunctionNameDC + = "__direct_callable__dummy_shadeops"; + shadeops_desc.callables.moduleCC = 0; + shadeops_desc.callables.entryFunctionNameCC = nullptr; + create_optix_pg(&shadeops_desc, 1, &state.program_options, + &state.shadeops_group); + } // Direct-callable -- fills in ShaderGlobals for Quads - OptixProgramGroupDesc quad_fillSG_desc = {}; - quad_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; - quad_fillSG_desc.callables.moduleDC = quad_module; - quad_fillSG_desc.callables.entryFunctionNameDC - = "__direct_callable__quad_shaderglobals"; - quad_fillSG_desc.callables.moduleCC = 0; - quad_fillSG_desc.callables.entryFunctionNameCC = nullptr; - OptixProgramGroup quad_fillSG_dc; - create_optix_pg(&quad_fillSG_desc, 1, &program_options, &quad_fillSG_dc); + { + OptixProgramGroupDesc quad_fillSG_desc = {}; + quad_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + quad_fillSG_desc.callables.moduleDC = state.quad_module; + quad_fillSG_desc.callables.entryFunctionNameDC + = "__direct_callable__quad_shaderglobals"; + quad_fillSG_desc.callables.moduleCC = 0; + quad_fillSG_desc.callables.entryFunctionNameCC = nullptr; + create_optix_pg(&quad_fillSG_desc, 1, &state.program_options, + &state.quad_fillSG_dc_group); + } // Hitgroup -- sphere - OptixProgramGroupDesc sphere_hitgroup_desc = {}; - sphere_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; - sphere_hitgroup_desc.hitgroup.moduleCH = wrapper_module; - sphere_hitgroup_desc.hitgroup.entryFunctionNameCH - = "__closesthit__closest_hit_osl"; - sphere_hitgroup_desc.hitgroup.moduleAH = wrapper_module; - sphere_hitgroup_desc.hitgroup.entryFunctionNameAH - = "__anyhit__any_hit_shadow"; - sphere_hitgroup_desc.hitgroup.moduleIS = sphere_module; - sphere_hitgroup_desc.hitgroup.entryFunctionNameIS = "__intersection__sphere"; - OptixProgramGroup sphere_hitgroup; - create_optix_pg(&sphere_hitgroup_desc, 1, &program_options, - &sphere_hitgroup); + { + OptixProgramGroupDesc sphere_hitgroup_desc = {}; + sphere_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; + sphere_hitgroup_desc.hitgroup.moduleCH = state.program_module; + sphere_hitgroup_desc.hitgroup.entryFunctionNameCH + = "__closesthit__deferred"; + sphere_hitgroup_desc.hitgroup.moduleAH = state.wrapper_module; + sphere_hitgroup_desc.hitgroup.entryFunctionNameAH + = "__anyhit__any_hit_shadow"; + sphere_hitgroup_desc.hitgroup.moduleIS = state.sphere_module; + sphere_hitgroup_desc.hitgroup.entryFunctionNameIS + = "__intersection__sphere"; + create_optix_pg(&sphere_hitgroup_desc, 1, &state.program_options, + &state.sphere_hit_group); + } // Direct-callable -- fills in ShaderGlobals for Sphere - OptixProgramGroupDesc sphere_fillSG_desc = {}; - sphere_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; - sphere_fillSG_desc.callables.moduleDC = sphere_module; - sphere_fillSG_desc.callables.entryFunctionNameDC - = "__direct_callable__sphere_shaderglobals"; - sphere_fillSG_desc.callables.moduleCC = 0; - sphere_fillSG_desc.callables.entryFunctionNameCC = nullptr; - OptixProgramGroup sphere_fillSG_dc; - create_optix_pg(&sphere_fillSG_desc, 1, &program_options, - &sphere_fillSG_dc); - - // Create materials + { + OptixProgramGroupDesc sphere_fillSG_desc = {}; + sphere_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + sphere_fillSG_desc.callables.moduleDC = state.sphere_module; + sphere_fillSG_desc.callables.entryFunctionNameDC + = "__direct_callable__sphere_shaderglobals"; + sphere_fillSG_desc.callables.moduleCC = 0; + sphere_fillSG_desc.callables.entryFunctionNameCC = nullptr; + create_optix_pg(&sphere_fillSG_desc, 1, &state.program_options, + &state.sphere_fillSG_dc_group); + } +} + + + +void +OptixRaytracer::create_shaders(State& state) +{ + // Space for message logging + char msg_log[8192]; + size_t sizeof_msg_log; + + // Stand-in: names of shader outputs to preserve + std::vector outputs { "Cout" }; int mtl_id = 0; + std::vector material_interactive_params; + for (const auto& groupref : shaders()) { std::string group_name, fused_name; shadingsys->getattribute(groupref.get(), "groupname", group_name); @@ -546,7 +553,7 @@ OptixRaytracer::make_optix_materials() if (osl_ptx.empty()) { errhandler().errorfmt("Failed to generate PTX for ShaderGroup {}", group_name); - return false; + exit(EXIT_FAILURE); } if (options.get_int("saveptx")) { @@ -566,14 +573,14 @@ OptixRaytracer::make_optix_materials() // can be executed by the closest hit program in the wrapper sizeof_msg_log = sizeof(msg_log); OPTIX_CHECK_MSG(optixModuleCreateFn(m_optix_ctx, - &module_compile_options, - &pipeline_compile_options, + &state.module_compile_options, + &state.pipeline_compile_options, osl_ptx.c_str(), osl_ptx.size(), msg_log, &sizeof_msg_log, &optix_module), fmtformat("Creating module for PTX group {}: {}", group_name, msg_log)); - modules.push_back(optix_module); + state.shader_modules.push_back(optix_module); // Create program groups (for direct callables) OptixProgramGroupDesc pgDesc[1] = {}; @@ -583,60 +590,70 @@ OptixRaytracer::make_optix_materials() pgDesc[0].callables.moduleCC = 0; pgDesc[0].callables.entryFunctionNameCC = nullptr; - shader_groups.resize(shader_groups.size() + 1); + state.shader_groups.resize(state.shader_groups.size() + 1); sizeof_msg_log = sizeof(msg_log); OPTIX_CHECK_MSG( optixProgramGroupCreate(m_optix_ctx, &pgDesc[0], 1, - &program_options, msg_log, &sizeof_msg_log, - &shader_groups[shader_groups.size() - 1]), + &state.program_options, msg_log, &sizeof_msg_log, + &state.shader_groups[state.shader_groups.size() - 1]), fmtformat("Creating 'shader' group for group {}: {}", group_name, msg_log)); } - OptixPipelineLinkOptions pipeline_link_options; - pipeline_link_options.maxTraceDepth = 1; -#if (OPTIX_VERSION < 70700) - pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; -#endif -#if (OPTIX_VERSION < 70100) - pipeline_link_options.overrideUsesMotionBlur = false; -#endif - - // Set up OptiX pipeline - std::vector final_groups = { rend_lib_group, - raygen_group, miss_group }; + // Upload per-material interactive buffer table + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_interactive_params), + sizeof(void*) * material_interactive_params.size())); + CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_interactive_params), + material_interactive_params.data(), + sizeof(void*) * material_interactive_params.size(), + cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_interactive_params)); +} - if (scene.quads.size() > 0) - final_groups.push_back(quad_hitgroup); - if (scene.spheres.size() > 0) - final_groups.push_back(sphere_hitgroup); - final_groups.push_back(quad_fillSG_dc); - final_groups.push_back(sphere_fillSG_dc); - // append the shader groups to our "official" list of program groups - // size_t shader_groups_start_index = final_groups.size(); - final_groups.insert(final_groups.end(), shader_groups.begin(), - shader_groups.end()); +void +OptixRaytracer::create_pipeline(State& state) +{ + char msg_log[8192]; + size_t sizeof_msg_log; - // append the program group for the built-in shadeops module - final_groups.push_back(shadeops_group); + // Set the pipeline link options + state.pipeline_link_options.maxTraceDepth = 1; +#if (OPTIX_VERSION < 70700) + state.pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; +#endif +#if (OPTIX_VERSION < 70100) + state.pipeline_link_options.overrideUsesMotionBlur = false; +#endif - // append set-globals groups - final_groups.push_back(setglobals_raygen_group); - final_groups.push_back(setglobals_miss_group); + // Gather all of the program groups + state.final_groups.push_back(state.raygen_group); + state.final_groups.push_back(state.miss_group); + state.final_groups.push_back(state.quad_hit_group); + state.final_groups.push_back(state.sphere_hit_group); + state.final_groups.push_back(state.quad_fillSG_dc_group); + state.final_groups.push_back(state.sphere_fillSG_dc_group); + state.final_groups.push_back(state.rend_lib_group); + state.final_groups.push_back(state.shadeops_group); + state.final_groups.push_back(state.setglobals_raygen_group); + state.final_groups.push_back(state.setglobals_miss_group); + state.final_groups.insert(state.final_groups.end(), + state.shader_groups.begin(), + state.shader_groups.end()); sizeof_msg_log = sizeof(msg_log); - OPTIX_CHECK_MSG(optixPipelineCreate(m_optix_ctx, &pipeline_compile_options, - &pipeline_link_options, - final_groups.data(), - int(final_groups.size()), msg_log, + OPTIX_CHECK_MSG(optixPipelineCreate(m_optix_ctx, + &state.pipeline_compile_options, + &state.pipeline_link_options, + state.final_groups.data(), + int(state.final_groups.size()), msg_log, &sizeof_msg_log, &m_optix_pipeline), fmtformat("Creating optix pipeline: {}", msg_log)); // Set the pipeline stack size OptixStackSizes stack_sizes = {}; - for (OptixProgramGroup& program_group : final_groups) { + for (OptixProgramGroup& program_group : state.final_groups) { #if (OPTIX_VERSION < 70700) OPTIX_CHECK(optixUtilAccumulateStackSizes(program_group, &stack_sizes)); #else @@ -671,129 +688,207 @@ OptixRaytracer::make_optix_materials() direct_callable_stack_size_from_state, continuation_stack_size, max_traversal_depth)); - // Build OptiX Shader Binding Table (SBT) +} + + + +void +OptixRaytracer::create_sbt(State& state) +{ + // Raygen + { + GenericRecord raygen_record; + CUdeviceptr d_raygen_record; + OPTIX_CHECK( + optixSbtRecordPackHeader(state.raygen_group, &raygen_record)); + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_raygen_record), + sizeof(GenericRecord))); + CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_raygen_record), + &raygen_record, sizeof(GenericRecord), + cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_raygen_record)); + + m_optix_sbt.raygenRecord = d_raygen_record; + } - std::vector sbt_records(final_groups.size()); + // Miss + { + GenericRecord miss_record; + CUdeviceptr d_miss_record; - CUdeviceptr d_raygen_record; - CUdeviceptr d_miss_record; - CUdeviceptr d_hitgroup_records; - CUdeviceptr d_callable_records; - CUdeviceptr d_setglobals_raygen_record; - CUdeviceptr d_setglobals_miss_record; + OPTIX_CHECK(optixSbtRecordPackHeader(state.miss_group, &miss_record)); - std::vector d_sbt_records(final_groups.size()); + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_miss_record), + sizeof(GenericRecord))); + CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_miss_record), + &miss_record, sizeof(GenericRecord), + cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_miss_record)); - for (size_t i = 0; i < final_groups.size(); i++) { - OPTIX_CHECK(optixSbtRecordPackHeader(final_groups[i], &sbt_records[i])); + m_optix_sbt.missRecordBase = d_miss_record; + m_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); + m_optix_sbt.missRecordCount = 1; } - int sbtIndex = 3; - const int hitRecordStart = sbtIndex; - size_t setglobals_start = final_groups.size() - 2; + // Hitgroups + { + const bool have_quads = scene.quads.size() > 0; + const bool have_spheres = scene.spheres.size() > 0; + const int num_geom_types = have_quads + have_spheres; + const int num_hit_groups = num_geom_types; + + std::vector hitgroup_records; + CUdeviceptr d_hitgroup_records; + hitgroup_records.reserve(num_hit_groups); + + size_t hg_idx = 0; + if (have_quads) { + OPTIX_CHECK(optixSbtRecordPackHeader(state.quad_hit_group, + &hitgroup_records[hg_idx])); + hitgroup_records[hg_idx].data = reinterpret_cast( + d_quads_list); + hitgroup_records[hg_idx].sbtGeoIndex = 0; + ++hg_idx; + } + + if (have_spheres) { + OPTIX_CHECK(optixSbtRecordPackHeader(state.sphere_hit_group, + &hitgroup_records[hg_idx])); + hitgroup_records[hg_idx].data = reinterpret_cast(d_spheres_list); + hitgroup_records[hg_idx].sbtGeoIndex = 1; + } - // Copy geometry data to appropriate SBT records - if (scene.quads.size() > 0) { - sbt_records[sbtIndex].data = reinterpret_cast(d_quads_list); - sbt_records[sbtIndex].sbtGeoIndex - = 0; // DC index for filling in Quad ShaderGlobals - ++sbtIndex; + // copy to device + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_hitgroup_records), + num_hit_groups * sizeof(GenericRecord))); + CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_hitgroup_records), + hitgroup_records.data(), + num_hit_groups * sizeof(GenericRecord), + cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_hitgroup_records)); + + m_optix_sbt.hitgroupRecordBase = d_hitgroup_records; + m_optix_sbt.hitgroupRecordStrideInBytes = sizeof(GenericRecord); + m_optix_sbt.hitgroupRecordCount = num_hit_groups; } - if (scene.spheres.size() > 0) { - sbt_records[sbtIndex].data = reinterpret_cast(d_spheres_list); - sbt_records[sbtIndex].sbtGeoIndex - = 1; // DC index for filling in Sphere ShaderGlobals - ++sbtIndex; + // Callable programs + { + const int ncallables = 2; // ShaderGlobals setup for quad & sphere + const int nshaders = int(state.shader_groups.size()); + + std::vector callable_records(ncallables + nshaders); + CUdeviceptr d_callable_records; + OPTIX_CHECK(optixSbtRecordPackHeader(state.quad_fillSG_dc_group, + &callable_records[0])); + callable_records[0].data = reinterpret_cast(d_quads_list); + callable_records[0].sbtGeoIndex = 0; + + OPTIX_CHECK(optixSbtRecordPackHeader(state.sphere_fillSG_dc_group, + &callable_records[1])); + callable_records[1].data = reinterpret_cast(d_spheres_list); + callable_records[1].sbtGeoIndex = 1; + + for (size_t idx = 0; idx < state.shader_groups.size(); ++idx) { + OPTIX_CHECK( + optixSbtRecordPackHeader(state.shader_groups[idx], + &callable_records[ncallables + idx])); + } + + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_callable_records), + (ncallables + nshaders) * sizeof(GenericRecord))); + CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_callable_records), + callable_records.data(), + (ncallables + nshaders) * sizeof(GenericRecord), + cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_callable_records)); + + m_optix_sbt.callablesRecordBase = d_callable_records; + m_optix_sbt.callablesRecordStrideInBytes = sizeof(GenericRecord); + m_optix_sbt.callablesRecordCount = ncallables + nshaders; + + m_setglobals_optix_sbt.callablesRecordBase = d_callable_records; + m_setglobals_optix_sbt.callablesRecordStrideInBytes = sizeof(GenericRecord); + m_setglobals_optix_sbt.callablesRecordCount = ncallables + nshaders; } - const int callableRecordStart = sbtIndex; - - // Copy geometry data to our DC (direct-callable) funcs that fill ShaderGlobals - sbt_records[sbtIndex++].data = reinterpret_cast(d_quads_list); - sbt_records[sbtIndex++].data = reinterpret_cast(d_spheres_list); - - const int nshaders = int(shader_groups.size()); - const int nhitgroups = (scene.quads.size() > 0) - + (scene.spheres.size() > 0); - - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_raygen_record), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_miss_record), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_hitgroup_records), - nhitgroups * sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_callable_records), - (2 + nshaders) * sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_setglobals_raygen_record), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_setglobals_miss_record), - sizeof(GenericRecord))); - - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_raygen_record), - &sbt_records[1], sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_miss_record), - &sbt_records[2], sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_hitgroup_records), - &sbt_records[hitRecordStart], - nhitgroups * sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_callable_records), - &sbt_records[callableRecordStart], - (2 + nshaders) * sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_setglobals_raygen_record), - &sbt_records[setglobals_start + 0], - sizeof(GenericRecord), cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_setglobals_miss_record), - &sbt_records[setglobals_start + 1], - sizeof(GenericRecord), cudaMemcpyHostToDevice)); - - // Looks like OptixShadingTable needs to be filled out completely - m_optix_sbt.raygenRecord = d_raygen_record; - m_optix_sbt.missRecordBase = d_miss_record; - m_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); - m_optix_sbt.missRecordCount = 1; - m_optix_sbt.hitgroupRecordBase = d_hitgroup_records; - m_optix_sbt.hitgroupRecordStrideInBytes = sizeof(GenericRecord); - m_optix_sbt.hitgroupRecordCount = nhitgroups; - m_optix_sbt.callablesRecordBase = d_callable_records; - m_optix_sbt.callablesRecordStrideInBytes = sizeof(GenericRecord); - m_optix_sbt.callablesRecordCount = 2 + nshaders; - - // Shader binding table for SetGlobals stage - m_setglobals_optix_sbt = {}; - m_setglobals_optix_sbt.raygenRecord = d_setglobals_raygen_record; - m_setglobals_optix_sbt.missRecordBase = d_setglobals_miss_record; - m_setglobals_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); - m_setglobals_optix_sbt.missRecordCount = 1; + // SetGlobals raygen + { + GenericRecord record; + CUdeviceptr d_setglobals_raygen_record; + OPTIX_CHECK( + optixSbtRecordPackHeader(state.setglobals_raygen_group, &record)); + CUDA_CHECK( + cudaMalloc(reinterpret_cast(&d_setglobals_raygen_record), + sizeof(GenericRecord))); + CUDA_CHECK( + cudaMemcpy(reinterpret_cast(d_setglobals_raygen_record), + &record, sizeof(GenericRecord), cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_setglobals_raygen_record)); + + m_setglobals_optix_sbt.raygenRecord = d_setglobals_raygen_record; + } + + // SetGlobals miss + { + GenericRecord record; + CUdeviceptr d_setglobals_miss_record; + OPTIX_CHECK( + optixSbtRecordPackHeader(state.setglobals_miss_group, &record)); + CUDA_CHECK( + cudaMalloc(reinterpret_cast(&d_setglobals_miss_record), + sizeof(GenericRecord))); + CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_setglobals_miss_record), + &record, sizeof(GenericRecord), + cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_setglobals_miss_record)); + + m_setglobals_optix_sbt.missRecordBase = d_setglobals_miss_record; + m_setglobals_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); + m_setglobals_optix_sbt.missRecordCount = 1; + } +} + - // Upload per-material interactive buffer table - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_interactive_params), - sizeof(void*) * material_interactive_params.size())); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_interactive_params), - material_interactive_params.data(), - sizeof(void*) * material_interactive_params.size(), - cudaMemcpyHostToDevice)); - // Pipeline has been created so we can clean some things up - for (auto&& i : final_groups) { +void +OptixRaytracer::cleanup_programs(State& state) +{ + for (auto&& i : state.final_groups) { optixProgramGroupDestroy(i); } - for (auto&& i : modules) { + for (auto&& i : state.shader_modules) { optixModuleDestroy(i); } - modules.clear(); + state.shader_modules.clear(); + + optixModuleDestroy(state.program_module); + optixModuleDestroy(state.quad_module); + optixModuleDestroy(state.sphere_module); + optixModuleDestroy(state.wrapper_module); + optixModuleDestroy(state.rend_lib_module); + optixModuleDestroy(state.shadeops_module); +} + + +bool +OptixRaytracer::make_optix_materials() +{ + State state; + create_modules(state); + create_programs(state); + create_shaders(state); + create_pipeline(state); + create_sbt(state); + cleanup_programs(state); return true; } -bool -OptixRaytracer::finalize_scene() +void +OptixRaytracer::build_accel() { // Build acceleration structures OptixAccelBuildOptions accelOptions; @@ -813,13 +908,14 @@ OptixRaytracer::finalize_scene() quadsParams.reserve(scene.quads.size()); std::vector quadShaders; quadShaders.reserve(scene.quads.size()); + int objID = static_cast(scene.spheres.size()); for (const auto& quad : scene.quads) { OptixAabb aabb; quad.getBounds(aabb.minX, aabb.minY, aabb.minZ, aabb.maxX, aabb.maxY, aabb.maxZ); quadsAabb.push_back(aabb); QuadParams quad_params; - quad.setOptixVariables(&quad_params); + quad.setOptixVariables(&quad_params, objID++); quadsParams.push_back(quad_params); } // Copy Quads bounding boxes to cuda device @@ -828,6 +924,7 @@ OptixRaytracer::finalize_scene() CUDA_CHECK(cudaMemcpy(d_quadsAabb, quadsAabb.data(), sizeof(OptixAabb) * scene.quads.size(), cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_quadsAabb)); // Copy Quads to cuda device CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_quads_list), @@ -836,12 +933,9 @@ OptixRaytracer::finalize_scene() quadsParams.data(), sizeof(QuadParams) * scene.quads.size(), cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_quads_list)); // Fill in Quad shaders - CUdeviceptr d_quadsIndexOffsetBuffer; - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_quadsIndexOffsetBuffer), - scene.quads.size() * sizeof(int))); - int numBuildInputs = 0; unsigned int quadSbtRecord; @@ -862,7 +956,7 @@ OptixRaytracer::finalize_scene() quadsInput.numSbtRecords = 1; quadsInput.sbtIndexOffsetSizeInBytes = sizeof(int); quadsInput.sbtIndexOffsetStrideInBytes = sizeof(int); - quadsInput.sbtIndexOffsetBuffer = 0; // d_quadsIndexOffsetBuffer; + quadsInput.sbtIndexOffsetBuffer = 0; ++numBuildInputs; } @@ -874,6 +968,7 @@ OptixRaytracer::finalize_scene() spheresParams.reserve(scene.spheres.size()); std::vector sphereShaders; sphereShaders.reserve(scene.spheres.size()); + objID = 0; for (const auto& sphere : scene.spheres) { OptixAabb aabb; sphere.getBounds(aabb.minX, aabb.minY, aabb.minZ, aabb.maxX, aabb.maxY, @@ -881,7 +976,7 @@ OptixRaytracer::finalize_scene() spheresAabb.push_back(aabb); SphereParams sphere_params; - sphere.setOptixVariables(&sphere_params); + sphere.setOptixVariables(&sphere_params, objID++); spheresParams.push_back(sphere_params); } // Copy Spheres bounding boxes to cuda device @@ -890,6 +985,7 @@ OptixRaytracer::finalize_scene() CUDA_CHECK(cudaMemcpy(d_spheresAabb, spheresAabb.data(), sizeof(OptixAabb) * scene.spheres.size(), cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_spheresAabb)); // Copy Spheres to cuda device CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_spheres_list), @@ -898,12 +994,9 @@ OptixRaytracer::finalize_scene() spheresParams.data(), sizeof(SphereParams) * scene.spheres.size(), cudaMemcpyHostToDevice)); + device_ptrs.push_back(reinterpret_cast(d_spheres_list)); // Fill in Sphere shaders - CUdeviceptr d_spheresIndexOffsetBuffer; - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_spheresIndexOffsetBuffer), - scene.spheres.size() * sizeof(int))); - unsigned int sphereSbtRecord; sphereSbtRecord = OPTIX_GEOMETRY_FLAG_NONE; if (scene.spheres.size() > 0) { @@ -923,7 +1016,7 @@ OptixRaytracer::finalize_scene() spheresInput.numSbtRecords = 1; spheresInput.sbtIndexOffsetSizeInBytes = sizeof(int); spheresInput.sbtIndexOffsetStrideInBytes = sizeof(int); - spheresInput.sbtIndexOffsetBuffer = 0; // d_spheresIndexOffsetBuffer; + spheresInput.sbtIndexOffsetBuffer = 0; ++numBuildInputs; } @@ -954,13 +1047,42 @@ OptixRaytracer::finalize_scene() CUDA_CHECK(cudaMemcpy((void*)&h_aabb, reinterpret_cast(d_aabb), sizeof(OptixAabb), cudaMemcpyDeviceToHost)); cudaFree(d_aabb); + cudaFree(d_temp); + + // We need to free the output buffer after rendering + device_ptrs.push_back(d_output); // Sanity check the AS bounds // printf ("AABB min: [%0.6f, %0.6f, %0.6f], max: [%0.6f, %0.6f, %0.6f]\n", // h_aabb.minX, h_aabb.minY, h_aabb.minZ, // h_aabb.maxX, h_aabb.maxY, h_aabb.maxZ ); +} + + + +void +OptixRaytracer::prepare_background() +{ + if (getBackgroundShaderID() >= 0) { + const int bg_res = std::max(32, getBackgroundResolution()); + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_bg_values), 3 * sizeof(float) * bg_res * bg_res)); + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_bg_rows), sizeof(float) * bg_res)); + CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_bg_cols), sizeof(float) * bg_res * bg_res)); + + device_ptrs.push_back(reinterpret_cast(d_bg_values)); + device_ptrs.push_back(reinterpret_cast(d_bg_rows)); + device_ptrs.push_back(reinterpret_cast(d_bg_cols)); + } +} + + +bool +OptixRaytracer::finalize_scene() +{ + build_accel(); make_optix_materials(); + prepare_background(); return true; } @@ -986,7 +1108,6 @@ OptixRaytracer::get_texture_handle(ustring filename, { auto itr = m_samplers.find(filename); if (itr == m_samplers.end()) { - // Open image OIIO::ImageBuf image; if (!image.init_spec(filename, 0, 0)) { errhandler().errorfmt("Could not load: {} (hash {})", filename, @@ -994,42 +1115,75 @@ OptixRaytracer::get_texture_handle(ustring filename, return (TextureHandle*)nullptr; } - OIIO::ROI roi = OIIO::get_roi_full(image.spec()); - int32_t width = roi.width(), height = roi.height(); - std::vector pixels(width * height * 4); - - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - image.getpixel(i, j, 0, &pixels[((j * width) + i) * 4 + 0]); - } - } - cudaResourceDesc res_desc = {}; + int32_t nmiplevels = image.nmiplevels(); + int32_t img_width = image.xmax() + 1; + int32_t img_height = image.ymax() + 1; // hard-code textures to 4 channels - int32_t pitch = width * 4 * sizeof(float); cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); - cudaArray_t pixelArray; - CUDA_CHECK(cudaMallocArray(&pixelArray, &channel_desc, width, height)); + cudaMipmappedArray_t mipmapArray; + cudaExtent extent = make_cudaExtent(img_width, img_height, 0); + CUDA_CHECK(cudaMallocMipmappedArray(&mipmapArray, &channel_desc, extent, + nmiplevels)); + + // Copy the pixel data for each mip level + std::vector> level_pixels(nmiplevels); + for (int32_t level = 0; level < nmiplevels; ++level) { + image.reset(filename, 0, level); + OIIO::ROI roi = OIIO::get_roi_full(image.spec()); + if (!roi.defined()) { + errhandler().errorfmt( + "Could not load mip level {}: {} (hash {})", level, + filename, filename); + return (TextureHandle*)nullptr; + } - CUDA_CHECK(cudaMemcpy2DToArray(pixelArray, 0, 0, pixels.data(), pitch, - pitch, height, cudaMemcpyHostToDevice)); + int32_t width = roi.width(), height = roi.height(); + level_pixels[level].resize(width * height * 4); + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + image.getpixel(i, j, 0, + &level_pixels[level][((j * width) + i) * 4]); + } + } - res_desc.resType = cudaResourceTypeArray; - res_desc.res.array.array = pixelArray; + cudaArray_t miplevelArray; + CUDA_CHECK( + cudaGetMipmappedArrayLevel(&miplevelArray, mipmapArray, level)); - cudaTextureDesc tex_desc = {}; - tex_desc.addressMode[0] = cudaAddressModeWrap; - tex_desc.addressMode[1] = cudaAddressModeWrap; - tex_desc.filterMode = cudaFilterModeLinear; - tex_desc.readMode - = cudaReadModeElementType; //cudaReadModeNormalizedFloat; + // Copy the texel data into the miplevel array + int32_t pitch = width * 4 * sizeof(float); + CUDA_CHECK(cudaMemcpy2DToArray(miplevelArray, 0, 0, + level_pixels[level].data(), pitch, + pitch, height, + cudaMemcpyHostToDevice)); + } + + int32_t pitch = img_width * 4 * sizeof(float); + cudaArray_t pixelArray; + CUDA_CHECK( + cudaMallocArray(&pixelArray, &channel_desc, img_width, img_height)); + CUDA_CHECK(cudaMemcpy2DToArray(pixelArray, 0, 0, level_pixels[0].data(), + pitch, pitch, img_height, + cudaMemcpyHostToDevice)); + array_ptrs.push_back(reinterpret_cast(pixelArray)); + + cudaResourceDesc res_desc = {}; + res_desc.resType = cudaResourceTypeMipmappedArray; + res_desc.res.mipmap.mipmap = mipmapArray; + + cudaTextureDesc tex_desc = {}; + tex_desc.addressMode[0] = cudaAddressModeWrap; + tex_desc.addressMode[1] = cudaAddressModeWrap; + tex_desc.filterMode = cudaFilterModeLinear; + tex_desc.readMode = cudaReadModeElementType; tex_desc.normalizedCoords = 1; tex_desc.maxAnisotropy = 1; - tex_desc.maxMipmapLevelClamp = 99; + tex_desc.maxMipmapLevelClamp = float(nmiplevels - 1); tex_desc.minMipmapLevelClamp = 0; - tex_desc.mipmapFilterMode = cudaFilterModePoint; + tex_desc.mipmapFilterMode = cudaFilterModeLinear; tex_desc.borderColor[0] = 1.0f; tex_desc.sRGB = 0; @@ -1076,10 +1230,17 @@ OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED) xres * yres * 4 * sizeof(float))); CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_launch_params), sizeof(RenderParams))); + device_ptrs.push_back(reinterpret_cast(d_output_buffer)); + device_ptrs.push_back(reinterpret_cast(d_launch_params)); m_xres = xres; m_yres = yres; + const int aa = std::max(1, options.get_int("aa")); + const int max_bounces = options.get_int("max_bounces"); + const float show_albedo_scale = options.get_float("show_albedo_scale"); + const bool no_jitter = options.get_int("no_jitter"); + RenderParams params; params.eye.x = camera.eye.x; params.eye.y = camera.eye.y; @@ -1087,14 +1248,14 @@ OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED) params.dir.x = camera.dir.x; params.dir.y = camera.dir.y; params.dir.z = camera.dir.z; - params.cx.x = camera.cx.x; - params.cx.y = camera.cx.y; - params.cx.z = camera.cx.z; - params.cy.x = camera.cy.x; - params.cy.y = camera.cy.y; - params.cy.z = camera.cy.z; - params.invw = 1.0f / m_xres; - params.invh = 1.0f / m_yres; + params.up.x = camera.up.x; + params.up.y = camera.up.y; + params.up.z = camera.up.z; + params.fov = camera.fov; + params.aa = aa; + params.max_bounces = max_bounces; + params.show_albedo_scale = show_albedo_scale; + params.no_jitter = no_jitter; params.interactive_params = d_interactive_params; params.output_buffer = d_output_buffer; params.traversal_handle = m_travHandle; @@ -1104,13 +1265,24 @@ OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED) params.color_system = d_color_system; params.test_str_1 = test_str_1; params.test_str_2 = test_str_2; + params.num_quads = scene.quads.size(); + params.quads_buffer = d_quads_list; + params.num_spheres = scene.spheres.size(); + params.spheres_buffer = d_spheres_list; + + // For the background shader + params.bg_res = std::max(32, getBackgroundResolution()); + params.bg_id = getBackgroundShaderID(); + params.bg_values = d_bg_values; + params.bg_rows = d_bg_rows; + params.bg_cols = d_bg_cols; CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_launch_params), ¶ms, sizeof(RenderParams), cudaMemcpyHostToDevice)); // Set up global variables OPTIX_CHECK(optixLaunch(m_optix_pipeline, m_cuda_stream, d_launch_params, - sizeof(RenderParams), &m_setglobals_optix_sbt, 1, 1, + sizeof(RenderParams), &m_setglobals_optix_sbt, 32, 1, 1)); CUDA_SYNC_CHECK(); @@ -1254,8 +1426,14 @@ void OptixRaytracer::clear() { SimpleRaytracer::clear(); - OPTIX_CHECK(optixDeviceContextDestroy(m_optix_ctx)); - m_optix_ctx = 0; + if (m_optix_pipeline) { + OPTIX_CHECK(optixPipelineDestroy(m_optix_pipeline)); + m_optix_pipeline = 0; + } + if (m_optix_ctx) { + OPTIX_CHECK(optixDeviceContextDestroy(m_optix_ctx)); + m_optix_ctx = 0; + } } OSL_NAMESPACE_EXIT diff --git a/src/testrender/optixraytracer.h b/src/testrender/optixraytracer.h index b331a8422..4c73f762b 100644 --- a/src/testrender/optixraytracer.h +++ b/src/testrender/optixraytracer.h @@ -14,7 +14,37 @@ #include "render_params.h" #include "simpleraytracer.h" -OSL_NAMESPACE_ENTER +OSL_NAMESPACE_ENTER; + + +struct State { + OptixModuleCompileOptions module_compile_options = {}; + OptixPipelineCompileOptions pipeline_compile_options = {}; + OptixPipelineLinkOptions pipeline_link_options = {}; + OptixProgramGroupOptions program_options = {}; + + OptixModule program_module; + OptixModule quad_module; + OptixModule sphere_module; + OptixModule wrapper_module; + OptixModule rend_lib_module; + OptixModule shadeops_module; + + OptixProgramGroup raygen_group; + OptixProgramGroup miss_group; + OptixProgramGroup rend_lib_group; + OptixProgramGroup shadeops_group; + OptixProgramGroup setglobals_raygen_group; + OptixProgramGroup setglobals_miss_group; + OptixProgramGroup quad_hit_group; + OptixProgramGroup sphere_hit_group; + OptixProgramGroup quad_fillSG_dc_group; + OptixProgramGroup sphere_fillSG_dc_group; + + std::vector shader_modules; + std::vector shader_groups; + std::vector final_groups; +}; class OptixRaytracer final : public SimpleRaytracer { @@ -37,6 +67,8 @@ class OptixRaytracer final : public SimpleRaytracer { bool init_optix_context(int xres, int yres); bool make_optix_materials(); + void build_accel(); + void prepare_background(); bool finalize_scene(); void prepare_render() override; void warmup() override; @@ -44,6 +76,13 @@ class OptixRaytracer final : public SimpleRaytracer { void finalize_pixel_buffer() override; void clear() override; + void create_modules(State& state); + void create_programs(State& state); + void create_shaders(State& state); + void create_pipeline(State& state); + void create_sbt(State& state); + void cleanup_programs(State& state); + /// Return true if the texture handle (previously returned by /// get_texture_handle()) is a valid texture that can be subsequently /// read or sampled. @@ -80,7 +119,11 @@ class OptixRaytracer final : public SimpleRaytracer { CUdeviceptr d_quads_list = 0; CUdeviceptr d_spheres_list = 0; CUdeviceptr d_interactive_params = 0; - int m_xres, m_yres; + CUdeviceptr d_bg_values = 0; + CUdeviceptr d_bg_rows = 0; + CUdeviceptr d_bg_cols = 0; + int m_xres = 0; + int m_yres = 0; CUdeviceptr d_osl_printf_buffer; CUdeviceptr d_color_system; uint64_t test_str_1; @@ -98,6 +141,9 @@ class OptixRaytracer final : public SimpleRaytracer { std::string m_materials_ptx; std::unordered_map m_samplers; + + std::vector device_ptrs; + std::vector array_ptrs; }; diff --git a/src/testrender/raytracer.h b/src/testrender/raytracer.h index 5308f1870..1d912fadc 100644 --- a/src/testrender/raytracer.h +++ b/src/testrender/raytracer.h @@ -20,6 +20,11 @@ # include // from CUDA #endif +#ifdef __CUDACC__ +# include "cuda/rend_lib.h" +# include "cuda/vec_math.h" +#endif + // The primitives don't included the intersection routines, etc., from the // versions in testrender, since those operations are performed on the GPU. // @@ -32,7 +37,7 @@ class OptixRenderer; // FIXME -- should not be here // build two vectors orthogonal to the first, assumes n is normalized -inline void +inline OSL_HOSTDEVICE void ortho(const Vec3& n, Vec3& x, Vec3& y) { x = (fabsf(n.x) > .01f ? Vec3(n.z, 0, -n.x) : Vec3(0, -n.z, n.y)) @@ -41,7 +46,6 @@ ortho(const Vec3& n, Vec3& x, Vec3& y) } -// Note: not used in OptiX mode struct Ray { enum RayType { CAMERA = 1, @@ -54,6 +58,7 @@ struct Ray { DISPLACEMENT = 128 }; + OSL_HOSTDEVICE Ray(const Vec3& o, const Vec3& d, float radius, float spread, RayType raytype) : origin(o) @@ -64,7 +69,9 @@ struct Ray { { } + OSL_HOSTDEVICE Vec3 point(float t) const { return origin + direction * t; } + Dual2 dual_direction() const { Dual2 v; @@ -75,6 +82,7 @@ struct Ray { return v; } + OSL_HOSTDEVICE Dual2 point(Dual2 t) const { const float r = radius + spread * t.val(); @@ -94,9 +102,10 @@ struct Ray { struct Camera { - Camera() {} + OSL_HOSTDEVICE Camera() {} // Set where the camera sits and looks at. + OSL_HOSTDEVICE void lookat(const Vec3& eye, const Vec3& dir, const Vec3& up, float fov) { this->eye = eye; @@ -107,6 +116,7 @@ struct Camera { } // Set resolution + OSL_HOSTDEVICE void resolution(int w, int h) { xres = w; @@ -117,6 +127,7 @@ struct Camera { } // Compute all derived values based on camera parameters. + OSL_HOSTDEVICE void finalize() { float k = OIIO::fast_tan(fov * float(M_PI / 360)); @@ -126,10 +137,21 @@ struct Camera { } // Get a ray for the given screen coordinates. + OSL_HOSTDEVICE Ray get(float x, float y) const { + // TODO: On CUDA devices, the normalize() operation can result in vector + // components with magnitudes slightly greater than 1.0, which can cause + // downstream computations to blow up and produce NaNs. Normalizing the + // vector again avoids this issue. const Vec3 v = (cx * (x * invw - 0.5f) + cy * (0.5f - y * invh) + dir) +#ifndef __CUDACC__ .normalize(); +#else + .normalize() + .normalized(); +#endif + const float cos_a = dir.dot(v); const float spread = sqrtf(invw * invh * cx.length() * cy.length() * cos_a) * cos_a; @@ -157,18 +179,22 @@ struct Camera { struct Primitive { + OSL_HOSTDEVICE Primitive(int shaderID, bool isLight) : shaderID(shaderID), isLight(isLight) { } + +#ifndef __CUDACC__ virtual ~Primitive() {} +#endif int shaderid() const { return shaderID; } bool islight() const { return isLight; } void getBounds(float& minx, float& miny, float& minz, float& maxx, float& maxy, float& maxz) const; -#if OSL_USE_OPTIX - virtual void setOptixVariables(void* data) const = 0; +#if defined(OSL_USE_OPTIX) && !defined(__CUDACC__) + virtual void setOptixVariables(void* data, int objID) const = 0; #endif private: @@ -178,6 +204,7 @@ struct Primitive { struct Sphere final : public Primitive { + OSL_HOSTDEVICE Sphere(Vec3 c, float r, int shaderID, bool isLight) : Primitive(shaderID, isLight), c(c), r(r), r2(r * r) { @@ -196,6 +223,7 @@ struct Sphere final : public Primitive { } // returns distance to nearest hit or 0 + OSL_HOSTDEVICE Dual2 intersect(const Ray& r, bool self) const { Dual2 oc = c - r.origin; @@ -217,6 +245,7 @@ struct Sphere final : public Primitive { Dual2 normal(const Dual2& p) const { return normalize(p - c); } + OSL_HOSTDEVICE Dual2 uv(const Dual2& /*p*/, const Dual2& n, Vec3& dPdu, Vec3& dPdv) const { @@ -254,6 +283,7 @@ struct Sphere final : public Primitive { } // return a direction towards a point on the sphere + OSL_HOSTDEVICE Vec3 sample(const Vec3& x, float xi, float yi, float& pdf) const { const float TWOPI = float(2 * M_PI); @@ -270,6 +300,7 @@ struct Sphere final : public Primitive { return (su * (cp * sin_a) + sv * (sp * sin_a) + sw * cos_a).normalize(); } + OSL_HOSTDEVICE float shapepdf(const Vec3& x, const Vec3& /*p*/) const { const float TWOPI = float(2 * M_PI); @@ -278,14 +309,17 @@ struct Sphere final : public Primitive { return 1 / (TWOPI * (1 - cmax)); } -#if OSL_USE_OPTIX - virtual void setOptixVariables(void* data) const +#if defined(OSL_USE_OPTIX) && !defined(__CUDACC__) + virtual void setOptixVariables(void* data, int objID) const { SphereParams* sphere_data = reinterpret_cast(data); sphere_data->c = make_float3(c.x, c.y, c.z); + sphere_data->r = r; sphere_data->r2 = r2; - sphere_data->a = M_PI * (r2 * r2); + sphere_data->a = M_PI * r2; sphere_data->shaderID = shaderid(); + sphere_data->isLight = islight(); + sphere_data->objID = objID; } #endif @@ -297,6 +331,7 @@ struct Sphere final : public Primitive { struct Quad final : public Primitive { + OSL_HOSTDEVICE Quad(const Vec3& p, const Vec3& ex, const Vec3& ey, int shaderID, bool isLight) : Primitive(shaderID, isLight), p(p), ex(ex), ey(ey) @@ -324,6 +359,7 @@ struct Quad final : public Primitive { } // returns distance to nearest hit or 0 + OSL_HOSTDEVICE Dual2 intersect(const Ray& r, bool self) const { if (self) @@ -348,6 +384,7 @@ struct Quad final : public Primitive { return Dual2(n, Vec3(0, 0, 0), Vec3(0, 0, 0)); } + OSL_HOSTDEVICE Dual2 uv(const Dual2& p, const Dual2& /*n*/, Vec3& dPdu, Vec3& dPdv) const { @@ -360,6 +397,7 @@ struct Quad final : public Primitive { } // return a direction towards a point on the sphere + OSL_HOSTDEVICE Vec3 sample(const Vec3& x, float xi, float yi, float& pdf) const { Vec3 l = (p + xi * ex + yi * ey) - x; @@ -369,6 +407,7 @@ struct Quad final : public Primitive { return dir; } + OSL_HOSTDEVICE float shapepdf(const Vec3& x, const Vec3& p) const { Vec3 l = p - x; @@ -377,8 +416,8 @@ struct Quad final : public Primitive { return d2 / (a * fabsf(dir.dot(n))); } -#if OSL_USE_OPTIX - virtual void setOptixVariables(void* data) const +#if defined(OSL_USE_OPTIX) && !defined(__CUDACC__) + virtual void setOptixVariables(void* data, int objID) const { QuadParams* quad_data = reinterpret_cast(data); quad_data->p = make_float3(p.x, p.y, p.z); @@ -389,6 +428,8 @@ struct Quad final : public Primitive { quad_data->ev = ev; quad_data->a = a; quad_data->shaderID = shaderid(); + quad_data->isLight = islight(); + quad_data->objID = objID; } #endif @@ -406,7 +447,8 @@ struct Scene { int num_prims() const { return spheres.size() + quads.size(); } - bool intersect(const Ray& r, Dual2& t, int& primID) const + bool intersect(const Ray& r, Dual2& t, int& primID, + const void* sg = nullptr) const { const int ns = spheres.size(); const int nq = quads.size(); diff --git a/src/testrender/render_params.h b/src/testrender/render_params.h index faba736c2..85e1fb159 100644 --- a/src/testrender/render_params.h +++ b/src/testrender/render_params.h @@ -15,11 +15,12 @@ struct RenderParams { float3 eye; float3 dir; - float3 cx; - float3 cy; - - float invw; - float invh; + float3 up; + float fov; + int aa; + int max_bounces; + float show_albedo_scale; + bool no_jitter; CUdeviceptr traversal_handle; CUdeviceptr output_buffer; @@ -38,19 +39,34 @@ struct RenderParams { // for used-data tests uint64_t test_str_1; uint64_t test_str_2; + + uint64_t num_spheres; + uint64_t num_quads; + CUdeviceptr spheres_buffer; + CUdeviceptr quads_buffer; + + // for the background + int bg_res; + int bg_id; + CUdeviceptr bg_values; + CUdeviceptr bg_rows; + CUdeviceptr bg_cols; }; struct PrimitiveParams { float a; // area - unsigned int shaderID; + int32_t shaderID; + int32_t objID; + bool isLight; }; struct SphereParams : PrimitiveParams { float3 c; // center + float r; // radius float r2; // radius ^2 }; diff --git a/src/testrender/sampling.h b/src/testrender/sampling.h index 3266e72d9..5f1a7bc47 100644 --- a/src/testrender/sampling.h +++ b/src/testrender/sampling.h @@ -14,7 +14,7 @@ OSL_NAMESPACE_ENTER struct TangentFrame { // build frame from unit normal - TangentFrame(const Vec3& n) : w(n) + OSL_HOSTDEVICE TangentFrame(const Vec3& n) : w(n) { u = (fabsf(w.x) > .01f ? Vec3(w.z, 0, -w.x) : Vec3(0, -w.z, w.y)) .normalize(); @@ -22,25 +22,25 @@ struct TangentFrame { } // build frame from unit normal and unit tangent - TangentFrame(const Vec3& n, const Vec3& t) : w(n) + OSL_HOSTDEVICE TangentFrame(const Vec3& n, const Vec3& t) : w(n) { v = w.cross(t); u = v.cross(w); } // transform vector - Vec3 get(float x, float y, float z) const { return x * u + y * v + z * w; } + OSL_HOSTDEVICE Vec3 get(float x, float y, float z) const { return x * u + y * v + z * w; } // untransform vector - float getx(const Vec3& a) const { return a.dot(u); } - float gety(const Vec3& a) const { return a.dot(v); } - float getz(const Vec3& a) const { return a.dot(w); } + OSL_HOSTDEVICE float getx(const Vec3& a) const { return a.dot(u); } + OSL_HOSTDEVICE float gety(const Vec3& a) const { return a.dot(v); } + OSL_HOSTDEVICE float getz(const Vec3& a) const { return a.dot(w); } - Vec3 tolocal(const Vec3& a) const + OSL_HOSTDEVICE Vec3 tolocal(const Vec3& a) const { return Vec3(a.dot(u), a.dot(v), a.dot(w)); } - Vec3 toworld(const Vec3& a) const { return get(a.x, a.y, a.z); } + OSL_HOSTDEVICE Vec3 toworld(const Vec3& a) const { return get(a.x, a.y, a.z); } private: Vec3 u, v, w; @@ -49,7 +49,8 @@ struct TangentFrame { struct Sampling { /// Warp the unit disk onto the unit sphere /// http://psgraphics.blogspot.com/2011/01/improved-code-for-concentric-map.html - static void to_unit_disk(float& x, float& y) + static OSL_HOSTDEVICE void + to_unit_disk(float& x, float& y) { const float PI_OVER_4 = float(M_PI_4); const float PI_OVER_2 = float(M_PI_2); @@ -71,8 +72,9 @@ struct Sampling { y *= r; } - static void sample_cosine_hemisphere(const Vec3& N, float rndx, float rndy, - Vec3& out, float& pdf) + static OSL_HOSTDEVICE void + sample_cosine_hemisphere(const Vec3& N, float rndx, float rndy, + Vec3& out, float& pdf) { to_unit_disk(rndx, rndy); float cos_theta = sqrtf(std::max(1 - rndx * rndx - rndy * rndy, 0.0f)); @@ -81,8 +83,9 @@ struct Sampling { pdf = cos_theta * float(M_1_PI); } - static void sample_uniform_hemisphere(const Vec3& N, float rndx, float rndy, - Vec3& out, float& pdf) + static OSL_HOSTDEVICE void + sample_uniform_hemisphere(const Vec3& N, float rndx, float rndy, + Vec3& out, float& pdf) { float phi = float(2 * M_PI) * rndx; float cos_theta = rndy; @@ -107,7 +110,8 @@ struct MIS { // Centralizing the handling of the pdfs this way ensures that all numerical // cases can be enumerated and handled robustly without arbitrary epsilons. template - static inline float power_heuristic(float sampled_pdf, float other_pdf) + static inline OSL_HOSTDEVICE float + power_heuristic(float sampled_pdf, float other_pdf) { // NOTE: inf is ok! assert(sampled_pdf >= 0); @@ -148,14 +152,25 @@ struct MIS { // such as a BRDF mixture. This updates a (weight, pdf) pair with a new one // to represent the sum of both. b is the probability of choosing the provided // weight. A running sum should be started with a weight and pdf of 0. - static inline void update_eval(Color3* w, float* pdf, Color3 ow, float opdf, - float b) + static inline OSL_HOSTDEVICE void + update_eval(Color3* w, float* pdf, Color3 ow, float opdf, + float b) { + // TODO: Need to handle these cases for CUDA. Clamping might be okay if the + // values are only slightly out of range + +#ifdef __CUDACC__ + // Check for those pesky NaNs + assert(*pdf == *pdf); + assert(b == b); + assert(opdf == opdf); +#endif + // NOTE: inf is ok! - assert(*pdf >= 0); - assert(opdf >= 0); - assert(b >= 0); - assert(b <= 1); + assert(*pdf >= 0.0f); + assert(opdf >= 0.0f); + assert(b >= 0.0f); + assert(b <= 1.0f); // make sure 1 / b is not inf // note that if the weight has components > 1 ow can still overflow, but @@ -182,6 +197,7 @@ struct MIS { // "Practical Hash-based Owen Scrambling" - Brent Burley - JCGT 2020 // https://jcgt.org/published/0009/04/01/ struct Sampler { + OSL_HOSTDEVICE Sampler(int px, int py, int si) : seed(((px & 2047) << 22) | ((py & 2047) << 11)) , index(reversebits(si)) @@ -189,6 +205,7 @@ struct Sampler { assert(si < (1 << 24)); } + OSL_HOSTDEVICE Vec3 get() { static const uint32_t zmatrix[24] = { @@ -223,7 +240,7 @@ struct Sampler { private: uint32_t seed, index; - static uint32_t hash(uint32_t s) + static OSL_HOSTDEVICE uint32_t hash(uint32_t s) { // https://github.com/skeeto/hash-prospector s ^= s >> 16; @@ -234,7 +251,7 @@ struct Sampler { return s; } - static uint32_t reversebits(uint32_t x) + static OSL_HOSTDEVICE uint32_t reversebits(uint32_t x) { #if defined(__clang__) return __builtin_bitreverse32(x); @@ -248,7 +265,7 @@ struct Sampler { #endif } - static uint32_t owen_scramble(uint32_t p, uint32_t s) + static OSL_HOSTDEVICE uint32_t owen_scramble(uint32_t p, uint32_t s) { // https://psychopath.io/post/2021_01_30_building_a_better_lk_hash // assumes reversed input diff --git a/src/testrender/shading.cpp b/src/testrender/shading.cpp index 73821a73c..154f50e61 100644 --- a/src/testrender/shading.cpp +++ b/src/testrender/shading.cpp @@ -8,238 +8,79 @@ #include "optics.h" #include "sampling.h" +#ifdef __CUDACC__ +# include "cuda/vec_math.h" +#endif + + using namespace OSL; + +// TODO: This is a little clumsy. Is it necessary? +#ifndef __CUDACC__ +using ShaderGlobalsType = OSL::ShaderGlobals; +#else +using ShaderGlobalsType = OSL_CUDA::ShaderGlobals; +#endif + + namespace { // anonymous namespace using OIIO::clamp; +using OSL::dot; -Color3 +OSL_HOSTDEVICE Color3 clamp(const Color3& c, float min, float max) { return Color3(clamp(c.x, min, max), clamp(c.y, min, max), clamp(c.z, min, max)); } -bool +OSL_HOSTDEVICE bool is_black(const Color3& c) { return c.x == 0 && c.y == 0 && c.z == 0; } +OSL_HOSTDEVICE Color3 +MxDielectricParams::evalR(float cos_theta) const +{ + return reflection_tint * fresnel_dielectric(cos_theta, ior); +} -// unique identifier for each closure supported by testrender -enum ClosureIDs { - EMISSION_ID = 1, - BACKGROUND_ID, - DIFFUSE_ID, - OREN_NAYAR_ID, - TRANSLUCENT_ID, - PHONG_ID, - WARD_ID, - MICROFACET_ID, - REFLECTION_ID, - FRESNEL_REFLECTION_ID, - REFRACTION_ID, - TRANSPARENT_ID, - // See MATERIALX_CLOSURES in stdosl.h - MX_OREN_NAYAR_DIFFUSE_ID, - MX_BURLEY_DIFFUSE_ID, - MX_DIELECTRIC_ID, - MX_CONDUCTOR_ID, - MX_GENERALIZED_SCHLICK_ID, - MX_TRANSLUCENT_ID, - MX_TRANSPARENT_ID, - MX_SUBSURFACE_ID, - MX_SHEEN_ID, - MX_UNIFORM_EDF_ID, - MX_ANISOTROPIC_VDF_ID, - MX_MEDIUM_VDF_ID, - MX_LAYER_ID, - // TODO: adding vdfs would require extending testrender with volume support ... -}; - -// these structures hold the parameters of each closure type -// they will be contained inside ClosureComponent -struct EmptyParams {}; -struct DiffuseParams { - Vec3 N; -}; -struct OrenNayarParams { - Vec3 N; - float sigma; -}; -struct PhongParams { - Vec3 N; - float exponent; -}; -struct WardParams { - Vec3 N, T; - float ax, ay; -}; -struct ReflectionParams { - Vec3 N; - float eta; -}; -struct RefractionParams { - Vec3 N; - float eta; -}; -struct MicrofacetParams { - ustringhash dist; - Vec3 N, U; - float xalpha, yalpha, eta; - int refract; -}; - -// MATERIALX_CLOSURES - -struct MxOrenNayarDiffuseParams { - Vec3 N; - Color3 albedo; - float roughness; - // optional - ustringhash label; - int energy_compensation; -}; - -struct MxBurleyDiffuseParams { - Vec3 N; - Color3 albedo; - float roughness; - // optional - ustringhash label; -}; - -// common to all MaterialX microfacet closures -struct MxMicrofacetBaseParams { - Vec3 N, U; - float roughness_x; - float roughness_y; - ustringhash distribution; - // optional - ustringhash label; -}; - -struct MxDielectricParams : public MxMicrofacetBaseParams { - Color3 reflection_tint; - Color3 transmission_tint; - float ior; - // optional - float thinfilm_thickness; - float thinfilm_ior; - - Color3 evalR(float cos_theta) const - { - return reflection_tint * fresnel_dielectric(cos_theta, ior); - } - - Color3 evalT(float cos_theta) const - { - return transmission_tint * (1.0f - fresnel_dielectric(cos_theta, ior)); - } -}; - -struct MxConductorParams : public MxMicrofacetBaseParams { - Color3 ior; - Color3 extinction; - // optional - float thinfilm_thickness; - float thinfilm_ior; - - Color3 evalR(float cos_theta) const - { - return fresnel_conductor(cos_theta, ior, extinction); - } - - Color3 evalT(float cos_theta) const { return Color3(0.0f); } - - // Avoid function was declared but never referenced - // float get_ior() const - // { - // return 0; // no transmission possible - // } -}; - -struct MxGeneralizedSchlickParams : public MxMicrofacetBaseParams { - Color3 reflection_tint; - Color3 transmission_tint; - Color3 f0; - Color3 f90; - float exponent; - // optional - float thinfilm_thickness; - float thinfilm_ior; - - Color3 evalR(float cos_theta) const - { - return reflection_tint - * fresnel_generalized_schlick(cos_theta, f0, f90, exponent); - } - - Color3 evalT(float cos_theta) const - { - return transmission_tint - * (Color3(1.0f) - - fresnel_generalized_schlick(cos_theta, f0, f90, exponent)); - } -}; - -struct MxTranslucentParams { - Vec3 N; - Color3 albedo; - // optional - ustringhash label; -}; - -struct MxSubsurfaceParams { - Vec3 N; - Color3 albedo; - float transmission_depth; - Color3 transmission_color; - float anisotropy; - // optional - ustringhash label; -}; - -struct MxSheenParams { - Vec3 N; - Color3 albedo; - float roughness; - // optional - ustringhash label; -}; +OSL_HOSTDEVICE Color3 +MxDielectricParams::evalT(float cos_theta) const +{ + return transmission_tint * (1.0f - fresnel_dielectric(cos_theta, ior)); +} -struct MxUniformEdfParams { - Color3 emittance; - // optional - ustringhash label; -}; +OSL_HOSTDEVICE Color3 +MxConductorParams::evalR(float cos_theta) const +{ + return fresnel_conductor(cos_theta, ior, extinction); +} -struct MxLayerParams { - OSL::ClosureColor* top; - OSL::ClosureColor* base; -}; +OSL_HOSTDEVICE Color3 +MxConductorParams::evalT(float cos_theta) const +{ + return Color3(0.0f); +} -struct MxAnisotropicVdfParams { - Color3 albedo; - Color3 extinction; - float anisotropy; - // optional - ustringhash label; -}; +OSL_HOSTDEVICE Color3 +MxGeneralizedSchlickParams::evalR(float cos_theta) const +{ + return reflection_tint + * fresnel_generalized_schlick(cos_theta, f0, f90, exponent); +} -struct MxMediumVdfParams { - Color3 albedo; - float transmission_depth; - Color3 transmission_color; - float anisotropy; - float ior; - int priority; - // optional - ustringhash label; -}; +OSL_HOSTDEVICE Color3 +MxGeneralizedSchlickParams::evalT(float cos_theta) const +{ + return transmission_tint + * (Color3(1.0f) + - fresnel_generalized_schlick(cos_theta, f0, f90, exponent)); +} } // anonymous namespace @@ -247,6 +88,7 @@ struct MxMediumVdfParams { OSL_NAMESPACE_ENTER +#if !defined(__CUDACC__) void register_closures(OSL::ShadingSystem* shadingsys) { @@ -433,24 +275,25 @@ register_closures(OSL::ShadingSystem* shadingsys) for (const BuiltinClosures& b : builtins) shadingsys->register_closure(b.name, b.id, b.params, nullptr, nullptr); } +#endif // !defined(__CUDACC__) OSL_NAMESPACE_EXIT namespace { // anonymous namespace template struct Diffuse final : public BSDF, DiffuseParams { - Diffuse(const DiffuseParams& params) : BSDF(), DiffuseParams(params) + OSL_HOSTDEVICE Diffuse(const DiffuseParams& params) : BSDF(DIFFUSE_ID), DiffuseParams(params) { if (trans) N = -N; } - Sample eval(const Vec3& /*wo*/, const OSL::Vec3& wi) const override + OSL_HOSTDEVICE Sample eval(const Vec3& /*wo*/, const OSL::Vec3& wi) const { const float pdf = std::max(N.dot(wi), 0.0f) * float(M_1_PI); return { wi, Color3(1.0f), pdf, 1.0f }; } - Sample sample(const Vec3& /*wo*/, float rx, float ry, - float /*rz*/) const override + OSL_HOSTDEVICE Sample sample(const Vec3& /*wo*/, float rx, float ry, + float /*rz*/) const { Vec3 out_dir; float pdf; @@ -460,10 +303,10 @@ template struct Diffuse final : public BSDF, DiffuseParams { }; struct OrenNayar final : public BSDF, OrenNayarParams { - OrenNayar(const OrenNayarParams& params) : BSDF(), OrenNayarParams(params) + OSL_HOSTDEVICE OrenNayar(const OrenNayarParams& params) : BSDF(OREN_NAYAR_ID), OrenNayarParams(params) { } - Sample eval(const Vec3& wo, const OSL::Vec3& wi) const override + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const OSL::Vec3& wi) const { float NL = N.dot(wi); float NV = N.dot(wo); @@ -485,8 +328,8 @@ struct OrenNayar final : public BSDF, OrenNayarParams { } return {}; } - Sample sample(const Vec3& wo, float rx, float ry, - float /*rz*/) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, + float /*rz*/) const { Vec3 out_dir; float pdf; @@ -496,11 +339,14 @@ struct OrenNayar final : public BSDF, OrenNayarParams { }; struct EnergyCompensatedOrenNayar : public BSDF, MxOrenNayarDiffuseParams { + OSL_HOSTDEVICE EnergyCompensatedOrenNayar(const MxOrenNayarDiffuseParams& params) - : BSDF(), MxOrenNayarDiffuseParams(params) + : BSDF(MX_OREN_NAYAR_DIFFUSE_ID), MxOrenNayarDiffuseParams(params) { } - Sample eval(const Vec3& wo, const OSL::Vec3& wi) const override + + OSL_HOSTDEVICE + Sample eval(const Vec3& wo, const OSL::Vec3& wi) const { float NL = N.dot(wi); float NV = N.dot(wo); @@ -537,8 +383,9 @@ struct EnergyCompensatedOrenNayar : public BSDF, MxOrenNayarDiffuseParams { return {}; } + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, - float /*rz*/) const override + float /*rz*/) const { Vec3 out_dir; float pdf; @@ -551,6 +398,7 @@ struct EnergyCompensatedOrenNayar : public BSDF, MxOrenNayarDiffuseParams { static constexpr float constant2_FON = float(2.0 / 3.0 - 28.0 / (15.0 * M_PI)); + OSL_HOSTDEVICE float E_FON_analytic(float mu) const { const float sigma = roughness; @@ -567,8 +415,8 @@ struct EnergyCompensatedOrenNayar : public BSDF, MxOrenNayarDiffuseParams { }; struct Phong final : public BSDF, PhongParams { - Phong(const PhongParams& params) : BSDF(), PhongParams(params) {} - Sample eval(const Vec3& wo, const Vec3& wi) const override + OSL_HOSTDEVICE Phong(const PhongParams& params) : BSDF(PHONG_ID), PhongParams(params) {} + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const Vec3& wi) const { float cosNI = N.dot(wi); float cosNO = N.dot(wo); @@ -585,8 +433,8 @@ struct Phong final : public BSDF, PhongParams { } return {}; } - Sample sample(const Vec3& wo, float rx, float ry, - float /*rz*/) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, + float /*rz*/) const { float cosNO = N.dot(wo); if (cosNO > 0) { @@ -607,8 +455,8 @@ struct Phong final : public BSDF, PhongParams { }; struct Ward final : public BSDF, WardParams { - Ward(const WardParams& params) : BSDF(), WardParams(params) {} - Sample eval(const Vec3& wo, const OSL::Vec3& wi) const override + OSL_HOSTDEVICE Ward(const WardParams& params) : BSDF(WARD_ID), WardParams(params) {} + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const OSL::Vec3& wi) const { float cosNO = N.dot(wo); float cosNI = N.dot(wi); @@ -632,8 +480,8 @@ struct Ward final : public BSDF, WardParams { } return {}; } - Sample sample(const Vec3& wo, float rx, float ry, - float /*rz*/) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, + float /*rz*/) const { float cosNO = N.dot(wo); if (cosNO > 0) { @@ -710,17 +558,17 @@ struct Ward final : public BSDF, WardParams { * is sufficient). */ struct GGXDist { - static float F(const float tan_m2) + static OSL_HOSTDEVICE float F(const float tan_m2) { return 1 / (float(M_PI) * (1 + tan_m2) * (1 + tan_m2)); } - static float Lambda(const float a2) + static OSL_HOSTDEVICE float Lambda(const float a2) { return 0.5f * (-1.0f + sqrtf(1.0f + 1.0f / a2)); } - static Vec2 sampleSlope(float cos_theta, float randu, float randv) + static OSL_HOSTDEVICE Vec2 sampleSlope(float cos_theta, float randu, float randv) { // GGX Vec2 slope; @@ -747,12 +595,12 @@ struct GGXDist { }; struct BeckmannDist { - static float F(const float tan_m2) + static OSL_HOSTDEVICE float F(const float tan_m2) { return float(1 / M_PI) * OIIO::fast_exp(-tan_m2); } - static float Lambda(const float a2) + static OSL_HOSTDEVICE float Lambda(const float a2) { const float a = sqrtf(a2); return a < 1.6f ? (1.0f - 1.259f * a + 0.396f * a2) @@ -760,7 +608,7 @@ struct BeckmannDist { : 0.0f; } - static Vec2 sampleSlope(float cos_theta, float randu, float randv) + static OSL_HOSTDEVICE Vec2 sampleSlope(float cos_theta, float randu, float randv) { const float SQRT_PI_INV = 1 / sqrtf(float(M_PI)); float ct = cos_theta < 1e-6f ? 1e-6f : cos_theta; @@ -808,14 +656,14 @@ struct BeckmannDist { template struct Microfacet final : public BSDF, MicrofacetParams { - Microfacet(const MicrofacetParams& params) - : BSDF() + OSL_HOSTDEVICE Microfacet(const MicrofacetParams& params) + : BSDF(MICROFACET_ID) , MicrofacetParams(params) , tf(U == Vec3(0) || xalpha == yalpha ? TangentFrame(N) : TangentFrame(N, U)) { } - Color3 get_albedo(const Vec3& wo) const override + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& wo) const { if (Refract == 2) return Color3(1.0f); @@ -824,7 +672,7 @@ struct Microfacet final : public BSDF, MicrofacetParams { float fr = fresnel_dielectric(N.dot(wo), eta); return Color3(Refract ? 1 - fr : fr); } - Sample eval(const Vec3& wo, const OSL::Vec3& wi) const override + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const OSL::Vec3& wi) const { const Vec3 wo_l = tf.tolocal(wo); const Vec3 wi_l = tf.tolocal(wi); @@ -891,7 +739,7 @@ struct Microfacet final : public BSDF, MicrofacetParams { return {}; } - Sample sample(const Vec3& wo, float rx, float ry, float rz) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, float rz) const { const Vec3 wo_l = tf.tolocal(wo); const float cosNO = wo_l.z; @@ -917,8 +765,9 @@ struct Microfacet final : public BSDF, MicrofacetParams { if (Refract == 2) { pdf *= F; return { wi, Color3(out), pdf, std::max(xalpha, yalpha) }; - } else + } else { return { wi, Color3(F * out), pdf, std::max(xalpha, yalpha) }; + } } else { const Vec3 M = tf.toworld(m); Vec3 wi; @@ -938,6 +787,7 @@ struct Microfacet final : public BSDF, MicrofacetParams { float pdf = (fabsf(cosHI * cosHO) * (eta * eta) * (G1 * D) * invHt2) / fabsf(wo_l.z); + float out = G2 / G1; if (Refract == 2) { pdf *= Ft; @@ -949,9 +799,9 @@ struct Microfacet final : public BSDF, MicrofacetParams { } private: - static float SQR(float x) { return x * x; } + static OSL_HOSTDEVICE float SQR(float x) { return x * x; } - float evalLambda(const Vec3 w) const + OSL_HOSTDEVICE float evalLambda(const Vec3 w) const { float cosTheta2 = SQR(w.z); /* Have these two multiplied by sinTheta^2 for convenience */ @@ -960,15 +810,15 @@ struct Microfacet final : public BSDF, MicrofacetParams { return Distribution::Lambda(cosTheta2 / (cosPhi2st2 + sinPhi2st2)); } - static float evalG2(float Lambda_i, float Lambda_o) + static OSL_HOSTDEVICE float evalG2(float Lambda_i, float Lambda_o) { // correlated masking-shadowing return 1 / (Lambda_i + Lambda_o + 1); } - static float evalG1(float Lambda_v) { return 1 / (Lambda_v + 1); } + static OSL_HOSTDEVICE float evalG1(float Lambda_v) { return 1 / (Lambda_v + 1); } - float evalD(const Vec3 Hr) const + OSL_HOSTDEVICE float evalD(const Vec3 Hr) const { float cosThetaM = Hr.z; if (cosThetaM > 0) { @@ -985,7 +835,7 @@ struct Microfacet final : public BSDF, MicrofacetParams { return 0; } - Vec3 sampleMicronormal(const Vec3 wo, float randu, float randv) const + OSL_HOSTDEVICE Vec3 sampleMicronormal(const Vec3 wo, float randu, float randv) const { /* Project wo and stretch by alpha values */ Vec3 swo = wo; @@ -993,6 +843,14 @@ struct Microfacet final : public BSDF, MicrofacetParams { swo.y *= yalpha; swo = swo.normalize(); +#ifdef __CUDACC__ + // TODO: For some reason, after being normalized, swo can end up with components + // with magnitudes slightly greater than 1.0, which makes subsequent operations + // start producing NaNs. Normalizing this vector again fixes the issue, but it's + // a pretty ugly hack... + swo = swo.normalize(); +#endif + // figure out angles for the incoming vector float cos_theta = std::max(swo.z, 0.0f); float cos_phi = 1; @@ -1015,6 +873,7 @@ struct Microfacet final : public BSDF, MicrofacetParams { float mlen = sqrtf(s.x * s.x + s.y * s.y + 1); Vec3 m(fabsf(s.x) < mlen ? -s.x / mlen : 1.0f, fabsf(s.y) < mlen ? -s.y / mlen : 1.0f, 1.0f / mlen); + return m; } @@ -1030,11 +889,12 @@ typedef Microfacet MicrofacetBeckmannBoth; // We use the CRTP to inherit the parameters because each MaterialX closure uses a different set of parameters -template struct MxMicrofacet final : public BSDF, MxMicrofacetParams { - MxMicrofacet(const MxMicrofacetParams& params, float refraction_ior) - : BSDF() + OSL_HOSTDEVICE MxMicrofacet(const MxMicrofacetParams& params, + float refraction_ior) + : BSDF(ID) , MxMicrofacetParams(params) , tf(MxMicrofacetParams::U == Vec3(0) || MxMicrofacetParams::roughness_x @@ -1045,7 +905,7 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { { } - float get_fresnel_angle(float cos_theta) const + OSL_HOSTDEVICE float get_fresnel_angle(float cos_theta) const { if (EnableTransmissionLobe && refraction_ior < 1) { // handle TIR if we are on the backside @@ -1059,7 +919,7 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { return cos_theta; } - Color3 get_albedo(const Vec3& wo) const override + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& wo) const { // if transmission is enabled, punt on if (EnableTransmissionLobe) @@ -1071,7 +931,7 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { get_fresnel_angle(MxMicrofacetParams::N.dot(wo))); } - Sample eval(const Vec3& wo, const OSL::Vec3& wi) const override + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const OSL::Vec3& wi) const { const Vec3 wo_l = tf.tolocal(wo); const Vec3 wi_l = tf.tolocal(wi); @@ -1150,7 +1010,7 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { } - Sample sample(const Vec3& wo, float rx, float ry, float rz) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, float rz) const { const Vec3 wo_l = tf.tolocal(wo); const float cosNO = wo_l.z; @@ -1225,9 +1085,9 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { } private: - static float SQR(float x) { return x * x; } + static OSL_HOSTDEVICE float SQR(float x) { return x * x; } - float evalLambda(const Vec3 w) const + OSL_HOSTDEVICE float evalLambda(const Vec3 w) const { float cosTheta2 = SQR(w.z); /* Have these two multiplied by sinTheta^2 for convenience */ @@ -1236,15 +1096,15 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { return Distribution::Lambda(cosTheta2 / (cosPhi2st2 + sinPhi2st2)); } - static float evalG2(float Lambda_i, float Lambda_o) + static OSL_HOSTDEVICE float evalG2(float Lambda_i, float Lambda_o) { // correlated masking-shadowing return 1 / (Lambda_i + Lambda_o + 1); } - static float evalG1(float Lambda_v) { return 1 / (Lambda_v + 1); } + static OSL_HOSTDEVICE float evalG1(float Lambda_v) { return 1 / (Lambda_v + 1); } - float evalD(const Vec3 Hr) const + OSL_HOSTDEVICE float evalD(const Vec3 Hr) const { float cosThetaM = Hr.z; if (cosThetaM > 0) { @@ -1256,14 +1116,26 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { float tanThetaM2 = (cosPhi2st2 + sinPhi2st2) / cosThetaM2; +#ifndef __CUDACC__ return Distribution::F(tanThetaM2) / (MxMicrofacetParams::roughness_x * MxMicrofacetParams::roughness_y * cosThetaM4); +#else + // Division can result in NaN if both inputs are 0.0, so only + // perform the divide if the numerator is not 0.0. + const float dist_val = Distribution::F(tanThetaM2); + if (dist_val == 0.0f) + return 0.0f; + else + return dist_val + / (MxMicrofacetParams::roughness_x + * MxMicrofacetParams::roughness_y * cosThetaM4); +#endif } return 0; } - Vec3 sampleMicronormal(const Vec3 wo, float randu, float randv) const + OSL_HOSTDEVICE Vec3 sampleMicronormal(const Vec3 wo, float randu, float randv) const { /* Project wo and stretch by alpha values */ Vec3 swo = wo; @@ -1301,23 +1173,23 @@ struct MxMicrofacet final : public BSDF, MxMicrofacetParams { }; struct Reflection final : public BSDF, ReflectionParams { - Reflection(const ReflectionParams& params) - : BSDF(), ReflectionParams(params) + OSL_HOSTDEVICE Reflection(const ReflectionParams& params) + : BSDF(REFLECTION_ID), ReflectionParams(params) { } - Color3 get_albedo(const Vec3& wo) const override + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& wo) const { float cosNO = N.dot(wo); if (cosNO > 0) return Color3(fresnel_dielectric(cosNO, eta)); return Color3(1); } - Sample eval(const Vec3& /*wo*/, const OSL::Vec3& /*wi*/) const override + OSL_HOSTDEVICE Sample eval(const Vec3& /*wo*/, const OSL::Vec3& /*wi*/) const { return {}; } - Sample sample(const Vec3& wo, float /*rx*/, float /*ry*/, - float /*rz*/) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float /*rx*/, float /*ry*/, + float /*rz*/) const { // only one direction is possible float cosNO = dot(N, wo); @@ -1331,21 +1203,21 @@ struct Reflection final : public BSDF, ReflectionParams { }; struct Refraction final : public BSDF, RefractionParams { - Refraction(const RefractionParams& params) - : BSDF(), RefractionParams(params) + OSL_HOSTDEVICE Refraction(const RefractionParams& params) + : BSDF(REFRACTION_ID), RefractionParams(params) { } - Color3 get_albedo(const Vec3& wo) const override + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& wo) const { float cosNO = N.dot(wo); return Color3(1 - fresnel_dielectric(cosNO, eta)); } - Sample eval(const Vec3& /*wo*/, const OSL::Vec3& /*wi*/) const override + OSL_HOSTDEVICE Sample eval(const Vec3& /*wo*/, const OSL::Vec3& /*wi*/) const { return {}; } - Sample sample(const Vec3& wo, float /*rx*/, float /*ry*/, - float /*rz*/) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float /*rx*/, float /*ry*/, + float /*rz*/) const { float pdf = std::numeric_limits::infinity(); Vec3 wi; @@ -1355,13 +1227,13 @@ struct Refraction final : public BSDF, RefractionParams { }; struct Transparent final : public BSDF { - Transparent() : BSDF() {} - Sample eval(const Vec3& /*wo*/, const Vec3& /*wi*/) const override + OSL_HOSTDEVICE Transparent() : BSDF(TRANSPARENT_ID) {} + OSL_HOSTDEVICE Sample eval(const Vec3& /*wo*/, const Vec3& /*wi*/) const { return {}; } - Sample sample(const Vec3& wo, float /*rx*/, float /*ry*/, - float /*rz*/) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float /*rx*/, float /*ry*/, + float /*rz*/) const { Vec3 wi = -wo; float pdf = std::numeric_limits::infinity(); @@ -1370,14 +1242,14 @@ struct Transparent final : public BSDF { }; struct MxBurleyDiffuse final : public BSDF, MxBurleyDiffuseParams { - MxBurleyDiffuse(const MxBurleyDiffuseParams& params) - : BSDF(), MxBurleyDiffuseParams(params) + OSL_HOSTDEVICE MxBurleyDiffuse(const MxBurleyDiffuseParams& params) + : BSDF(MX_BURLEY_DIFFUSE_ID), MxBurleyDiffuseParams(params) { } - Color3 get_albedo(const Vec3& wo) const override { return albedo; } + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& wo) const { return albedo; } - Sample eval(const Vec3& wo, const Vec3& wi) const override + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const Vec3& wi) const { const Vec3 L = wi, V = wo; const Vec3 H = (L + V).normalize(); @@ -1391,7 +1263,7 @@ struct MxBurleyDiffuse final : public BSDF, MxBurleyDiffuseParams { return { wi, albedo * refL * refV, pdf, 1.0f }; } - Sample sample(const Vec3& wo, float rx, float ry, float rz) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, float rz) const { Vec3 out_dir; float pdf; @@ -1401,9 +1273,12 @@ struct MxBurleyDiffuse final : public BSDF, MxBurleyDiffuseParams { }; struct MxSheen final : public BSDF, MxSheenParams { - MxSheen(const MxSheenParams& params) : BSDF(), MxSheenParams(params) {} + OSL_HOSTDEVICE MxSheen(const MxSheenParams& params) + : BSDF(MX_SHEEN_ID), MxSheenParams(params) + { + } - Color3 get_albedo(const Vec3& wo) const override + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& wo) const { const float NdotV = clamp(N.dot(wo), 0.0f, 1.0f); // Rational fit from the Material X project @@ -1417,7 +1292,7 @@ struct MxSheen final : public BSDF, MxSheenParams { return clamp(albedo * (r.x / r.y), 0.0f, 1.0f); } - Sample eval(const Vec3& wo, const Vec3& wi) const override + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const Vec3& wi) const { const Vec3 L = wi, V = wo; const Vec3 H = (L + V).normalize(); @@ -1437,7 +1312,7 @@ struct MxSheen final : public BSDF, MxSheenParams { pdf, 1.0f }; } - Sample sample(const Vec3& wo, float rx, float ry, float rz) const override + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, float rz) const { Vec3 out_dir; float pdf; @@ -1446,393 +1321,800 @@ struct MxSheen final : public BSDF, MxSheenParams { } }; -Color3 -evaluate_layer_opacity(const OSL::ShaderGlobals& sg, + +OSL_HOSTDEVICE Color3 +evaluate_layer_opacity(const ShaderGlobalsType& sg, const ClosureColor* closure) { // Null closure, the layer is fully transparent if (closure == nullptr) return Color3(0); + // Non-recursive traversal stack + const int STACK_SIZE = 16; + int stack_idx = 0; + const ClosureColor* ptr_stack[STACK_SIZE]; + Color3 weight_stack[STACK_SIZE]; + Color3 weight = Color3(1.0f); + + while (closure) { switch (closure->id) { - case ClosureColor::MUL: - return closure->as_mul()->weight - * evaluate_layer_opacity(sg, closure->as_mul()->closure); - case ClosureColor::ADD: - return evaluate_layer_opacity(sg, closure->as_add()->closureA) - + evaluate_layer_opacity(sg, closure->as_add()->closureB); - default: { - const ClosureComponent* comp = closure->as_comp(); - Color3 w = comp->w; - switch (comp->id) { + case ClosureColor::MUL: + weight *= closure->as_mul()->weight; + closure = closure->as_mul()->closure; + break; + case ClosureColor::ADD: + ptr_stack[stack_idx] = closure->as_add()->closureB; + weight_stack[stack_idx++] = weight; + closure = closure->as_add()->closureA; + break; + default: { + const ClosureComponent* comp = closure->as_comp(); + Color3 w = comp->w; + switch (comp->id) { + case MX_LAYER_ID: { + const MxLayerParams* srcparams = comp->as(); + closure = srcparams->top; + ptr_stack[stack_idx] = srcparams->base; + weight_stack[stack_idx++] = weight * w; + break; + } + case REFLECTION_ID: + case FRESNEL_REFLECTION_ID: { + Reflection bsdf(*comp->as()); + const Vec3& I = *reinterpret_cast(&sg.I); + weight *= w * bsdf.get_albedo(-I); + closure = nullptr; + break; + } + case MX_DIELECTRIC_ID: { + const MxDielectricParams& params = *comp->as(); + // Transmissive dielectrics are opaque + if (!is_black(params.transmission_tint)) { + // return Color3(1); + closure = nullptr; + break; + } + MxMicrofacet + mf(params, 1.0f); + const Vec3& I = *reinterpret_cast(&sg.I); + weight *= w * mf.get_albedo(-I); + closure = nullptr; + break; + } + case MX_GENERALIZED_SCHLICK_ID: { + const MxGeneralizedSchlickParams& params + = *comp->as(); + // Transmissive dielectrics are opaque + if (!is_black(params.transmission_tint)) { + // return Color3(1); + closure = nullptr; + break; + } + MxMicrofacet + mf(params, 1.0f); + const Vec3& I = *reinterpret_cast(&sg.I); + weight *= w * mf.get_albedo(-I); + closure = nullptr; + break; + } + case MX_SHEEN_ID: { + MxSheen bsdf(*comp->as()); + const Vec3& I = *reinterpret_cast(&sg.I); + weight *= w * bsdf.get_albedo(-I); + closure = nullptr; + break; + } + default: // Assume unhandled BSDFs are opaque + closure = nullptr; + break; + } + } + } + if (closure == nullptr && stack_idx > 0) { + closure = ptr_stack[--stack_idx]; + weight = weight_stack[stack_idx]; + } + } + return weight; +} + + +OSL_HOSTDEVICE void +process_medium_closure(const ShaderGlobalsType& sg, ShadingResult& result, + const ClosureColor* closure, const Color3& w_in) +{ + if (!closure) + return; + + // Non-recursive traversal stack + const int STACK_SIZE = 16; + int stack_idx = 0; + const ClosureColor* ptr_stack[STACK_SIZE]; + Color3 weight_stack[STACK_SIZE]; + Color3 weight = w_in; + + while (closure) { + switch (closure->id) { + case ClosureColor::MUL: { + weight *= closure->as_mul()->weight; + closure = closure->as_mul()->closure; + break; + } + case ClosureColor::ADD: { + weight_stack[stack_idx] = weight; + ptr_stack[stack_idx++] = closure->as_add()->closureB; + closure = closure->as_add()->closureA; + break; + } case MX_LAYER_ID: { - const MxLayerParams* srcparams = comp->as(); - return w - * (evaluate_layer_opacity(sg, srcparams->top) - + evaluate_layer_opacity(sg, srcparams->base)); + const ClosureComponent* comp = closure->as_comp(); + const MxLayerParams* params = comp->as(); + Color3 base_w + = weight + * (Color3(1) + - clamp(evaluate_layer_opacity(sg, params->top), 0.f, 1.f)); + closure = params->top; + ptr_stack[stack_idx] = params->base; + weight_stack[stack_idx++] = weight * base_w; + break; } - case REFLECTION_ID: - case FRESNEL_REFLECTION_ID: { - Reflection bsdf(*comp->as()); - return w * bsdf.get_albedo(-sg.I); + case MX_ANISOTROPIC_VDF_ID: { + const ClosureComponent* comp = closure->as_comp(); + Color3 cw = weight * comp->w; + const auto& params = *comp->as(); + result.sigma_t = cw * params.extinction; + result.sigma_s = params.albedo * result.sigma_t; + result.medium_g = params.anisotropy; + result.refraction_ior = 1.0f; + result.priority = 0; // TODO: should this closure have a priority? + closure = nullptr; + break; + } + case MX_MEDIUM_VDF_ID: { + const ClosureComponent* comp = closure->as_comp(); + Color3 cw = weight * comp->w; + const auto& params = *comp->as(); + result.sigma_t = { -OIIO::fast_log(params.transmission_color.x), + -OIIO::fast_log(params.transmission_color.y), + -OIIO::fast_log(params.transmission_color.z) }; + // NOTE: closure weight scales the extinction parameter + result.sigma_t *= cw / params.transmission_depth; + result.sigma_s = params.albedo * result.sigma_t; + result.medium_g = params.anisotropy; + // TODO: properly track a medium stack here ... + result.refraction_ior = sg.backfacing ? 1.0f / params.ior : params.ior; + result.priority = params.priority; + closure = nullptr; + break; } case MX_DIELECTRIC_ID: { - const MxDielectricParams& params = *comp->as(); - // Transmissive dielectrics are opaque - if (!is_black(params.transmission_tint)) - return Color3(1); - MxMicrofacet mf(params, 1.0f); - return w * mf.get_albedo(-sg.I); + const ClosureComponent* comp = closure->as_comp(); + const auto& params = *comp->as(); + if (!is_black(weight * comp->w * params.transmission_tint)) { + // TODO: properly track a medium stack here ... + result.refraction_ior = sg.backfacing ? 1.0f / params.ior + : params.ior; + } + closure = nullptr; + break; } case MX_GENERALIZED_SCHLICK_ID: { - const MxGeneralizedSchlickParams& params - = *comp->as(); - // Transmissive dielectrics are opaque - if (!is_black(params.transmission_tint)) - return Color3(1); - MxMicrofacet mf(params, - 1.0f); - return w * mf.get_albedo(-sg.I); + const ClosureComponent* comp = closure->as_comp(); + const auto& params = *comp->as(); + if (!is_black(weight * comp->w * params.transmission_tint)) { + // TODO: properly track a medium stack here ... + float avg_F0 = clamp((params.f0.x + params.f0.y + params.f0.z) + / 3.0f, + 0.0f, 0.99f); + float sqrt_F0 = sqrtf(avg_F0); + float ior = (1 + sqrt_F0) / (1 - sqrt_F0); + result.refraction_ior = sg.backfacing ? 1.0f / ior : ior; + } + closure = nullptr; + break; } - case MX_SHEEN_ID: { - MxSheen bsdf(*comp->as()); - return w * bsdf.get_albedo(-sg.I); + default: + closure = nullptr; + break; } - default: // Assume unhandled BSDFs are opaque - return Color3(1); + if (closure == nullptr && stack_idx > 0) { + closure = ptr_stack[--stack_idx]; + weight = weight_stack[stack_idx]; } } - } - OSL_ASSERT(false && "Layer opacity evaluation failed"); - return Color3(0); } -void -process_medium_closure(const OSL::ShaderGlobals& sg, ShadingResult& result, - const ClosureColor* closure, const Color3& w) +// walk through the closure tree, creating bsdfs as we go +OSL_HOSTDEVICE void +process_bsdf_closure(const ShaderGlobalsType& sg, ShadingResult& result, + const ClosureColor* closure, const Color3& w_in, + bool light_only) { + static const ustringhash uh_ggx("ggx"); + static const ustringhash uh_beckmann("beckmann"); + static const ustringhash uh_default("default"); if (!closure) return; - switch (closure->id) { - case ClosureColor::MUL: { - process_medium_closure(sg, result, closure->as_mul()->closure, - w * closure->as_mul()->weight); - break; + + // Non-recursive traversal stack + const int STACK_SIZE = 16; + int stack_idx = 0; + const ClosureColor* ptr_stack[STACK_SIZE]; + Color3 weight_stack[STACK_SIZE]; + Color3 weight = w_in; + + while (closure) { + switch (closure->id) { + case ClosureColor::MUL: { + weight *= closure->as_mul()->weight; + closure = closure->as_mul()->closure; + break; + } + case ClosureColor::ADD: { + ptr_stack[stack_idx] = closure->as_add()->closureB; + weight_stack[stack_idx++] = weight; + closure = closure->as_add()->closureA; + break; + } + default: { + const ClosureComponent* comp = closure->as_comp(); + Color3 cw = weight * comp->w; + closure = nullptr; + if (comp->id == EMISSION_ID) { + result.Le += cw; + } + else if (comp->id == MX_UNIFORM_EDF_ID) { + result.Le += cw * comp->as()->emittance; + } + else if (!light_only) { + bool ok = false; + switch (comp->id) { + case DIFFUSE_ID: + ok = result.bsdf.add_bsdf>( + cw, *comp->as()); + break; + case OREN_NAYAR_ID: + ok = result.bsdf.add_bsdf( + cw, *comp->as()); + break; + case TRANSLUCENT_ID: + ok = result.bsdf.add_bsdf>( + cw, *comp->as()); + break; + case PHONG_ID: + ok = result.bsdf.add_bsdf(cw, *comp->as()); + break; + case WARD_ID: + ok = result.bsdf.add_bsdf(cw, *comp->as()); + break; + case MICROFACET_ID: { + const MicrofacetParams* mp = comp->as(); + if (mp->dist == uh_ggx) { + switch (mp->refract) { + case 0: + ok = result.bsdf.add_bsdf(cw, *mp); + break; + case 1: + ok = result.bsdf.add_bsdf(cw, *mp); + break; + case 2: + ok = result.bsdf.add_bsdf(cw, *mp); + break; + } + } else if (mp->dist == uh_beckmann || mp->dist == uh_default) { + switch (mp->refract) { + case 0: + ok = result.bsdf.add_bsdf(cw, + *mp); + break; + case 1: + ok = result.bsdf.add_bsdf(cw, + *mp); + break; + case 2: + ok = result.bsdf.add_bsdf(cw, + *mp); + break; + } + } + break; + } + case REFLECTION_ID: + case FRESNEL_REFLECTION_ID: + ok = result.bsdf.add_bsdf( + cw, *comp->as()); + break; + case REFRACTION_ID: + ok = result.bsdf.add_bsdf( + cw, *comp->as()); + break; + case TRANSPARENT_ID: + ok = result.bsdf.add_bsdf(cw); + break; + case MX_OREN_NAYAR_DIFFUSE_ID: { + const MxOrenNayarDiffuseParams* srcparams + = comp->as(); + if (srcparams->energy_compensation) { + // energy compensation handled by its own BSDF + ok = result.bsdf.add_bsdf( + cw, *srcparams); + } else { + // translate MaterialX parameters into existing closure + OrenNayarParams params = {}; + params.N = srcparams->N; + params.sigma = srcparams->roughness; + ok = result.bsdf.add_bsdf(cw * srcparams->albedo, + params); + } + break; + } + case MX_BURLEY_DIFFUSE_ID: { + const MxBurleyDiffuseParams& params + = *comp->as(); + ok = result.bsdf.add_bsdf(cw, params); + break; + } + case MX_DIELECTRIC_ID: { + const MxDielectricParams& params + = *comp->as(); + if (is_black(params.transmission_tint)) + ok = result.bsdf.add_bsdf>( + cw, params, 1.0f); + else + ok = result.bsdf.add_bsdf>( + cw, params, result.refraction_ior); + break; + } + case MX_CONDUCTOR_ID: { + const MxConductorParams& params = *comp->as(); + ok = result.bsdf.add_bsdf>( + cw, params, 1.0f); + break; + }; + case MX_GENERALIZED_SCHLICK_ID: { + const MxGeneralizedSchlickParams& params + = *comp->as(); + if (is_black(params.transmission_tint)) + ok = result.bsdf.add_bsdf< + MxMicrofacet>(cw, + params, + 1.0f); + else + ok = result.bsdf.add_bsdf< + MxMicrofacet>( + cw, params, result.refraction_ior); + break; + }; + case MX_TRANSLUCENT_ID: { + const MxTranslucentParams* srcparams + = comp->as(); + DiffuseParams params = {}; + params.N = srcparams->N; + ok = result.bsdf.add_bsdf>(cw * srcparams->albedo, + params); + break; + } + case MX_TRANSPARENT_ID: { + ok = result.bsdf.add_bsdf(cw); + break; + } + case MX_SUBSURFACE_ID: { + // TODO: implement BSSRDF support? + const MxSubsurfaceParams* srcparams + = comp->as(); + DiffuseParams params = {}; + params.N = srcparams->N; + ok = result.bsdf.add_bsdf>(cw * srcparams->albedo, + params); + break; + } + case MX_SHEEN_ID: { + const MxSheenParams& params = *comp->as(); + ok = result.bsdf.add_bsdf(cw, params); + break; + } + case MX_LAYER_ID: { + const MxLayerParams* srcparams = comp->as(); + Color3 base_w + = weight + * (Color3(1, 1, 1) + - clamp(evaluate_layer_opacity(sg, srcparams->top), + 0.f, 1.f)); + closure = srcparams->top; + weight = cw; + if (!is_black(base_w)) { + ptr_stack[stack_idx] = srcparams->base; + weight_stack[stack_idx++] = base_w; + } + ok = true; + break; + } + case MX_ANISOTROPIC_VDF_ID: + case MX_MEDIUM_VDF_ID: { + // already processed by process_medium_closure + ok = true; + break; + default: + // TODO: Add a diagnostic message? + break; + } + } +#ifndef __CUDACC__ + OSL_ASSERT(ok && "Invalid closure invoked in surface shader"); +#else + if (!ok) { + printf("Invalid closure invoked in surface shader\n"); + } +#endif + } + break; + } + } + if (closure == nullptr && stack_idx > 0) { + closure = ptr_stack[--stack_idx]; + weight = weight_stack[stack_idx]; + } } - case ClosureColor::ADD: { - process_medium_closure(sg, result, closure->as_add()->closureA, w); - process_medium_closure(sg, result, closure->as_add()->closureB, w); - break; +} + +} // anonymous namespace + +OSL_NAMESPACE_ENTER + +OSL_HOSTDEVICE void +process_closure(const ShaderGlobalsType& sg, ShadingResult& result, + const ClosureColor* Ci, bool light_only) +{ + if (!light_only) + process_medium_closure(sg, result, Ci, Color3(1)); + process_bsdf_closure(sg, result, Ci, Color3(1), light_only); +} + +OSL_HOSTDEVICE Vec3 +process_background_closure(const ClosureColor* closure) +{ + if (!closure) + return Vec3(0, 0, 0); + + // Non-recursive traversal stack + const int STACK_SIZE = 16; + int stack_idx = 0; + const ClosureColor* ptr_stack[STACK_SIZE]; + Color3 weight_stack[STACK_SIZE]; + Color3 weight = Color3(1.0f); + + while (closure) { + switch (closure->id) { + case ClosureColor::MUL: { + weight *= closure->as_mul()->weight; + closure = closure->as_mul()->closure; + break; + } + case ClosureColor::ADD: { + ptr_stack[stack_idx] = closure->as_add()->closureB; + weight_stack[stack_idx++] = weight; + closure = closure->as_add()->closureA; + break; + } + case BACKGROUND_ID: { + weight *= closure->as_comp()->w; + closure = nullptr; + break; + } + } + if (closure == nullptr && stack_idx > 0) { + closure = ptr_stack[--stack_idx]; + weight = weight_stack[stack_idx]; + } } - case MX_LAYER_ID: { - const ClosureComponent* comp = closure->as_comp(); - const MxLayerParams* params = comp->as(); - Color3 base_w - = w - * (Color3(1) - - clamp(evaluate_layer_opacity(sg, params->top), 0.f, 1.f)); - process_medium_closure(sg, result, params->top, w); - process_medium_closure(sg, result, params->base, base_w); + return weight; +} + + +typedef MxMicrofacet + MxConductor; +typedef MxMicrofacet + MxDielectric; +typedef MxMicrofacet + MxDielectricOpaque; +typedef MxMicrofacet + MxGeneralizedSchlick; +typedef MxMicrofacet + MxGeneralizedSchlickOpaque; + + +OSL_HOSTDEVICE Color3 +CompositeBSDF::get_albedo(const BSDF* bsdf, const Vec3& wo) const +{ + static const ustringhash uh_ggx("ggx"); + static const ustringhash uh_beckmann("beckmann"); + static const ustringhash uh_default("default"); + + Color3 albedo(0); + switch (bsdf->id) { + case DIFFUSE_ID: + albedo = BSDF_CAST(Diffuse<0>, bsdf)->get_albedo(wo); break; - } - case MX_ANISOTROPIC_VDF_ID: { - const ClosureComponent* comp = closure->as_comp(); - Color3 cw = w * comp->w; - const auto& params = *comp->as(); - result.sigma_t = cw * params.extinction; - result.sigma_s = params.albedo * result.sigma_t; - result.medium_g = params.anisotropy; - result.refraction_ior = 1.0f; - result.priority = 0; // TODO: should this closure have a priority? + case TRANSPARENT_ID: + case MX_TRANSPARENT_ID: + albedo = BSDF_CAST(Transparent, bsdf)->get_albedo(wo); break; - } - case MX_MEDIUM_VDF_ID: { - const ClosureComponent* comp = closure->as_comp(); - Color3 cw = w * comp->w; - const auto& params = *comp->as(); - result.sigma_t = { -OIIO::fast_log(params.transmission_color.x), - -OIIO::fast_log(params.transmission_color.y), - -OIIO::fast_log(params.transmission_color.z) }; - // NOTE: closure weight scales the extinction parameter - result.sigma_t *= cw / params.transmission_depth; - result.sigma_s = params.albedo * result.sigma_t; - result.medium_g = params.anisotropy; - // TODO: properly track a medium stack here ... - result.refraction_ior = sg.backfacing ? 1.0f / params.ior : params.ior; - result.priority = params.priority; + case OREN_NAYAR_ID: + albedo = BSDF_CAST(OrenNayar, bsdf)->get_albedo(wo); break; - } - case MX_DIELECTRIC_ID: { - const ClosureComponent* comp = closure->as_comp(); - const auto& params = *comp->as(); - if (!is_black(w * comp->w * params.transmission_tint)) { - // TODO: properly track a medium stack here ... - result.refraction_ior = sg.backfacing ? 1.0f / params.ior - : params.ior; + case TRANSLUCENT_ID: + albedo = BSDF_CAST(Diffuse<1>, bsdf)->get_albedo(wo); + break; + case PHONG_ID: albedo = BSDF_CAST(Phong, bsdf)->get_albedo(wo); break; + case WARD_ID: albedo = BSDF_CAST(Ward, bsdf)->get_albedo(wo); break; + case REFLECTION_ID: + case FRESNEL_REFLECTION_ID: + albedo = BSDF_CAST(Reflection, bsdf)->get_albedo(wo); + break; + case REFRACTION_ID: + albedo = BSDF_CAST(Refraction, bsdf)->get_albedo(wo); + break; + case MICROFACET_ID: { + const int refract = ((MicrofacetBeckmannRefl*)bsdf)->refract; + const ustringhash dist = ((MicrofacetBeckmannRefl*)bsdf)->dist; + if (dist == uh_default || dist == uh_beckmann) { + switch (refract) { + case 0: + albedo = BSDF_CAST(MicrofacetBeckmannRefl, bsdf)->get_albedo(wo); + break; + case 1: + albedo = BSDF_CAST(MicrofacetBeckmannRefr, bsdf)->get_albedo(wo); + break; + case 2: + albedo = BSDF_CAST(MicrofacetBeckmannBoth, bsdf)->get_albedo(wo); + break; + } + } else if (dist == uh_ggx) { + switch (refract) { + case 0: + albedo = BSDF_CAST(MicrofacetGGXRefl, bsdf)->get_albedo(wo); + break; + case 1: + albedo = BSDF_CAST(MicrofacetGGXRefr, bsdf)->get_albedo(wo); + break; + case 2: + albedo = BSDF_CAST(MicrofacetGGXBoth, bsdf)->get_albedo(wo); + break; + } } break; } + case MX_CONDUCTOR_ID: + albedo = BSDF_CAST(MxConductor, bsdf)->get_albedo(wo); + break; + case MX_DIELECTRIC_ID: + if (is_black(((MxDielectricOpaque*)bsdf)->transmission_tint)) + albedo = BSDF_CAST(MxDielectricOpaque, bsdf)->get_albedo(wo); + else + albedo = BSDF_CAST(MxDielectric, bsdf)->get_albedo(wo); + break; + case MX_OREN_NAYAR_DIFFUSE_ID: + albedo = BSDF_CAST(EnergyCompensatedOrenNayar, bsdf)->get_albedo(wo); + break; + case MX_BURLEY_DIFFUSE_ID: + albedo = BSDF_CAST(MxBurleyDiffuse, bsdf)->get_albedo(wo); + break; + case MX_SHEEN_ID: albedo = BSDF_CAST(MxSheen, bsdf)->get_albedo(wo); break; case MX_GENERALIZED_SCHLICK_ID: { - const ClosureComponent* comp = closure->as_comp(); - const auto& params = *comp->as(); - if (!is_black(w * comp->w * params.transmission_tint)) { - // TODO: properly track a medium stack here ... - float avg_F0 = clamp((params.f0.x + params.f0.y + params.f0.z) - / 3.0f, - 0.0f, 0.99f); - float sqrt_F0 = sqrtf(avg_F0); - float ior = (1 + sqrt_F0) / (1 - sqrt_F0); - result.refraction_ior = sg.backfacing ? 1.0f / ior : ior; - } + const Color3& tint = ((MxGeneralizedSchlick*)bsdf)->transmission_tint; + if (is_black(tint)) + albedo = BSDF_CAST(MxGeneralizedSchlickOpaque, bsdf)->get_albedo(wo); + else + albedo = BSDF_CAST(MxGeneralizedSchlick, bsdf)->get_albedo(wo); break; } + default: break; } + return albedo; } -// recursively walk through the closure tree, creating bsdfs as we go -void -process_bsdf_closure(const OSL::ShaderGlobals& sg, ShadingResult& result, - const ClosureColor* closure, const Color3& w, - bool light_only) + +OSL_HOSTDEVICE BSDF::Sample +CompositeBSDF::sample(const BSDF* bsdf, const Vec3& wo, float rx, float ry, + float rz) const { static const ustringhash uh_ggx("ggx"); static const ustringhash uh_beckmann("beckmann"); static const ustringhash uh_default("default"); - if (!closure) - return; - switch (closure->id) { - case ClosureColor::MUL: { - Color3 cw = w * closure->as_mul()->weight; - process_bsdf_closure(sg, result, closure->as_mul()->closure, cw, - light_only); + + BSDF::Sample sample = {}; + switch (bsdf->id) { + case DIFFUSE_ID: + sample = BSDF_CAST(Diffuse<0>, bsdf)->sample(wo, rx, ry, rz); break; - } - case ClosureColor::ADD: { - process_bsdf_closure(sg, result, closure->as_add()->closureA, w, - light_only); - process_bsdf_closure(sg, result, closure->as_add()->closureB, w, - light_only); + case TRANSPARENT_ID: + case MX_TRANSPARENT_ID: + sample = BSDF_CAST(Transparent, bsdf)->sample(wo, rx, ry, rz); break; - } - default: { - const ClosureComponent* comp = closure->as_comp(); - Color3 cw = w * comp->w; - if (comp->id == EMISSION_ID) - result.Le += cw; - else if (comp->id == MX_UNIFORM_EDF_ID) - result.Le += cw * comp->as()->emittance; - else if (!light_only) { - bool ok = false; - switch (comp->id) { - case DIFFUSE_ID: - ok = result.bsdf.add_bsdf>( - cw, *comp->as()); - break; - case OREN_NAYAR_ID: - ok = result.bsdf.add_bsdf( - cw, *comp->as()); - break; - case TRANSLUCENT_ID: - ok = result.bsdf.add_bsdf>( - cw, *comp->as()); - break; - case PHONG_ID: - ok = result.bsdf.add_bsdf(cw, *comp->as()); + case OREN_NAYAR_ID: + sample = BSDF_CAST(OrenNayar, bsdf)->sample(wo, rx, ry, rz); + break; + case TRANSLUCENT_ID: + sample = BSDF_CAST(Diffuse<1>, bsdf)->sample(wo, rx, ry, rz); + break; + case PHONG_ID: + sample = BSDF_CAST(Phong, bsdf)->sample(wo, rx, ry, rz); + break; + case WARD_ID: sample = BSDF_CAST(Ward, bsdf)->sample(wo, rx, ry, rz); break; + case REFLECTION_ID: + case FRESNEL_REFLECTION_ID: + sample = BSDF_CAST(Reflection, bsdf)->sample(wo, rx, ry, rz); + break; + case REFRACTION_ID: + sample = BSDF_CAST(Refraction, bsdf)->sample(wo, rx, ry, rz); + break; + case MICROFACET_ID: { + const int refract = ((MicrofacetBeckmannRefl*)bsdf)->refract; + const ustringhash dist = ((MicrofacetBeckmannRefl*)bsdf)->dist; + if (dist == uh_default || dist == uh_beckmann) { + switch (refract) { + case 0: + sample = BSDF_CAST(MicrofacetBeckmannRefl, bsdf) + ->sample(wo, rx, ry, rz); break; - case WARD_ID: - ok = result.bsdf.add_bsdf(cw, *comp->as()); + case 1: + sample = BSDF_CAST(MicrofacetBeckmannRefr, bsdf) + ->sample(wo, rx, ry, rz); break; - case MICROFACET_ID: { - const MicrofacetParams* mp = comp->as(); - if (mp->dist == uh_ggx) { - switch (mp->refract) { - case 0: - ok = result.bsdf.add_bsdf(cw, *mp); - break; - case 1: - ok = result.bsdf.add_bsdf(cw, *mp); - break; - case 2: - ok = result.bsdf.add_bsdf(cw, *mp); - break; - } - } else if (mp->dist == uh_beckmann || mp->dist == uh_default) { - switch (mp->refract) { - case 0: - ok = result.bsdf.add_bsdf(cw, - *mp); - break; - case 1: - ok = result.bsdf.add_bsdf(cw, - *mp); - break; - case 2: - ok = result.bsdf.add_bsdf(cw, - *mp); - break; - } - } + case 2: + sample = BSDF_CAST(MicrofacetBeckmannBoth, bsdf) + ->sample(wo, rx, ry, rz); break; } - case REFLECTION_ID: - case FRESNEL_REFLECTION_ID: - ok = result.bsdf.add_bsdf( - cw, *comp->as()); - break; - case REFRACTION_ID: - ok = result.bsdf.add_bsdf( - cw, *comp->as()); - break; - case TRANSPARENT_ID: - ok = result.bsdf.add_bsdf(cw); - break; - case MX_OREN_NAYAR_DIFFUSE_ID: { - const MxOrenNayarDiffuseParams* srcparams - = comp->as(); - if (srcparams->energy_compensation) { - // energy compensation handled by its own BSDF - ok = result.bsdf.add_bsdf( - cw, *srcparams); - } else { - // translate MaterialX parameters into existing closure - OrenNayarParams params = {}; - params.N = srcparams->N; - params.sigma = srcparams->roughness; - ok = result.bsdf.add_bsdf(cw * srcparams->albedo, - params); - } + } else if (dist == uh_ggx) { + switch (refract) { + case 0: + sample + = BSDF_CAST(MicrofacetGGXRefl, bsdf)->sample(wo, rx, ry, rz); break; - } - case MX_BURLEY_DIFFUSE_ID: { - const MxBurleyDiffuseParams& params - = *comp->as(); - ok = result.bsdf.add_bsdf(cw, params); + case 1: + sample + = BSDF_CAST(MicrofacetGGXRefr, bsdf)->sample(wo, rx, ry, rz); break; - } - case MX_DIELECTRIC_ID: { - const MxDielectricParams& params - = *comp->as(); - if (is_black(params.transmission_tint)) - ok = result.bsdf.add_bsdf< - MxMicrofacet>( - cw, params, 1.0f); - else - ok = result.bsdf.add_bsdf< - MxMicrofacet>( - cw, params, result.refraction_ior); + case 2: + sample + = BSDF_CAST(MicrofacetGGXBoth, bsdf)->sample(wo, rx, ry, rz); break; } - case MX_CONDUCTOR_ID: { - const MxConductorParams& params = *comp->as(); - ok = result.bsdf.add_bsdf< - MxMicrofacet>(cw, params, - 1.0f); - break; - }; - case MX_GENERALIZED_SCHLICK_ID: { - const MxGeneralizedSchlickParams& params - = *comp->as(); - if (is_black(params.transmission_tint)) - ok = result.bsdf.add_bsdf>(cw, params, - 1.0f); - else - ok = result.bsdf.add_bsdf< - MxMicrofacet>( - cw, params, result.refraction_ior); - break; - }; - case MX_TRANSLUCENT_ID: { - const MxTranslucentParams* srcparams - = comp->as(); - DiffuseParams params = {}; - params.N = srcparams->N; - ok = result.bsdf.add_bsdf>(cw * srcparams->albedo, - params); + } + break; + } + case MX_CONDUCTOR_ID: + sample = BSDF_CAST(MxConductor, bsdf)->sample(wo, rx, ry, rz); + break; + case MX_DIELECTRIC_ID: + if (is_black(((MxDielectricOpaque*)bsdf)->transmission_tint)) + sample = BSDF_CAST(MxDielectricOpaque, bsdf)->sample(wo, rx, ry, rz); + else + sample = BSDF_CAST(MxDielectric, bsdf)->sample(wo, rx, ry, rz); + break; + case MX_BURLEY_DIFFUSE_ID: + sample = BSDF_CAST(MxBurleyDiffuse, bsdf)->sample(wo, rx, ry, rz); + break; + case MX_OREN_NAYAR_DIFFUSE_ID: + sample = BSDF_CAST(EnergyCompensatedOrenNayar, bsdf)->sample(wo, rx, ry, rz); + break; + case MX_SHEEN_ID: + sample = BSDF_CAST(MxSheen, bsdf)->sample(wo, rx, ry, rz); + break; + case MX_GENERALIZED_SCHLICK_ID: { + const Color3& tint = ((MxGeneralizedSchlick*)bsdf)->transmission_tint; + if (is_black(tint)) { + sample = BSDF_CAST(MxGeneralizedSchlickOpaque, bsdf) + ->sample(wo, rx, ry, rz); + } else { + sample + = BSDF_CAST(MxGeneralizedSchlick, bsdf)->sample(wo, rx, ry, rz); + } + break; + } + default: break; + } + return sample; +} + + +OSL_HOSTDEVICE BSDF::Sample +CompositeBSDF::eval(const BSDF* bsdf, const Vec3& wo, const Vec3& wi) const +{ + static const ustringhash uh_ggx("ggx"); + static const ustringhash uh_beckmann("beckmann"); + static const ustringhash uh_default("default"); + + BSDF::Sample sample = {}; + switch (bsdf->id) { + case DIFFUSE_ID: sample = BSDF_CAST(Diffuse<0>, bsdf)->eval(wo, wi); break; + case TRANSPARENT_ID: + case MX_TRANSPARENT_ID: + sample = BSDF_CAST(Transparent, bsdf)->eval(wo, wi); + break; + case OREN_NAYAR_ID: + sample = BSDF_CAST(OrenNayar, bsdf)->eval(wo, wi); + break; + case TRANSLUCENT_ID: + sample = BSDF_CAST(Diffuse<1>, bsdf)->eval(wo, wi); + break; + case PHONG_ID: sample = BSDF_CAST(Phong, bsdf)->eval(wo, wi); break; + case WARD_ID: sample = BSDF_CAST(Ward, bsdf)->eval(wo, wi); break; + case REFLECTION_ID: + case FRESNEL_REFLECTION_ID: + sample = BSDF_CAST(Reflection, bsdf)->eval(wo, wi); + break; + case REFRACTION_ID: + sample = BSDF_CAST(Refraction, bsdf)->eval(wo, wi); + break; + case MICROFACET_ID: { + const int refract = ((MicrofacetBeckmannRefl*)bsdf)->refract; + const ustringhash dist = ((MicrofacetBeckmannRefl*)bsdf)->dist; + if (dist == uh_default || dist == uh_beckmann) { + switch (refract) { + case 0: + sample = BSDF_CAST(MicrofacetBeckmannRefl, bsdf)->eval(wo, wi); break; - } - case MX_TRANSPARENT_ID: { - ok = result.bsdf.add_bsdf(cw); + case 1: + sample = BSDF_CAST(MicrofacetBeckmannRefr, bsdf)->eval(wo, wi); break; - } - case MX_SUBSURFACE_ID: { - // TODO: implement BSSRDF support? - const MxSubsurfaceParams* srcparams - = comp->as(); - DiffuseParams params = {}; - params.N = srcparams->N; - ok = result.bsdf.add_bsdf>(cw * srcparams->albedo, - params); + case 2: + sample = BSDF_CAST(MicrofacetBeckmannBoth, bsdf)->eval(wo, wi); break; } - case MX_SHEEN_ID: { - const MxSheenParams& params = *comp->as(); - ok = result.bsdf.add_bsdf(cw, params); + } else if (dist == uh_ggx) { + switch (refract) { + case 0: + sample = BSDF_CAST(MicrofacetGGXRefl, bsdf)->eval(wo, wi); break; - } - case MX_LAYER_ID: { - const MxLayerParams* srcparams = comp->as(); - Color3 base_w - = w - * (Color3(1, 1, 1) - - clamp(evaluate_layer_opacity(sg, srcparams->top), - 0.f, 1.f)); - process_bsdf_closure(sg, result, srcparams->top, w, light_only); - if (!is_black(base_w)) - process_bsdf_closure(sg, result, srcparams->base, base_w, - light_only); - ok = true; + case 1: + sample = BSDF_CAST(MicrofacetGGXRefr, bsdf)->eval(wo, wi); break; - } - case MX_ANISOTROPIC_VDF_ID: - case MX_MEDIUM_VDF_ID: { - // already processed by process_medium_closure - ok = true; + case 2: + sample = BSDF_CAST(MicrofacetGGXBoth, bsdf)->eval(wo, wi); break; } - } - OSL_ASSERT(ok && "Invalid closure invoked in surface shader"); } break; } + case MX_CONDUCTOR_ID: + sample = BSDF_CAST(MxConductor, bsdf)->eval(wo, wi); + break; + case MX_DIELECTRIC_ID: + if (is_black(((MxDielectricOpaque*)bsdf)->transmission_tint)) + sample = BSDF_CAST(MxDielectricOpaque, bsdf)->eval(wo, wi); + else + sample = BSDF_CAST(MxDielectric, bsdf)->eval(wo, wi); + break; + case MX_BURLEY_DIFFUSE_ID: + sample = BSDF_CAST(MxBurleyDiffuse, bsdf)->eval(wo, wi); + break; + case MX_OREN_NAYAR_DIFFUSE_ID: + sample = BSDF_CAST(EnergyCompensatedOrenNayar, bsdf)->eval(wo, wi); + break; + case MX_SHEEN_ID: sample = ((MxSheen*)bsdf)->MxSheen::eval(wo, wi); break; + case MX_GENERALIZED_SCHLICK_ID: { + const Color3& tint = ((MxGeneralizedSchlick*)bsdf)->transmission_tint; + if (is_black(tint)) { + sample = BSDF_CAST(MxGeneralizedSchlickOpaque, bsdf)->eval(wo, wi); + } else { + sample = BSDF_CAST(MxGeneralizedSchlick, bsdf)->eval(wo, wi); + } + break; } -} - -} // anonymous namespace - -OSL_NAMESPACE_ENTER - -void -process_closure(const OSL::ShaderGlobals& sg, ShadingResult& result, - const ClosureColor* Ci, bool light_only) -{ - if (!light_only) - process_medium_closure(sg, result, Ci, Color3(1)); - process_bsdf_closure(sg, result, Ci, Color3(1), light_only); -} - -Vec3 -process_background_closure(const ClosureColor* closure) -{ - if (!closure) - return Vec3(0, 0, 0); - switch (closure->id) { - case ClosureColor::MUL: { - return closure->as_mul()->weight - * process_background_closure(closure->as_mul()->closure); - } - case ClosureColor::ADD: { - return process_background_closure(closure->as_add()->closureA) - + process_background_closure(closure->as_add()->closureB); - } - case BACKGROUND_ID: { - return closure->as_comp()->w; - } + default: break; } - // should never happen - OSL_ASSERT(false && "Invalid closure invoked in background shader"); - return Vec3(0, 0, 0); + return sample; } - OSL_NAMESPACE_EXIT diff --git a/src/testrender/shading.h b/src/testrender/shading.h index d7fdb190d..c7e6c67dc 100644 --- a/src/testrender/shading.h +++ b/src/testrender/shading.h @@ -6,24 +6,234 @@ #pragma once #include +#include #include #include #include #include "sampling.h" +// TODO: This used to be in the anonymous namespace ... +// unique identifier for each closure supported by testrender +enum ClosureIDs { + COMPONENT_BASE_ID = 0, MUL = -1, ADD = -2, + EMISSION_ID = 1, + BACKGROUND_ID, + DIFFUSE_ID, + OREN_NAYAR_ID, + TRANSLUCENT_ID, + PHONG_ID, + WARD_ID, + MICROFACET_ID, + REFLECTION_ID, + FRESNEL_REFLECTION_ID, + REFRACTION_ID, + TRANSPARENT_ID, + DEBUG_ID, + HOLDOUT_ID, + // See MATERIALX_CLOSURES in stdosl.h + MX_OREN_NAYAR_DIFFUSE_ID, + MX_BURLEY_DIFFUSE_ID, + MX_DIELECTRIC_ID, + MX_CONDUCTOR_ID, + MX_GENERALIZED_SCHLICK_ID, + MX_TRANSLUCENT_ID, + MX_TRANSPARENT_ID, + MX_SUBSURFACE_ID, + MX_SHEEN_ID, + MX_UNIFORM_EDF_ID, + MX_ANISOTROPIC_VDF_ID, + MX_MEDIUM_VDF_ID, + MX_LAYER_ID, + // TODO: adding vdfs would require extending testrender with volume support ... + EMPTY_ID +}; + + +// Closure params +namespace { + +// these structures hold the parameters of each closure type +// they will be contained inside ClosureComponent +struct EmptyParams { +}; +struct DiffuseParams { + OSL::Vec3 N; +}; +struct OrenNayarParams { + OSL::Vec3 N; + float sigma; +}; +struct PhongParams { + OSL::Vec3 N; + float exponent; +}; +struct WardParams { + OSL::Vec3 N, T; + float ax, ay; +}; +struct ReflectionParams { + OSL::Vec3 N; + float eta; +}; +struct RefractionParams { + OSL::Vec3 N; + float eta; +}; +struct MicrofacetParams { + OSL::ustringhash dist; + OSL::Vec3 N, U; + float xalpha, yalpha, eta; + int refract; +}; + +// MATERIALX_CLOSURES + +struct MxOrenNayarDiffuseParams { + OSL::Vec3 N; + OSL::Color3 albedo; + float roughness; + // optional + OSL::ustringhash label; + int energy_compensation; +}; + +struct MxBurleyDiffuseParams { + OSL::Vec3 N; + OSL::Color3 albedo; + float roughness; + // optional + OSL::ustringhash label; +}; + +// common to all MaterialX microfacet closures +struct MxMicrofacetBaseParams { + OSL::Vec3 N, U; + float roughness_x; + float roughness_y; + OSL::ustringhash distribution; + // optional + OSL::ustringhash label; +}; + +struct MxDielectricParams : public MxMicrofacetBaseParams { + OSL::Color3 reflection_tint; + OSL::Color3 transmission_tint; + float ior; + // optional + float thinfilm_thickness; + float thinfilm_ior; + + OSL_HOSTDEVICE OSL::Color3 evalR(float cos_theta) const; + OSL_HOSTDEVICE OSL::Color3 evalT(float cos_theta) const; +}; + +struct MxConductorParams : public MxMicrofacetBaseParams { + OSL::Color3 ior; + OSL::Color3 extinction; + // optional + float thinfilm_thickness; + float thinfilm_ior; + + OSL_HOSTDEVICE OSL::Color3 evalR(float cos_theta) const; + OSL_HOSTDEVICE OSL::Color3 evalT(float cos_theta) const; + + // Avoid function was declared but never referenced + // float get_ior() const + // { + // return 0; // no transmission possible + // } +}; + +struct MxGeneralizedSchlickParams : public MxMicrofacetBaseParams { + OSL::Color3 reflection_tint; + OSL::Color3 transmission_tint; + OSL::Color3 f0; + OSL::Color3 f90; + float exponent; + // optional + float thinfilm_thickness; + float thinfilm_ior; + + OSL_HOSTDEVICE OSL::Color3 evalR(float cos_theta) const; + OSL_HOSTDEVICE OSL::Color3 evalT(float cos_theta) const; +}; + +struct MxTranslucentParams { + OSL::Vec3 N; + OSL::Color3 albedo; + // optional + OSL::ustringhash label; +}; + +struct MxSubsurfaceParams { + OSL::Vec3 N; + OSL::Color3 albedo; + float transmission_depth; + OSL::Color3 transmission_color; + float anisotropy; + // optional + OSL::ustringhash label; +}; + +struct MxSheenParams { + OSL::Vec3 N; + OSL::Color3 albedo; + float roughness; + // optional + OSL::ustringhash label; +}; + +struct MxUniformEdfParams { + OSL::Color3 emittance; + // optional + OSL::ustringhash label; +}; + +struct MxLayerParams { + OSL::ClosureColor* top; + OSL::ClosureColor* base; +}; + +struct MxAnisotropicVdfParams { + OSL::Color3 albedo; + OSL::Color3 extinction; + float anisotropy; + // optional + OSL::ustringhash label; +}; + +struct MxMediumVdfParams { + OSL::Color3 albedo; + float transmission_depth; + OSL::Color3 transmission_color; + float anisotropy; + float ior; + int priority; + // optional + OSL::ustringhash label; +}; + +} + OSL_NAMESPACE_ENTER + +struct ShadingResult; + +// Cast a BSDF* to the specified sub-type +#define BSDF_CAST(BSDF_TYPE, bsdf) reinterpret_cast(bsdf) + /// Individual BSDF (diffuse, phong, refraction, etc ...) /// Actual implementations of this class are private struct BSDF { struct Sample { - Sample() : wi(0.0f), weight(0.0f), pdf(0.0f), roughness(0.0f) {} - Sample(const Sample& o) + OSL_HOSTDEVICE Sample() : wi(0.0f), weight(0.0f), pdf(0.0f), roughness(0.0f) {} + OSL_HOSTDEVICE Sample(const Sample& o) : wi(o.wi), weight(o.weight), pdf(o.pdf), roughness(o.roughness) { } - Sample(Vec3 wi, Color3 w, float pdf, float r) + OSL_HOSTDEVICE Sample(Vec3 wi, Color3 w, float pdf, float r) : wi(wi), weight(w), pdf(pdf), roughness(r) { } @@ -32,48 +242,82 @@ struct BSDF { float pdf; float roughness; }; - BSDF() {} - virtual Color3 get_albedo(const Vec3& /*wo*/) const { return Color3(1); } - virtual Sample eval(const Vec3& wo, const Vec3& wi) const = 0; - virtual Sample sample(const Vec3& wo, float rx, float ry, float rz) const - = 0; + OSL_HOSTDEVICE BSDF(ClosureIDs id=EMPTY_ID) : id(id) {} + + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& /*wo*/) const + { + return Color3(1); + } + + OSL_HOSTDEVICE Sample eval(const Vec3& wo, const Vec3& wi) const + { + return {}; + } + + OSL_HOSTDEVICE Sample sample(const Vec3& wo, float rx, float ry, + float rz) const + { + return {}; + } + + ClosureIDs id; }; + /// Represents a weighted sum of BSDFS /// NOTE: no need to inherit from BSDF here because we use a "flattened" representation and therefore never nest these /// struct CompositeBSDF { - CompositeBSDF() : num_bsdfs(0), num_bytes(0) {} + OSL_HOSTDEVICE CompositeBSDF() : num_bsdfs(0), num_bytes(0) {} + OSL_HOSTDEVICE void prepare(const Vec3& wo, const Color3& path_weight, bool absorb) { float total = 0; for (int i = 0; i < num_bsdfs; i++) { - pdfs[i] = weights[i].dot(path_weight * bsdfs[i]->get_albedo(wo)) + pdfs[i] = weights[i].dot(path_weight * get_albedo(bsdfs[i], wo)) / (path_weight.x + path_weight.y + path_weight.z); + +#ifndef __CUDACC__ + // TODO: Figure out what to do with weights/albedos with negative + // components (e.g., as might happen when bipolar noise is + // used as a color). + + // The PDF is out-of-range in some test scenes on the CPU path, but + // these asserts are no-ops in release builds. The asserts are active + // on the CUDA path, so we need to skip them. assert(pdfs[i] >= 0); assert(pdfs[i] <= 1); +#endif total += pdfs[i]; } if ((!absorb && total > 0) || total > 1) { - for (int i = 0; i < num_bsdfs; i++) + for (int i = 0; i < num_bsdfs; i++) { +#ifndef __CUDACC__ pdfs[i] /= total; +#else + pdfs[i] = __fdiv_rn(pdfs[i], total); +#endif + } } } + OSL_HOSTDEVICE Color3 get_albedo(const Vec3& wo) const { Color3 result(0, 0, 0); - for (int i = 0; i < num_bsdfs; i++) - result += weights[i] * bsdfs[i]->get_albedo(wo); + for (int i = 0; i < num_bsdfs; i++) { + result += weights[i] * get_albedo(bsdfs[i], wo); + } return result; } + OSL_HOSTDEVICE BSDF::Sample eval(const Vec3& wo, const Vec3& wi) const { BSDF::Sample s = {}; for (int i = 0; i < num_bsdfs; i++) { - BSDF::Sample b = bsdfs[i]->eval(wo, wi); + BSDF::Sample b = eval(bsdfs[i], wo, wi); b.weight *= weights[i]; MIS::update_eval(&s.weight, &s.pdf, b.weight, b.pdf, pdfs[i]); s.roughness += b.roughness * pdfs[i]; @@ -81,6 +325,7 @@ struct CompositeBSDF { return s; } + OSL_HOSTDEVICE BSDF::Sample sample(const Vec3& wo, float rx, float ry, float rz) const { float accum = 0; @@ -88,7 +333,7 @@ struct CompositeBSDF { if (rx < (pdfs[i] + accum)) { rx = (rx - accum) / pdfs[i]; rx = std::min(rx, 0.99999994f); // keep result in [0,1) - BSDF::Sample s = bsdfs[i]->sample(wo, rx, ry, rz); + BSDF::Sample s = sample(bsdfs[i], wo, rx, ry, rz); s.weight *= weights[i] * (1 / pdfs[i]); s.pdf *= pdfs[i]; if (s.pdf == 0.0f) @@ -96,7 +341,7 @@ struct CompositeBSDF { // we sampled PDF i, now figure out how much the other bsdfs contribute to the chosen direction for (int j = 0; j < num_bsdfs; j++) { if (i != j) { - BSDF::Sample b = bsdfs[j]->eval(wo, s.wi); + BSDF::Sample b = eval(bsdfs[j], wo, s.wi); b.weight *= weights[j]; MIS::update_eval(&s.weight, &s.pdf, b.weight, b.pdf, pdfs[j]); @@ -110,9 +355,8 @@ struct CompositeBSDF { } template - bool add_bsdf(const Color3& w, BSDF_Args&&... args) + OSL_HOSTDEVICE bool add_bsdf(const Color3& w, BSDF_Args&&... args) { - // make sure we have enough space if (num_bsdfs >= MaxEntries) return false; if (num_bytes + sizeof(BSDF_Type) > MaxSize) @@ -127,8 +371,14 @@ struct CompositeBSDF { private: /// Never try to copy this struct because it would invalidate the bsdf pointers - CompositeBSDF(const CompositeBSDF& c); - CompositeBSDF& operator=(const CompositeBSDF& c); + OSL_HOSTDEVICE CompositeBSDF(const CompositeBSDF& c); + OSL_HOSTDEVICE CompositeBSDF& operator=(const CompositeBSDF& c); + + OSL_HOSTDEVICE Color3 get_albedo(const BSDF* bsdf, const Vec3& wo) const; + OSL_HOSTDEVICE BSDF::Sample eval(const BSDF* bsdf, const Vec3& wo, + const Vec3& wi) const; + OSL_HOSTDEVICE BSDF::Sample sample(const BSDF* bsdf, const Vec3& wo, + float rx, float ry, float rz) const; enum { MaxEntries = 8 }; enum { MaxSize = 256 * sizeof(float) }; @@ -151,12 +401,15 @@ struct ShadingResult { int priority = 0; }; +#ifndef __CUDACC__ void register_closures(ShadingSystem* shadingsys); -void +#endif + +OSL_HOSTDEVICE void process_closure(const OSL::ShaderGlobals& sg, ShadingResult& result, const ClosureColor* Ci, bool light_only); -Vec3 +OSL_HOSTDEVICE Vec3 process_background_closure(const ClosureColor* Ci); OSL_NAMESPACE_EXIT diff --git a/src/testrender/simpleraytracer.cpp b/src/testrender/simpleraytracer.cpp index 13c92b88f..477bc4a05 100644 --- a/src/testrender/simpleraytracer.cpp +++ b/src/testrender/simpleraytracer.cpp @@ -2,19 +2,21 @@ // SPDX-License-Identifier: BSD-3-Clause // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage -#include -#include +#ifndef __CUDACC__ +# include +# include -#include +# include -#ifdef USING_OIIO_PUGI +# ifdef USING_OIIO_PUGI namespace pugi = OIIO::pugi; -#endif +# endif -#include -#include "raytracer.h" -#include "shading.h" -#include "simpleraytracer.h" +# include +# include "raytracer.h" +# include "shading.h" +# include "simpleraytracer.h" +#endif // Create ustrings for all strings used by the free function renderer services. // Required to allow the reverse mapping of hash->string to work when processing messages @@ -41,6 +43,7 @@ using namespace OSL; OSL_NAMESPACE_ENTER +#ifndef __CUDACC__ static TypeDesc TypeFloatArray2(TypeDesc::FLOAT, 2); static TypeDesc TypeFloatArray4(TypeDesc::FLOAT, 4); static TypeDesc TypeIntArray2(TypeDesc::INT, 2); @@ -846,36 +849,60 @@ SimpleRaytracer::globals_from_hit(ShaderGlobals& sg, const Ray& r, sg.renderstate = &sg; } -Vec3 +#endif // #ifndef __CUDACC__ + + +#ifndef __CUDACC__ + using ShaderGlobalsType = OSL::ShaderGlobals; +#else + using ShaderGlobalsType = OSL_CUDA::ShaderGlobals; +#endif + + +OSL_HOSTDEVICE Vec3 SimpleRaytracer::eval_background(const Dual2& dir, ShadingContext* ctx, int bounce) { - ShaderGlobals sg; - memset((char*)&sg, 0, sizeof(ShaderGlobals)); + ShaderGlobalsType sg; + memset((char*)&sg, 0, sizeof(ShaderGlobalsType)); sg.I = dir.val(); sg.dIdx = dir.dx(); sg.dIdy = dir.dy(); if (bounce >= 0) sg.raytype = bounce > 0 ? Ray::DIFFUSE : Ray::CAMERA; +#ifndef __CUDACC__ shadingsys->execute(*ctx, *m_shaders[backgroundShaderID], sg); - return process_background_closure(sg.Ci); +#else + alignas(8) char closure_pool[256]; + sg.shaderID = render_params.bg_id; + execute_shader(sg, closure_pool); +#endif + return process_background_closure((const ClosureColor*) sg.Ci); } -Color3 -SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, - ShadingContext* ctx) + +OSL_HOSTDEVICE Color3 +SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, ShadingContext* ctx) { +#ifdef __CUDACC__ + // Scratch space for the output closures + alignas(8) char closure_pool[256]; + alignas(8) char light_closure_pool[256]; +#endif + Ray r = camera.get(x, y); Color3 path_weight(1, 1, 1); Color3 path_radiance(0, 0, 0); int prev_id = -1; float bsdf_pdf = std::numeric_limits< float>::infinity(); // camera ray has only one possible direction + for (int b = 0; b <= max_bounces; b++) { // trace the ray against the scene Dual2 t; - int id = prev_id; - if (!scene.intersect(r, t, id)) { + int id = prev_id; + ShaderGlobalsType sg; + if (!scene.intersect(r, t, id, &sg)) { // we hit nothing? check background shader if (backgroundShaderID >= 0) { if (b > 0 && backgroundResolution > 0) { @@ -894,8 +921,8 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, break; } +#ifndef __CUDACC__ // construct a shader globals for the hit point - ShaderGlobals sg; globals_from_hit(sg, r, t, id); const float radius = r.radius + r.spread * t.val(); int shaderID = scene.shaderid(id); @@ -904,9 +931,16 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, // execute shader and process the resulting list of closures shadingsys->execute(*ctx, *m_shaders[shaderID], sg); +#else + // The ShaderGlobals are populated in the closest-hit program, + // so we don't need to do that here. + execute_shader(sg, closure_pool); + const float radius = r.radius + r.spread * t.val(); +#endif + ShadingResult result; bool last_bounce = b == max_bounces; - process_closure(sg, result, sg.Ci, last_bounce); + process_closure(sg, result, (const ClosureColor*)sg.Ci, last_bounce); // add self-emission float k = 1; @@ -953,8 +987,10 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, Ray shadow_ray = Ray(sg.P, bg_dir.val(), radius, 0, Ray::SHADOW); Dual2 shadow_dist; - if (!scene.intersect(shadow_ray, shadow_dist, - shadow_id)) // ray reached the background? + ShaderGlobalsType shadow_sg; + + if (!scene.intersect(shadow_ray, shadow_dist, shadow_id, + &shadow_sg)) // ray reached the background? path_radiance += contrib; } } @@ -965,9 +1001,12 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, continue; // skip self if (!scene.islight(lid)) continue; // doesn't want to be sampled as a light + +#ifndef __CUDACC__ int shaderID = scene.shaderid(lid); if (shaderID < 0 || !m_shaders[shaderID]) continue; // no shader attached to this light +#endif // sample a random direction towards the object float light_pdf; Vec3 ldir = scene.sample(lid, sg.P, xi, yi, light_pdf); @@ -975,21 +1014,26 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, Color3 contrib = path_weight * b.weight * MIS::power_heuristic(light_pdf, b.pdf); + if ((contrib.x + contrib.y + contrib.z) > 0) { Ray shadow_ray = Ray(sg.P, ldir, radius, 0, Ray::SHADOW); // trace a shadow ray and see if we actually hit the target // in this tiny renderer, tracing a ray is probably cheaper than evaluating the light shader int shadow_id = id; // ignore self hit Dual2 shadow_dist; - if (scene.intersect(shadow_ray, shadow_dist, shadow_id) + ShaderGlobalsType light_sg; + if (scene.intersect(shadow_ray, shadow_dist, shadow_id, &light_sg) && shadow_id == lid) { +#ifndef __CUDACC__ // setup a shader global for the point on the light - ShaderGlobals light_sg; globals_from_hit(light_sg, shadow_ray, shadow_dist, lid); // execute the light shader (for emissive closures only) shadingsys->execute(*ctx, *m_shaders[shaderID], light_sg); +#else + execute_shader(light_sg, light_closure_pool); +#endif ShadingResult light_result; - process_closure(light_sg, light_result, light_sg.Ci, true); + process_closure(light_sg, light_result, (const ClosureColor*) light_sg.Ci, true); // accumulate contribution path_radiance += contrib * light_result.Le; } @@ -1008,12 +1052,14 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, if (!(path_weight.x > 0) && !(path_weight.y > 0) && !(path_weight.z > 0)) break; // filter out all 0's or NaNs - prev_id = id; - r.origin = sg.P; + prev_id = id; + r.origin = sg.P; } return path_radiance; } +#ifndef __CUDACC__ + Color3 SimpleRaytracer::antialias_pixel(int x, int y, ShadingContext* ctx) { @@ -1107,4 +1153,6 @@ SimpleRaytracer::clear() shaders().clear(); } +#endif // #ifndef __CUDACC__ + OSL_NAMESPACE_EXIT diff --git a/src/testrender/simpleraytracer.h b/src/testrender/simpleraytracer.h index e9cae486b..ea70e950f 100644 --- a/src/testrender/simpleraytracer.h +++ b/src/testrender/simpleraytracer.h @@ -99,6 +99,9 @@ class SimpleRaytracer : public RendererServices { OIIO::ParamValueList options; OIIO::ImageBuf pixelbuf; + int getBackgroundShaderID() const { return backgroundShaderID; } + int getBackgroundResolution() const { return backgroundResolution; } + private: // Camera parameters Matrix44 m_world_to_camera; diff --git a/src/testrender/testrender.cpp b/src/testrender/testrender.cpp index aa807953e..d5ef062cd 100644 --- a/src/testrender/testrender.cpp +++ b/src/testrender/testrender.cpp @@ -51,6 +51,7 @@ static std::string texoptions; static int xres = 640, yres = 480; static int aa = 1, max_bounces = 1000000, rr_depth = 5; static float show_albedo_scale = 0.0f; +static bool no_jitter = false; static int num_threads = 0; static int iters = 1; static std::string scenefile, imagefile; @@ -174,6 +175,8 @@ getargs(int argc, const char* argv[]) .help("Trace NxN rays per pixel"); ap.arg("-albedo %f:SCALE", &show_albedo_scale) .help("Visualize the albedo of each pixel instead of path tracing"); + ap.arg("--no-jitter", &no_jitter) + .help("Disable AA pixel jitter (OptiX mode only)"); ap.arg("--iters %d:N", &iters) .help("Number of iterations"); ap.arg("-O0", &O0) @@ -281,6 +284,7 @@ main(int argc, const char* argv[]) rend->attribute("rr_depth", rr_depth); rend->attribute("aa", aa); rend->attribute("show_albedo_scale", show_albedo_scale); + rend->attribute("no_jitter", (int)no_jitter); OIIO::attribute("threads", num_threads); #if OSL_USE_OPTIX @@ -368,6 +372,8 @@ main(int argc, const char* argv[]) // We're done with the shading system now, destroy it rend->clear(); + // We need to destroy the shading system before the renderer, because the + // shading system destructor may call into the renderer. delete shadingsys; delete rend; return EXIT_SUCCESS; diff --git a/src/testshade/CMakeLists.txt b/src/testshade/CMakeLists.txt index 88d710166..da7471620 100644 --- a/src/testshade/CMakeLists.txt +++ b/src/testshade/CMakeLists.txt @@ -24,7 +24,9 @@ if (OSL_USE_OPTIX) ) set ( testshade_cuda_headers - ../testrender/cuda/rend_lib.h ) + ../testrender/cuda/rend_lib.h + ../testrender/raytracer.h + ) # We need to make sure that the PTX files are regenerated whenever these # headers change. diff --git a/src/testshade/cuda/optix_grid_renderer.cu b/src/testshade/cuda/optix_grid_renderer.cu index 1304e9bfa..fdc44fc4b 100644 --- a/src/testshade/cuda/optix_grid_renderer.cu +++ b/src/testshade/cuda/optix_grid_renderer.cu @@ -29,7 +29,7 @@ OSL_NAMESPACE_EXIT extern "C" { -__device__ __constant__ RenderParams render_params; +__device__ __constant__ testshade::RenderParams render_params; } extern "C" __global__ void @@ -101,7 +101,7 @@ __raygen__() alignas(8) char closure_pool[256]; alignas(8) char params[256]; - ShaderGlobals sg; + OSL_CUDA::ShaderGlobals sg; // Setup the ShaderGlobals sg.I = make_float3(0, 0, 1); sg.N = make_float3(0, 0, 1); @@ -130,7 +130,7 @@ __raygen__() sg.backfacing = 0; // NB: These variables are not used in the current iteration of the sample - sg.raytype = CAMERA; + sg.raytype = OSL::Ray::CAMERA; sg.flipHandedness = 0; sg.shader2common = reinterpret_cast(render_params.shader2common); @@ -143,20 +143,20 @@ __raygen__() // Run the OSL group and init functions if (render_params.fused_callable) // call osl_init_func - optixDirectCall( + optixDirectCall( 0u, &sg /*shaderglobals_ptr*/, params /*groupdata_ptr*/, nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, 0 /*shadeindex - unused*/, sbtdata->data /*interactive_params_ptr*/); else { // call osl_init_func - optixDirectCall( + optixDirectCall( 0u, &sg /*shaderglobals_ptr*/, params /*groupdata_ptr*/, nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, 0 /*shadeindex - unused*/, sbtdata->data /*interactive_params_ptr*/); // call osl_group_func - optixDirectCall( + optixDirectCall( 1u, &sg /*shaderglobals_ptr*/, params /*groupdata_ptr*/, nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, 0 /*shadeindex - unused*/, @@ -173,8 +173,10 @@ __raygen__() // Because clang++ 9.0 seems to have trouble with some of the texturing "intrinsics" // let's do the texture look-ups in this file. extern "C" __device__ float4 -osl_tex2DLookup(void* handle, float s, float t) +osl_tex2DLookup(void* handle, float s, float t, float dsdx, float dtdx, float dsdy, float dtdy) { + const float2 dx = {dsdx, dtdx}; + const float2 dy = {dsdy, dtdy}; cudaTextureObject_t texID = cudaTextureObject_t(handle); - return tex2D(texID, s, t); + return tex2DGrad(texID, s, t, dx, dy); } diff --git a/src/testshade/optixgridrender.cpp b/src/testshade/optixgridrender.cpp index ff64029cc..d066c777f 100644 --- a/src/testshade/optixgridrender.cpp +++ b/src/testshade/optixgridrender.cpp @@ -14,7 +14,7 @@ #include "render_params.h" #include -#include +#include #include #include #include @@ -33,6 +33,9 @@ const auto optixModuleCreateFn = optixModuleCreate; #endif +using namespace testshade; + + OSL_NAMESPACE_ENTER @@ -179,6 +182,8 @@ OptixGridRenderer::~OptixGridRenderer() { for (void* p : m_ptrs_to_free) cudaFree(p); + for (void* p : m_arrays_to_free) + cudaFreeArray(reinterpret_cast(p)); if (m_optix_ctx) OPTIX_CHECK(optixDeviceContextDestroy(m_optix_ctx)); } @@ -817,53 +822,82 @@ OptixGridRenderer::get_texture_handle(ustring filename, { auto itr = m_samplers.find(filename); if (itr == m_samplers.end()) { - // Open image + // Open image to check the number of mip levels OIIO::ImageBuf image; if (!image.init_spec(filename, 0, 0)) { errhandler().errorfmt("Could not load: {} (hash {})", filename, filename); return (TextureHandle*)nullptr; } - - OIIO::ROI roi = OIIO::get_roi_full(image.spec()); - int32_t width = roi.width(), height = roi.height(); - std::vector pixels(width * height * 4); - - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - image.getpixel(i, j, 0, &pixels[((j * width) + i) * 4 + 0]); - } - } - cudaResourceDesc res_desc = {}; + int32_t nmiplevels = image.nmiplevels(); + int32_t img_width = image.xmax() + 1; + int32_t img_height = image.ymax() + 1; // hard-code textures to 4 channels - int32_t pitch = width * 4 * sizeof(float); cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); - cudaArray_t pixelArray; - CUDA_CHECK(cudaMallocArray(&pixelArray, &channel_desc, width, height)); + cudaMipmappedArray_t mipmapArray; + cudaExtent extent = make_cudaExtent(img_width, img_height, 0); + CUDA_CHECK(cudaMallocMipmappedArray(&mipmapArray, &channel_desc, extent, + nmiplevels)); + + // Copy the pixel data for each mip level + std::vector> level_pixels(nmiplevels); + for (int32_t level = 0; level < nmiplevels; ++level) { + image.reset(filename, 0, level); + OIIO::ROI roi = OIIO::get_roi_full(image.spec()); + if (!roi.defined()) { + errhandler().errorfmt( + "Could not load mip level {}: {} (hash {})", level, + filename, filename); + return (TextureHandle*)nullptr; + } - m_ptrs_to_free.push_back(reinterpret_cast(pixelArray)); + int32_t width = roi.width(), height = roi.height(); + level_pixels[level].resize(width * height * 4); + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + image.getpixel(i, j, 0, + &level_pixels[level][((j * width) + i) * 4]); + } + } - CUDA_CHECK(cudaMemcpy2DToArray(pixelArray, - /* offset */ 0, 0, pixels.data(), pitch, - pitch, height, cudaMemcpyHostToDevice)); + cudaArray_t miplevelArray; + CUDA_CHECK( + cudaGetMipmappedArrayLevel(&miplevelArray, mipmapArray, level)); - res_desc.resType = cudaResourceTypeArray; - res_desc.res.array.array = pixelArray; + // Copy the texel data into the miplevel array + int32_t pitch = width * 4 * sizeof(float); + CUDA_CHECK(cudaMemcpy2DToArray(miplevelArray, 0, 0, + level_pixels[level].data(), pitch, + pitch, height, + cudaMemcpyHostToDevice)); + } - cudaTextureDesc tex_desc = {}; - tex_desc.addressMode[0] = cudaAddressModeWrap; - tex_desc.addressMode[1] = cudaAddressModeWrap; - tex_desc.filterMode = cudaFilterModeLinear; - tex_desc.readMode - = cudaReadModeElementType; //cudaReadModeNormalizedFloat; + int32_t pitch = img_width * 4 * sizeof(float); + cudaArray_t pixelArray; + CUDA_CHECK( + cudaMallocArray(&pixelArray, &channel_desc, img_width, img_height)); + CUDA_CHECK(cudaMemcpy2DToArray(pixelArray, 0, 0, level_pixels[0].data(), + pitch, pitch, img_height, + cudaMemcpyHostToDevice)); + m_arrays_to_free.push_back(reinterpret_cast(pixelArray)); + + cudaResourceDesc res_desc = {}; + res_desc.resType = cudaResourceTypeMipmappedArray; + res_desc.res.mipmap.mipmap = mipmapArray; + + cudaTextureDesc tex_desc = {}; + tex_desc.addressMode[0] = cudaAddressModeWrap; + tex_desc.addressMode[1] = cudaAddressModeWrap; + tex_desc.filterMode = cudaFilterModeLinear; + tex_desc.readMode = cudaReadModeElementType; tex_desc.normalizedCoords = 1; tex_desc.maxAnisotropy = 1; - tex_desc.maxMipmapLevelClamp = 99; + tex_desc.maxMipmapLevelClamp = float(nmiplevels - 1); tex_desc.minMipmapLevelClamp = 0; - tex_desc.mipmapFilterMode = cudaFilterModePoint; + tex_desc.mipmapFilterMode = cudaFilterModeLinear; tex_desc.borderColor[0] = 1.0f; tex_desc.sRGB = 0; diff --git a/src/testshade/optixgridrender.h b/src/testshade/optixgridrender.h index 5f630f922..f689217d2 100644 --- a/src/testshade/optixgridrender.h +++ b/src/testshade/optixgridrender.h @@ -105,6 +105,7 @@ class OptixGridRenderer final : public SimpleRenderer { // CUdeviceptrs that need to be freed after we are done std::vector m_ptrs_to_free; + std::vector m_arrays_to_free; }; diff --git a/src/testshade/render_params.h b/src/testshade/render_params.h index c2e9c26c9..6670fe099 100644 --- a/src/testshade/render_params.h +++ b/src/testshade/render_params.h @@ -4,6 +4,8 @@ #pragma once +namespace testshade { + struct RenderParams { float invw; float invh; @@ -40,3 +42,5 @@ struct GenericRecord { // What follows should duplicate GenericData void* data; }; + +} // namespace testshade diff --git a/testsuite/testoptix-noise/ref/out.exr b/testsuite/testoptix-noise/ref/out.exr index a1a3cefe1..3c38e37e1 100644 Binary files a/testsuite/testoptix-noise/ref/out.exr and b/testsuite/testoptix-noise/ref/out.exr differ diff --git a/testsuite/testoptix-noise/ref/out_02.exr b/testsuite/testoptix-noise/ref/out_02.exr index 7f29b6185..c97803b2a 100644 Binary files a/testsuite/testoptix-noise/ref/out_02.exr and b/testsuite/testoptix-noise/ref/out_02.exr differ diff --git a/testsuite/testoptix-noise/run.py b/testsuite/testoptix-noise/run.py index 865b73183..3a787e3dd 100755 --- a/testsuite/testoptix-noise/run.py +++ b/testsuite/testoptix-noise/run.py @@ -7,5 +7,5 @@ failthresh = 0.03 # allow a little more LSB noise between platforms failpercent = .5 outputs = [ "out.exr", "out_02.exr" ] -command = testrender("-optix -res 320 240 scene.xml out.exr") -command += testrender("-optix -res 320 240 scene_02.xml out_02.exr") +command = testrender("-optix -res 320 240 -no-jitter -albedo 1.0 scene.xml out.exr") +command += testrender("-optix -res 320 240 -no-jitter -albedo 1.0 scene_02.xml out_02.exr") diff --git a/testsuite/testoptix-reparam/ref/out.exr b/testsuite/testoptix-reparam/ref/out.exr index 895e33842..deb4c66a2 100644 Binary files a/testsuite/testoptix-reparam/ref/out.exr and b/testsuite/testoptix-reparam/ref/out.exr differ diff --git a/testsuite/testoptix-reparam/run.py b/testsuite/testoptix-reparam/run.py index ef900eae8..b196355b0 100755 --- a/testsuite/testoptix-reparam/run.py +++ b/testsuite/testoptix-reparam/run.py @@ -7,4 +7,4 @@ failthresh = 0.03 # allow a little more LSB noise between platforms failpercent = .5 outputs = [ "out.exr" ] -command = testrender("-optix -res 320 240 scene.xml out.exr") +command = testrender("-optix -res 320 240 -no-jitter -albedo 1.0 scene.xml out.exr") diff --git a/testsuite/testoptix/ref/out.exr b/testsuite/testoptix/ref/out.exr index 9ab3ac64b..b3f22b78a 100644 Binary files a/testsuite/testoptix/ref/out.exr and b/testsuite/testoptix/ref/out.exr differ diff --git a/testsuite/testoptix/ref/out.txt b/testsuite/testoptix/ref/out.txt index e8fc1c646..c545d4b61 100644 --- a/testsuite/testoptix/ref/out.txt +++ b/testsuite/testoptix/ref/out.txt @@ -16,8 +16,6 @@ temp: abracadabra temp2: open sesame str: default hash(str): 3d6b922d -strlen(str): 7 -getchar(str, 0): 100 str: userdata string Output Cout to test_spline.exr diff --git a/testsuite/testoptix/ref/test_microfacet_dist.exr b/testsuite/testoptix/ref/test_microfacet_dist.exr index 34317b655..350f9c3fc 100644 Binary files a/testsuite/testoptix/ref/test_microfacet_dist.exr and b/testsuite/testoptix/ref/test_microfacet_dist.exr differ diff --git a/testsuite/testoptix/ref/test_spline.exr b/testsuite/testoptix/ref/test_spline.exr index 6d19261e9..636870be2 100644 Binary files a/testsuite/testoptix/ref/test_spline.exr and b/testsuite/testoptix/ref/test_spline.exr differ diff --git a/testsuite/testoptix/ref/test_texture.exr b/testsuite/testoptix/ref/test_texture.exr index cc780678b..0026f1084 100644 Binary files a/testsuite/testoptix/ref/test_texture.exr and b/testsuite/testoptix/ref/test_texture.exr differ diff --git a/testsuite/testoptix/run.py b/testsuite/testoptix/run.py index 833915c27..660e7e867 100755 --- a/testsuite/testoptix/run.py +++ b/testsuite/testoptix/run.py @@ -7,14 +7,14 @@ failthresh = 0.03 # allow a little more LSB noise between platforms failpercent = .5 outputs = [ "out.exr", "test_microfacet_dist.exr", "test_texture.exr", "test_spline.exr", "out.txt" ] -command = testrender("-optix -res 320 240 scene.xml out.exr") -command += testrender("-optix -res 320 240 test_microfacet_dist.xml test_microfacet_dist.exr") -command += testrender("-optix -res 1 1 test_print.xml dummy.exr") -command += testrender("-optix -res 1 1 test_compare.xml dummy.exr") -command += testrender("-optix -res 1 1 test_assign.xml dummy.exr") -command += testrender("-optix -res 1 1 test_assign_02.xml dummy.exr") -command += testrender("-optix -res 1 1 test_str_ops.xml dummy.exr") -command += testrender("-optix -res 1 1 test_userdata_string.xml dummy.exr") +command = testrender("-optix -res 320 240 -no-jitter -albedo 1.0 scene.xml out.exr") +command += testrender("-optix -res 320 240 -no-jitter -albedo 1.0 test_microfacet_dist.xml test_microfacet_dist.exr") +command += testrender("-optix -res 1 1 -no-jitter -albedo 1.0 test_print.xml dummy.exr") +command += testrender("-optix -res 1 1 -no-jitter -albedo 1.0 test_compare.xml dummy.exr") +command += testrender("-optix -res 1 1 -no-jitter -albedo 1.0 test_assign.xml dummy.exr") +command += testrender("-optix -res 1 1 -no-jitter -albedo 1.0 test_assign_02.xml dummy.exr") +command += testrender("-optix -res 1 1 -no-jitter -albedo 1.0 test_str_ops.xml dummy.exr") +command += testrender("-optix -res 1 1 -no-jitter -albedo 1.0 test_userdata_string.xml dummy.exr") command += testshade("-optix -res 256 256 test_spline -o Cout test_spline.exr") command += testshade("-optix -res 512 512 test_texture -o Cout test_texture.exr") diff --git a/testsuite/testoptix/test_str_ops.osl b/testsuite/testoptix/test_str_ops.osl index b23c79576..e7c72463e 100644 --- a/testsuite/testoptix/test_str_ops.osl +++ b/testsuite/testoptix/test_str_ops.osl @@ -6,7 +6,9 @@ surface test_str_ops (string str = "default", color Cs = 1) { printf("str: %s\n", str); printf("hash(str): %x\n", hash(str)); - printf("strlen(str): %d\n", strlen(str)); - printf("getchar(str, 0): %d\n", getchar(str, 0)); + // NB: These string operations don't work with the current + // device string implementation. + // printf("strlen(str): %d\n", strlen(str)); + // printf("getchar(str, 0): %d\n", getchar(str, 0)); Ci = Cs * diffuse (N); }