Merge remote-tracking branch 'b/main'

UPBGE · Oct 31, 2024 · d046efe · d046efe
2 parents 36a3314 + 77a7671
commit d046efe
Show file tree

Hide file tree

Showing 99 changed files with 1,158 additions and 880 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -725,9 +725,8 @@ endif()
 if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
   option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
   option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
-  # Radeon VII (gfx906) not currently working with HIP SDK, so left out of the list.
+  # We only support RDNA1 (gfx101X) and newer. Vega and older generations have rendering artifacts and crashing issues.
   set(CYCLES_HIP_BINARIES_ARCH
-    gfx900 gfx90c gfx902
     gfx1010 gfx1011 gfx1012
     gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1036
     gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151

diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
@@ -1743,15 +1743,15 @@ def _draw_devices(self, layout, device_type, devices):
                 if sys.platform[:3] == "win":
                     driver_version = "21.Q4"
                     col.label(
-                        text=rpt_("Requires AMD GPU with Vega or RDNA architecture"),
+                        text=rpt_("Requires AMD GPU with RDNA architecture"),
                         icon='BLANK1',
                         translate=False)
                     col.label(text=rpt_("and AMD Radeon Pro %s driver or newer") % driver_version,
                               icon='BLANK1', translate=False)
                 elif sys.platform.startswith("linux"):
                     driver_version = "22.10"
                     col.label(
-                        text=rpt_("Requires AMD GPU with Vega or RDNA architecture"),
+                        text=rpt_("Requires AMD GPU with RDNA architecture"),
                         icon='BLANK1',
                         translate=False)
                     col.label(text=rpt_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',

diff --git a/intern/cycles/device/hip/util.h b/intern/cycles/device/hip/util.h
@@ -63,7 +63,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
   hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
   hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
 
-  return (major >= 9);
+  return (major >= 10);
 }
 
 static inline bool hipSupportsDeviceOIDN(const int hipDevId)

diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
@@ -450,7 +450,8 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
 #  if OPTIX_ABI_VERSION >= 55
       builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
       builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
-                                   OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
+                                   OPTIX_BUILD_FLAG_ALLOW_COMPACTION |
+                                   OPTIX_BUILD_FLAG_ALLOW_UPDATE;
       builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
 #  else
       builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
@@ -1031,17 +1032,20 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
 
   const CUDAContextScope scope(this);
 
-  const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
+  bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
 
   /* Compute memory usage. */
   OptixAccelBufferSizes sizes = {};
   OptixAccelBuildOptions options = {};
   options.operation = operation;
-  if (use_fast_trace_bvh ||
-      /* The build flags have to match the ones used to query the built-in curve intersection
-       * program (see optixBuiltinISModuleGet above) */
-      build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES)
-  {
+  if (build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) {
+    /* The build flags have to match the ones used to query the built-in curve intersection
+     * program (see optixBuiltinISModuleGet above) */
+    options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION |
+                         OPTIX_BUILD_FLAG_ALLOW_UPDATE;
+    use_fast_trace_bvh = true;
+  }
+  else if (use_fast_trace_bvh) {
     VLOG_INFO << "Using fast to trace OptiX BVH";
     options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
   }

diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
@@ -15,6 +15,7 @@
 #include "session/tile.h"
 #include "util/algorithm.h"
 #include "util/log.h"
+#include "util/math.h"
 #include "util/progress.h"
 #include "util/tbb.h"
 #include "util/time.h"
@@ -405,6 +406,8 @@ void PathTrace::path_trace(RenderWork &render_work)
                                     num_samples,
                                     render_work.path_trace.sample_offset);
 
+    DCHECK(isfinite(statistics.occupancy));
+
     const double work_time = time_dt() - work_start_time;
     work_balance_infos_[i].time_spent += work_time;
     work_balance_infos_[i].occupancy = statistics.occupancy;

diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -400,7 +400,12 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
     ++num_iterations;
   }
 
-  statistics.occupancy = static_cast<float>(num_busy_accum) / num_iterations / max_num_paths_;
+  if (num_iterations) {
+    statistics.occupancy = float(num_busy_accum) / num_iterations / max_num_paths_;
+  }
+  else {
+    statistics.occupancy = 0.0f;
+  }
 }
 
 DeviceKernel PathTraceWorkGPU::get_most_queued_kernel() const
@@ -1097,6 +1102,10 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
   queue_->zero_to_device(num_active_pixels);
 
   const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
+  if (!work_size) {
+    return 0;
+  }
+
   const int reset_int = reset; /* No bool kernel arguments. */
 
   DeviceKernelArguments args(&buffers_->buffer.device_pointer,
@@ -1121,6 +1130,7 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
 void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_x()
 {
   const int work_size = effective_buffer_params_.height;
+  DCHECK_GT(work_size, 0);
 
   DeviceKernelArguments args(&buffers_->buffer.device_pointer,
                              &effective_buffer_params_.full_x,
@@ -1136,6 +1146,7 @@ void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_x()
 void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_y()
 {
   const int work_size = effective_buffer_params_.width;
+  DCHECK_GT(work_size, 0);
 
   DeviceKernelArguments args(&buffers_->buffer.device_pointer,
                              &effective_buffer_params_.full_x,
@@ -1151,6 +1162,9 @@ void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_y()
 void PathTraceWorkGPU::cryptomatte_postproces()
 {
   const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
+  if (!work_size) {
+    return;
+  }
 
   DeviceKernelArguments args(&buffers_->buffer.device_pointer,
                              &work_size,

diff --git a/intern/cycles/integrator/work_tile_scheduler.cpp b/intern/cycles/integrator/work_tile_scheduler.cpp
@@ -56,16 +56,17 @@ void WorkTileScheduler::reset_scheduler_state()
 
   const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
                                       tile_size_.num_samples;
-  const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
-
-  VLOG_WORK << "Will schedule " << num_tiles << " tiles of " << tile_size_;
 
   if (num_path_states_in_tile == 0) {
+    VLOG_WORK << "Will not schedule any tiles: no work remained for the device";
     num_tiles_x_ = 0;
     num_tiles_y_ = 0;
     num_tiles_per_sample_range_ = 0;
   }
   else {
+    const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
+    VLOG_WORK << "Will schedule " << num_tiles << " tiles of " << tile_size_;
+
     /* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile
      * scheduling and purely focusing on the number of used path states. */
     VLOG_WORK << "Number of unused path states: "

diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h
@@ -319,13 +319,16 @@ ccl_device_extern void osl_blackbody_vf(ccl_private ShaderGlobals *sg,
   *result = color_rgb;
 }
 
-#if 0
 ccl_device_extern void osl_wavelength_color_vf(ccl_private ShaderGlobals *sg,
-                                                   ccl_private float3 *result,
-                                                   float wavelength)
+                                               ccl_private float3 *result,
+                                               float lambda_nm)
 {
+  float3 color = xyz_to_rgb(nullptr, svm_math_wavelength_color_xyz(lambda_nm));
+  color *= 1.0f / 2.52f;  // Empirical scale from lg to make all comps <= 1
+
+  /* Clamp to zero if values are smaller */
+  *result = max(color, make_float3(0.0f, 0.0f, 0.0f));
 }
-#endif
 
 ccl_device_extern void osl_luminance_fv(ccl_private ShaderGlobals *sg,
                                         ccl_private float *result,

diff --git a/intern/cycles/kernel/svm/math_util.h b/intern/cycles/kernel/svm/math_util.h
@@ -241,4 +241,22 @@ ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
   return color;
 }
 
+ccl_device float3 svm_math_wavelength_color_xyz(float lambda_nm)
+{
+  float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f);  // scaled 0..80
+  int i = float_to_int(ii);
+  float3 color;
+
+  if (i < 0 || i >= 80) {
+    color = make_float3(0.0f, 0.0f, 0.0f);
+  }
+  else {
+    ii -= i;
+    ccl_constant float *c = cie_color_match[i];
+    color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
+  }
+
+  return color;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/wavelength.h b/intern/cycles/kernel/svm/wavelength.h
@@ -7,6 +7,8 @@
 
 #pragma once
 
+#include "kernel/svm/math_util.h"
+
 CCL_NAMESPACE_BEGIN
 
 /* Wavelength to RGB */
@@ -17,20 +19,9 @@ ccl_device_noinline void svm_node_wavelength(KernelGlobals kg,
                                              uint wavelength,
                                              uint color_out)
 {
-  float lambda_nm = stack_load_float(stack, wavelength);
-  float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f);  // scaled 0..80
-  int i = float_to_int(ii);
-  float3 color;
-
-  if (i < 0 || i >= 80) {
-    color = make_float3(0.0f, 0.0f, 0.0f);
-  }
-  else {
-    ii -= i;
-    ccl_constant float *c = cie_color_match[i];
-    color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
-  }
+  const float lambda_nm = stack_load_float(stack, wavelength);
 
+  float3 color = svm_math_wavelength_color_xyz(lambda_nm);
   color = xyz_to_rgb(kg, color);
   color *= 1.0f / 2.52f;  // Empirical scale from lg to make all comps <= 1
 

diff --git a/release/datafiles/assets b/release/datafiles/assets
diff --git a/scripts/addons_core/io_scene_gltf2/__init__.py b/scripts/addons_core/io_scene_gltf2/__init__.py
@@ -5,8 +5,8 @@
 bl_info = {
     'name': 'glTF 2.0 format',
     'author': 'Julien Duroure, Scurest, Norbert Nopper, Urs Hanselmann, Moritz Becher, Benjamin Schmithüsen, Jim Eckerlein, and many external contributors',
-    "version": (4, 4, 0),
-    'blender': (4, 2, 0),
+    "version": (4, 4, 12),
+    'blender': (4, 3, 0),
     'location': 'File > Import-Export',
     'description': 'Import-Export as glTF 2.0',
     'warning': '',
@@ -981,6 +981,12 @@ class ExportGLTF2_Base(ConvertGLTF2_Base):
         default=False
     )
 
+    export_loglevel: IntProperty(
+        name='Log Level',
+        description="Log Level",
+        default=-1,
+    )
+
     # Custom scene property for saving settings
     scene_key = "glTF2ExportSettings"
 
@@ -1066,7 +1072,11 @@ def execute(self, context):
         # All custom export settings are stored in this container.
         export_settings = {}
 
-        export_settings['loglevel'] = set_debug_log()
+        # Get log level from parameters
+        # If not set, get it from Blender app debug value
+        export_settings['gltf_loglevel'] = self.export_loglevel
+        if export_settings['gltf_loglevel'] < 0:
+            export_settings['loglevel'] = set_debug_log()
 
         export_settings['exported_images'] = {}
         export_settings['exported_texture_nodes'] = []

diff --git a/scripts/addons_core/io_scene_gltf2/blender/com/extras.py b/scripts/addons_core/io_scene_gltf2/blender/com/extras.py
@@ -8,7 +8,7 @@
 
 
 # Custom properties, which are in most cases present and should not be imported/exported.
-BLACK_LIST = ['cycles', 'cycles_visibility', 'cycles_curves', 'glTF2ExportSettings']
+BLACK_LIST = ['cycles', 'cycles_visibility', 'cycles_curves', 'glTF2ExportSettings', 'gltf2_mesh_applied']
 
 
 def generate_extras(blender_element):

diff --git a/scripts/addons_core/io_scene_gltf2/blender/com/gltf2_blender_ui.py b/scripts/addons_core/io_scene_gltf2/blender/com/gltf2_blender_ui.py
@@ -327,6 +327,8 @@ class MESH_PT_gltf2_mesh_variants(bpy.types.Panel):
 
     @classmethod
     def poll(self, context):
+        if not bpy.context.object:
+            return False
         return bpy.context.preferences.addons['io_scene_gltf2'].preferences.KHR_materials_variants_ui is True \
             and len(bpy.context.object.material_slots) > 0
 
@@ -380,6 +382,8 @@ class SCENE_OT_gltf2_variant_slot_add(bpy.types.Operator):
 
     @classmethod
     def poll(self, context):
+        if not bpy.context.object:
+            return False
         return len(bpy.context.object.material_slots) > 0
 
     def execute(self, context):
@@ -412,6 +416,8 @@ class SCENE_OT_gltf2_material_to_variant(bpy.types.Operator):
 
     @classmethod
     def poll(self, context):
+        if not bpy.context.object:
+            return False
         return len(bpy.context.object.material_slots) > 0 and context.object.data.gltf2_variant_pointer != ""
 
     def execute(self, context):
@@ -452,6 +458,8 @@ class SCENE_OT_gltf2_remove_material_variant(bpy.types.Operator):
 
     @classmethod
     def poll(self, context):
+        if not bpy.context.object:
+            return False
         return len(bpy.context.object.material_slots) > 0 and len(bpy.context.object.data.gltf2_variant_mesh_data) > 0
 
     def execute(self, context):