diff --git a/CMakeLists.txt b/CMakeLists.txt index 867d14fe..8beedb84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -221,6 +221,7 @@ blt_add_library( "src/Kripke/Kernel/Scattering.cpp" "src/Kripke/Kernel/Source.cpp" "src/Kripke/Kernel/SweepSubdomain.cpp" + "src/Kripke/Kernel/ErrorNorms.cpp" "src/Kripke/ParallelComm/BlockJacobiComm.cpp" "src/Kripke/ParallelComm/SweepComm.cpp" "src/Kripke/ParallelComm.cpp" diff --git a/README.md b/README.md index a4a3f009..a06f7b66 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ The steady-state solution method uses the source-iteration technique, where each 4. Rhs = LPlusTimes(PhiOut) 5. Psi = Sweep(Rhs, Psi) which is solving Psi=(Hinverse * Rhs) a.k.a _"Inverting H"_ - +Kripke can compute error norms for the type 3i problem by passing the "--compute_errors" flag on the command line. Problem 3i does not have scattering, so the scattering terms should be turned off by passing Kripke "--sigs 0,0,0". In the future we may add comparisons to the 3ii benchmark points given in the original paper for specific scattering coefficients. Building and Running ==================== diff --git a/src/Kripke/Arch/ErrorNorms.h b/src/Kripke/Arch/ErrorNorms.h new file mode 100644 index 00000000..828ae719 --- /dev/null +++ b/src/Kripke/Arch/ErrorNorms.h @@ -0,0 +1,308 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCH_ERRORNORMS +#define KRIPKE_ARCH_ERRORNORMS + +#include +#include + +namespace Kripke { +namespace Arch { + +template +struct Policy_ErrorNorms; + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = seq_reduce; + + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // direction + For<1, loop_exec, // group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = seq_reduce; + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<1, loop_exec, // group + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = seq_reduce; + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // group + For<0, loop_exec, // direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = seq_reduce; + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<0, loop_exec, // direction + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = seq_reduce; + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<0, loop_exec, // direction + For<1, loop_exec, // group + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = seq_reduce; + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<1, loop_exec, // group + For<0, loop_exec, // direction + Lambda<0> + > + > + > + > + > + >; +}; + + + + +#ifdef KRIPKE_USE_OPENMP + + +template<> +struct Policy_ErrorNorms> { + + using ReducePolicy = omp_reduce; + + using ExecPolicy = + KernelPolicy< + Collapse, // direction, group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = omp_reduce; + using ExecPolicy = + KernelPolicy< + For<0, omp_parallel_for_exec, // direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<1, loop_exec, // group + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = omp_reduce; + using ExecPolicy = + KernelPolicy< + Collapse, // group, direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = omp_reduce; + using ExecPolicy = + KernelPolicy< + For<1, omp_parallel_for_exec, // group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<0, loop_exec, // direction + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = omp_reduce; + using ExecPolicy = + KernelPolicy< + Hyperplane<2, seq_exec, ArgList<3,4>, omp_parallel_collapse_exec, + For<0, loop_exec, // direction + For<1, loop_exec, // group + Lambda<0> + > + > + > + >; +}; + + +template<> +struct Policy_ErrorNorms> { + using ReducePolicy = omp_reduce; + using ExecPolicy = + KernelPolicy< + Hyperplane<2, seq_exec, ArgList<3,4>, omp_parallel_collapse_exec, + For<1, loop_exec, // group + For<0, loop_exec, // direction + Lambda<0> + > + > + > + >; +}; + +#endif // KRIPKE_USE_OPENMP + + +#ifdef KRIPKE_USE_CUDA + +template<> +struct Policy_ErrorNorms> : public Policy_ErrorNorms> {}; + +template<> +struct Policy_ErrorNorms> : public Policy_ErrorNorms> {}; + +template<> +struct Policy_ErrorNorms> : public Policy_ErrorNorms> {}; + +template<> +struct Policy_ErrorNorms> : public Policy_ErrorNorms> {}; + +template<> +struct Policy_ErrorNorms> : public Policy_ErrorNorms> {}; + +template<> +struct Policy_ErrorNorms> : public Policy_ErrorNorms> {}; + +#endif // KRIPKE_USE_CUDA + +} +} + +#endif diff --git a/src/Kripke/ClipLiangBarsky3D.hh b/src/Kripke/ClipLiangBarsky3D.hh new file mode 100644 index 00000000..14486d3d --- /dev/null +++ b/src/Kripke/ClipLiangBarsky3D.hh @@ -0,0 +1,170 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef __CLIPLIANGBARSKY3D_HH__ +#define __CLIPLIANGBARSKY3D_HH__ + +#include +#include +#include + +namespace KKC_CLIP_3D { + + // Implmentation of Liang Barsky algorithm for clipping against coordinate aligned Cartesian boxes + // This code essentially follows that in : + // Foley, Van Dam, Feiner, and Hughes, "Computer Graphics", 2nd Edition in C, 1996. + /* We make a slight modification in an attempt to make the clipping more robust to degenerate intersections and lines parallel to edges. + Basically, we "fuzz" up the box near machine epsilon (relative to the scale of the box coordinates). The box can either be a bit bigger than it + really is or a bit smaller depending on the sign of a template argument. Note this also affects whether the box boundary closes the box volume or not. + If we were really worried about this, we would use either robust predicates (adaptive precision math) + or an implementation of Simulation of Simplicity. The implementation here is basically using tolerances. + */ + + // this is just a helper to clean things up a bit in the code, there is an interface that avoids calling codes needing to know about this + template + using Vector3D = std::array; + + template + inline + auto + clip_or_reject(const T &norm_dot_line, // input: the cutting plane's outward normal dotted with the line + const T &norm_dot_dir, // input: the same outward normal dotted with a line going from the start point to a point on the cutting plane + const T &line_scale, // input: the scaling used in the tolerance of the normal dotted with the line + const T &dir_scale, // input: the scaling used in the tolerance of the normal dotted with the direction vector + T &t_min, T &t_max) // input and output: the parameter space endpoints of the clipped line, if it is clipped + { + // return output: true if we can accept this line or false if we might be able to reject it + bool accept = true; + T t; + + T denom = -norm_dot_line; // I like making the input to this function clear, so do this to make the logic follow Foley et al. + if ( denom > std::abs(line_scale) ) + { // entering itersection + t = norm_dot_dir/denom; + if ( (t-t_max)>std::numeric_limits::epsilon() ) // note that t\in[0,1] is usually an O(1) number, so this comparison is appropriate, however we should really use a robust predicate + accept=false; + else if ( (t-t_min)>std::numeric_limits::epsilon() ) + t_min = t; // found a new t_min on the line segment + } + else if ( denom < -std::abs(line_scale) ) + { // leaving intersection + t = norm_dot_dir/denom; + if ( (t_min-t)>std::numeric_limits::epsilon() ) // note that t\in[0,1] is usually an O(1) number, so this comparison is appropriate, however we should really use a robust predicate + accept=false; + else if ( (t_max-t)>std::numeric_limits::epsilon() ) + t_max = t; // found a new t_max on the line segment + } + else if ( norm_dot_dir > dir_scale ) + { + accept = false; + } + return accept; + } + + template // increase REL_EPS to make the inersection calculation more "sloppy", note making REL_EPS negative effectively shrinks the box + inline + auto + LiangBarskyClip3DBox(const Vector3D &box_lower_bounds, const Vector3D &box_upper_bounds, // input: bounding points for the clipping box + const Vector3D &p0, const Vector3D &p1, // input: the end points of the line segment to clip + T& t_min, T&t_max) // output: the line paramter coordinates of the clipping endpoints + { + // return output: true if the line has a segment inside the box, false if not + bool segment_inside_box = false; + + t_min = 0.0; + t_max = 1.0; + + Vector3D dx, dx_box; + T unperturbed_l1_norm=0, coord_norm=0, box_norm=0; // norms used for various tolerances + for ( size_t i=0; i<3; i++ ) + { + unperturbed_l1_norm += std::abs(p1[i] - p0[i]); + + // strictly speaking we don't need the following stuff yet, but lets compute it anyways + coord_norm += (std::abs(p1[i])+std::abs(p0[i]))/6.0; // this will give us an estimate for the average scale of the coordinates + box_norm += (std::abs(box_lower_bounds[i]) + std::abs(box_upper_bounds[i]) + p0[i])/9.0; + dx[i] = p1[i] - p0[i]; + dx_box[i] = box_upper_bounds[i] - box_lower_bounds[i]; + } + + // check to see if the line segment is degenerate + if (unperturbed_l1_norm < std::numeric_limits::epsilon()*coord_norm || unperturbed_l1_norm::min() ) + { // this line segment is degenerate, really a point + // check to see if this thing is in or out of the box, use only one of the unperturbed points + bool is_inside_box = true; + for ( size_t i=0; i<3 && is_inside_box; i++ ) + is_inside_box = (p0[i]>=box_lower_bounds[i]) && (p0[i]<=box_upper_bounds[i]); //yeah, sigh, should use scaled epsilon here too... + + // now get the heck out of here + if (is_inside_box) + return true; + else + return false; + } + + const T line_scale = coord_norm * REL_EPS * std::numeric_limits::epsilon(); + const T box_scale = box_norm * REL_EPS * std::numeric_limits::epsilon(); + // continue with the line segment clipping since we now know we have an actual line segment + // note that for the left, bottom and back faces we use the box lower bounds for the direction calculation + // for the right, top and front faces we use the box upper bounds for the direction calculation + if (clip_or_reject( -dx[0], -(p0[0]-box_lower_bounds[0]), line_scale, box_scale, t_min, t_max) ) // left face + if (clip_or_reject( dx[0], (p0[0]-box_upper_bounds[0]), line_scale, box_scale, t_min, t_max) ) // right face + if (clip_or_reject( -dx[1], -(p0[1]-box_lower_bounds[1]), line_scale, box_scale, t_min, t_max) ) // bottom face + if (clip_or_reject( dx[1], (p0[1]-box_upper_bounds[1]), line_scale, box_scale, t_min, t_max) ) // top face + if (clip_or_reject( -dx[2], -(p0[2]-box_lower_bounds[2]), line_scale, box_scale, t_min, t_max) ) // back face + if (clip_or_reject( dx[2], (p0[2]-box_upper_bounds[2]), line_scale, box_scale, t_min, t_max) ) // front face + segment_inside_box = std::abs(t_max-t_min)>std::numeric_limits::epsilon(); // Foley et al don't check this but I do... + + return segment_inside_box; + } + + // here is an interface that does not use "internal" data structures like Vector3D + + template // increase REL_EPS to make the inersection calculation more "sloppy", note making REL_EPS negative effectively shrinks the box + inline + auto + LiangBarskyClip3DBox_plain_types(const T&x_lo, const T&y_lo, const T&z_lo, // input: box lower bounds + const T&x_hi, const T&y_hi, const T&z_hi, // input: box upper bounds + const T&x_p0, const T&y_p0, const T&z_p0, // input: starting point + const T&x_p1, const T&y_p1, const T&z_p1, // input: ending point + T& t_min, T&t_max) // output: the line paramter coordinates of the clipping endpoints + { + return LiangBarskyClip3DBox(Vector3D{{x_lo,y_lo,z_lo}}, + Vector3D{{x_hi,y_hi,z_hi}}, + Vector3D{{x_p0,y_p0,z_p0}}, + Vector3D{{x_p1,y_p1,z_p1}}, + t_min, t_max); + } + +} + +#endif diff --git a/src/Kripke/Core/Comm.h b/src/Kripke/Core/Comm.h index 9b1b15f7..f944afcd 100644 --- a/src/Kripke/Core/Comm.h +++ b/src/Kripke/Core/Comm.h @@ -217,6 +217,18 @@ class Comm : public Kripke::Core::BaseVar { return value; } + /** + * Allreduce MAX a single value. + * Without MPI, this is a NOP returning the value passed in + */ + RAJA_INLINE + double allReduceMaxDouble(double value) const { +#ifdef KRIPKE_USE_MPI + MPI_Allreduce(MPI_IN_PLACE, &value, 1, MPI_DOUBLE, MPI_MAX, m_comm); +#endif + return value; + } + private: #ifdef KRIPKE_USE_MPI MPI_Comm m_comm; diff --git a/src/Kripke/InputVariables.cpp b/src/Kripke/InputVariables.cpp index df4b39aa..21cf51f9 100644 --- a/src/Kripke/InputVariables.cpp +++ b/src/Kripke/InputVariables.cpp @@ -56,7 +56,8 @@ InputVariables::InputVariables() : niter(10), parallel_method(PMETHOD_SWEEP), num_material_subsamples(4), - run_name("kripke") + run_name("kripke"), + compute_errors(false) { num_zonesets_dim[0] = 1; num_zonesets_dim[1] = 1; diff --git a/src/Kripke/InputVariables.h b/src/Kripke/InputVariables.h index cd1cacbf..6de1eb69 100644 --- a/src/Kripke/InputVariables.h +++ b/src/Kripke/InputVariables.h @@ -71,6 +71,7 @@ struct InputVariables { // Output Options std::string run_name; // Name to use when generating output files + bool compute_errors; // compute error and solution norms for the no-scattering (type i) problem }; #endif diff --git a/src/Kripke/KSN_AnalyticalSolutions.hh b/src/Kripke/KSN_AnalyticalSolutions.hh new file mode 100644 index 00000000..1ef6423e --- /dev/null +++ b/src/Kripke/KSN_AnalyticalSolutions.hh @@ -0,0 +1,679 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + + +#ifndef KSN_ANALYTICAL_SOLUTIONS_HH +#define KSN_ANALYTICAL_SOLUTIONS_HH + +//#define KKC_DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ClipLiangBarsky3D.hh" + +/* + +This file provides utilities for computing exact solutions of the KSN "type i" (i.e. no scattering) benchmark problems from: + +It requires a companion file called ClipLiangBarsky3D.hh. + +The easiest way to use this code is something like: +-------------------------------------------------- +#include "KSN_AnalyticalSolutions.hh" + +using namespace KSN_AnalyticalSolutions; + +Problem_3 problem; +double x = -4.5; +double y = -10.5; +double z = -9.5; +double xdir = -0.14451; +double ydir = -0.014233; +double zdir = -0.989401; + +double flux = compute_exact_solution(x,y,z, + xdir, ydir, zdir, + problem); +-------------------------------------------------- +where x,y and z are the coordinates where you would like the solution evaluated and +and xdir, ydir, zdir are components of the unit vector pointing in the streaming direction. + +You can also ask for the integrated flux at a point (not normalized by 4pi) by doing something like: +-------------------------------------------------- +double integral = integrate_total_flux(x,y,z, + problem, + N_azimuthal,N_polar); +-------------------------------------------------- +where N_azimuthal and N_polar are the number of quadrature cells in the azimuthal and polar directions. Note +4-pt Gauss quadrature is used for the surface integral. + */ + + +namespace KSN_AnalyticalSolutions { + + enum RegionType { + Source = 0, + Void = 1, + Shield = 2, + NumberOfRegionTypes + }; + + // a region keeps track of a 3D box, basically. A collection of these boxes is used to define each KSN test problem's domain. + template + struct RegionDefinition { + RegionDefinition(const T & x_mn, const T &x_mx, + const T & y_mn, const T &y_mx, + const T & z_mn, const T &z_mx, + const RegionType &type, + const std::string &desc) : x_min(x_mn), x_max(x_mx), + y_min(y_mn), y_max(y_mx), + z_min(z_mn), z_max(z_mx), + region_type(type), + description(desc) {} + + RegionDefinition(const RegionDefinition &rd) : x_min(rd.x_min), x_max(rd.x_max), + y_min(rd.y_min), y_max(rd.y_max), + z_min(rd.z_min), z_max(rd.z_max), + region_type(rd.region_type), + description(rd.description) {} + + virtual ~RegionDefinition() {} + + public: + T x_min, x_max, y_min, y_max, z_min, z_max; + RegionType region_type; + std::string description; + + private: + inline RegionDefinition() {}; + + }; + + template + class TestProblem { + public: + // this is basically a class that holds the list of regions for each test problem + // Note that we define a region as a box, each box can have boxes nexted in it from other regions, the nested regions are effectively carved out of + // the enclosing region when the ray-trace is performed. + // HOWEVER: this is not a generic constructive solid geometry code, it has some shortcuts specific to this application because the developer is lazy + // do not think you can simply lay out a bunch of arbitrarily overlapping boxes and expect it to work... + using iterator = typename std::list >::iterator; + using const_iterator = typename std::list >::const_iterator; + + TestProblem() {} + virtual ~TestProblem() {} + std::list > regions; + }; + + template + class Problem_TEST_GEOM : public TestProblem { + public: + Problem_TEST_GEOM() + { + this->regions.push_back(RegionDefinition(0, 10, + 0, 10, + 0, 10, + Source, + "Source")); + + this->regions.push_back(RegionDefinition(0, 50, + 0, 50, + 0, 50, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(0, 100, + 0, 100, + 0, 100, + Shield, + "Shield")); + } + + }; + + + template + class Problem_TEST_GEOM_2 : public TestProblem { + public: + Problem_TEST_GEOM_2() + { + this->regions.push_back(RegionDefinition(0, 10, + 0, 10, + 0, 10, + Source, + "Source")); + + this->regions.push_back(RegionDefinition(0, 10, + 10, 50, + 0, 10, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(0, 10, + 50, 100, + 0, 10, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(0, 100, + 0, 100, + 0, 100, + Shield, + "Shield")); + } + + }; + + + // The test problems define reflection boundary condtions at the x=0, y=0 and z=0 planes. We just compute it over the whole (unreflected) geometry. + template + class Problem_1 : public TestProblem { + public: + Problem_1() + { + this->regions.push_back(RegionDefinition(-10, 10, + -10, 10, + -10, 10, + Source, + "Source")); + + this->regions.push_back(RegionDefinition(-50, 50, + -50, 50, + -50, 50, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-100, 100, + -100, 100, + -100, 100, + Shield, + "Shield")); + } + + }; + + template + class Problem_2 : public TestProblem { + public: + Problem_2() + { + this->regions.push_back(RegionDefinition(-10, 10, + -10, 10, + -10, 10, + Source, + "Source")); + + this->regions.push_back(RegionDefinition(-10, 10, + -100,-10, + -10, 10, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-10, 10, + 10, 100, + -10, 10, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-60, 60, + -100, 100, + -60, 60, + Shield, + "Shield")); + } + + }; + + template + class Problem_3 : public TestProblem { + public: + Problem_3() + { + this->regions.push_back(RegionDefinition(-10, 10, + -10, 10, + -10, 10, + Source, + "Source")); + + // first leg of the channel, along the y axis + this->regions.push_back(RegionDefinition(-10, 10, + 10, 50, + -10, 10, + Void, + "Void")); + this->regions.push_back(RegionDefinition(-10, 10, + -50,-10, + -10, 10, + Void, + "Void")); + // second leg of the channel, parallel to x axis + this->regions.push_back(RegionDefinition(-40, 40, + 50, 60, + -10, 10, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40, 40, + -60,-50, + -10, 10, + Void, + "Void")); + + // third leg of the channel, there is one in each octant + this->regions.push_back(RegionDefinition(30,40, + 50,60, + 10,30, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + 50,60, + 10,30, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(30,40, + 50,60, + -30,-10, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + 50,60, + -30,-10, + Void, + "Void")); + + // - negative y legs + this->regions.push_back(RegionDefinition(30,40, + -60,-50, + 10,30, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + -60,-50, + 10,30, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(30,40, + -60,-50, + -30,-10, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + -60,-50, + -30,-10, + Void, + "Void")); + + + // fourth leg of the channel, also one in each octant + this->regions.push_back(RegionDefinition(30,40, + 50,100, + 30,40, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + 50,100, + 30,40, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(30,40, + 50,100, + -40,-30, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + 50,100, + -40,-30, + Void, + "Void")); + + // negative y axis part of fourth channel + this->regions.push_back(RegionDefinition(30,40, + -100,-50, + 30,40, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + -100,-50, + 30,40, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(30,40, + -100,-50, + -40,-30, + Void, + "Void")); + + this->regions.push_back(RegionDefinition(-40,-30, + -100,-50, + -40,-30, + Void, + "Void")); + + + + // Shield, there is only one region + this->regions.push_back(RegionDefinition(-60, 60, + -100, 100, + -60, 60, + Shield, + "Shield")); + } + + }; + + template + auto + compute_exact_solution(const T &x_start, const T &y_start, const T &z_start, //input: x,y,z coordinates to compute the solution at + const T &dir_x, const T& dir_y, const T &dir_z, // direction of the flux + const TestProblem &test_problem) //input: the specific test problem to use + { + // return output: the angular flux at the given point and direction in units of number/cm^2 + using namespace KSN_AnalyticalSolutions; + + const T source[NumberOfRegionTypes] = {1.0, 0.0, 0.0}; // units are number/(cm^3 s) + const T sigmat[NumberOfRegionTypes] = {0.1, 1e-4, 0.1}; // units are 1/cm + + // first we loop through the regions to find the longest length scale, we will use this for our starting ray + T len_scale = 0.0; + for (typename TestProblem::const_iterator region=test_problem.regions.begin(); + region!=test_problem.regions.end(); + region++) + { + len_scale = std::max(len_scale, + (region->x_max-region->x_min)*(region->x_max-region->x_min) + + (region->y_max-region->y_min)*(region->y_max-region->y_min) + + (region->z_max-region->z_min)*(region->z_max-region->z_min) ); + } + len_scale = std::sqrt(len_scale); + + #ifdef KKC_DEBUG + std::cout<<"max len = "<"<"<"< segments; + + // loop through all the regions and compute an initial set of segments. Note these segments will generally overlap, so we fix that up afterwards + for (typename TestProblem::const_iterator region=test_problem.regions.begin(); + region!=test_problem.regions.end(); + region++) + { + T t_min=0; + T t_max=1; + + bool segment_inside = KKC_CLIP_3D::LiangBarskyClip3DBox_plain_types(region->x_min, region->y_min, region->z_min, + region->x_max, region->y_max, region->z_max, + x_start, y_start, z_start, + x_end, y_end, z_end, + t_min, t_max); + if (segment_inside) + { + #ifdef KKC_DEBUG + std::cout<<"found a segment with region type "<region_type<<", t_min = "<::iterator i=segments.begin(); + i!=segments.end(); + i++) + { + std::cout<<"segment t_min="<t_s<<", t_max = "<t_e<<", type = "<type<std::numeric_limits::epsilon() ) { + T sig_i = sigmat[segments[i].type]; + T sigma_sum = 0.0; + for ( size_t k=0; k + inline + auto + compute_exact_solution(const T &x_start, const T &y_start, const T &z_start, //input: x,y,z coordinates to compute the solution at + const T &theta, const T&phi, //input: azimuthal and polar coordinates (in radians) for the direction to compute the solution with + const TestProblem &test_problem) //input: the specific test problem to use + { + // NOTE: the polar angle, phi, is *measured from the equator* so \phi \in [-pi/2, pi/2] + // Why, you ask? Because. + const T& dir_x = std::cos(theta) * std::cos(phi);; + const T& dir_y = std::sin(theta) * std::cos(phi);; + const T& dir_z = std::sin(phi); + + return compute_exact_solution(x_start, y_start, z_start, dir_x, dir_y, dir_z, test_problem); + } + + + template + auto + integrate_total_flux(const T &x, const T &y, const T &z, //input: x,y,z coordinates to compute the solution at + const TestProblem &problem,//input: the specific test problem to use + const size_t &N_theta, const size_t &N_phi) //input: the number of integration intervals in each angular direction + { + // return output: an approximation of the total intergated flux + + // we use 4-point gauss quadrature + + const T pi = std::acos(-1); + + T d_theta = 2*pi/N_theta; + T d_phi = pi/N_phi; + + const T x1 = std::sqrt(525.0 - 70.0*sqrt(30.))/35.0; + const T x2 = std::sqrt(525.0 + 70.0*sqrt(30.))/35.0; + + const T w1 = (18.0 + std::sqrt(30))/36.0; + const T w2 = (18.0 - std::sqrt(30))/36.0; + + constexpr int N = 4; + T weights[] = { w2, w1, w1, w2 }; + T abscissa[] = { -x2, -x1, x1, x2 }; + T integral = 0; + for ( size_t i_theta = 0; i_theta(x,y,z, + theta[i], phi[j], + problem); + } + } + return integral; + } +} + +namespace KSN_Type_ii_reference_solutions { + // These are NOT exact solutions, but the reported reference results at specific points given in the original paper, computed with a monte carlo method + + template + class TestProblem { + public: + using SolutionPoint = std::tuple; + using iterator = typename std::vector; + using const_iterator = typename std::vector; + + TestProblem() {} + virtual ~TestProblem() {} + std::vector points; + }; + + template + class Problem_3 : public KSN_Type_ii_reference_solutions::TestProblem { + public: + Problem_3() : TestProblem::points{ + {5, 5,5,8.61578e0}, + {5,15,5,2.16130e0}, + {5,25,5,8.93784e-1}, + {5,35,5,4.78052e-1}, + {5,45,5,2.89424e-1}, + {5,55,5,1.92698e-1}, + {5,65,5,1.04982e-1}, + {5,75,5,3.37544e-2}, + {5,85,5,1.08158e-2}, + {5,95,5,3.39632e-3}, + + { 5,55,5,1.92698e-1}, + {15,55,5,6.72147e-2}, + {25,55,5,2.21799e-2}, + {35,55,5,9.90646e-3}, + {45,55,5,3.39066e-3}, + {55,55,5,1.05629e-3}, + + { 5,95,35,3.44804e-4}, + {15,95,35,2.91825e-4}, + {25,95,35,2.05793e-4}, + {35,95,35,2.62086e-4}, + {45,95,35,1.05367e-4}, + {55,95,35,4.44962e-5} + } {} + + }; +} + +#endif diff --git a/src/Kripke/Kernel.h b/src/Kripke/Kernel.h index 09d68468..c72cacfd 100644 --- a/src/Kripke/Kernel.h +++ b/src/Kripke/Kernel.h @@ -58,6 +58,7 @@ namespace Kripke { void sweepSubdomain(Kripke::Core::DataStore &data_store, Kripke::SdomId sdom_id); + void error_norms(Kripke::Core::DataStore &data_store); template RAJA_INLINE diff --git a/src/Kripke/Kernel/ErrorNorms.cpp b/src/Kripke/Kernel/ErrorNorms.cpp new file mode 100644 index 00000000..04c30c24 --- /dev/null +++ b/src/Kripke/Kernel/ErrorNorms.cpp @@ -0,0 +1,219 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +using namespace Kripke; +using namespace Kripke::Core; + +using namespace KSN_AnalyticalSolutions; + +struct ErrorNormsSdom { + + template + void operator()(AL al, + Kripke::SdomId sdom_id, + Kripke::Core::DataStore &data_store, + double *max_nrm_ptr, + double *l1_nrm_ptr, + double *l2_nrm_ptr, + double *max_sol_nrm_ptr, + double *l1_sol_nrm_ptr, + double *l2_sol_nrm_ptr, + double *vol_grp) const + { + + using ErrorNormsPolicy = Kripke::Arch::Policy_ErrorNorms; // use population's policy to only to grab the reduction policy, avoids needing new policy for errornorms + using ReducePolicy = typename ErrorNormsPolicy::ReducePolicy; + + using ExecPolicy = typename ErrorNormsPolicy::ExecPolicy; // we can re-use this policy because this uses a similar loop structure + + auto sdom_al = getSdomAL(al, sdom_id); + + int num_directions = data_store.getVariable("Set/Direction").size(sdom_id); + int num_groups = data_store.getVariable("Set/Group").size(sdom_id); + int local_imax = data_store.getVariable("Set/ZoneI").size(sdom_id); + int local_jmax = data_store.getVariable("Set/ZoneJ").size(sdom_id); + int local_kmax = data_store.getVariable("Set/ZoneK").size(sdom_id); + + size_t global_i0 = data_store.getVariable("Set/ZoneI").lower(sdom_id); + size_t global_j0 = data_store.getVariable("Set/ZoneJ").lower(sdom_id); + size_t global_k0 = data_store.getVariable("Set/ZoneK").lower(sdom_id); + + auto xcos = sdom_al.getView(data_store.getVariable("quadrature/xcos")); + auto ycos = sdom_al.getView(data_store.getVariable("quadrature/ycos")); + auto zcos = sdom_al.getView(data_store.getVariable("quadrature/zcos")); + auto view_id = sdom_al.getView(data_store.getVariable("quadrature/id")); + auto view_jd = sdom_al.getView(data_store.getVariable("quadrature/jd")); + auto view_kd = sdom_al.getView(data_store.getVariable("quadrature/kd")); + + auto dx = sdom_al.getView(data_store.getVariable("dx")); + auto dy = sdom_al.getView(data_store.getVariable("dy")); + auto dz = sdom_al.getView(data_store.getVariable("dz")); + + auto &field_w = data_store.getVariable("quadrature/w"); + auto &field_volume = data_store.getVariable("volume"); + auto w = sdom_al.getView(field_w); + auto volume = sdom_al.getView(field_volume); + + auto psi = sdom_al.getView(data_store.getVariable("psi")); + + auto zone_layout = data_store.getVariable>("Set/Zone").getLayout(sdom_id); + + ZoneI start_i(0); + ZoneJ start_j(0); + ZoneK start_k(0); + + ZoneI end_i(local_imax); + ZoneJ end_j(local_jmax); + ZoneK end_k(local_kmax); + + RAJA::ReduceMax max_nrm_red(0.0); + RAJA::ReduceSum l1_nrm_red(0.0); + RAJA::ReduceSum l2_nrm_red(0.0); + RAJA::ReduceMax max_sol_nrm_red(0.0); + RAJA::ReduceSum l1_sol_nrm_red(0.0); + RAJA::ReduceSum l2_sol_nrm_red(0.0); + RAJA::ReduceSum vol_grp_red(0.0); + + Problem_3 ksn_problem; + // Direction d0{0}; + //int id = view_id(d0); + //int jd = view_jd(d0); + //int kd = view_kd(d0); + + RAJA::kernel( + camp::make_tuple( + RAJA::TypedRangeSegment(0, num_directions), + RAJA::TypedRangeSegment(0, num_groups), + RAJA::TypedRangeStrideSegment(*start_k, *end_k, 1), + RAJA::TypedRangeStrideSegment(*start_j, *end_j, 1), + RAJA::TypedRangeStrideSegment(*start_i, *end_i, 1) + + + ), + [=,&ksn_problem] (Direction d, Group g, ZoneK k, ZoneJ j, ZoneI i) { // don't use KRIPKE_LAMBDA here because we won't (can't) execute this on a GPU + + + double xz = -60 + dx(i)*global_i0 + dx(i)*(double(*i)+0.5); + double yz = -100 + dy(j)*global_j0 + dy(j)*(double(*j)+0.5); + double zz = -60 + dz(k)*global_k0 + dz(k)*(double(*k)+0.5); + + Zone z(zone_layout(*i, *j, *k)); + + double psi_exact = compute_exact_solution(xz,yz,zz, + view_id(d)*xcos(d),view_jd(d)*ycos(d),view_kd(d)*zcos(d), + ksn_problem); + + double err = std::abs(psi(d,g,z) - psi_exact); + + + max_nrm_red.max(err); + l1_nrm_red += w(d) * volume(z) * err; + l2_nrm_red += w(d) * volume(z) * err * err; + + max_sol_nrm_red.max(std::abs(psi(d,g,z))); + l1_sol_nrm_red += w(d) * volume(z) * std::abs(psi(d,g,z)); + l2_sol_nrm_red += w(d) * volume(z) * std::abs(psi(d,g,z)) * std::abs(psi(d,g,z)); + + vol_grp_red += w(d) * volume(z); + } + ); + + *max_nrm_ptr = fmax(*max_nrm_ptr,(double) max_nrm_red); + *l1_nrm_ptr += (double) l1_nrm_red; + *l2_nrm_ptr += (double) l2_nrm_red; + + *max_sol_nrm_ptr = fmax(*max_sol_nrm_ptr,(double) max_sol_nrm_red); + *l1_sol_nrm_ptr += (double) l1_sol_nrm_red; + *l2_sol_nrm_ptr += (double) l2_sol_nrm_red; + + *vol_grp += (double) vol_grp_red; + } + +}; + + +/** + * Returns the integral of Psi over all phase-space, to look at convergence + */ +void Kripke::Kernel::error_norms(Kripke::Core::DataStore &data_store) +{ + KRIPKE_TIMER(data_store, ErrorNorms); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + // compute errors and solution norms for psi + double max_norm = 0.0; + double l1_norm = 0.0; + double l2_norm = 0.0; + double max_sol_norm = 0.0; + double l1_sol_norm = 0.0; + double l2_sol_norm = 0.0; + double vol_grp = 0.0; + auto &field_psi = data_store.getVariable("psi"); + + for (Kripke::SdomId sdom_id : field_psi.getWorkList()){ + Kripke::dispatch(al_v, ErrorNormsSdom{}, sdom_id, + data_store, + &max_norm, &l1_norm, &l2_norm, &max_sol_norm, &l1_sol_norm, &l2_sol_norm, &vol_grp); + } + + // reduce + auto const &comm = data_store.getVariable("comm"); + + double vol = comm.allReduceSumDouble(vol_grp); + double g_max_norm = comm.allReduceMaxDouble(max_norm); + double g_max_sol_norm = comm.allReduceMaxDouble(max_sol_norm); + + double g_l1_norm = comm.allReduceSumDouble(l1_norm)/vol; + double g_l2_norm = std::sqrt(comm.allReduceSumDouble(l2_norm)/vol); + + double g_l1_sol_norm = comm.allReduceSumDouble(l1_sol_norm)/vol; + double g_l2_sol_norm = std::sqrt(comm.allReduceSumDouble(l2_sol_norm)/vol); + + if(comm.rank() == 0){ + printf(" errors(max, l2, l1) : %18.12e, %18.12e, %18.12e\n", g_max_norm, g_l2_norm, g_l1_norm); + printf(" solution(max, l2, l1) : %18.12e, %18.12e, %18.12e\n", g_max_sol_norm, g_l2_sol_norm, g_l1_sol_norm); + fflush(stdout); + } + + return ; +} diff --git a/src/Kripke/Kernel/Source.cpp b/src/Kripke/Kernel/Source.cpp index 75d440aa..6af08521 100644 --- a/src/Kripke/Kernel/Source.cpp +++ b/src/Kripke/Kernel/Source.cpp @@ -87,10 +87,9 @@ struct SourceSdom { Material material = mixelem_to_material(mix); - if(material == 2){ + if(material == 0){ Zone z = mixelem_to_zone(mix); double fraction = mixelem_to_fraction(mix); - phi_out(nm, g, z) += source_strength * fraction; } diff --git a/src/Kripke/SteadyStateSolver.cpp b/src/Kripke/SteadyStateSolver.cpp index 65d593fb..bcb8672f 100644 --- a/src/Kripke/SteadyStateSolver.cpp +++ b/src/Kripke/SteadyStateSolver.cpp @@ -46,7 +46,7 @@ using namespace Kripke::Core; /** Run solver iterations. */ -int Kripke::SteadyStateSolver (Kripke::Core::DataStore &data_store, size_t max_iter, bool block_jacobi) +int Kripke::SteadyStateSolver (Kripke::Core::DataStore &data_store, size_t max_iter, bool block_jacobi, bool compute_errors) { KRIPKE_TIMER(data_store, Solve); @@ -132,6 +132,9 @@ int Kripke::SteadyStateSolver (Kripke::Core::DataStore &data_store, size_t max_i } + if (compute_errors) + Kripke::Kernel::error_norms(data_store); + if(comm.rank() == 0){ printf(" Solver terminated\n"); } diff --git a/src/Kripke/SteadyStateSolver.h b/src/Kripke/SteadyStateSolver.h index 210447f6..3609cd0b 100644 --- a/src/Kripke/SteadyStateSolver.h +++ b/src/Kripke/SteadyStateSolver.h @@ -40,7 +40,7 @@ namespace Kripke { class DataStore; - int SteadyStateSolver(Kripke::Core::DataStore &data_store, size_t max_iter, bool block_jacobi); + int SteadyStateSolver(Kripke::Core::DataStore &data_store, size_t max_iter, bool block_jacobi, bool compute_errors); } // namespace diff --git a/src/kripke.cpp b/src/kripke.cpp index 7f073171..1e66dbc2 100644 --- a/src/kripke.cpp +++ b/src/kripke.cpp @@ -137,6 +137,9 @@ void usage(void){ printf(" sweep: Full up-wind sweep (wavefront algorithm)\n"); printf(" bj: Block Jacobi\n"); printf(" Default: --pmethod sweep\n\n"); + printf(" --compute_errors compute and report error norms for the no-scattering problem\n"); + printf(" Currently this only makes sense when using --sigs 0,0,0\n"); + printf(" Default: no errors are computed\n\n"); printf("\n"); } @@ -394,6 +397,9 @@ int main(int argc, char **argv) { else if(opt == "--layout"){ vars.al_v.layout_v = Kripke::stringToLayout(cmd.pop()); } + else if(opt == "--compute_errors"){ + vars.compute_errors = true; + } else{ printf("Unknwon options %s\n", opt.c_str()); usage(); @@ -473,7 +479,7 @@ int main(int argc, char **argv) { Kripke::generateProblem(data_store, vars); // Run the solver - Kripke::SteadyStateSolver(data_store, vars.niter, vars.parallel_method == PMETHOD_BJ); + Kripke::SteadyStateSolver(data_store, vars.niter, vars.parallel_method == PMETHOD_BJ, vars.compute_errors); // Print Timing Info auto &timing = data_store.getVariable("timing");