diff --git a/.gitignore b/.gitignore index 4b00bc9..c1eade0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.slo *.lo *.o +*.deps # Compiled Dynamic libraries *.so @@ -25,8 +26,8 @@ sph/output/* # Build results -[Dd]ebug/ -[Rr]elease/ +[Dd]ebug*/ +[Rr]elease*/ x64/ build/ [Bb]in/ diff --git a/Part1/PROJ_WIN/Project3.sln b/Part1/PROJ_WIN/Project3.sln index 4bc27f6..6e2a4ab 100644 --- a/Part1/PROJ_WIN/Project3.sln +++ b/Part1/PROJ_WIN/Project3.sln @@ -7,12 +7,15 @@ Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 Release|Win32 = Release|Win32 + Release2|Win32 = Release2|Win32 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.ActiveCfg = Debug|Win32 {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.Build.0 = Debug|Win32 {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.ActiveCfg = Release|Win32 {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.Build.0 = Release|Win32 + {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release2|Win32.ActiveCfg = Release2|Win32 + {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release2|Win32.Build.0 = Release2|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Part1/PROJ_WIN/Project3/Project3.vcxproj b/Part1/PROJ_WIN/Project3/Project3.vcxproj index 20ddcbc..6a72489 100644 --- a/Part1/PROJ_WIN/Project3/Project3.vcxproj +++ b/Part1/PROJ_WIN/Project3/Project3.vcxproj @@ -5,6 +5,10 @@ Debug Win32 + + Release2 + Win32 + Release Win32 @@ -21,12 +25,18 @@ MultiByte v100 + + Application + true + MultiByte + v100 + Application false true MultiByte - v110 + v100 @@ -35,6 +45,9 @@ + + + @@ -42,6 +55,10 @@ false + + + + Level3 @@ -65,18 +82,60 @@ compute_10,sm_10;compute_20,sm_20;compute_30,sm_30 + + + Level3 + MaxSpeed + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include;C:/ProgramData/NVIDIA Corporation/CUDA Samples/v5.5/common/inc;../shared/glew/includes;../shared/freeglut/includes + WIN32;_CONSOLE;%(PreprocessorDefinitions) + true + Default + true + MultiThreadedDLL + ProgramDatabase + true + + + true + ../shared/glew/lib;../shared/freeglut/lib;%(AdditionalLibraryDirectories) + cudart.lib; glew32.lib;glu32.lib;opengl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + Console + mainCRTStartup + true + true + Default + + + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include;C:/ProgramData/NVIDIA Corporation/CUDA Samples/v5.5/common/inc;../shared/glew/includes;../shared/freeglut/includes + $(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj + false + false + false + compute_10,sm_10;compute_20,sm_20;compute_30,sm_30 + + Level3 MaxSpeed true true + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include;C:/ProgramData/NVIDIA Corporation/CUDA Samples/v5.5/common/inc;../shared/glew/includes;../shared/freeglut/includes;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + MultiThreadedDebugDLL true true true + ../shared/glew/lib;../shared/freeglut/lib;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + cudart.lib; glew32.lib;glu32.lib;opengl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + mainCRTStartup + Console + + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include;C:/ProgramData/NVIDIA Corporation/CUDA Samples/v5.5/common/inc;../shared/glew/includes;../shared/freeglut/includes + $(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj + @@ -87,6 +146,7 @@ Document compute_10,sm_10;compute_20,sm_20 + compute_10,sm_10;compute_20,sm_20 diff --git a/Part1/PROJ_WIN/Project3/shaders/heightFS.glsl b/Part1/PROJ_WIN/Project3/shaders/heightFS.glsl index e36d53e..ae926e7 100644 --- a/Part1/PROJ_WIN/Project3/shaders/heightFS.glsl +++ b/Part1/PROJ_WIN/Project3/shaders/heightFS.glsl @@ -1,4 +1,11 @@ +varying float f_height; +varying vec2 v_Texcoords; + void main(void) { - gl_FragColor = vec4(0.05,0.15,0.3,1.0); -} + float shade = (1.0-2.0*sqrt(f_height)); + float alpha = float(mod(v_Texcoords.x+0.025, 0.05) > 0.046 || + mod(v_Texcoords.y+0.025, 0.05) > 0.046); + vec4 color = mix(vec4(0.75,0.55,0.3,1.0), vec4(0.05, 0.3, 0.4, 1.0), alpha); + gl_FragColor = shade*color; +} \ No newline at end of file diff --git a/Part1/PROJ_WIN/Project3/shaders/heightVS.glsl b/Part1/PROJ_WIN/Project3/shaders/heightVS.glsl index eda1b93..7e86950 100644 --- a/Part1/PROJ_WIN/Project3/shaders/heightVS.glsl +++ b/Part1/PROJ_WIN/Project3/shaders/heightVS.glsl @@ -1,9 +1,18 @@ uniform mat4 u_projMatrix; +uniform sampler2D u_height; + attribute vec4 Position; +attribute vec2 Texcoords; + +varying vec2 v_Texcoords; +varying float f_height; void main(void) { - vec4 pos = u_projMatrix * Position; - pos.z += 0.01; - gl_Position = pos; + v_Texcoords = Texcoords; + vec4 pos = Position; + f_height = texture2D(u_height, Texcoords).w; + pos.z = -0.01-clamp(f_height,0.0,2.0); + pos = u_projMatrix * pos; + gl_Position = pos; } diff --git a/Part1/PROJ_WIN/Project3/shaders/planetFS.glsl b/Part1/PROJ_WIN/Project3/shaders/planetFS.glsl index e2c1350..9d281ca 100644 --- a/Part1/PROJ_WIN/Project3/shaders/planetFS.glsl +++ b/Part1/PROJ_WIN/Project3/shaders/planetFS.glsl @@ -1,4 +1,28 @@ -void main(void) +#version 330 + +in vec3 WorldCoord; +in vec3 ToCam; +in vec3 Up; +in vec3 Right; +in vec2 TexCoord; +out vec4 FragColor; + +void main() { - gl_FragColor = vec4(1.0); -} + vec2 coord = 2.01 * (TexCoord - vec2(0.5)); + float r = length(coord); + if (r >= 1.0) { discard; } + + float dist = length(WorldCoord); + if(dist <= 0.01) + { + FragColor = vec4(1.0); + return; + } + + vec3 N = Right*-coord.x + Up*coord.y + ToCam*sqrt(1-r*r); + vec3 L = normalize(-WorldCoord); + float light = 0.1 + 0.9*clamp(dot(N,L),0.0, 1.0)*exp(-dist); + vec3 color = vec3(0.4, 0.1, 0.6); + FragColor = vec4(color*light,1.0); +} diff --git a/Part1/PROJ_WIN/Project3/shaders/planetGS.glsl b/Part1/PROJ_WIN/Project3/shaders/planetGS.glsl index 88027d3..b6f4143 100644 --- a/Part1/PROJ_WIN/Project3/shaders/planetGS.glsl +++ b/Part1/PROJ_WIN/Project3/shaders/planetGS.glsl @@ -1,15 +1,49 @@ #version 330 uniform mat4 u_projMatrix; +uniform vec3 u_cameraPos; layout (points) in; -layout (points) out; -layout (max_vertices = 1) out; +layout (triangle_strip) out; +layout (max_vertices = 4) out; + +out vec3 WorldCoord; +out vec3 ToCam; +out vec3 Up; +out vec3 Right; +out vec2 TexCoord; + +const float scale = 0.03; void main() { vec3 Position = gl_in[0].gl_Position.xyz; - gl_Position = u_projMatrix * vec4(Position, 1.0); + WorldCoord = Position; + + ToCam = normalize(u_cameraPos - Position); + Up = vec3(0.0, 0.0, 1.0); + Right = cross(ToCam, Up); + Up = cross(Right, ToCam); + + vec3 Pos = Position + scale*Right - scale*Up; + gl_Position = u_projMatrix * vec4(Pos, 1.0); + TexCoord = vec2(0.0, 0.0); + EmitVertex(); + + Pos = Position + scale*Right + scale*Up; + gl_Position = u_projMatrix * vec4(Pos, 1.0); + TexCoord = vec2(0.0, 1.0); EmitVertex(); + + Pos = Position - scale*Right - scale*Up; + gl_Position = u_projMatrix * vec4(Pos, 1.0); + TexCoord = vec2(1.0, 0.0); + EmitVertex(); + + Pos = Position - scale*Right + scale*Up; + gl_Position = u_projMatrix * vec4(Pos, 1.0); + TexCoord = vec2(1.0, 1.0); + EmitVertex(); + EndPrimitive(); -} +} \ No newline at end of file diff --git a/Part1/PROJ_WIN/src/kernel.cu.deps b/Part1/PROJ_WIN/src/kernel.cu.deps index 35aaf16..b90df28 100644 --- a/Part1/PROJ_WIN/src/kernel.cu.deps +++ b/Part1/PROJ_WIN/src/kernel.cu.deps @@ -1,18 +1,18 @@ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\cuda_runtime.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\host_config.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\sal.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\sal.h c:\program files (x86)\microsoft visual studio 10.0\vc\include\codeanalysis\sourceannotations.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\vadefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\vadefs.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\builtin_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\device_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\host_defines.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\driver_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\host_defines.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stddef.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stddef.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\surface_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\driver_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\texture_types.h @@ -68,12 +68,12 @@ c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\surface_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\texture_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\vector_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\host_defines.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\string.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wtime.inl -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.inl +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\string.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wtime.inl +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.inl c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\math_functions.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\builtin_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\device_types.h @@ -82,16 +82,16 @@ c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\surface_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\texture_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\vector_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\host_defines.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdlib.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cmath -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\yvals.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\use_ansi.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdlib +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdlib.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cmath +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\yvals.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\use_ansi.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdlib c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\cuda_surface_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\builtin_types.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\device_types.h @@ -239,285 +239,286 @@ c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\host_defines.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\vector_functions.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\device_launch_parameters.h c:\program files\nvidia gpu computing toolkit\cuda\v5.5\include\vector_types.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\swprintf.inl +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\swprintf.inl C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\cuda.h -c:\users\liam\project3-simulation\part1\src\glm/glm.hpp -c:\users\liam\project3-simulation\part1\src\glm\core/_fixes.hpp -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\climits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cfloat -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\float.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtwrn.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ymath.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cwchar -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wchar.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstddef -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstddef -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdio -c:\users\liam\project3-simulation\part1\src\glm\core/setup.hpp -c:\users\liam\project3-simulation\part1\src\glm\./core/_detail.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\setup.hpp -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cassert -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\assert.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -c:\users\liam\project3-simulation\part1\src\glm\./core/_vectorize.hpp -c:\users\liam\project3-simulation\part1\src\glm\./core/type.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_half.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_half.inl -c:\users\liam\project3-simulation\part1\src\glm\core\_detail.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_float.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_half.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\setup.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_int.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\setup.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_detail.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_gentype.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_size.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec1.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_gentype.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_float.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_int.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_size.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_swizzle.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_swizzle_func.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec1.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec2.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_float.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_int.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_size.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_swizzle.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec2.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec3.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_float.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_int.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_size.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_swizzle.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec3.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec4.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_float.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_int.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_size.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_swizzle.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_vec4.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat2x2.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_gentype.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat2x2.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat2x3.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat2x3.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat2x4.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat2x4.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat3x2.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat3x2.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat3x3.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat3x3.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat3x4.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat3x4.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat4x2.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat4x2.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat4x3.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat4x3.inl -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat4x4.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\type_mat4x4.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_trigonometric.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_trigonometric.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_exponential.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_exponential.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_common.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_fixes.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_common.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_packing.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_packing.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_geometric.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_geometric.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_matrix.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_matrix.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_vector_relational.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\_detail.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_vector_relational.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_integer.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_integer.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/func_noise.hpp -c:\users\liam\project3-simulation\part1\src\glm\core\func_noise.inl -c:\users\liam\project3-simulation\part1\src\glm\./core/_swizzle.hpp -c:\users\liam\project3-simulation\part1\src\utilities.h -c:\users\liam\project3-simulation\part1\src\glm/glm.hpp -c:\users\liam\project3-simulation\part1\src\glm\core/_fixes.hpp -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\algorithm -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\memory -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmemory -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\new -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\exception -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\eh.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\malloc.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xutility -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\utility -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iosfwd -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstring -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdbg.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\type_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xtr1common -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\typeinfo -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\intrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\setjmp.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\immintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wmmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\nmmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\smmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\tmmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\pmmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\emmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mm3dnow.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\istream -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ostream -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ios -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocnum -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\streambuf -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xiosbase -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocale -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdexcept -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstring -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ctype.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\locale.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xdebug -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\system_error -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cerrno -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\errno.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\share.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iterator -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\sstream -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\string -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\vector -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfunctional -c:\users\liam\project3-simulation\part1\src\cudaMat4.h -c:\users\liam\project3-simulation\part1\src\glm/glm.hpp -c:\users\liam\project3-simulation\part1\src\glm\core/_fixes.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm/glm.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core/_fixes.hpp +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\climits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cfloat +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\float.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtwrn.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ymath.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cwchar +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wchar.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstddef +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstddef +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdio +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core/setup.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/_detail.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\setup.hpp +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cassert +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\assert.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/_vectorize.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/type.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_half.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_half.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_detail.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_float.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_half.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\setup.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_int.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\setup.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_detail.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_gentype.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_size.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec1.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_gentype.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_float.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_int.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_size.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_swizzle.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_swizzle_func.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec1.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec2.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_float.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_int.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_size.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_swizzle.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec2.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec3.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_float.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_int.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_size.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_swizzle.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec3.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec4.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_float.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_int.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_size.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_swizzle.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_vec4.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat2x2.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_gentype.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat2x2.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat2x3.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat2x3.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat2x4.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat2x4.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat3x2.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat3x2.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat3x3.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat3x3.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat3x4.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat3x4.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat4x2.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat4x2.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat4x3.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat4x3.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat4x4.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\type_mat4x4.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_trigonometric.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_trigonometric.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_exponential.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_exponential.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_common.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_fixes.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_common.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_packing.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_packing.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_geometric.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_geometric.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_matrix.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_matrix.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_vector_relational.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\_detail.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_vector_relational.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_integer.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_integer.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/func_noise.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core\func_noise.inl +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\./core/_swizzle.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\utilities.h +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm/glm.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core/_fixes.hpp +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\algorithm +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\memory +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmemory +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\new +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\exception +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\eh.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\malloc.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xutility +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\utility +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iosfwd +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstring +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdbg.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\type_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xtr1common +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\typeinfo +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\intrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\setjmp.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\immintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\nmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\smmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\tmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\pmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\emmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ammintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mm3dnow.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxshared +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\istream +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ostream +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ios +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocnum +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\streambuf +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xiosbase +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocale +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdexcept +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstring +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ctype.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\locale.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xdebug +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\system_error +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cerrno +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\errno.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\share.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iterator +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\sstream +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\string +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\vector +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfunctional +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\cudaMat4.h +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm/glm.hpp +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\glm\core/_fixes.hpp C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\cuda_runtime.h -c:\users\liam\project3-simulation\part1\src\kernel.h +c:\users\hms\desktop\cis565-rohithc\project3-simulation\part1\src\kernel.h C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/random.h C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/detail/config.h C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/detail/config/config.h @@ -534,7 +535,7 @@ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/detail/co C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/detail/config/hd_warning_disable.h C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/detail/cstdint.h C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/random/discard_block_engine.h -C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iostream +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iostream C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/random/detail/random_core_access.h C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/random/detail/discard_block_engine.inl C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include\thrust/random/linear_congruential_engine.h diff --git a/Part1/src/kernel.cu b/Part1/src/kernel.cu index 121db3c..65a562e 100644 --- a/Part1/src/kernel.cu +++ b/Part1/src/kernel.cu @@ -5,24 +5,36 @@ #include "utilities.h" #include "kernel.h" -#if SHARED == 1 - #define ACC(x,y,z) sharedMemAcc(x,y,z) -#else - #define ACC(x,y,z) naiveAcc(x,y,z) -#endif - //GLOBALS dim3 threadsPerBlock(blockSize); +#define CAMHEIGHT 75.0f +#define CAMFORWARD 30.0f + int numObjects; const float planetMass = 3e8; const __device__ float starMass = 5e10; - +const __device__ float GravConst = 6.67384e-11; +__device__ bool prefetch; +__device__ int attachedToIndex = 0; const float scene_scale = 2e2; //size of the height map in simulation space +bool camUpdate = false; + +#if SHARED == 1 +// if (prefetch) + #define ACC(x,y,z) pfSharedMemAcc(x,y,z) +// else +// #define ACC(x,y,z) sharedMemAcc(x,y,z) + #define FLOCK(p,q,r,s,t) FlockGlobal(p,q,r,s,t) +#else + #define ACC(x,y,z) naiveAcc(x,y,z) + #define FLOCK(p,q,r,s,t) FlockGlobal(p,q,r,s,t) +#endif glm::vec4 * dev_pos; glm::vec3 * dev_vel; glm::vec3 * dev_acc; +glm::vec4 * dev_campos; void checkCUDAError(const char *msg, int line = -1) { @@ -34,6 +46,7 @@ void checkCUDAError(const char *msg, int line = -1) fprintf(stderr, "Line %d: ", line); } fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + std::cin.get (); exit(EXIT_FAILURE); } } @@ -107,39 +120,7 @@ void generateRandomVelArray(int time, int N, glm::vec3 * arr, float scale) } } -//TODO: Determine force between two bodies -__device__ -glm::vec3 calculateAcceleration(glm::vec4 us, glm::vec4 them) -{ - // G*m_us*m_them - //F = ------------- - // r^2 - // - // G*m_us*m_them G*m_them - //a = ------------- = -------- - // m_us*r^2 r^2 - - return glm::vec3(0.0f); -} - -//TODO: Core force calc kernel global memory -__device__ -glm::vec3 naiveAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos) -{ - glm::vec3 acc = calculateAcceleration(my_pos, glm::vec4(0,0,0,starMass)); - return acc; -} - - -//TODO: Core force calc kernel shared memory -__device__ -glm::vec3 sharedMemAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos) -{ - glm::vec3 acc = calculateAcceleration(my_pos, glm::vec4(0,0,0,starMass)); - return acc; -} - -//Simple Euler integration scheme +// Calculate gravitational acceleration. __global__ void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) { @@ -154,19 +135,61 @@ void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) if(index < N) acc[index] = accel; } +//Simple Euler integration scheme __global__ -void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) +void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc, glm::vec4 * dev_campos, bool cameraUpdate) { int index = threadIdx.x + (blockIdx.x * blockDim.x); if( index < N ) { + bool isCameraAttached = false; + glm::vec4 curPos = pos [index]; + if (cameraUpdate) + { + if (isApproximately (curPos.x, dev_campos->x) && + isApproximately (curPos.y, dev_campos->y+CAMFORWARD) && + isApproximately (curPos.z, dev_campos->z-CAMHEIGHT)) + isCameraAttached = true; + } + vel[index] += acc[index] * dt; pos[index].x += vel[index].x * dt; pos[index].y += vel[index].y * dt; pos[index].z += vel[index].z * dt; + + if (isCameraAttached) + { + dev_campos->z = curPos.z + CAMHEIGHT; + dev_campos->y = curPos.y - CAMFORWARD; + } } } +//TODO: Done! +__device__ +glm::vec3 calculateAcceleration(glm::vec4 us, glm::vec4 them) +{ + // G*m_us*m_them + //F = ------------- + // r^2 + // + // G*m_us*m_them G*m_them + //a = ------------- = -------- + // m_us*r^2 r^2 + glm::vec3 forceDir = glm::vec3 (them.x - us.x, them.y - us.y, them.z - us.z); + float dist = sqrt (glm::dot (forceDir, forceDir)); + + if (dist > 0) + { + forceDir /= dist; // Force direction is now normalized and we have distance between the two objects (r)! + float accVal = (GravConst * them.w) / (dist*dist); + return forceDir * accVal; + } + + return glm::vec3 (0); +} + + //Update the vertex buffer object //(The VBO is where OpenGL looks for the positions for the planets) __global__ @@ -203,20 +226,346 @@ void sendToPBO(int N, glm::vec4 * pos, float4 * pbo, int width, int height, floa glm::vec3 color(0.05, 0.15, 0.3); glm::vec3 acc = ACC(N, glm::vec4((x-w2)/c_scale_w,(y-h2)/c_scale_h,0,1), pos); - if(x= (b - 0.001)) && (a <= (b + 0.001))) + return true; + return false; +} + +// Calculate acceleration for each object using shared memory, given the number of particles (N), +// their positions (their_pos) and this object's position (my_pos). +// Written by Rohith Chandran. +__device__ +glm::vec3 sharedMemAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos) +{ + extern __shared__ glm::vec4 shared_pos []; + int threadNo = blockDim.x * blockIdx.x + threadIdx.x; + + glm::vec3 acc = glm::vec3 (0); + + // Loop over each block (assuming parallelization of objects) and load objects from global to shared memory. + // Each block of threads will load blockDim.x no. of objects from global memory to shared memory; + // Thus, we load the entire set of positions in global memory into shared memory iteratively, one block at a time. + for (int j = 0; j < ceil (N / (float)blockDim.x); j ++) + { + // Calculate global memory index that should be accessed by this thread. + int index = blockDim.x * j + threadIdx.x; + // Load the value from global to shared. + if (index < N) + shared_pos [threadIdx.x] = their_pos [index]; + +// Synchronize here. + __syncthreads(); + + // Compute acceleration for all threads. + // Loop over each object, and calculate acceleration. + for (int i = 0; i < blockDim.x; i ++) + { + // If the block of global memory we're loading into shared mem corresponds to the last block in the grid, + // it can contain less than blockDim.x elements. In such a situation, break out of the loop once we pass + // the last element in that "block". + if (j == (floor (N / (float)blockDim.x))) + if (i >= (N%blockDim.x)) + break; + + // A body cannot exert a force on itself, so skip.. + if (isApproximately (shared_pos [i].x, my_pos.x) && + isApproximately (shared_pos [i].y, my_pos.y) && + isApproximately (shared_pos [i].z, my_pos.z)) + continue; + + acc += calculateAcceleration(my_pos, shared_pos [i]); + } + } + + // Calculate acceleration due to star. + acc += calculateAcceleration (my_pos, glm::vec4 (0, 0, 0, starMass)); + + return acc; +} + +// Shared memory acceleration calculation with prefetching. +// Written as a separate function to compare performance. +// Written by Rohith Chandran. +__device__ +glm::vec3 pfSharedMemAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos) +{ + extern __shared__ glm::vec4 shared_pos []; + int threadNo = blockDim.x * blockIdx.x + threadIdx.x; + + glm::vec3 acc = glm::vec3 (0); + glm::vec4 prefetcher = glm::vec4 (0); + + int index = threadIdx.x; + if (index < N) + prefetcher = their_pos [index]; // Prefetch first element into register. + + for (int j = 0; j < ceil (N / (float)blockDim.x); j ++) + { + index = blockDim.x * (j+1) + threadIdx.x; + + shared_pos [threadIdx.x] = prefetcher; // Copy prefetched element into shared memory. + prefetcher = glm::vec4 (0); + __syncthreads(); + + if (index < N) + if (j < floor (N / (float)blockDim.x)) // Prefetching to stop at the penultimate block, after the final + prefetcher = their_pos [index]; // block has been loaded. + + for (int i = 0; i < blockDim.x; i ++) + { + if (j == (floor (N / (float)blockDim.x))) + if (i >= (N%blockDim.x)) + break; + + if (isApproximately (shared_pos [i].x, my_pos.x) && + isApproximately (shared_pos [i].y, my_pos.y) && + isApproximately (shared_pos [i].z, my_pos.z)) + continue; + + acc += calculateAcceleration(my_pos, shared_pos [i]); + } + } + + // Calculate acceleration due to star. + acc += calculateAcceleration (my_pos, glm::vec4 (0, 0, 0, starMass)); + + return acc; +} + +// Calculate flocking velocity. +// Written by Rohith Chandran. +__device__ glm::vec3 FlockGlobal (int N, float DT, glm::vec4 my_pos, glm::vec4 *pos, glm::vec3 *vel) +{ + glm::vec3 acc = glm::vec3 (0); + glm::vec3 my_vel; + + glm::vec3 sumVelocities = glm::vec3 (0); + glm::vec3 sumPositions = glm::vec3 (0); + glm::vec3 sumSepVelocities = glm::vec3 (0); + + int neighbours = 0; + + int index = threadIdx.x + (blockIdx.x * blockDim.x); + + if (index < N) + { + my_vel = vel [index]; + + for (int i = 0; i < N; i ++) + { + glm::vec4 curPos = pos [i]; + float distance = glm::length (curPos - my_pos); + + if (distance <= 5.0) + { + sumVelocities += vel [i]; + + sumPositions.x += curPos.x; + sumPositions.y += curPos.y; + sumPositions.z += curPos.z; + + sumSepVelocities.x += (my_pos.x - curPos.x); + sumSepVelocities.y += (my_pos.y - curPos.y); + sumSepVelocities.z += (my_pos.z - curPos.z); + + neighbours ++; + } + } + + if (neighbours > 0) + { + sumSepVelocities /= neighbours; + sumPositions /= neighbours; // Centre of mass. + sumVelocities /= neighbours; + } + + // Calculate total velocity: + glm::vec3 flockVel = (safeNormalize (sumVelocities) * glm::length (my_vel))// * 0.4f // Align component + + (safeNormalize (sumPositions - glm::vec3 (my_pos)) * glm::length (my_vel))// * 0.2f // Cohesion component + + (safeNormalize (sumSepVelocities) * glm::length (my_vel));// * 0.4f; // Separation component + + acc = ((glm::length (flockVel) - glm::length (my_vel))/DT) * safeNormalize (flockVel); + } + return acc; +} + +// normalize only if length > 0 +// Written by Rohith Chandran. +inline __device__ glm::vec3 safeNormalize (glm::vec3 vectorToBeNormalized) +{ + float len = glm::length (vectorToBeNormalized); + if (len > 0.01) + return vectorToBeNormalized / len; + return vectorToBeNormalized; +} + +// Calculate acceleration for Custom Simulation (flocking). +// Written by Rohith Chandran. +__global__ +void updateFCustom (int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) +{ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + glm::vec4 my_pos; + glm::vec3 accel; + + if(index < N) my_pos = pos[index]; + + accel = FLOCK (N, dt, my_pos, pos, vel) + calculateAcceleration (my_pos, glm::vec4 (0, 0, 0, starMass)); + + if(index < N) acc[index] = accel; +} + +// Update state using Verlet Integration +// Written by Rohith Chandran. +__global__ +void updateS_V (int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc, glm::vec4 * dev_campos, bool cameraUpdate) +{ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if( index < N ) + { + bool isCameraAttached = false; + glm::vec4 curPos = pos [index]; + glm::vec3 curVel = vel [index]; + glm::vec4 prevPos = curPos - (dt * glm::vec4 (curVel.x, curVel.y, curVel.z, 0)); + + if (cameraUpdate) + { + if (isApproximately (curPos.x, dev_campos->x) && + isApproximately (curPos.y, dev_campos->y+CAMFORWARD) && + isApproximately (curPos.z, dev_campos->z-CAMHEIGHT)) + isCameraAttached = true; + } + + curPos = (2.0*curPos) - prevPos + (dt * dt * glm::vec4 (acc [index].x, acc [index].y, acc [index].z, 0)); + prevPos = (curPos - prevPos) / (2.0*dt); + curVel.x = prevPos.x; + curVel.y = prevPos.y; + curVel.z = prevPos.z; + + vel[index] = curVel; + pos[index] = curPos; + + if (isCameraAttached) + { + dev_campos->z = curPos.z + CAMHEIGHT; + dev_campos->y = curPos.y - CAMFORWARD; + } + } +} + +// Update state using Leapfrog Integration. +// Written by Rohith Chandran. +__global__ +void updateS_LF (int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc, glm::vec4 * dev_campos, bool cameraUpdate) +{ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if( index < N ) + { + /*bool isCameraAttached = false;*/ + glm::vec4 curPos = pos[index]; + /*if (cameraUpdate) + { + if (isApproximately (curPos.x, dev_campos->x) && + isApproximately (curPos.y, dev_campos->y+CAMFORWARD) && + isApproximately (curPos.z, dev_campos->z-CAMHEIGHT)) + isCameraAttached = true; + }*/ + + curPos += glm::vec4 ((vel [index] * dt), 0); + vel[index] += acc [index] * dt; + + if (/*isCameraAttached*/cameraUpdate) + { + if (index == attachedToIndex) + { + dev_campos->z = curPos.z + CAMHEIGHT; + dev_campos->y = curPos.y - CAMFORWARD; + } + } + pos [index] = curPos; + } +} + +// Sets up velocity for leapfrog integration. +// Written by Rohith Chandran. +__global__ +void setupVelocityLF (int N, float dt, glm::vec3 * vel, glm::vec3 * acc) +{ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if( index < N ) + vel[index] += acc [index] * dt; +} + +__global__ void moveCamera (int N, glm::vec4* campos, glm::vec4* pos) +{ + for (int i = 0; i < N; i ++) + { + if (i <= attachedToIndex) + { + /*if (attachedToIndex == N) + attachedToIndex = 0; + else*/ + continue; + } + + if (glm::length (pos [i] - pos [attachedToIndex]) > 5.0) + { + *campos = pos [i]; + campos->z += CAMHEIGHT; + campos->y -= CAMFORWARD; + + attachedToIndex = i; + + break; + } + } +} + +__global__ void attachedToIndexReset () +{ + attachedToIndex = 0; +} + /************************************* * Wrappers for the __global__ calls * *************************************/ //Initialize memory, update some globals -void initCuda(int N) +void initCuda(int N, const glm::vec4 &camera_position) { numObjects = N; dim3 fullBlocksPerGrid((int)ceil(float(N)/float(blockSize))); @@ -226,21 +575,36 @@ void initCuda(int N) cudaMalloc((void**)&dev_vel, N*sizeof(glm::vec3)); checkCUDAErrorWithLine("Kernel failed!"); cudaMalloc((void**)&dev_acc, N*sizeof(glm::vec3)); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMalloc((void**)&dev_campos, sizeof(glm::vec4)); checkCUDAErrorWithLine("Kernel failed!"); + resetAttachedToIndex (); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMemcpy (dev_campos, &camera_position, sizeof (camera_position), cudaMemcpyHostToDevice); generateRandomPosArray<<>>(1, numObjects, dev_pos, scene_scale, planetMass); checkCUDAErrorWithLine("Kernel failed!"); generateCircularVelArray<<>>(2, numObjects, dev_vel, dev_pos); checkCUDAErrorWithLine("Kernel failed!"); cudaThreadSynchronize(); + + float dt = 0.1; + updateF<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc); + setupVelocityLF<<>>(numObjects, dt, dev_vel, dev_acc); } -void cudaNBodyUpdateWrapper(float dt) +void cudaNBodyUpdateWrapper(float dt, bool customSimulation) { dim3 fullBlocksPerGrid((int)ceil(float(numObjects)/float(blockSize))); - updateF<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc); + if (customSimulation) + updateFCustom<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc); + else + updateF<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc); checkCUDAErrorWithLine("Kernel failed!"); - updateS<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc); + +// updateS<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc, dev_campos, camUpdate); +// updateS_V<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc, dev_campos, camUpdate); + updateS_LF<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc, dev_campos, camUpdate); checkCUDAErrorWithLine("Kernel failed!"); cudaThreadSynchronize(); } @@ -259,4 +623,50 @@ void cudaUpdatePBO(float4 * pbodptr, int width, int height) cudaThreadSynchronize(); } +void setDevicePrefetch (bool prefetchEnabled) +{ + cudaMemcpyToSymbol (&prefetch, &prefetchEnabled, sizeof (bool), 0); +} + +glm::vec4 getCurrentCameraPosition () +{ + glm::vec4 camera_position; + cudaMemcpy (&camera_position, dev_campos, sizeof (camera_position), cudaMemcpyDeviceToHost); + return camera_position/scene_scale; +} +glm::vec3 getCurrentCameraLookAt () +{ + int attachedTo = 0; + cudaMemcpy (&attachedTo, &attachedToIndex, sizeof (int), cudaMemcpyDeviceToHost); + checkCUDAErrorWithLine("Kernel failed!"); + glm::vec3 accelerationDir = glm::vec3 (0); + glm::vec3 pos = glm::vec3 (0); + cudaMemcpy (&accelerationDir, &dev_acc [attachedTo], sizeof (glm::vec3), cudaMemcpyDeviceToHost); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMemcpy (&pos, &dev_pos [attachedTo], sizeof (glm::vec3), cudaMemcpyDeviceToHost); + checkCUDAErrorWithLine("Kernel failed!"); + pos += accelerationDir * 5.0f; + return pos/scene_scale; +} + +void setCurrentCameraPosition (const glm::vec4 &camera_position) +{ + glm::vec4 cPos = camera_position * scene_scale; + cudaMemcpy (dev_campos, &cPos, sizeof (glm::vec4), cudaMemcpyHostToDevice); +} + +void moveCameraToNextFlock (glm::vec3 &cameraPos) +{ + moveCamera<<<1,1>>> (numObjects, dev_campos, dev_pos); +} + +void setCameraUpdate (bool shouldCameraUpdate) +{ + camUpdate = shouldCameraUpdate; +} + +void resetAttachedToIndex () +{ + attachedToIndexReset<<<1,1>>> (); +} \ No newline at end of file diff --git a/Part1/src/kernel.cu.orig b/Part1/src/kernel.cu.orig new file mode 100644 index 0000000..143d992 --- /dev/null +++ b/Part1/src/kernel.cu.orig @@ -0,0 +1,374 @@ +#include +#include +#include +#include "glm/glm.hpp" +#include "utilities.h" +#include "kernel.h" + +#if SHARED == 1 + #define ACC(x,y,z) sharedMemAcc(x,y,z) +#else + #define ACC(x,y,z) naiveAcc(x,y,z) +#endif + +//GLOBALS +dim3 threadsPerBlock(blockSize); + +int numObjects; +const float planetMass = 3e8; +const __device__ float starMass = 5e10; +const __device__ float GravConst = 6.67384e-11; +const float scene_scale = 2e2; //size of the height map in simulation space + +glm::vec4 * dev_pos; +glm::vec3 * dev_vel; +glm::vec3 * dev_acc; + +void checkCUDAError(const char *msg, int line = -1) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) + { + if( line >= 0 ) + { + fprintf(stderr, "Line %d: ", line); + } + fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + exit(EXIT_FAILURE); + } +} + +__host__ __device__ +unsigned int hash(unsigned int a){ + a = (a+0x7ed55d16) + (a<<12); + a = (a^0xc761c23c) ^ (a>>19); + a = (a+0x165667b1) + (a<<5); + a = (a+0xd3a2646c) ^ (a<<9); + a = (a+0xfd7046c5) + (a<<3); + a = (a^0xb55a4f09) ^ (a>>16); + return a; +} + +//Function that generates static. +__host__ __device__ +glm::vec3 generateRandomNumberFromThread(float time, int index) +{ + thrust::default_random_engine rng(hash(index*time)); + thrust::uniform_real_distribution u01(0,1); + + return glm::vec3((float) u01(rng), (float) u01(rng), (float) u01(rng)); +} + +//Generate randomized starting positions for the planets in the XY plane +//Also initialized the masses +__global__ +void generateRandomPosArray(int time, int N, glm::vec4 * arr, float scale, float mass) +{ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index < N) + { + glm::vec3 rand = scale*(generateRandomNumberFromThread(time, index)-0.5f); + arr[index].x = rand.x; + arr[index].y = rand.y; + arr[index].z = 0.0f;//rand.z; + arr[index].w = mass; + } +} + +//Determine velocity from the distance from the center star. Not super physically accurate because +//the mass ratio is too close, but it makes for an interesting looking scene +__global__ +void generateCircularVelArray(int time, int N, glm::vec3 * arr, glm::vec4 * pos) +{ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index < N) + { + glm::vec3 R = glm::vec3(pos[index].x, pos[index].y, pos[index].z); + float r = glm::length(R) + EPSILON; + float s = sqrt(G*starMass/r); + glm::vec3 D = glm::normalize(glm::cross(R/r,glm::vec3(0,0,1))); + arr[index].x = s*D.x; + arr[index].y = s*D.y; + arr[index].z = s*D.z; + } +} + +//Generate randomized starting velocities in the XY plane +__global__ +void generateRandomVelArray(int time, int N, glm::vec3 * arr, float scale) +{ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index < N) + { + glm::vec3 rand = scale*(generateRandomNumberFromThread(time, index) - 0.5f); + arr[index].x = rand.x; + arr[index].y = rand.y; + arr[index].z = 0.0;//rand.z; + } +} + +//TODO: Done! +__device__ +glm::vec3 calculateAcceleration(glm::vec4 us, glm::vec4 them) +{ + // G*m_us*m_them + //F = ------------- + // r^2 + // + // G*m_us*m_them G*m_them + //a = ------------- = -------- + // m_us*r^2 r^2 + glm::vec3 forceDir = glm::vec3 (them.x - us.x, them.y - us.y, them.z - us.z); + float dist = sqrt (glm::dot (forceDir, forceDir)); + forceDir /= dist; // Force direction is now normalized and we have distance between the two objects (r)! + + float accVal = (GravConst * them.w) / (dist*dist); + + return forceDir * accVal; +} + +//TODO: Done! +__device__ +glm::vec3 naiveAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos) +{ + // NOTE: their_pos is a pointer to global memory. + glm::vec3 acc = glm::vec3 (0); + int index = threadIdx.x + (blockIdx.x * blockDim.x); + + if (index < N) + { + for (int i = 0; i < N; i ++) + { + if (their_pos [i] == my_pos) + continue; + acc += calculateAcceleration(my_pos, their_pos [i]); + } + acc += calculateAcceleration (my_pos, glm::vec4 (0, 0, 0, starMass)); + } + return acc; +} + +// For float comparisons. +__device__ bool isApproximately (const float &a, const float &b) +{ + if ((a >= (b - 0.001)) && (a <= (b + 0.001))) + return true; + return false; +} + +//TODO: Done! +__device__ +glm::vec3 sharedMemAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos) +{ + extern __shared__ glm::vec4 shared_pos []; + int threadNo = blockDim.x * blockIdx.x + threadIdx.x; + + glm::vec3 acc = glm::vec3 (0); + + int loopMax = ceil (N / (float)blockDim.x); + + // Loop over each block (assuming parallelization of objects) and load objects from global to shared memory. + // The first block of threads will load the first blockDim.x no. of objects from global memory to shared memory; + // The next block will load the next blockDim.x no. of objects from global and so on. Thus, we load the entire + // set of positions in global memory into shared memory iteratively, one block at a time. + for (int j = 0; j < loopMax; j ++) + { + // refBlockIndex is the block index of the block of memory locations we're trying to copy into shared. + int refblockIndex = blockIdx.x + j; + + // If trying to load a block beyond the grid boundary, wrap around. + if (refblockIndex >= loopMax) + refblockIndex -= loopMax; + + // Calculate global memory index that should be accessed by this thread. + int index = blockDim.x * refblockIndex + threadIdx.x; + // Load the value from global to shared. + if (index < N) + shared_pos [threadIdx.x] = their_pos [index]; + +// Synchronize here. + __syncthreads(); + + // Compute acceleration only for valid threads. + if (threadNo < N) + { + // Loop over each object, and calculate acceleration. + for (int i = 0; i < blockDim.x; i ++) + { + // If the block of global memory we're loading into shared mem corresponds to the last block in the grid, + // it can contain less than blockDim.x elements. In such a situation, break out of the loop once we pass + // the last element in that "block". + if (refblockIndex == (loopMax-1)) + if (i >= (N%blockDim.x)) + break; + + // A body cannot exert a force on itself, so skip.. + if (isApproximately (shared_pos [i].x, my_pos.x) && + isApproximately (shared_pos [i].y, my_pos.y) && + isApproximately (shared_pos [i].z, my_pos.z)) + continue; + + acc += calculateAcceleration(my_pos, shared_pos [i]); + } + } + } + + // Calculate acceleration due to star. + if (threadNo < N) // Only for valid threads. + acc += calculateAcceleration (my_pos, glm::vec4 (0, 0, 0, starMass)); + return acc; +} + +//Simple Euler integration scheme +__global__ +void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) +{ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + glm::vec4 my_pos; + glm::vec3 accel; + + if(index < N) my_pos = pos[index]; + + accel = ACC(N, my_pos, pos); + + if(index < N) acc[index] = accel; +} + +__global__ +void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) +{ + int index = threadIdx.x + (blockIdx.x * blockDim.x); +<<<<<<< HEAD + glm::vec4 my_pos = glm::vec4 (0); + if( index < N ) + my_pos = pos[index]; + + // For invalid threads, we still compute dummy acceleration. + // This is because sharedMemAcc will cause a deadlocked kernel if there is a divergence here. + glm::vec3 acc = ACC(N, my_pos, pos); + + if( index < N ) + { + vel[index] += acc * dt; +======= + if( index < N ) + { + vel[index] += acc[index] * dt; +>>>>>>> upstream/master + pos[index].x += vel[index].x * dt; + pos[index].y += vel[index].y * dt; + pos[index].z += vel[index].z * dt; + } +} + +//Update the vertex buffer object +//(The VBO is where OpenGL looks for the positions for the planets) +__global__ +void sendToVBO(int N, glm::vec4 * pos, float * vbo, int width, int height, float s_scale) +{ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + + float c_scale_w = -2.0f / s_scale; + float c_scale_h = -2.0f / s_scale; + + if(index>>>>>> upstream/master + float mag = sqrt(sqrt(acc.x*acc.x + acc.y*acc.y + acc.z*acc.z)); + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = (mag < 1.0f) ? mag : 1.0f; + } +} + +/************************************* + * Wrappers for the __global__ calls * + *************************************/ + +//Initialize memory, update some globals +void initCuda(int N) +{ + numObjects = N; + dim3 fullBlocksPerGrid((int)ceil(float(N)/float(blockSize))); + + cudaMalloc((void**)&dev_pos, N*sizeof(glm::vec4)); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMalloc((void**)&dev_vel, N*sizeof(glm::vec3)); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMalloc((void**)&dev_acc, N*sizeof(glm::vec3)); + checkCUDAErrorWithLine("Kernel failed!"); + + generateRandomPosArray<<>>(1, numObjects, dev_pos, scene_scale, planetMass); + checkCUDAErrorWithLine("Kernel failed!"); + generateCircularVelArray<<>>(2, numObjects, dev_vel, dev_pos); + checkCUDAErrorWithLine("Kernel failed!"); + cudaThreadSynchronize(); +} + +void cudaNBodyUpdateWrapper(float dt) +{ + dim3 fullBlocksPerGrid((int)ceil(float(numObjects)/float(blockSize))); +<<<<<<< HEAD + update<<>>(numObjects, dt, dev_pos, dev_vel); +======= + updateF<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc); + checkCUDAErrorWithLine("Kernel failed!"); + updateS<<>>(numObjects, dt, dev_pos, dev_vel, dev_acc); +>>>>>>> upstream/master + checkCUDAErrorWithLine("Kernel failed!"); + cudaThreadSynchronize(); +} + +void cudaUpdateVBO(float * vbodptr, int width, int height) +{ + dim3 fullBlocksPerGrid((int)ceil(float(numObjects)/float(blockSize))); + sendToVBO<<>>(numObjects, dev_pos, vbodptr, width, height, scene_scale); + cudaThreadSynchronize(); +} + +void cudaUpdatePBO(float4 * pbodptr, int width, int height) +{ + dim3 fullBlocksPerGrid((int)ceil(float(width*height)/float(blockSize))); +<<<<<<< HEAD + sendToPBO<<>>(numObjects, dev_pos, pbodptr, width, height, scene_scale); + checkCUDAErrorWithLine("Kernel failed!"); +======= + sendToPBO<<>>(numObjects, dev_pos, pbodptr, width, height, scene_scale); + cudaThreadSynchronize(); +>>>>>>> upstream/master +} + + diff --git a/Part1/src/kernel.h b/Part1/src/kernel.h index 1f8b37a..405552b 100644 --- a/Part1/src/kernel.h +++ b/Part1/src/kernel.h @@ -14,11 +14,27 @@ #define blockSize 128 #define checkCUDAErrorWithLine(msg) checkCUDAError(msg, __LINE__) -#define SHARED 0 +#define SHARED 1 void checkCUDAError(const char *msg, int line); -void cudaNBodyUpdateWrapper(float dt); -void initCuda(int N); +void cudaNBodyUpdateWrapper(float dt, bool customSimulation); +void initCuda(int N, const glm::vec4 &camera_position); void cudaUpdatePBO(float4 * pbodptr, int width, int height); void cudaUpdateVBO(float * vbodptr, int width, int height); +void setDevicePrefetch (bool prefetchEnabled); + +void moveCameraToNextFlock (glm::vec3 &cameraPos); +void setCameraUpdate (bool shouldCameraUpdate); +void resetAttachedToIndex (); + +glm::vec4 getCurrentCameraPosition (); +glm::vec3 getCurrentCameraLookAt (); +void setCurrentCameraPosition (const glm::vec4 &camera_position); + +inline __device__ glm::vec3 safeNormalize (glm::vec3 vectorToBeNormalized); // normalize only if length > 0 +__device__ bool isApproximately (const float &a, const float &b); +__device__ glm::vec3 sharedMemAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos); +__device__ glm::vec3 pfSharedMemAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos); +__device__ glm::vec3 FlockGlobal (int N, float DT, glm::vec4 my_pos, glm::vec4 *pos, glm::vec3 *vel); +__device__ glm::vec3 naiveAcc(int N, glm::vec4 my_pos, glm::vec4 * their_pos); #endif diff --git a/Part1/src/main.cpp b/Part1/src/main.cpp index d4c9c5b..c2f5fea 100644 --- a/Part1/src/main.cpp +++ b/Part1/src/main.cpp @@ -7,6 +7,10 @@ #define N_FOR_VIS 25 #define DT 0.2 #define VISUALIZE 1 + +bool customSimulation = false; +bool prefetchEnabled = false; +bool cameraToggle = false; //------------------------------- //-------------MAIN-------------- //------------------------------- @@ -22,15 +26,15 @@ int main(int argc, char** argv) cudaGLRegisterBufferObject( planetVBO ); #if VISUALIZE == 1 - initCuda(N_FOR_VIS); + initCuda(N_FOR_VIS, glm::vec4 (cameraPosition, 1)); #else - initCuda(2*128); + initCuda(20*120); #endif - - projection = glm::perspective(fovy, float(width)/float(height), zNear, zFar); +// setDevicePrefetch (prefetchEnabled); + perspMat = glm::perspective(fovy, float(width)/float(height), zNear, zFar); view = glm::lookAt(cameraPosition, glm::vec3(0), glm::vec3(0,0,1)); - projection = projection * view; + projection = perspMat * view; GLuint passthroughProgram; initShaders(program); @@ -53,7 +57,7 @@ int main(int argc, char** argv) //---------RUNTIME STUFF--------- //------------------------------- -void runCuda() +void runCuda(bool customSimulation) { // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer @@ -64,7 +68,7 @@ void runCuda() cudaGLMapBufferObject((void**)&dptrvert, planetVBO); // execute the kernel - cudaNBodyUpdateWrapper(DT); + cudaNBodyUpdateWrapper(DT, customSimulation); #if VISUALIZE == 1 cudaUpdatePBO(dptr, field_width, field_height); cudaUpdateVBO(dptrvert, field_width, field_height); @@ -88,7 +92,7 @@ void display() timebase = time; frame = 0; } - runCuda(); + runCuda(customSimulation); char title[100]; sprintf( title, "565 NBody sim [%0.2f fps]", fps ); @@ -104,6 +108,8 @@ void display() // VAO, shader program, and texture already bound //glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); //glDrawElements(GL_TRIANGLES, 6*field_width*field_height, GL_UNSIGNED_INT, 0); + if (cameraToggle) + updateCameraTransformation (); glUseProgram(program[HEIGHT_FIELD]); @@ -152,9 +158,52 @@ void keyboard(unsigned char key, int x, int y) case(27): exit(1); break; + case 'C': + cameraToggle = !cameraToggle; + if (!cameraToggle) + { + setCameraUpdate (false); + cameraPosition = glm::vec3 (originalCamPosition); + setCurrentCameraPosition (glm::vec4 (cameraPosition, 1.0)); + view = glm::lookAt(cameraPosition, glm::vec3 (0), glm::vec3(0,0,1)); + resetAttachedToIndex (); + } + case 'N': + if (cameraToggle) + { + setCameraUpdate (true); + moveCameraToNextFlock (cameraPosition); + glm::vec4 temp_cp = getCurrentCameraPosition (); + cameraPosition.x = temp_cp.x; cameraPosition.y = temp_cp.y; cameraPosition.z = temp_cp.z; +// cameraPosition = glm::vec3 (1.0, 1.0, 1.2); + view = glm::lookAt(cameraPosition, getCurrentCameraLookAt (), + glm::vec3(0,0,1)); + } + + projection = perspMat * view; + glUseProgram(program[0]); + glUniformMatrix4fv (glGetUniformLocation(program[0], "u_projMatrix"), 1, GL_FALSE, &projection [0][0]); + glUseProgram(program[1]); + glUniformMatrix4fv (glGetUniformLocation(program[1], "u_projMatrix"), 1, GL_FALSE, &projection [0][0]); + glUniform3fv (glGetUniformLocation(program[1], "u_cameraPos"), 1, &cameraPosition [0]); + glUseProgram (program[0]); + break; } } +void updateCameraTransformation () +{ + glm::vec4 temp_cp = getCurrentCameraPosition (); + cameraPosition.x = temp_cp.x; cameraPosition.y = temp_cp.y; cameraPosition.z = temp_cp.z; + view = glm::lookAt(cameraPosition, /*glm::vec3 (cameraPosition.x, cameraPosition.y +0.7f, cameraPosition.z-0.5f)*/getCurrentCameraLookAt (), + glm::vec3(0,0,1)); + projection = perspMat * view; + glUseProgram(program[0]); + glUniformMatrix4fv (glGetUniformLocation(program[0], "u_projMatrix"), 1, GL_FALSE, &projection [0][0]); + glUseProgram(program[1]); + glUniformMatrix4fv (glGetUniformLocation(program[1], "u_projMatrix"), 1, GL_FALSE, &projection [0][0]); + glUniform3fv (glGetUniformLocation(program[1], "u_cameraPos"), 1, &cameraPosition [0]); +} //------------------------------- //----------SETUP STUFF---------- @@ -178,6 +227,15 @@ void init(int argc, char* argv[]) exit (1); } + if (argc > 1) + { + if (!strcmp (argv [1], "true")) + customSimulation = true; + if (argc > 2) + if (!strcmp (argv [2], "prefetch")) + prefetchEnabled = true; + } + initVAO(); initTextures(); } diff --git a/Part1/src/main.h b/Part1/src/main.h index 2b818bf..2f84b5a 100644 --- a/Part1/src/main.h +++ b/Part1/src/main.h @@ -2,7 +2,7 @@ #define MAIN_H #include -#include +#include #include #include @@ -48,21 +48,23 @@ GLuint program[2]; const unsigned int HEIGHT_FIELD = 0; const unsigned int PASS_THROUGH = 1; -const int field_width = 800; -const int field_height = 800; +const int field_width = 296; +const int field_height = 296; float fovy = 60.0f; float zNear = 0.10; float zFar = 5.0; +glm::mat4 perspMat; glm::mat4 projection; glm::mat4 view; -glm::vec3 cameraPosition(1.75,1.75,1.35); +glm::vec3 originalCamPosition(1.75,1.75,1.35); +glm::vec3 cameraPosition (originalCamPosition); //------------------------------- //----------CUDA STUFF----------- //------------------------------- -int width=1000; int height=1000; +int width=1024; int height=768; //------------------------------- //-------------MAIN-------------- @@ -74,10 +76,11 @@ int main(int argc, char** argv); //---------RUNTIME STUFF--------- //------------------------------- -void runCuda(); +void runCuda(bool customSimulation); void display(); void keyboard(unsigned char key, int x, int y); +void updateCameraTransformation (); //------------------------------- //----------SETUP STUFF---------- diff --git a/README.md b/README.md index e3122aa..6ec272a 100644 --- a/README.md +++ b/README.md @@ -1,373 +1,161 @@ CIS565: Project 3: CUDA Simulation and GLSL Visualization === -Fall 2013 ---- -Due Sunday, 10/20/2013 by 11:59:59 pm ---- ---- -NOTE: ---- -This project requires an NVIDIA graphics card with CUDA capability! Any card -after the Geforce 8xxx series will work. If you do not have an NVIDIA graphics -card in the machine you are working on, feel free to use any machine in the SIG -Lab or in Moore100 labs. All machines in the SIG Lab and Moore100 are equipped -with CUDA capable NVIDIA graphics cards. If this too proves to be a problem, -please contact Patrick or Liam as soon as possible. +For this project, I wrote code to implement an N-Body simulation in CUDA, visualized using GLSL. The N-Body simulator +is like a gravity sim resembling a solar system where planets would orbit around a star. This assignment was an exercise +on the use of shared memory and how efficient it is for programs, since the last two (the Raytracer and Pathtracer) +didn't explicitly focus on performance and efficiency. As always, a framework starter code was provided by our TA, +Liam Boone. + +The code I've written could be optimized a lot further, since it contains quite a lot of uncoalesced global memory +accesses and shared memory access bank conflicts (where the threads loop through each element in shared/global memory). +One way that this could be done is by launching a block of threads for every object in which each thread will only calculate +the force/acceleration on that object due to a single other object in the scene. A Parallel reduction could then be +performed to find the total force/acceleration on that body. However, it is impossible to predict what the effect will be +on performance without performing performance profiling using NSight, which is out of bounds for me. + +Nevertheless, using this code, I was able to witness a HUGE speedup when using shared memory as opposed to global (53 fps vs. 243). + +SCREENSHOTS +----------- +Flocking:
+
+Gravity Simulation:
+
+ +DETAILS +------- +In this project, the positions, velocities and accelerations of all objects are stored in global memory locations +dev_pos, +dev_vel +and dev_acc respectively. I was required to write device functions to calculate the accelerations for every object using +both the global memory and shared memory. As an added bonus, I was able to implement prefetching for shared memory (where +instead of directly loading a value from global memory into shared, we pre-load into a register ahead of the current +iteration and then load that into shared during the current iteration) and two other integration schemes: Verlet and +Leapfrog (a Symplectic Euler integrator was provided by default). + +In addition to the above, I was also required to do my own simulation. I implemented dynamic flocking, where planets +dynamically drop in and out of flocks. Such flocks are created on the fly as planets move around. This dynamic flocking +runs when the command line parameter is set to 'true'. ---- -INTRODUCTION: ---- -In this project you will be creating a 3D visualization of an N-Body system -simulated using CUDA and OpenGL shaders. You will also be creating your own -simulation of choice. -This project is divided into two parts. Part one will consist mostly of a -tutorial style walkthrough of creating the N-Body sim. Part two is an open -ended assignment to create your own simulation. This simulation can be virtually -anything you choose, but will require approval for ideas not listed in this -readme. - -You are also free to do as many extra simulations as you like! - ---- -CONTENTS: ---- -The Project3 root directory contains the following subdirectories: - - * Part1/ - * resources/ the screenshots used in this readme. - * src/ contains the provided code. __NOTE:__ Shader code will be located in the PROJ3_XYZ folders - * PROJ_WIN/ contains a Visual Studio 2010 project file with different configurations - * Debug (v4.0) - * Release (v4.0) - * Debug (v5.5) - * Release (v5.5) - * PROJ_NIX/ contains a Linux makefile for building and running on Ubuntu - 12.04 LTS. Note that you will need to set the following environment - variables (you may set these any way that you like. I added them to my .bashrc): - * PATH=$PATH:/usr/local/cuda-5.5/bin - * LD_LIBRARY_PATH=/usr/local/cuda-5.5/lib64:/lib - * Part2/ you will fill this with your own simulation code. - -__NOTE:__ Since I do not use Apple products regularly enough to know what I'm doing I did not create a Mac friendly version of the project. I will award a +5 point bounty to the first person to open a pull request containing an OSX compatible version of the starter code. All runners up will receive +100 awesome points. - -PART 1: CUDA NBody Simulation -=== - ---- -REQUIREMENTS: ---- -In this project, you are given code for: - * Initialization - * Rendering to the screen - * Some helpful math functions - * CUDA/OpenGL inter-op - -You will need to implement the following features: - * Calculating forces between all interacting bodies - * The same, but with shared memory - * Vertex shader code to render a height field - * Fragment shader code to light that height field - * Geometry shader code to create screen facing billboards from rendered points - * Fragment shader code to render those billboards like spheres with simple diffuse shading - -You are NOT required to implement any of the following features: - * Prefetching (__NOTE:__ to receive +5 for this feature it must be discussed in your performance section) - * Tessellation shader code to refine the heightfield mesh in regions of interest - * Render the height map as a quad and use parallax occlusion mapping in the fragment shader to simulate the height field - * More interesting rendering of the scene (making some planets light sources would be cool, or perhaps adding orbit trails) - * Textures for the planets and/or unique looking planets - * Replace geometry shader billboarding with adding in simple models (e.g. a pyramid pointing in the direction of velocity) - * Collisions - * Runge Kutta integration (or anything better than the Euler integration provided) - -Since we had some problems going live on time with this project you can give yourself a +5 point boost for including up to two of the above extra features. For example, adding collisions and textured planets along with completing all other required components can get you a 110% score. - ---- -WALKTHROUGH ---- -You can choose to complete all of the TODO: tags in the kernel.cu file either before or after sprucing up the graphics, but it will be easier to see some of our improvements if you finish them before. - -For the graphics, you'll see something that looks like this: - -![boring](Part1/resources/000.png) - -Pretty underwhelming if I do say so. Lets add some height and coloring in the height field so we can see what the potential field looks like. - -Since the starter code saves a very high resolution force field to texture memory we will use that to perturb the Z components of the height field. In addition we also multiply by the camera matrix to get everything in the right place. Add the following code to your heightVS.glsl file: - -```glsl -uniform mat4 u_projMatrix; -uniform sampler2D u_height; - -attribute vec4 Position; -attribute vec2 Texcoords; - -varying vec2 v_Texcoords; -varying float f_height; - -void main(void) -{ - v_Texcoords = Texcoords; - vec4 pos = Position; - f_height = texture2D(u_height, Texcoords).w; - pos.z = -0.01-clamp(f_height,0.0,2.0); - pos = u_projMatrix * pos; - gl_Position = pos; -} -``` - -You can run the code as is right now, but you'll likely see very little difference in most cases. In order to really get the feel we want, without the added complexity of doing real lighting we'll just darken the fragment color based on the height map. Add the following to your heightFS.glsl file: - -```glsl -varying float f_height; - -void main(void) -{ - float shade = (1.0-2.0*sqrt(f_height)); - vec4 color = vec4(0.05,0.15,0.3,1.0); - gl_FragColor = shade*color; -} -``` - -Now your height field should look closer to this: - -![less boring](Part1/resources/001.png) - -Okay, that's a lot better, but now our planets need some attention. For this step we'll be using the geometry shader to create screen facing quads from the points that are currently being rendered. Essentially, what we want is to create a geometry shader that takes in points and emits triangle strips, so replace the version of planetGS.glsl with this: - -```glsl -#version 330 - -uniform mat4 u_projMatrix; -uniform vec3 u_cameraPos; - -layout (points) in; -layout (triangle_strip) out; -layout (max_vertices = 4) out; - -out vec3 WorldCoord; -out vec3 ToCam; -out vec3 Up; -out vec3 Right; -out vec2 TexCoord; - -const float scale = 0.03; -``` - -Before we can produce the vertices for our quad we need to figure out where they go. This code takes the vector from the point to the camera and crosses it with the up vector (usually I conform to convention and use +Y, but here I used +Z and never got around to fixing it) to produce the right vector. Next we cross the right vector and the camera vector to produce a corrected up vector. - -```glsl -void main() -{ - vec3 Position = gl_in[0].gl_Position.xyz; - WorldCoord = Position; - - ToCam = normalize(u_cameraPos - Position); - Up = vec3(0.0, 0.0, 1.0); - Right = cross(ToCam, Up); - Up = cross(Right, ToCam); -``` - -Now that we have the correct up and right vectors, we can emit our vertices and produce our screen facing quads: - -```glsl - vec3 Pos = Position + scale*Right - scale*Up; - gl_Position = u_projMatrix * vec4(Pos, 1.0); - TexCoord = vec2(0.0, 0.0); - EmitVertex(); - - Pos = Position + scale*Right + scale*Up; - gl_Position = u_projMatrix * vec4(Pos, 1.0); - TexCoord = vec2(0.0, 1.0); - EmitVertex(); - - Pos = Position - scale*Right - scale*Up; - gl_Position = u_projMatrix * vec4(Pos, 1.0); - TexCoord = vec2(1.0, 0.0); - EmitVertex(); - - Pos = Position - scale*Right + scale*Up; - gl_Position = u_projMatrix * vec4(Pos, 1.0); - TexCoord = vec2(1.0, 1.0); - EmitVertex(); - - EndPrimitive(); -} -``` - -![cool](Part1/resources/002.png) - -__NOTE:__ You'll notice here that the quads are not aligned to the screen, they merely face it. This is okay for our purposes because we are using them to render spheres. - -With our quads we can do some very fancy things in the fragment shader. Use the following snippets to replace the existing planetFS.glsl file: - -```glsl -#version 330 - -in vec3 WorldCoord; -in vec3 ToCam; -in vec3 Up; -in vec3 Right; -in vec2 TexCoord; -out vec4 FragColor; - -void main() -{ -``` - -This section takes the "texture" coordinates produces in the GS and uses them to decide where in the quad this fragment is. We discard any fragments outside of our desired radius in order to simulate the edge of the sphere. - -```glsl - vec2 coord = 2.01 * (TexCoord - vec2(0.5)); - float r = length(coord); - if (r >= 1.0) { discard; } -``` - -Since I designed this project with the center object being a star I execute an early out here to simply color it white. - -```glsl - float dist = length(WorldCoord); - if(dist <= 0.01) - { - FragColor = vec4(1.0); - return; - } -``` - -This last segment takes care of calculating the fake intersection point and its lighting. I am using a simple diffuse + constant ambient with exponential attenuation. - -```glsl - vec3 N = Right*-coord.x + Up*coord.y + ToCam*sqrt(1-r*r); - vec3 L = normalize(-WorldCoord); - float light = 0.1 + 0.9*clamp(dot(N,L),0.0, 1.0)*exp(-dist); - vec3 color = vec3(0.4, 0.1, 0.6); - FragColor = vec4(color*light,1.0); -} -``` - -![almost there](Part1/resources/003.png) - -The last thing we add is a little bit of procedural coloring to give a nice grid effect. Replace the boring color code in heightFS.glsl with this: - -```glsl -float alpha = float(mod(v_Texcoords.x+0.025, 0.05) > 0.046 || - mod(v_Texcoords.y+0.025, 0.05) > 0.046); -vec4 color = mix(vec4(0.05,0.15,0.3,1.0), vec4(0.05, 0.3, 0.4, 1.0), alpha); -``` - -![awesome](Part1/resources/004.png) - -Now we have a beautiful looking (if simple) gravity sim! - - -PART 2: Your CUDA Simulation -=== - -To complete this part of the assignment you must implement your own simulation. This can be anything within reason, but two examples that would be well suited are: - -* Flocking -* Mass spring cloth/jello - -Feel free to code your own unique simulation here, just ask on the Google group if your topic is acceptable and we'll probably say yes. - ---- -NOTES ON GLM: ---- -This project uses GLM, the GL Math library, for linear algebra. You need to -know two important points on how GLM is used in this project: - -* In this project, indices in GLM vectors (such as vec3, vec4), are accessed - via swizzling. So, instead of v[0], v.x is used, and instead of v[1], v.y is - used, and so on and so forth. -* GLM Matrix operations work fine on NVIDIA Fermi cards and later, but - pre-Fermi cards do not play nice with GLM matrices. As such, in this project, - GLM matrices are replaced with a custom matrix struct, called a cudaMat4, found - in cudaMat4.h. A custom function for multiplying glm::vec4s and cudaMat4s is - provided as multiplyMV() in intersections.h. - ---- -README ---- -All students must replace the contents of this Readme.md in a clear manner with -the following: - -* A brief description of the project and the specific features you implemented. -* At least one screenshot of your project running. -* A 30 second or longer video of your project running. To create the video you - can use http://www.microsoft.com/expression/products/Encoder4_Overview.aspx -* A performance evaluation (described in detail below). - ---- PERFORMANCE EVALUATION ---- -The performance evaluation is where you will investigate how to make your CUDA -programs more efficient using the skills you've learned in class. You must -perform at least one experiment on your code to investigate the positive or -negative effects on performance. - -For this Project, one of these experiments should be a comparison between the -global and shared memory versions of the acceleration calculation function at -varying block sizes. - -A good metric to track would be number of frames per second, -or number of objects displayable at 60fps. - -We encourage you to get creative with your tweaks. Consider places in your code -that could be considered bottlenecks and try to improve them. - -Each student should provide no more than a one page summary of their -optimizations along with tables and or graphs to visually explain any -performance differences. - ---- -THIRD PARTY CODE POLICY ---- -* Use of any third-party code must be approved by asking on our Google group. - If it is approved, all students are welcome to use it. Generally, we approve - use of third-party code that is not a core part of the project. For example, - for the ray tracer, we would approve using a third-party library for loading - models, but would not approve copying and pasting a CUDA function for doing - refraction. -* Third-party code must be credited in README.md. -* Using third-party code without its approval, including using another - student's code, is an academic integrity violation, and will result in you - receiving an F for the semester. - ---- -SELF-GRADING ---- -* On the submission date, email your grade, on a scale of 0 to 100, to Liam, - liamboone+cis565@gmail.com, with a one paragraph explanation. Be concise and - realistic. Recall that we reserve 30 points as a sanity check to adjust your - grade. Your actual grade will be (0.7 * your grade) + (0.3 * our grade). We - hope to only use this in extreme cases when your grade does not realistically - reflect your work - it is either too high or too low. In most cases, we plan - to give you the exact grade you suggest. -* For late assignments there will be a 50% penaly per week. -* Projects are not weighted evenly, e.g., Project 0 doesn't count as much as - the path tracer. We will determine the weighting at the end of the semester - based on the size of each project. - ---- -SUBMISSION ---- -As with the previous project, you should fork this project and work inside of -your fork. Upon completion, commit your finished project back to your fork, and -make a pull request to the master repository. You should include a README.md -file in the root directory detailing the following - -* A brief description of the project and specific features you implemented -* At least one screenshot of your project running. -* A link to a video of your raytracer running. -* Instructions for building and running your project if they differ from the - base code. -* A performance writeup as detailed above. -* A list of all third-party code used. -* This Readme file edited as described above in the README section. - ---- -ACKNOWLEDGEMENTS ---- -I adapted the geometry shader code from [this excellent tutorial on the subject](http://ogldev.atspace.co.uk/www/tutorial27/tutorial27.html) +---------------------- +Performance of the program was compared for different number of planets/objects being simulated, using global memory, shared +memory and prefetched version of shared memory. Here are the results:
+
+With visualization on: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Memory typeNumber of objectsFramerateNumber of objectsFramerate
Global25001.75
Shared2500 1250006.77
Shared (Prefetched)25001250006.77
+ +5000 objects were not simulated in global memory since the framerate was close to 0.
+
+With visualization off: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Memory typeNumber of objectsFramerate (avg.)Number of objectsFramerate (avg.)
Global1,500,000533,000,00053
Shared1,500,0006153,000,000620
Shared (Prefetched)1,500,0006303,000,000630
Global5,000,0005310,000,00053
Shared5,000,00063010,000,000615
Shared (Prefetched)5,000,00063010,000,000615
Global20,000,0005350,000,00053
Shared20,000,00062050,000,000620
Shared (Prefetched)20,000,00061550,000,000630
+ +These results show that shared memory is WAY better than global memory. As I mentioned above, if the bank conflicts +resulting out of threads accessing multiple shared memory locations were to be corrected, the program would run much faster. + +The results also show no great advantage while using prefetching. I believe this is because there are not many independent +instructions to mask out the latency involved in accessing global memory. diff --git a/screenshots/flock.png b/screenshots/flock.png new file mode 100644 index 0000000..282dd55 Binary files /dev/null and b/screenshots/flock.png differ diff --git a/screenshots/gravsim.png b/screenshots/gravsim.png new file mode 100644 index 0000000..06ea9d2 Binary files /dev/null and b/screenshots/gravsim.png differ