@@ -45,6 +45,17 @@ if(NOT CUDA_FOUND)
4545 message (FATAL_ERROR "Could not find CUDA >= 7.0" )
4646endif ()
4747
48+ #
49+ # Default setting of the CUDA CC versions to compile.
50+ # Shortening the lists saves a lot of compile time.
51+ #
52+ if (CUDA_VERSION_MAJOR GREATER 7)
53+ set (PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62)
54+ else (CUDA_VERSION_MAJOR GREATER 7)
55+ set (PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 )
56+ endif (CUDA_VERSION_MAJOR GREATER 7)
57+ set (PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile" )
58+
4859if (PopSift_USE_NVTX_PROFILING)
4960 message (STATUS "PROFILING CPU CODE: NVTX is in use" )
5061endif (PopSift_USE_NVTX_PROFILING)
@@ -77,18 +88,21 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE)
7788 set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-Xcompiler;-fPIC" )
7889endif ()
7990
80- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_30,code=sm_30" )
81- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_35,code=sm_35" )
82- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_50,code=sm_50" )
83- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_52,code=sm_52" )
84- if (CUDA_VERSION_MAJOR GREATER 7)
85- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_60,code=sm_60" )
86- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_61,code=sm_61" )
87- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_62,code=sm_62" )
88- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_62,code=compute_62" )
89- else (CUDA_VERSION_MAJOR GREATER 7)
90- set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_52,code=compute_52" )
91- endif (CUDA_VERSION_MAJOR GREATER 7)
91+ #
92+ # Add all requested CUDA CCs to the command line for offline compilation
93+ #
94+ list (SORT PopSift_CUDA_CC_LIST)
95+ foreach (PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST} )
96+ set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_${PopSift_CC_VERSION} ,code=sm_${PopSift_CC_VERSION} " )
97+ endforeach (PopSift_CC_VERSION)
98+
99+ #
100+ # Use the highest request CUDA CC for CUDA JIT compilation
101+ #
102+ list (LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN)
103+ MATH (EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN} -1" )
104+ list (GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST)
105+ set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ;-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST} ,code=compute_${PopSift_CUDA_CC_LIST_LAST} " )
92106
93107# default stream legacy implies that the 0 stream synchronizes all streams
94108# default stream per-thread implies that each host thread has one non-synchronizing 0-stream
@@ -97,6 +111,7 @@ set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy")
97111# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread")
98112
99113message (STATUS "CUDA Version is ${CUDA_VERSION} " )
114+ message (STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST} " )
100115if (CUDA_VERSION>=7.5)
101116 set (CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE} ;-Xptxas;-warn-lmem-usage" )
102117 set (CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE} ;-Xptxas;-warn-spills" )
0 commit comments