Skip to content

Commit d8700a1

Browse files
authored
Merge pull request #32 from alicevision/multigauss
Multigauss - various enhancements
2 parents 2178992 + e658ced commit d8700a1

40 files changed

+1976
-883
lines changed

CMakeLists.txt

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ cmake_minimum_required(VERSION 3.4)
44
project(PopSift VERSION 1.0.0)
55

66
OPTION(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON)
7-
OPTION(USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
7+
OPTION(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
8+
OPTION(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF)
89
OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
10+
OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
911

1012
if(PopSift_USE_POSITION_INDEPENDENT_CODE)
1113
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -43,9 +45,25 @@ if(NOT CUDA_FOUND)
4345
message(FATAL_ERROR "Could not find CUDA >= 7.0")
4446
endif()
4547

46-
if(USE_NVTX_PROFILING)
48+
#
49+
# Default setting of the CUDA CC versions to compile.
50+
# Shortening the lists saves a lot of compile time.
51+
#
52+
if(CUDA_VERSION_MAJOR GREATER 7)
53+
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62)
54+
else(CUDA_VERSION_MAJOR GREATER 7)
55+
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 )
56+
endif(CUDA_VERSION_MAJOR GREATER 7)
57+
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
58+
59+
if(PopSift_USE_NVTX_PROFILING)
4760
message(STATUS "PROFILING CPU CODE: NVTX is in use")
48-
endif(USE_NVTX_PROFILING)
61+
endif(PopSift_USE_NVTX_PROFILING)
62+
63+
if(PopSift_ERRCHK_AFTER_KERNEL)
64+
message(STATUS "Synchronizing and checking errors after every kernel call")
65+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL")
66+
endif(PopSift_ERRCHK_AFTER_KERNEL)
4967

5068
set(CUDA_SEPARABLE_COMPILATION ON)
5169

@@ -70,11 +88,21 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE)
7088
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC")
7189
endif()
7290

73-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_30,code=sm_30")
74-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_35,code=sm_35")
75-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_50,code=sm_50")
76-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_52,code=sm_52")
77-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_52,code=compute_52")
91+
#
92+
# Add all requested CUDA CCs to the command line for offline compilation
93+
#
94+
list(SORT PopSift_CUDA_CC_LIST)
95+
foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST})
96+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}")
97+
endforeach(PopSift_CC_VERSION)
98+
99+
#
100+
# Use the highest request CUDA CC for CUDA JIT compilation
101+
#
102+
list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN)
103+
MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1")
104+
list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST)
105+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}")
78106

79107
# default stream legacy implies that the 0 stream synchronizes all streams
80108
# default stream per-thread implies that each host thread has one non-synchronizing 0-stream
@@ -83,6 +111,7 @@ set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy")
83111
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread")
84112

85113
message(STATUS "CUDA Version is ${CUDA_VERSION}")
114+
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
86115
if(CUDA_VERSION>=7.5)
87116
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage")
88117
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills")
@@ -94,11 +123,16 @@ endif(CUDA_VERSION>=7.5)
94123
# library required for CUDA dynamic parallelism, forgotten by CMake 3.4
95124
cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library")
96125

97-
if(USE_NVTX_PROFILING)
126+
if(PopSift_USE_NVTX_PROFILING)
98127
# library required for NVTX profiling of the CPU
99128
cuda_find_library_local_first(CUDA_NVTX_LIBRARY nvToolsExt "NVTX library")
100129
add_definitions(-DUSE_NVTX)
101-
endif(USE_NVTX_PROFILING)
130+
endif(PopSift_USE_NVTX_PROFILING)
131+
132+
if(NOT PopSift_USE_GRID_FILTER)
133+
message(STATUS "Disabling grid filter compilation")
134+
add_definitions(-DDISABLE_GRID_FILTER)
135+
endif(NOT PopSift_USE_GRID_FILTER)
102136

103137
add_subdirectory(src)
104138

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ CUDA_ADD_LIBRARY(popsift STATIC
1919
popsift/sift_extremum.h
2020
popsift/sift_extremum.cu popsift/s_extrema.cu
2121
popsift/s_orientation.cu
22+
popsift/s_filtergrid.cu
2223
popsift/sift_desc.cu
2324
popsift/s_desc_loop.cu popsift/s_desc_loop.h
2425
popsift/s_desc_iloop.cu popsift/s_desc_iloop.h

src/application/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ else()
2828
set(PD_COMPILE_OPTIONS "" )
2929
endif()
3030

31-
if(USE_NVTX_PROFILING)
31+
if(PopSift_USE_NVTX_PROFILING)
3232
list(APPEND PD_LINK_LIBS ${CUDA_NVTX_LIBRARY})
33-
endif(USE_NVTX_PROFILING)
33+
endif(PopSift_USE_NVTX_PROFILING)
3434

3535
#############################################################
3636
# popsift-demo

src/application/main.cpp

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ static bool print_time_info = false;
4343
static bool write_as_uchar = false;
4444
static bool dont_write = false;
4545
static bool pgmread_loading = false;
46+
static bool float_mode = false;
4647

4748
static void parseargs(int argc, char** argv, popsift::Config& config, string& inputFile) {
4849
using namespace boost::program_options;
@@ -73,9 +74,10 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
7374
options_description modes("Modes");
7475
{
7576
modes.add_options()
76-
("gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
77-
"Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
78-
"Options are: vlfeat, relative, opencv, fixed9, fixed15")
77+
( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
78+
popsift::Config::getGaussModeUsage() )
79+
// "Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
80+
// "Options are: vlfeat, relative, relative-all, opencv, fixed9, fixed15"
7981
("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
8082
"Choice of descriptor extraction modes:\n"
8183
"loop, iloop, grid, igrid, notile\n"
@@ -97,9 +99,11 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
9799
"Computed filter width are lower than VLFeat/PopSift")
98100
("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
99101
"Direct each octave from upscaled orig instead of blurred level.")
100-
("root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setUseRootSift(true); }),
101-
"Use the L1-based norm for OpenMVG rather than L2-based as in OpenCV")
102102
("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
103+
( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
104+
popsift::Config::getNormModeUsage() )
105+
( "root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setNormMode(popsift::Config::RootSift); }),
106+
popsift::Config::getNormModeUsage() )
103107
("filter-max-extrema", value<int>()->notifier([&](int f) {config.setFilterMaxExtrema(f); }), "Approximate max number of extrema.")
104108
("filter-grid", value<int>()->notifier([&](int f) {config.setFilterGridSize(f); }), "Grid edge length for extrema filtering (ie. value 4 leads to a 4x4 grid)")
105109
("filter-sort", value<std::string>()->notifier([&](const std::string& s) {config.setFilterSorting(s); }), "Sort extrema in each cell by scale, either random (default), up or down");
@@ -115,6 +119,7 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
115119
"Scaling to sensible ranges is not automatic, should be combined with --norm-multi=9 or similar")
116120
("dont-write", bool_switch(&dont_write)->default_value(false), "Suppress descriptor output")
117121
("pgmread-loading", bool_switch(&pgmread_loading)->default_value(false), "Use the old image loader instead of LibDevIL")
122+
("float-mode", bool_switch(&float_mode)->default_value(false), "Upload image to GPU as float instead of byte")
118123
;
119124

120125
//("test-direct-scaling")
@@ -164,12 +169,18 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
164169
{
165170
int w;
166171
int h;
167-
unsigned char* image_data;
168172
SiftJob* job;
173+
unsigned char* image_data;
169174

170175
#ifdef USE_DEVIL
171176
if( not pgmread_loading )
172177
{
178+
if( float_mode )
179+
{
180+
cerr << "Cannot combine float-mode test with DevIL image reader" << endl;
181+
exit( -1 );
182+
}
183+
173184
nvtxRangePushA( "load and convert image - devil" );
174185

175186
ilImage img;
@@ -184,11 +195,11 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
184195
w = img.Width();
185196
h = img.Height();
186197
cout << "Loading " << w << " x " << h << " image " << inputFile << endl;
198+
187199
image_data = img.GetData();
188200

189201
nvtxRangePop( ); // "load and convert image - devil"
190202

191-
// PopSift.init( w, h );
192203
job = PopSift.enqueue( w, h, image_data );
193204

194205
img.Clear();
@@ -205,10 +216,25 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
205216

206217
nvtxRangePop( ); // "load and convert image - pgmread"
207218

208-
// PopSift.init( w, h );
209-
job = PopSift.enqueue( w, h, image_data );
219+
if( not float_mode )
220+
{
221+
// PopSift.init( w, h );
222+
job = PopSift.enqueue( w, h, image_data );
210223

211-
delete [] image_data;
224+
delete [] image_data;
225+
}
226+
else
227+
{
228+
float* f_image_data = new float [w * h];
229+
for( int i=0; i<w*h; i++ )
230+
{
231+
f_image_data[i] = float( image_data[i] ) / 256.0f;
232+
}
233+
job = PopSift.enqueue( w, h, f_image_data );
234+
235+
delete [] image_data;
236+
delete [] f_image_data;
237+
}
212238
}
213239

214240
return job;
@@ -272,7 +298,9 @@ int main(int argc, char **argv)
272298
deviceInfo.set( 0, print_dev_info );
273299
if( print_dev_info ) deviceInfo.print( );
274300

275-
PopSift PopSift( config );
301+
PopSift PopSift( config,
302+
popsift::Config::ExtractingMode,
303+
float_mode ? PopSift::FloatImages : PopSift::ByteImages );
276304

277305
std::queue<SiftJob*> jobs;
278306
for( auto it = inputFiles.begin(); it!=inputFiles.end(); it++ ) {

src/application/match.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,8 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
7474
options_description modes("Modes");
7575
{
7676
modes.add_options()
77-
("gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
78-
"Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
79-
"Options are: vlfeat, relative, opencv, fixed9, fixed15")
77+
( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
78+
popsift::Config::getGaussModeUsage() )
8079
("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
8180
"Choice of descriptor extraction modes:\n"
8281
"loop, iloop, grid, igrid, notile\n"
@@ -98,9 +97,11 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
9897
"Computed filter width are lower than VLFeat/PopSift")
9998
("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
10099
"Direct each octave from upscaled orig instead of blurred level.")
101-
("root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setUseRootSift(true); }),
102-
"Use the L1-based norm for OpenMVG rather than L2-based as in OpenCV")
103100
("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
101+
( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
102+
popsift::Config::getNormModeUsage() )
103+
( "root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setNormMode(popsift::Config::RootSift); }),
104+
popsift::Config::getNormModeUsage() )
104105
("filter-max-extrema", value<int>()->notifier([&](int f) {config.setFilterMaxExtrema(f); }), "Approximate max number of extrema.")
105106
("filter-grid", value<int>()->notifier([&](int f) {config.setFilterGridSize(f); }), "Grid edge length for extrema filtering (ie. value 4 leads to a 4x4 grid)")
106107
("filter-sort", value<std::string>()->notifier([&](const std::string& s) {config.setFilterSorting(s); }), "Sort extrema in each cell by scale, either random (default), up or down");

src/application/pgmread.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,11 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h )
6262

6363
int type;
6464
if( pgmtype.substr(0,2) == "P2" ) type = 2;
65+
else if( pgmtype.substr(0,2) == "P3" ) type = 3;
6566
else if( pgmtype.substr(0,2) == "P5" ) type = 5;
6667
else if( pgmtype.substr(0,2) == "P6" ) type = 6;
6768
else {
68-
cerr << "File " << input_file << " can only contain P2, P5 or P6 PGM images" << endl;
69+
cerr << "File " << input_file << " can only contain P2, P3, P5 or P6 PGM images" << endl;
6970
return 0;
7071
}
7172

@@ -139,6 +140,42 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h )
139140
}
140141
}
141142
break;
143+
case 3 :
144+
{
145+
unsigned char* i2 = new unsigned char[ w * h * 3 ];
146+
unsigned char* src = i2;
147+
for( int i=0; i<w*h*3; i++ ) {
148+
int input;
149+
pgmfile >> input;
150+
if( maxval == 255 ) {
151+
i2[i] = input;
152+
} else {
153+
i2[i] = (unsigned char)(input * 255.0 / maxval );
154+
}
155+
if( pgmfile.fail() ) {
156+
cerr << "File " << input_file << " file too short" << endl;
157+
delete [] i2;
158+
delete [] input_data;
159+
return 0;
160+
}
161+
}
162+
for( int i=0; i<w*h; i++ ) {
163+
#ifdef RGB2GRAY_IN_INT
164+
unsigned int r = *src; src++;
165+
unsigned int g = *src; src++;
166+
unsigned int b = *src; src++;
167+
unsigned int res = ( ( R_RATE*r+G_RATE*g+B_RATE*b ) >> RATE_SHIFT );
168+
input_data[i] = (unsigned char)res;
169+
#else // RGB2GRAY_IN_INT
170+
float r = *src; src++;
171+
float g = *src; src++;
172+
float b = *src; src++;
173+
input_data[i] = (unsigned char)( R_RATE*r+G_RATE*g+B_RATE*b );
174+
#endif // RGB2GRAY_IN_INT
175+
}
176+
delete [] i2;
177+
}
178+
break;
142179
case 5 :
143180
if( maxval < 256 ) {
144181
pgmfile.read( (char*)input_data, w*h );

src/popsift/common/debug_macros.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111

1212
using namespace std;
1313

14+
void pop_sync_check_last_error( const char* file, size_t line )
15+
{
16+
cudaDeviceSynchronize();
17+
pop_check_last_error( file, line );
18+
}
19+
1420
void pop_check_last_error( const char* file, size_t line )
1521
{
1622
cudaError_t err = cudaGetLastError( );

src/popsift/common/debug_macros.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,20 @@
1414
#include <assert.h>
1515
#include <cuda_runtime.h>
1616

17-
void pop_check_last_error( const char* file,
18-
size_t line );
17+
// synchronize device and check for an error
18+
void pop_sync_check_last_error( const char* file, size_t line );
19+
20+
// check for an error without synchronizing first
21+
void pop_check_last_error( const char* file, size_t line );
22+
1923
#define POP_CHK pop_check_last_error( __FILE__, __LINE__ )
2024

25+
#ifdef ERRCHK_AFTER_KERNEL
26+
#define POP_SYNC_CHK pop_sync_check_last_error( __FILE__, __LINE__ )
27+
#else
28+
#define POP_SYNC_CHK
29+
#endif
30+
2131
namespace popsift {
2232
namespace cuda {
2333
void malloc_dev( void** ptr, int sz,

src/popsift/features.cu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,8 @@ void FeaturesDev::match( FeaturesDev* other )
293293
<<<grid,block>>>
294294
( match_matrix, getDescriptors(), l_len, other->getDescriptors(), r_len );
295295

296+
POP_SYNC_CHK;
297+
296298
show_distance
297299
<<<1,32>>>
298300
( match_matrix,
@@ -305,6 +307,7 @@ void FeaturesDev::match( FeaturesDev* other )
305307
other->getReverseMap(),
306308
r_len );
307309

310+
POP_SYNC_CHK;
308311

309312
cudaFree( match_matrix );
310313
}

0 commit comments

Comments
 (0)