Skip to content

Commit

Permalink
Merge branch 'master' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
lkrcal committed Jul 12, 2017
2 parents 190912a + d5c6008 commit d3ec9de
Show file tree
Hide file tree
Showing 175 changed files with 3,584 additions and 4,237 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,6 @@ Doxyfile
#vim swap files
*.swp

src/GPUGenie/configure.h
src/genie/configure.h
*.inv
*.cinv
50 changes: 36 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.8)

set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release")
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug")
project("GPUGenie"
project("genie"
VERSION 0.1
LANGUAGES C CXX CUDA)
enable_testing()
Expand Down Expand Up @@ -35,6 +35,13 @@ set(CMAKE_C_FLAGS "${genie_warnings}")
set(CMAKE_CXX_FLAGS "${genie_warnings}")

set(CMAKE_CUDA_FLAGS "--std c++11 --generate-code arch=compute_35,code=sm_35 --generate-code arch=compute_52,code=sm_52 --relocatable-device-code true --Werror cross-execution-space-call")
# Disable cudafe warning: "type qualifier is meaningless on cast type" occuring in boost::archive
# https://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
-Xcudafe --diag_suppress=cast_to_qualified_type,--diag_suppress=integer_sign_change,--diag_suppress=set_but_not_used")
# Boost 1.64 has a bug with Cuda 7.5 and 8.0, where compilation fails on static assert from g++ standard library
# https://svn.boost.org/trac10/ticket/13049
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DBOOST_NO_CXX11_NOEXCEPT")
set(CMAKE_CUDA_FLAGS_DEBUG "--debug --device-debug --generate-line-info")
set(CMAKE_CUDA_FLAGS_RELEASE "--optimize 3 --generate-line-info -DNDEBUG")
# set(CUDA_NVCC_FLAGS_RELWITHDEBUGINFO "--optimize 2 --generate-line-info")
Expand All @@ -45,19 +52,22 @@ set(CUDA_SEPARABLE_COMPILATION ON)

## DEPENDENCIES

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")

### Boost

set(Boost_USE_STATIC_LIBS ON)
if (GENIE_COMPR)
set(Boost_COMPONETNS serialization program_options)
else()
set(Boost_COMPONETNS serialization)
endif()
set(Boost_COMPONETNS iostreams serialization program_options)
# WARNING: CMake may issue a warning: Imported targets not available for Boost version 106400
# This happens when CMake doesn't recognize too new version of Boost, for example CMake 3.7 doesn't recognize Boost
# 1.64, but CMake 3.8 already does. The warning causes missing Boost components dependencies, but may be ignored.
find_package(Boost 1.63.0 REQUIRED COMPONENTS ${Boost_COMPONETNS})

### Thrust

find_package(Thrust REQUIRED)
include_directories(${THRUST_INCLUDE_DIR})

### MPI & OpenMP

if (GENIE_DISTRIBUTED)
Expand All @@ -71,18 +81,24 @@ if (GENIE_DISTRIBUTED)
endforeach()

find_package(MPI REQUIRED) # Use MPI_HOME or env(MPI_HOME) to define root directory of MPI installation
unset(MPI_HOME CACHE)
message(STATUS "Found mpiexec: ${MPIEXEC}")

# Check for correct OpenMPI and version
if (NOT MPIEXEC OR NOT MPI_C_FOUND OR NOT MPI_CXX_FOUND)
message(FATAL_ERROR "OpenMPI not found!")
endif()
execute_process(COMMAND ${MPI_C_COMPILER} --showme:version OUTPUT_VARIABLE MPI_VERSION)
if (NOT ${MPI_VERSION} MATCHES ".*Open MPI.*" OR
NOT ${MPI_VERSION} MATCHES ".* 1\\.[7-9]\\..*| .*1\\.10\\..*| .* 2\\.[0-9]+\\..*")
message(FATAL_ERROR "MPI not OpenMPI, or OpenMPI has unknown or too low version! Required OpenMPI 1.7 or higher.")
execute_process(COMMAND ${MPI_C_COMPILER} "--showme:version" OUTPUT_VARIABLE MPI_VERSION ERROR_VARIABLE MPI_VERSION_ERR)
if (MPI_VERSION STREQUAL "")
set(MPI_VERSION ${MPI_VERSION_ERR})
endif()
if (NOT ${MPI_VERSION} MATCHES ".*Open MPI.*")
message(FATAL_ERROR "MPI not OpenMPI!")
endif()
if(NOT ${MPI_VERSION} MATCHES ".* 1\\.[7-9]\\..*| .*1\\.10\\..*| .* 2\\.[0-9]+\\..*")
message(FATAL_ERROR "OpenMPI has unknown or too low version! Required OpenMPI 1.7 or higher.")
endif()



find_package(OpenMP REQUIRED)

Expand All @@ -91,6 +107,9 @@ if (GENIE_DISTRIBUTED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()

## Bzip2

find_package(BZip2 REQUIRED)

## COMPRESSION MODULE

Expand Down Expand Up @@ -140,7 +159,7 @@ link_directories(${PROJECT_BINARY_DIR}/lib ${Boost_LIBRARY_DIR})
function(genie_add_simple_executable TARGET)
add_executable(${TARGET} ${ARGN})
add_dependencies(${TARGET} ${PROJECT_NAME})
set_property(TARGET ${TARGET} PROPERTY LINK_DEPENDS ${PROJECT_BINARY_DIR}/lib/libGPUGenie.a)
set_property(TARGET ${TARGET} PROPERTY LINK_DEPENDS ${PROJECT_BINARY_DIR}/lib/libgenie.a)
target_link_libraries(${TARGET}
${PROJECT_NAME}
${CUDA_cudadevrt_LIBRARY})
Expand All @@ -163,9 +182,12 @@ if (GENIE_EXAMPLES)
endif()
add_subdirectory(src)
if (GENIE_DISTRIBUTED)
add_subdirectory(src/DistGenie)
add_subdirectory(src/dist_genie)
endif()
add_subdirectory(src/genie_cli)
if (GENIE_COMPR)
add_subdirectory(src/perf_toolkit)
endif()
add_subdirectory(src/perftoolkit)
add_subdirectory(static)
add_subdirectory(test)
add_subdirectory(utility)
17 changes: 17 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Contributing to GENIE

This document describes how GENIE is developed and released.

## Development

The `master` branch is the main development branch. For new features, create
a separate branch. When merging the features, first rebase the branch from
`master`, then submit a pull request for peer reviews.

Make sure the commits are meaningful, you could adjust your commits with rebase.

## Release

The `release` branch is for releasing GENIE publicly. Once the features are
ready in the `master` branch, it could be merged to the `release` branch.
The `release` branch is then pushed to the `SeSaMe-NUS/genie` repo.
36 changes: 21 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
# GENIE

GENIE is a Generic Inverted Index on the GPU. It builds the database from a csv file or a vector of instances. Then
GENIE will consturct the inverted list table and transfer it to the device. GENIE provides a simple way to
perform the similarity queries. User may define queries and their matching ranges, then directly call the matching
funtion. The library will parallel process all queries and save the matching result into a device_vector. A top k
search can also be simply perfromed. GENIE uses parallel searching to determine the top k values in a vector. It is
much faster than the CPU searching algorithm. All device methods are wrapped in host methods. Developers are not
required to configure the device function call. Please refer to the following documents:
GENIE is a Generic Inverted Index on GPU. It builds a database (inverted index) from high dimensional data, commonly
preprocessed by either Locality Sensitive Hashing or Shotgun and Assembly schemes. GENIE provides a simple way to
perform top-k similarity queries on top of such inverted index. The user may define queries as dimension and value
pairs, and optionally value ranges and weights. GENIE processes all queries in parallel on GPU using a Match Count
similarity model (number of dimensions with matching values in a query). For each query, top-k similar results and
their corresponding counts are returned. GENIE is much faster than other CPU searching algorithms due to extensive
parallelism on two levels: parallel query processing and multiple queries processed in parallel.

Please refer to the following technical report:

```
Generic Inverted Index on the GPU, Technical Report (TR 11/15), School of Computing, NUS.
Generic Inverted Index on the GPU, CoRR arXiv:1603.08390 at www.comp.nus.edu.sg/~atung/publication/gpugenie.pdf
Generic Inverted Index on the GPU, Technical Report (TR 11/15), School of Computing, NUS. <br>
CoRR arXiv:1603.08390 at www.comp.nus.edu.sg/~atung/publication/gpugenie.pdf
```


Expand All @@ -21,7 +24,7 @@ You are required to install G++, CMake, CUDA, OpenMPI and Boost. The minimum req
- CMake 3.8
- CUDA 7.0
- OpenMPI 1.7 (for `GENIE_DISTRIBUTED` only)
- Boost 1.63: serialization (always required), program_options (for `GENIE_COMPR` only)
- Boost 1.63: serialization, iostreams, program_options (for `GENIE_COMPR` only)

To create an "out-of-source" build of GENIE containing both the GENIE library, tests and tools, you can use the
standard CMake procedure:
Expand All @@ -32,7 +35,8 @@ $ cd build
$ cmake ..
$ make -j8
```
Use target `$ make test` to run GENIE tests, `$ make doc` to build html code documentation, `$ make install` to install GENIE.
Use target `$ make test` to run GENIE tests, `$ make doc` to build html code documentation, `$ make install` to
install GENIE.

`CMake` build parameters can be further configured using the following options:
- `CMAKE_BUILD_TYPE:STRING` -- build type, one of `Release`, `Debug` (default `Release`)
Expand All @@ -53,10 +57,10 @@ $ cmake -DGENIE_SIMDCAI=ON -DCMAKE_BUILD_TYPE=Release -DGENIE_DISTRIBUTED=ON -DG

## Running GENIE

There are several main parts of GENIE project. The core is a library `/lib/libGPUGenie.a` with the main functionality.
To see how to use the library, you can check source code in either `/example` or `/test`. Tests are the simplest
applications built on top of GENIE library. Other utilities include a compression performance toolkit in `/perf` and
miscellaneous utilities in `/utility`. All of these tools are compiled into `/bin` directory.
There are several main parts of the GENIE project. The core is a library `/lib/libgenie.a` with the main functionality.
To see how to use the library, you can check the source code in either `/example` or `/test` directories. Tests are
the simplest applications built on top of GENIE library. Other utilities include a compression performance toolkit
in `/perf` and miscellaneous utilities in `/utility`. All of these tools are compiled into `/bin` directory.


### Compression performance toolkit
Expand Down Expand Up @@ -210,5 +214,7 @@ $ pid=$(pgrep odgenie | sed -n 2p); gdb -q --pid "${pid}"

## Documentation

The documentation is available online at http://sesame-nus.github.io/genie.

Code documentation for GENIE can be generated with `cmake` and `make`. After you configure CMake following steps in
[Compilation and Development](#compilation-and-development), just run `$ make doc`.
82 changes: 82 additions & 0 deletions cmake/modules/FindThrust.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2014 Sandia Corporation.
## Copyright 2014 UT-Battelle, LLC.
## Copyright 2014 Los Alamos National Security.
##
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================

#
# FindThrust
#
# This module finds the Thrust header files and extrats their version. It
# sets the following variables.
#
# THRUST_INCLUDE_DIR - Include directory for thrust header files. (All header
# files will actually be in the thrust subdirectory.)
# THRUST_VERSION - Version of thrust in the form "major.minor.patch".
#

find_path( THRUST_INCLUDE_DIR
HINTS
/usr/include/cuda
/usr/local/include
/usr/local/cuda/include
${CUDA_INCLUDE_DIRS}
${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_SDK_ROOT_DIR}
NAMES thrust/version.h
DOC "Thrust headers"
)
if( THRUST_INCLUDE_DIR )
list( REMOVE_DUPLICATES THRUST_INCLUDE_DIR )
endif( THRUST_INCLUDE_DIR )

# Find thrust version
if (THRUST_INCLUDE_DIR)
file( STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h
version
REGEX "#define THRUST_VERSION[ \t]+([0-9x]+)"
)
string( REGEX REPLACE
"#define THRUST_VERSION[ \t]+"
""
version
"${version}"
)

string( REGEX MATCH "^[0-9]" major ${version} )
string( REGEX REPLACE "^${major}00" "" version "${version}" )
string( REGEX MATCH "^[0-9]" minor ${version} )
string( REGEX REPLACE "^${minor}0" "" version "${version}" )
set( THRUST_VERSION "${major}.${minor}.${version}")
set( THRUST_MAJOR_VERSION "${major}")
set( THRUST_MINOR_VERSION "${minor}")
endif()

# Check for required components
include( FindPackageHandleStandardArgs )
find_package_handle_standard_args( Thrust
FOUND_VAR Thrust_FOUND
REQUIRED_VARS THRUST_INCLUDE_DIR
VERSION_VAR THRUST_VERSION
)

if(Thrust_FOUND)
set(THRUST_INCLUDE_DIRS ${THRUST_INCLUDE_DIR})
endif()

mark_as_advanced(THRUST_INCLUDE_DIR)

2 changes: 1 addition & 1 deletion doc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ if(DOXYGEN_FOUND)
VERBATIM)

# Trailing / in ${CMAKE_CURRENT_BINARY_DIR}/html/ gets rid of the extra html dir in install/doc
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/ DESTINATION doc)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/ DESTINATION doc OPTIONAL)
endif()
12 changes: 6 additions & 6 deletions doc/doxy.in
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.

PROJECT_NAME = "GPUGenie"
PROJECT_NAME = "GENIE"

# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
Expand Down Expand Up @@ -771,7 +771,7 @@ WARN_LOGFILE =
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.

INPUT = "@PROJECT_SOURCE_DIR@/src/GPUGenie"
INPUT = @PROJECT_SOURCE_DIR@/src/genie @PROJECT_SOURCE_DIR@/doc

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
Expand All @@ -796,13 +796,13 @@ INPUT_ENCODING = UTF-8
# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f, *.for, *.tcl,
# *.vhd, *.vhdl, *.ucf, *.qsf, *.as and *.js.

FILE_PATTERNS = *.cc *.cu *.h
FILE_PATTERNS = *.cc *.cu *.h *.dox

# The RECURSIVE tag can be used to specify whether or not subdirectories should
# be searched for input files as well.
# The default value is: NO.

RECURSIVE = NO
RECURSIVE = YES

# The EXCLUDE tag can be used to specify files and/or directories that should be
# excluded from the INPUT source files. This way you can easily exclude a
Expand Down Expand Up @@ -1111,7 +1111,7 @@ HTML_STYLESHEET =
# list). For an example see the documentation.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_EXTRA_STYLESHEET =
HTML_EXTRA_STYLESHEET = @PROJECT_SOURCE_DIR@/doc/style/custom.css

# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
# other source files which should be copied to the HTML output directory. Note
Expand Down Expand Up @@ -1397,7 +1397,7 @@ DISABLE_INDEX = NO
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

GENERATE_TREEVIEW = NO
GENERATE_TREEVIEW = YES

# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
# doxygen will group on one line in the generated HTML documentation.
Expand Down
Loading

0 comments on commit d3ec9de

Please sign in to comment.