Skip to content

Commit d980729

Browse files
committed
More forgiving build defaults (no cpu_features, git allowed to fail)
Update README build steps Cleanup CMakeLists Add missing clangd format file Don't read input decks when listing devices
1 parent cce7a33 commit d980729

12 files changed

+141
-165
lines changed

.clang-format

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
AllowShortIfStatementsOnASingleLine: Always
3+
AllowShortCaseLabelsOnASingleLine: true
4+
AllowShortFunctionsOnASingleLine: All
5+
IndentCaseLabels: true
6+
ColumnLimit: 120
7+
CompactNamespaces: true
8+
FixNamespaceComments: true
9+
IndentPPDirectives: BeforeHash
10+
...

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@ build/
1818
cmake-build-*/
1919
.idea/
2020
.directory
21-
log.txt
21+
log.txt
22+
23+
heatmap.csv

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Changelog
22
All notable changes to this project will be documented in this file.
33

4-
## [v2.0] - 2022-02-??
4+
## [v2.0] - 2022-??-??
55

66
### Added
77
- CI via GitHub Actions

CMakeLists.txt

+15-113
Original file line numberDiff line numberDiff line change
@@ -3,112 +3,20 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
33
project(miniBUDE VERSION 2.0 LANGUAGES CXX)
44
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
55

6-
76
set(CMAKE_VERBOSE_MAKEFILE ON)
87

98
# some nicer defaults for standard C++
109
set(CMAKE_CXX_EXTENSIONS OFF)
1110
set(CMAKE_CXX_STANDARD_REQUIRED ON)
1211

1312

14-
#set(USE_CPU_FEATURES ON)
15-
#set(MODEL acc)
16-
17-
18-
if (NOT MODEL)
19-
# set(MODEL raja)
20-
# set(RAJA_IN_TREE /home/tom/Downloads/RAJA-v0.14.1/)
21-
# set(ENABLE_CUDA ON)
22-
# set(CUDA_ARCH sm_61)
23-
# set(CUDA_TOOLKIT_ROOT_DIR /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/11.4)
24-
25-
26-
27-
# set(MODEL cuda)
28-
# set(CUDA_ARCH sm_61)
29-
# set(CMAKE_CUDA_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/11.4/bin/nvcc)
30-
31-
32-
33-
#set(MODEL kokkos)
34-
#set(KOKKOS_IN_TREE /home/tom/Downloads/kokkos-3.5.00)
35-
#set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "")
36-
#set(Kokkos_ENABLE_CUDA ON CACHE BOOL "")
37-
#set(Kokkos_ARCH_PASCAL61 ON CACHE BOOL "")
38-
#set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "")
39-
40-
set(MODEL thrust)
41-
set(THRUST_IMPL CUDA)
42-
set(SDK_DIR /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/include)
43-
set(CMAKE_CUDA_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/11.4/bin/nvcc)
44-
set(CUDA_ARCH sm_61)
45-
46-
13+
## Flags for debugging only, enable for development (ASan only works on few models)
14+
set(SANITIZE OFF)
15+
if (SANITIZE)
16+
set(DEBUG_FLAGS ${DEBUG_FLAGS} -fsanitize=address)
17+
set(CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address)
4718
endif ()
4819

49-
50-
#set(MODEL tbb)
51-
#set(OFFLOAD NVIDIA:sm_35)
52-
53-
54-
#set(MODEL sycl)
55-
#set(SYCL_COMPILER HIPSYCL)
56-
#set(SYCL_COMPILER_DIR /opt/hipsycl/68fb6d2026b07b0895ff468b58d40858ac1ae7d5)
57-
58-
59-
#set(MODEL sycl)
60-
#set(SYCL_COMPILER ONEAPI-DPCPP)
61-
62-
63-
64-
#set(CMAKE_CXX_COMPILER "${KOKKOS_IN_TREE}/bin/nvcc_wrapper")
65-
66-
#set(ENV{CUDA_ROOT} /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/11.4/)
67-
#set(CUDA_ROOT /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/11.4/)
68-
69-
#set(CMAKE_CUDA_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/compilers/bin/nvcc)
70-
#set(CUDA_ROOT /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/11.4/)
71-
#set(CUDAToolkit_ROOT /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/11.4/)
72-
73-
74-
75-
#
76-
#set(MODEL std20)
77-
#set(CXX_EXTRA_LIBRARIES tbb)
78-
79-
#set(MODEL std)
80-
#set(CMAKE_CXX_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.5/compilers/bin/nvc++)
81-
82-
83-
#set(MODEL cuda)
84-
#set(CMAKE_CUDA_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/compilers/bin/nvcc)
85-
#set(CUDA_EXTRA_FLAGS "-gencode arch=compute_61,code=sm_61")
86-
#set(CUDA_ARCH sm_61)
87-
88-
89-
#set(MODEL ocl)
90-
#set(OpenCL_LIBRARY /opt/rocm-4.3.0/opencl/lib/libOpenCL.so.1.2)
91-
92-
#set(MODEL hip)
93-
#set(CMAKE_CXX_COMPILER /opt/rocm-4.3.0/bin/hipcc)
94-
#set(CXX_EXTRA_FLAGS -nogpuinc -nogpulib)
95-
#set(CUDA_EXTRA_FLAGS "-gencode arch=compute_35,code=sm_35")
96-
#set(CUDA_ARCH sm_35)
97-
98-
99-
#set(SYCL_COMPILER_DIR /opt/hipsycl/68fb6d2026b07b0895ff468b58d40858ac1ae7d5)
100-
101-
#set(MODEL std)
102-
##set(CXX_EXTRA_LIBRARIES tbb)
103-
#set(CMAKE_CXX_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.9/compilers/bin/nvc++)
104-
#set(NVHPC_OFFLOAD cc61)
105-
106-
107-
#set(SYCL_COMPILER COMPUTECPP)
108-
#set(SYCL_COMPILER_DIR /home/tom/Downloads/ComputeCpp-CE-2.6.0-x86_64-linux-gnu/)
109-
#set(OpenCL_LIBRARY /opt/intel/oneapi/compiler/2021.4.0/linux/lib/libOpenCL.so.1.2)
110-
111-
11220
# the final executable name
11321
set(EXE_NAME bude)
11422

@@ -157,7 +65,7 @@ if ((DEFINED CXX_EXTRA_FLAGS) AND (NOT DEFINED CXX_EXTRA_LINK_FLAGS))
15765
set(CXX_EXTRA_LINK_FLAGS ${CXX_EXTRA_FLAGS})
15866
endif ()
15967

160-
option(USE_CPU_FEATURES "Enable the cpu_feature library for host CPU detection" ON)
68+
option(USE_CPU_FEATURES "Enable the cpu_feature library for host CPU detection" OFF)
16169

16270
if (USE_CPU_FEATURES)
16371
include(FetchContent)
@@ -177,8 +85,8 @@ include(cmake/register_models.cmake)
17785
# register out models <model_name> <preprocessor_def_name> <source files...>
17886
register_model(omp OMP fasten.hpp)
17987
register_model(ocl OCL fasten.hpp)
180-
register_model(std STD fasten.hpp)
181-
register_model(std20 STD20 fasten.hpp) # TODO
88+
register_model(std-indices STD_INDICES fasten.hpp)
89+
register_model(std-ranges STD_RANGES fasten.hpp) # TODO
18290
register_model(hip HIP fasten.hpp)
18391
register_model(cuda CUDA fasten.hpp)
18492
register_model(kokkos KOKKOS fasten.hpp)
@@ -190,7 +98,6 @@ register_model(tbb TBB fasten.hpp)
19098
register_model(thrust THRUST fasten.hpp) # TODO
19199

192100

193-
194101
set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")
195102

196103
message(STATUS "Available models: ${REGISTERED_MODELS}")
@@ -226,12 +133,6 @@ endif ()
226133

227134
message(STATUS "Default ${CMAKE_BUILD_TYPE} flags are `${DEFAULT_${BUILD_TYPE}_FLAGS}`, set ${BUILD_TYPE}_FLAGS to override (CXX_EXTRA_* flags are not affected)")
228135

229-
set(SANATIZE OFF)
230-
231-
if (SANATIZE)
232-
set(DEBUG_FLAGS ${DEBUG_FLAGS} -fsanitize=address)
233-
set(CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address)
234-
endif ()
235136

236137
# setup common build flag defaults if there are no overrides
237138
if (NOT DEFINED ${BUILD_TYPE}_FLAGS)
@@ -243,9 +144,9 @@ endif ()
243144
set(DEFAULT_PPWI "1,2,4,8,16,32,64,128")
244145

245146
if (NOT PPWI)
246-
message(STATUS PPWI not set, defaulting to ${DEFAULT_PPWI})
147+
message(STATUS "PPWI not set, defaulting to ${DEFAULT_PPWI}")
247148
set(PPWI ${DEFAULT_PPWI})
248-
endif()
149+
endif ()
249150

250151

251152
message(STATUS "CXX vendor : ${CMAKE_CXX_COMPILER_ID} (${CMAKE_CXX_COMPILER})")
@@ -285,9 +186,10 @@ target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS})
285186
# setup git_watcher...
286187
set(PRE_CONFIGURE_FILE "${CMAKE_SOURCE_DIR}/src/meta_vcs.h.in")
287188
set(POST_CONFIGURE_FILE "${CMAKE_BINARY_DIR}/generated/meta_vcs.h")
288-
#include("${CMAKE_SOURCE_DIR}/cmake/git_watcher.cmake")
289-
#set(GIT_FAIL_IF_NONZERO_EXIT FALSE)
290-
#add_dependencies(${EXE_NAME} check_git)
189+
190+
set(GIT_FAIL_IF_NONZERO_EXIT FALSE) # Dont' fail the build because of VCS; use FALSE here because git_watcher says so
191+
include("${CMAKE_SOURCE_DIR}/cmake/git_watcher.cmake")
192+
add_dependencies(${EXE_NAME} check_git)
291193

292194

293195
# some models require the target to be already specified so they can finish their setup here
@@ -312,6 +214,6 @@ else ()
312214
set(COMPILE_COMMANDS COMPILE_FLAGS)
313215
endif ()
314216

315-
#set_target_properties(${EXE_NAME} PROPERTIES OUTPUT_NAME "${BIN_NAME}")
217+
set_target_properties(${EXE_NAME} PROPERTIES OUTPUT_NAME "${BIN_NAME}")
316218

317219
install(TARGETS ${EXE_NAME} DESTINATION bin)

README.md

+75-26
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,84 @@ Increasing the iteration count has similar performance effects to docking multip
88

99
The top-level `data` directory contains the input common to implementations.
1010
The top-level `makedeck` directory contains an input deck generation program and a set of mol2/bhff input files.
11-
Each other subdirectory contains a separate C/C++ implementation:
12-
13-
- [OpenMP](openmp/) for CPUs
14-
- [OpenMP target](openmp-target/) for GPUs
15-
- [CUDA](cuda/) for GPUs
16-
- [OpenCL](opencl/) for GPUs
17-
- [OpenACC](openacc/) for GPUs
18-
- [SYCL](sycl/) for CPUs and GPUs
19-
- [Kokkos](kokkos/) for CPUs and GPUs
20-
21-
We also include implementations in emerging programming languages as direct ports of miniBUDE:
22-
23-
- [Julia](miniBUDE.jl) for CPUs (@threads) and GPUs ([CUDA.jl](https://juliagpu.gitlab.io/CUDA.jl/), [AMDGPU.jl](https://amdgpu.juliagpu.org/stable/), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl), etc)
24-
11+
Each other subdirectory in `src` contains a separate C/C++ implementation.
2512

2613
## Building
2714

28-
To build with the default options, type `make` in an implementation directory.
29-
There are options to choose the compiler used and the architecture targeted.
30-
31-
Refer to each implementation's README for further build instructions.
32-
33-
## Running
34-
35-
To run with the default options, run the binary without any flags.
36-
To adjust the run time, use `-i` to set the number of iterations.
37-
For very short runs, e.g. for simulation, use `-n 1024` to reduce the number of poses.
38-
39-
Refer to each implementation's README for further run instructions.
15+
Drivers, compiler and software applicable to whichever implementation you would like to build against is required.
16+
17+
### CMake
18+
19+
The project supports building with CMake >= 3.14.0, which can be installed without root via the [official script](https://cmake.org/download/).
20+
21+
Each miniBUDE implementation (programming model) is built as follows:
22+
23+
```shell
24+
$ cd miniBUDE
25+
26+
# configure the build, build type defaults to Release
27+
# The -DMODEL flag is required
28+
$ cmake -Bbuild -H. -DMODEL=<model> <model specific flags prefixed with -D...>
29+
30+
# compile
31+
$ cmake --build build
32+
33+
# run executables in ./build
34+
$ ./build/<model>-bude
35+
```
36+
37+
The `MODEL` option selects one implementation of miniBUDE to build.
38+
The source for each model's implementations are located in `./src/<model>`.
39+
40+
Currently available models are:
41+
```
42+
omp;ocl;std-indices;std-ranges;hip;cuda;kokkos;sycl;acc;raja;tbb;thrust
43+
```
44+
45+
#### Overriding default flags
46+
By default, we have defined a set of optimal flags for known HPC compilers.
47+
There are assigned those to `RELEASE_FLAGS`, and you can override them if required.
48+
49+
To find out what flag each model supports or requires, simply configure while only specifying the model.
50+
For example:
51+
```shell
52+
> cd miniBUDE
53+
> cmake -Bbuild -H. -DMODEL=omp
54+
No CMAKE_BUILD_TYPE specified, defaulting to 'Release'
55+
-- CXX_EXTRA_FLAGS:
56+
Appends to common compile flags. These will be appended at link phase as well.
57+
To use separate flags at link phase, set `CXX_EXTRA_LINK_FLAGS`
58+
-- CXX_EXTRA_LINK_FLAGS:
59+
Appends to link flags which appear *before* the objects.
60+
Do not use this for linking libraries, as the link line is order-dependent
61+
-- CXX_EXTRA_LIBRARIES:
62+
Append to link flags which appear *after* the objects.
63+
Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)
64+
-- CXX_EXTRA_LINKER_FLAGS:
65+
Append to linker flags (i.e GCC's `-Wl` or equivalent)
66+
-- Available models: omp;ocl;std-indices;std-ranges;hip;cuda;kokkos;sycl;acc;raja;tbb;thrust
67+
-- Selected model : omp
68+
-- Supported flags:
69+
70+
CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)
71+
ARCH (optional, default=): This overrides CMake's CMAKE_SYSTEM_PROCESSOR detection which uses (uname -p), this is mainly for use with
72+
specialised accelerators only and not to be confused with offload which is is mutually exclusive with this.
73+
Supported values are:
74+
- NEC
75+
OFFLOAD (optional, default=OFF): Whether to use OpenMP offload, the format is <VENDOR:ARCH?>|ON|OFF.
76+
We support a small set of known offload flags for clang, gcc, and icpx.
77+
However, as offload support is rapidly evolving, we recommend you directly supply them via OFFLOAD_FLAGS.
78+
For example:
79+
* OFFLOAD=NVIDIA:sm_60
80+
* OFFLOAD=AMD:gfx906
81+
* OFFLOAD=INTEL
82+
* OFFLOAD=ON OFFLOAD_FLAGS=...
83+
OFFLOAD_FLAGS (optional, default=): If OFFLOAD is enabled, this *overrides* the default offload flags
84+
OFFLOAD_APPEND_LINK_FLAG (optional, default=ON): If enabled, this appends all resolved offload flags (OFFLOAD=<vendor:arch> or directly from OFFLOAD_FLAGS) to the link flags.
85+
This is required for most offload implementations so that offload libraries can linked correctly.
86+
87+
88+
```
4089
4190
### Benchmarks
4291

heatmap.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
import pandas as pd
33
import seaborn as sns
44
import matplotlib.pyplot as plt
5+
from matplotlib.colors import LogNorm
6+
7+
import copy
8+
import matplotlib
59

610

711
def linear_scale(old_min, old_max, new_min, new_max, old_value):
@@ -15,14 +19,17 @@ def linear_scale(old_min, old_max, new_min, new_max, old_value):
1519
normalised = data.copy()
1620

1721
normalised["sum_ms"] = normalised["sum_ms"].apply(
18-
lambda x: linear_scale(normalised["sum_ms"].min(), normalised["sum_ms"].max(), 1, 0, x))
22+
lambda x: linear_scale(normalised["sum_ms"].min(), normalised["sum_ms"].max(), 0, 100, x) )
1923

2024
out = normalised.pivot(index="ppwi", columns="wgsize", values="sum_ms")
2125
out.sort_index(level=0, ascending=False, inplace=True)
2226

2327
# data = np.genfromtxt('heatmap.csv', delimiter=',')
2428
print(out)
2529

26-
sns.heatmap(out, annot=True)
30+
my_cmap = copy.copy(matplotlib.cm.get_cmap('rocket')) # copy the default cmap
31+
my_cmap.set_bad((0,0,0))
32+
33+
sns.heatmap(out, annot=True, norm=LogNorm(), cmap=my_cmap)
2734

2835
plt.show()

0 commit comments

Comments
 (0)