Skip to content

Commit 36dedc9

Browse files
author
zuoyanzhang
committed
update paper
1 parent 72dd9f0 commit 36dedc9

File tree

1 file changed

+16
-13
lines changed

1 file changed

+16
-13
lines changed
Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1-
@inproceedings{swgemm,
2-
location = {Bordeaux, France},
3-
address = {New York, NY, USA},
4-
author = {Tao, Xiaohan and Zhu, Yu and Wang, Boyang and Xu, Jinlong and Pang, Jianmin and Zhao, Jie},
5-
booktitle = {Proceedings of the 51st International Conference on Parallel Processing},
6-
isbn = {9781450397339},
7-
numpages = {12},
8-
pages = {23:1-23:12},
9-
publisher = {Association for Computing Machinery},
10-
series = {ICPP'22},
11-
title = {Automatically Generating High-performance Matrix Multiplication Kernels on the Latest Sunway Processor},
12-
doi = {https://doi.org/10.1145/3545008.3545031},
13-
year = {2022}
1+
@inproceedings{10.1145/3330345.3331059,
2+
author = {Sun, Huihui and Fey, Florian and Zhao, Jie and Gorlatch, Sergei},
3+
title = {WCCV: improving the vectorization of IF-statements with warp-coherent conditions},
4+
year = {2019},
5+
isbn = {9781450360791},
6+
publisher = {Association for Computing Machinery},
7+
address = {New York, NY, USA},
8+
url = {https://doi.org/10.1145/3330345.3331059},
9+
doi = {10.1145/3330345.3331059},
10+
abstract = {When vectorizing programs for modern processors with SIMD extensions, IF-statements pose a challenge: existing vectorization approaches often introduce redundant computations or they resort to inefficient masked instructions.In this paper, we introduce a new notion of warp-coherence for conditions that exhibit coherent run-time behavior on different lanes of a vector register. We demonstrate that warp-coherent conditions appear frequently in practice. We present Warp-Coherent Condition Vectorization (WCCV) - an approach to detecting and optimizing IF-statements with warp-coherent conditions - to efficiently vectorize programs with IF-statements while avoiding the overhead of existing methods. WCCV detects warp-coherent conditions via the affine analysis of conditional boolean expressions and branch predication of IF-statements; the runtime code generated by WCCV avoids redundant computations and masked instructions. We employ auto-tuning to find the optimal benefit-overhead ratio for WCCV. We implement WCCV on top of Region Vectorizer (RV) - an LLVM-based vectorizing compiler, and we conduct experiments on the Rodinia benchmark suite, achieving a mean speedup of 1.14\texttimes{} over the original vectorized and optimized code, and speedup between 0.98\texttimes{} and 7.02\texttimes{} over the scalar code on Skylake with AVX512.},
11+
booktitle = {Proceedings of the ACM International Conference on Supercomputing},
12+
pages = {319–329},
13+
numpages = {11},
14+
keywords = {IF-statements, SPMD-on-SIMD, compiler optimization, vectorization, warp-coherence},
15+
location = {Phoenix, Arizona},
16+
series = {ICS '19}
1417
}

0 commit comments

Comments
 (0)