Skip to content

Commit 7ccadea

Browse files
iii-ifneddy
andcommitted
s390x: vectorize crc32
Use vector extensions when compiling for s390x and binutils knows about them. At runtime, check whether kernel supports vector extensions (it has to be not just the CPU, but also the kernel) and choose between the regular and the vectorized implementations. Co-authored-by: Eduard Stefes <[email protected]>
1 parent 5a82f71 commit 7ccadea

File tree

12 files changed

+559
-75
lines changed

12 files changed

+559
-75
lines changed

CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ if(NOT ZLIB_CONF_WRITTEN)
7676
mark_as_advanced(ZLIB_CONF_WRITTEN)
7777
endif(NOT ZLIB_CONF_WRITTEN)
7878

79+
#
80+
# Add contrib code
81+
#
82+
add_subdirectory(contrib/s390x)
7983
#
8084
# Check to see if we have large file support
8185
#
@@ -201,6 +205,8 @@ if(ZLIB_BUILD_SHARED)
201205
UNIX
202206
AND NOT APPLE
203207
AND NOT (CMAKE_SYSTEM_NAME STREQUAL AIX))
208+
target_link_libraries(zlib PRIVATE $<TARGET_NAME_IF_EXISTS:zlib_s390x_functable>)
209+
target_link_libraries(zlib PRIVATE $<TARGET_NAME_IF_EXISTS:zlib_crc32_vx>)
204210
endif(ZLIB_BUILD_SHARED)
205211

206212
if(ZLIB_BUILD_STATIC)
@@ -223,6 +229,8 @@ if(ZLIB_BUILD_STATIC)
223229
set_target_properties(
224230
zlibstatic PROPERTIES EXPORT_NAME ZLIBSTATIC OUTPUT_NAME
225231
z${zlib_static_suffix})
232+
target_link_libraries(zlibstatic PRIVATE $<TARGET_NAME_IF_EXISTS:zlib_s390x_functable>)
233+
target_link_libraries(zlibstatic PRIVATE $<TARGET_NAME_IF_EXISTS:zlib_crc32_vx>)
226234
endif(ZLIB_BUILD_STATIC)
227235

228236
if(ZLIB_INSTALL)

Makefile.in

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ LDFLAGS=
2727
TEST_LIBS=-L. libz.a
2828
LDSHARED=$(CC)
2929
CPP=$(CC) -E
30+
VGFMAFLAG=
3031

3132
STATICLIB=libz.a
3233
SHAREDLIB=libz.so
@@ -164,6 +165,12 @@ adler32.o: $(SRCDIR)adler32.c
164165
crc32.o: $(SRCDIR)crc32.c
165166
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
166167

168+
crc32-vx.o: $(SRCDIR)contrib/s390x/crc32-vx.c
169+
$(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390x/crc32-vx.c
170+
171+
s390x-functable.o: $(SRCDIR)contrib/s390x/s390x-functable.c
172+
$(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390x/s390x-functable.c
173+
167174
deflate.o: $(SRCDIR)deflate.c
168175
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
169176

@@ -214,6 +221,16 @@ crc32.lo: $(SRCDIR)crc32.c
214221
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
215222
-@mv objs/crc32.o $@
216223

224+
crc32-vx.lo: $(SRCDIR)contrib/s390x/crc32-vx.c
225+
-@mkdir objs 2>/dev/null || test -d objs
226+
$(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390x/crc32-vx.c
227+
-@mv objs/crc32-vx.o $@
228+
229+
s390x-functable.lo: $(SRCDIR)contrib/s390x/s390x-functable.c
230+
-@mkdir objs 2>/dev/null || test -d objs
231+
$(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/s390x-functable.o $(SRCDIR)contrib/s390x/s390x-functable.c
232+
-@mv objs/s390x-functable.o $@
233+
217234
deflate.lo: $(SRCDIR)deflate.c
218235
-@mkdir objs 2>/dev/null || test -d objs
219236
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c

configure

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ debug=0
9393
address=0
9494
memory=0
9595
unknown=0
96+
enable_crcvx=1
9697
old_cc="$CC"
9798
old_cflags="$CFLAGS"
9899
OBJC='$(OBJZ) $(OBJG)'
@@ -120,6 +121,7 @@ case "$1" in
120121
echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log
121122
echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log
122123
echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log
124+
echo ' [--disable-crcvx]' | tee -a configure.log
123125
exit 0 ;;
124126
-p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;;
125127
-e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;;
@@ -146,6 +148,7 @@ case "$1" in
146148
--sanitize) address=1; shift ;;
147149
--address) address=1; shift ;;
148150
--memory) memory=1; shift ;;
151+
--disable-crcvx) enable_crcvx=0; shift ;;
149152
*) unknown=1; echo "unknown option ignored: $1" | tee -a configure.log; shift;;
150153
esac
151154
done
@@ -870,6 +873,70 @@ EOF
870873
fi
871874
fi
872875

876+
# check for ibm s390x build
877+
HAVE_S390X=0
878+
cat > $test.c << EOF
879+
#ifndef __s390x__
880+
#error
881+
#endif
882+
EOF
883+
if try $CC -c $CFLAGS $test.c; then
884+
echo "Checking for s390x build ... Yes." | tee -a configure.log
885+
HAVE_S390X=1
886+
else
887+
echo "Checking for s390x build ... No." | tee -a configure.log
888+
fi
889+
890+
# check for ibm s390x vx vector extensions
891+
HAVE_S390X_VX=0
892+
if test $HAVE_S390X -eq 1 && test $enable_crcvx -eq 1 ; then
893+
# preset the compiler specific flags
894+
if test $clang -eq 1; then
895+
VGFMAFLAG=-fzvector
896+
else
897+
VGFMAFLAG=-mzarch
898+
fi
899+
900+
cat > $test.c <<EOF
901+
#ifndef __s390x__
902+
#error
903+
#endif
904+
#include <vecintrin.h>
905+
int main(void) {
906+
unsigned long long a __attribute__((vector_size(16))) = { 0 };
907+
unsigned long long b __attribute__((vector_size(16))) = { 0 };
908+
unsigned char c __attribute__((vector_size(16))) = { 0 };
909+
c = vec_gfmsum_accum_128(a, b, c);
910+
return c[0];
911+
}
912+
EOF
913+
914+
# cflags already contains a valid march
915+
if try $CC -c $CFLAGS $VGFMAFLAG $test.c; then
916+
echo "Checking for s390x vx vector extension ... Yes." | tee -a configure.log
917+
HAVE_S390X_VX=1
918+
# or set march for our compile units
919+
elif try $CC -c $CFLAGS $VGFMAFLAG -march=z13 $test.c; then
920+
echo "Checking for s390x vx vector extension (march=z13) ... Yes." | tee -a configure.log
921+
HAVE_S390X_VX=1
922+
VGFMAFLAG="$VGFMAFLAG -march=z13"
923+
# else we are not on s390x
924+
else
925+
echo "Checking for s390x vx vector extension ... No." | tee -a configure.log
926+
fi
927+
928+
# prepare compiling for s390x
929+
if test $HAVE_S390X_VX -eq 1; then
930+
CFLAGS="$CFLAGS -DHAVE_S390X_VX"
931+
SFLAGS="$SFLAGS -DHAVE_S390X_VX"
932+
OBJC="$OBJC crc32-vx.o s390x-functable.o"
933+
PIC_OBJC="$PIC_OBJC crc32-vx.lo s390x-functable.lo"
934+
else
935+
# target has no vx extension
936+
VGFMAFLAG=""
937+
fi
938+
fi
939+
873940
# show the results in the log
874941
echo >> configure.log
875942
echo ALL = $ALL >> configure.log
@@ -901,6 +968,9 @@ echo mandir = $mandir >> configure.log
901968
echo prefix = $prefix >> configure.log
902969
echo sharedlibdir = $sharedlibdir >> configure.log
903970
echo uname = $uname >> configure.log
971+
echo HAVE_S390X = $HAVE_S390X >> configure.log
972+
echo HAVE_S390X_VX = $HAVE_S390X_VX >> configure.log
973+
echo VGFMAFLAG = $VGFMAFLAG >> configure.log
904974

905975
# update Makefile with the configure results
906976
sed < ${SRCDIR}Makefile.in "
@@ -912,6 +982,7 @@ sed < ${SRCDIR}Makefile.in "
912982
/^LDFLAGS *=/s#=.*#=$LDFLAGS#
913983
/^LDSHARED *=/s#=.*#=$LDSHARED#
914984
/^CPP *=/s#=.*#=$CPP#
985+
/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG#
915986
/^STATICLIB *=/s#=.*#=$STATICLIB#
916987
/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
917988
/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#

contrib/README.contrib

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ puff/ by Mark Adler <[email protected]>
4646
Small, low memory usage inflate. Also serves to provide an
4747
unambiguous description of the deflate format.
4848

49+
s390x/ by Ilya Leoshkevich <[email protected]>
50+
Hardware-accelerated CRC32 on IBM Z with Z13 VX extension.
51+
4952
testzlib/ by Gilles Vollant <[email protected]>
5053
Example of the use of zlib
5154

contrib/functable/README

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
To exchange a zlib internal function on demand with architecture specific
2+
implementations use functable.h.
3+
4+
The architecture specific code has to provide three symbols:
5+
- arch_functable
6+
- arch_init
7+
- arch_init_done
8+
9+
`arch_functable` is a struct containing function pointers will all functions
10+
that may be replaced by architecture specific code.
11+
12+
`arch_init` is a function thats responsibility is to fill
13+
`arch_functable` with pointers to the functions that shall be used during the
14+
run.
15+
16+
`arch_init_done` is an atomic flag that will be set after
17+
`arch_init` is executed.

contrib/functable/functable.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#ifndef Z_FUNCTABLE_H__
2+
#define Z_FUNCTABLE_H__
3+
4+
#include "../../zutil.h"
5+
#include "../../zonce.h"
6+
7+
struct zfunctable_s {
8+
unsigned long (*crc32_z)(unsigned long crc, const unsigned char FAR *buf,
9+
z_size_t len);
10+
};
11+
12+
/* to be implemented by architecture specific code */
13+
extern struct zfunctable_s ZLIB_INTERNAL arch_functable;
14+
extern once_t ZLIB_INTERNAL arch_init_done;
15+
void ZLIB_INTERNAL arch_init(void);
16+
17+
#endif

contrib/s390x/CMakeLists.txt

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
option(ZLIB_CRC32VX "Enable building S390-CRC32VX implementation" ON)
2+
3+
set(CMAKE_TRY_COMPILE_TARGET_TYPE "STATIC_LIBRARY")
4+
5+
#
6+
# check if we compile for IBM s390x
7+
#
8+
CHECK_C_SOURCE_COMPILES("
9+
#ifndef __s390x__
10+
#error
11+
#endif
12+
" HAS_S390X_SUPPORT)
13+
if(HAS_S390X_SUPPORT)
14+
add_library(zlib_s390x_functable OBJECT
15+
s390x-functable.c
16+
../functable/functable.h)
17+
endif(HAS_S390X_SUPPORT)
18+
19+
#
20+
# Check for IBM S390X - VX extensions
21+
#
22+
if(ZLIB_CRC32VX AND HAS_S390X_SUPPORT)
23+
# preset the compiler specific flags
24+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
25+
set(VGFMAFLAG "-fzvector")
26+
else()
27+
set(VGFMAFLAG "-mzarch")
28+
endif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
29+
30+
set(S390X_VX_TEST
31+
"#ifndef __s390x__ \n\
32+
#error \n\
33+
#endif \n\
34+
#include <vecintrin.h> \n\
35+
int main(void) { \
36+
unsigned long long a __attribute__((vector_size(16))) = { 0 }; \
37+
unsigned long long b __attribute__((vector_size(16))) = { 0 }; \
38+
unsigned char c __attribute__((vector_size(16))) = { 0 }; \
39+
c = vec_gfmsum_accum_128(a, b, c); \
40+
return c[0]; \
41+
}")
42+
43+
# cflags already contains a valid march
44+
set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG}")
45+
check_c_source_compiles("${S390X_VX_TEST}" HAS_S390X_VX_SUPPORT)
46+
unset(CMAKE_REQUIRED_FLAGS)
47+
48+
# or set march for our compile units
49+
if(NOT HAS_S390X_VX_SUPPORT)
50+
set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} -march=z13")
51+
check_c_source_compiles("${S390X_VX_TEST}" HAS_Z13_S390X_VX_SUPPORT)
52+
unset(CMAKE_REQUIRED_FLAGS )
53+
list(APPEND VGFMAFLAG "-march=z13")
54+
endif(NOT HAS_S390X_VX_SUPPORT)
55+
56+
# prepare compiling for s390x
57+
if(HAS_S390X_VX_SUPPORT OR HAS_Z13_S390X_VX_SUPPORT)
58+
add_library(zlib_crc32_vx OBJECT
59+
crc32-vx.c
60+
../functable/functable.h)
61+
target_link_libraries(zlib_crc32_vx PRIVATE zlib_s390x_functable)
62+
set_source_files_properties(
63+
crc32-vx.c
64+
PROPERTIES COMPILE_OPTIONS "${VGFMAFLAG}")
65+
target_compile_definitions(
66+
zlib_crc32_vx PUBLIC HAVE_S390X_VX=1)
67+
endif(HAS_S390X_VX_SUPPORT OR HAS_Z13_S390X_VX_SUPPORT)
68+
endif(ZLIB_CRC32VX AND HAS_S390X_SUPPORT)

contrib/s390x/README

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
IBM Z mainframes starting from version z13 provide vector instructions, which
2+
allows vectorization of crc32. This extension is build by default when targeting
3+
ibm s390x. However this extension can disabled if desired:
4+
5+
# for configure build
6+
$ ./configure --disable-crcvx
7+
8+
# for cmake build
9+
$ cmake .. -DZLIB_CRC32VX=off

0 commit comments

Comments
 (0)