Skip to content

Commit

Permalink
8320500: [vectorapi] RISC-V: Optimize vector math operations with SLEEF
Browse files Browse the repository at this point in the history
Reviewed-by: luhenry, ihse, erikj, fyang, rehn
  • Loading branch information
Hamlin Li committed Oct 8, 2024
1 parent 4a12f5b commit 580eb62
Show file tree
Hide file tree
Showing 14 changed files with 290 additions and 48 deletions.
18 changes: 18 additions & 0 deletions make/modules/jdk.incubator.vector/Lib.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,21 @@ ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64)+$(INCLUDE_CO

TARGETS += $(BUILD_LIBJSVML)
endif

################################################################################
## Build libsleef
################################################################################

ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2), true+true+true)
$(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \
NAME := sleef, \
OPTIMIZATION := HIGH, \
SRC := libsleef/lib, \
EXTRA_SRC := libsleef/generated, \
DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
CFLAGS := -march=rv64gcv, \
))

TARGETS += $(BUILD_LIBSLEEF)
endif
6 changes: 4 additions & 2 deletions src/hotspot/cpu/riscv/assembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@
class Argument {
public:
enum {
n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
// check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
n_vector_register_parameters_c = 16, // v8, v9, ... v23

n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...)
Expand Down
27 changes: 24 additions & 3 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1972,12 +1972,16 @@ const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length)

// Vector calling convention not yet implemented.
bool Matcher::supports_vector_calling_convention(void) {
return false;
return EnableVectorSupport && UseVectorStubs;
}

OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
Unimplemented();
return OptoRegPair(0, 0);
assert(EnableVectorSupport && UseVectorStubs, "sanity");
assert(ideal_reg == Op_VecA, "sanity");
// check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
int lo = V8_num;
int hi = V8_K_num;
return OptoRegPair(hi, lo);
}

// Is this branch offset short enough that a short branch can be used?
Expand Down Expand Up @@ -10075,6 +10079,23 @@ instruct CallLeafDirect(method meth, rFlagsReg cr)
ins_pipe(pipe_class_call);
%}

// Call Runtime Instruction without safepoint and with vector arguments

instruct CallLeafDirectVector(method meth, rFlagsReg cr)
%{
match(CallLeafVector);

effect(USE meth, KILL cr);

ins_cost(BRANCH_COST);

format %{ "CALL, runtime leaf vector $meth" %}

ins_encode(riscv_enc_java_to_runtime(meth));

ins_pipe(pipe_class_call);
%}

// Call Runtime Instruction

instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
Expand Down
15 changes: 14 additions & 1 deletion src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,20 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
uint num_bits,
uint total_args_passed) {
Unimplemented();
assert(total_args_passed <= Argument::n_vector_register_parameters_c, "unsupported");
assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");

// check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
static const VectorRegister VEC_ArgReg[Argument::n_vector_register_parameters_c] = {
v8, v9, v10, v11, v12, v13, v14, v15,
v16, v17, v18, v19, v20, v21, v22, v23
};

const int next_reg_val = 3;
for (uint i = 0; i < total_args_passed; i++) {
VMReg vmreg = VEC_ArgReg[i]->as_VMReg();
regs[i].set_pair(vmreg->next(next_reg_val), vmreg);
}
return 0;
}

Expand Down
54 changes: 54 additions & 0 deletions src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6071,6 +6071,58 @@ static const int64_t right_3_bits = right_n_bits(3);
return start;
}

void generate_vector_math_stubs() {
if (!UseRVV) {
log_info(library)("vector is not supported, skip loading vector math (sleef) library!");
return;
}

// Get native vector math stub routine addresses
void* libsleef = nullptr;
char ebuf[1024];
char dll_name[JVM_MAXPATHLEN];
if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
}
if (libsleef == nullptr) {
log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf);
return;
}

// Method naming convention
// All the methods are named as <OP><T>_<U><suffix>
//
// Where:
// <OP> is the operation name, e.g. sin, cos
// <T> is to indicate float/double
// "fx/dx" for vector float/double operation
// <U> is the precision level
// "u10/u05" represents 1.0/0.5 ULP error bounds
// We use "u10" for all operations by default
// But for those functions do not have u10 support, we use "u05" instead
// <suffix> rvv, indicates riscv vector extension
//
// e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions
//
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));

for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression
continue;
}

// The native library does not support u10 level of "hypot".
const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";

snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);

snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
}
}

#endif // COMPILER2

/**
Expand Down Expand Up @@ -6291,6 +6343,8 @@ static const int64_t right_3_bits = right_n_bits(3);

generate_string_indexof_stubs();

generate_vector_math_stubs();

#endif // COMPILER2
}

Expand Down
24 changes: 12 additions & 12 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4184,41 +4184,41 @@ void StubGenerator::generate_compiler_stubs() {

log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml));
if (UseAVX > 2) {
for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
int vop = VectorSupport::VECTOR_OP_SVML_START + op;
for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
if ((!VM_Version::supports_avx512dq()) &&
(vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
continue;
}
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);

snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
}
}
const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
int vop = VectorSupport::VECTOR_OP_SVML_START + op;
for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
if (vop == VectorSupport::VECTOR_OP_POW) {
continue;
}
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);

snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);

snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);

snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);

snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);

snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/callnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {

if (Opcode() == Op_CallLeafVector) {
// If the return is in vector, compute appropriate regmask taking into account the whole range
if(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ) {
if(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ) {
if(OptoReg::is_valid(regs.second())) {
for (OptoReg::Name r = regs.first(); r <= regs.second(); r = OptoReg::add(r, 1)) {
rm.Insert(r);
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ class LibraryCallKit : public GraphKit {
bool inline_index_vector();
bool inline_index_partially_in_upper_range();

Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
Node* gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);

enum VectorMaskUseType {
VecMaskUseLoad = 1 << 0,
Expand Down
45 changes: 29 additions & 16 deletions src/hotspot/share/opto/vectorIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,11 +468,11 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
Node* operation = nullptr;
if (opc == Op_CallLeafVector) {
assert(UseVectorStubs, "sanity");
operation = gen_call_to_svml(opr->get_con(), elem_bt, num_elem, opd1, opd2);
operation = gen_call_to_vector_math(opr->get_con(), elem_bt, num_elem, opd1, opd2);
if (operation == nullptr) {
log_if_needed(" ** svml call failed for %s_%s_%d",
(elem_bt == T_FLOAT)?"float":"double",
VectorSupport::svmlname[opr->get_con() - VectorSupport::VECTOR_OP_SVML_START],
log_if_needed(" ** Vector math call failed for %s_%s_%d",
(elem_bt == T_FLOAT) ? "float" : "double",
VectorSupport::mathname[opr->get_con() - VectorSupport::VECTOR_OP_MATH_START],
num_elem * type2aelembytes(elem_bt));
return false;
}
Expand Down Expand Up @@ -2071,34 +2071,47 @@ bool LibraryCallKit::inline_vector_rearrange() {
return true;
}

static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) {
static address get_vector_math_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) {
address addr = nullptr;
assert(UseVectorStubs, "sanity");
assert(name_ptr != nullptr, "unexpected");
assert((vop >= VectorSupport::VECTOR_OP_SVML_START) && (vop <= VectorSupport::VECTOR_OP_SVML_END), "unexpected");
int op = vop - VectorSupport::VECTOR_OP_SVML_START;
assert((vop >= VectorSupport::VECTOR_OP_MATH_START) && (vop <= VectorSupport::VECTOR_OP_MATH_END), "unexpected");
int op = vop - VectorSupport::VECTOR_OP_MATH_START;

switch(bits) {
case 64: //fallthough
case 128: //fallthough
case 256: //fallthough
case 512:
if (bt == T_FLOAT) {
snprintf(name_ptr, name_len, "vector_%s_float%d", VectorSupport::svmlname[op], bits);
snprintf(name_ptr, name_len, "vector_%s_float_%dbits_fixed", VectorSupport::mathname[op], bits);
addr = StubRoutines::_vector_f_math[exact_log2(bits/64)][op];
} else {
assert(bt == T_DOUBLE, "must be FP type only");
snprintf(name_ptr, name_len, "vector_%s_double%d", VectorSupport::svmlname[op], bits);
snprintf(name_ptr, name_len, "vector_%s_double_%dbits_fixed", VectorSupport::mathname[op], bits);
addr = StubRoutines::_vector_d_math[exact_log2(bits/64)][op];
}
break;
default:
snprintf(name_ptr, name_len, "invalid");
addr = nullptr;
Unimplemented();
if (!Matcher::supports_scalable_vector() || !Matcher::vector_size_supported(bt, bits/type2aelembytes(bt)) ) {
snprintf(name_ptr, name_len, "invalid");
addr = nullptr;
Unimplemented();
}
break;
}

if (addr == nullptr && Matcher::supports_scalable_vector()) {
if (bt == T_FLOAT) {
snprintf(name_ptr, name_len, "vector_%s_float_%dbits_scalable", VectorSupport::mathname[op], bits);
addr = StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op];
} else {
assert(bt == T_DOUBLE, "must be FP type only");
snprintf(name_ptr, name_len, "vector_%s_double_%dbits_scalable", VectorSupport::mathname[op], bits);
addr = StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op];
}
}

return addr;
}

Expand Down Expand Up @@ -2246,16 +2259,16 @@ bool LibraryCallKit::inline_vector_select_from() {
return true;
}

Node* LibraryCallKit::gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) {
Node* LibraryCallKit::gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) {
assert(UseVectorStubs, "sanity");
assert(vector_api_op_id >= VectorSupport::VECTOR_OP_SVML_START && vector_api_op_id <= VectorSupport::VECTOR_OP_SVML_END, "need valid op id");
assert(vector_api_op_id >= VectorSupport::VECTOR_OP_MATH_START && vector_api_op_id <= VectorSupport::VECTOR_OP_MATH_END, "need valid op id");
assert(opd1 != nullptr, "must not be null");
const TypeVect* vt = TypeVect::make(bt, num_elem);
const TypeFunc* call_type = OptoRuntime::Math_Vector_Vector_Type(opd2 != nullptr ? 2 : 1, vt, vt);
char name[100] = "";

// Get address for svml method.
address addr = get_svml_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100);
// Get address for vector math method.
address addr = get_vector_math_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100);

if (addr == nullptr) {
return nullptr;
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/share/prims/vectorSupport.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -43,7 +43,7 @@
#endif // COMPILER2

#ifdef COMPILER2
const char* VectorSupport::svmlname[VectorSupport::NUM_SVML_OP] = {
const char* VectorSupport::mathname[VectorSupport::NUM_VECTOR_OP_MATH] = {
"tan",
"tanh",
"sin",
Expand Down
13 changes: 7 additions & 6 deletions src/hotspot/share/prims/vectorSupport.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -121,25 +121,26 @@ class VectorSupport : AllStatic {
VECTOR_OP_EXPM1 = 117,
VECTOR_OP_HYPOT = 118,

VECTOR_OP_SVML_START = VECTOR_OP_TAN,
VECTOR_OP_SVML_END = VECTOR_OP_HYPOT,
NUM_SVML_OP = VECTOR_OP_SVML_END - VECTOR_OP_SVML_START + 1
VECTOR_OP_MATH_START = VECTOR_OP_TAN,
VECTOR_OP_MATH_END = VECTOR_OP_HYPOT,
NUM_VECTOR_OP_MATH = VECTOR_OP_MATH_END - VECTOR_OP_MATH_START + 1
};

enum {
VEC_SIZE_64 = 0,
VEC_SIZE_128 = 1,
VEC_SIZE_256 = 2,
VEC_SIZE_512 = 3,
NUM_VEC_SIZES = 4
VEC_SIZE_SCALABLE = 4,
NUM_VEC_SIZES = 5
};

enum {
MODE_BROADCAST = 0,
MODE_BITS_COERCED_LONG_TO_MASK = 1
};

static const char* svmlname[VectorSupport::NUM_SVML_OP];
static const char* mathname[VectorSupport::NUM_VECTOR_OP_MATH];

static int vop2ideal(jint vop, BasicType bt);

Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/share/runtime/stubRoutines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ address StubRoutines::_dtanh = nullptr;
address StubRoutines::_f2hf = nullptr;
address StubRoutines::_hf2f = nullptr;

address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};
address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};

address StubRoutines::_method_entry_barrier = nullptr;
address StubRoutines::_array_sort = nullptr;
Expand Down
Loading

0 comments on commit 580eb62

Please sign in to comment.