Skip to content

Commit

Permalink
[GPU] Fix fc_tiled kernel for eltwise fusion and allow fusion for dyn…
Browse files Browse the repository at this point in the history
…amic shape (openvinotoolkit#26502)

### Details:
 - Fixed bug in fc_tiled_opt kernel + eltwise fusion 
- Allow fc (fp precision) + eltwise fusion for dynamic case & unfuse if
unsupported kernel is selected

### Tickets:
 - CVS-151537
  • Loading branch information
yeonbok authored Sep 12, 2024
1 parent e8f1167 commit cbe2edf
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
return true;
} else {
auto in_dt = node.get_input_layout(0).data_type;
return data_type_traits::is_i8_u8(in_dt);
return node.is_dynamic() || data_type_traits::is_i8_u8(in_dt);
}
};

Expand Down
10 changes: 10 additions & 0 deletions src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2382,6 +2382,16 @@ bool primitive_inst::is_valid_fusion() const {
if (fused_eltwise_prims.empty())
return true;

if (_node->is_type<fully_connected>() && _node->get_preferred_impl_type() == impl_types::ocl) {
// TODO: Only fc_bf_tiled_kernel & ref kernel are verified for fused eltwise. To support more fc kernels for eltwise fusion
if (!_node->get_selected_impl())
return false;
if ((_node->get_selected_impl()->get_kernel_name().find("fully_connected_gpu_bf_tiled") == std::string::npos)
&& (_node->get_selected_impl()->get_kernel_name().find("fully_connected_gpu_bfyx_ref") == std::string::npos)) {
return false;
}
}

const auto& out_pshape = _impl_params->get_output_layout().get_partial_shape();
for (auto& fd : fused_eltwise_prims) {
auto outer_dep_idx = fd.outer_dep_start_idx;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -642,10 +642,10 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para

if (!params.fused_ops.empty()) {
std::vector<std::string> idx_order_scalar = { "(out_b + bi)", "(out_f + sglid)", "0", "0" };
std::vector<std::string> idx_order_vec = { "(out_b + bi)", "(out_f + fi + sglid)", "0", "0" };
std::vector<std::string> idx_order_vec = { "(out_b + bi)", "(out_f + sglid + fi * SIMD)", "0", "0" };
if (params.outputs[0].GetLayout() == DataLayout::bfyx) {
idx_order_scalar = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "sglid", "0" };
idx_order_vec = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "sglid", "0" };
idx_order_scalar = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "(out_f + sglid)", "0" };
idx_order_vec = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "(out_f + sglid + fi * SIMD)", "0" };
}

// Simplify fused ops configuration to prevent mixed layout exception in jitter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
#define CASE_FC_FP16_3D_1 { 2, 32, 3 }, { 2, 32, 16 }, { 16, 3, 1 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_FP16_3D_2 { 1, 1, 3 }, { 1, 1, 32 }, { 32, 3, 1 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx

#define DYN_CASE_FC_FP16_5 { 1, 128, 76 }, { 1, 128, 768 }, { 768, 76 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
#define DYN_CASE_FC_FP16_6 { 2, 1, 76 }, { 2, 1, 768 }, { 768, 76 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
#define DYN_CASE_FC_FP16_7 { 2, 128, 76 }, { 2, 128, 768 }, { 768, 76 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
#define DYN_CASE_FC_FP16_3D_1 { 2, 32, 3 }, { 2, 32, 16 }, { 16, 3 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
#define DYN_CASE_FC_FP16_3D_2 { 1, 1, 3 }, { 1, 1, 32 }, { 32, 3 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx



#define CASE_FC_FP16_INT4_COMP_1 { 1, 128 }, { 1, 128 }, { 128, 128 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx

/* ----------------------------------------------------------------------------------------------------- */
Expand Down Expand Up @@ -810,3 +818,44 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp32_activation_relu, ::testing::Values
fully_connected_test_params{ CASE_FC_FP32_1, 2, 3 }
}));
#endif

class fc_fp16_eltwise_add_ocl_dynamic : public FullyConnectedFusingTest {
public:
void run_test() {
auto p = GetParam();
auto test_input_layout = get_input_layout(p);
auto dynamic_input_layout = layout{ov::PartialShape::dynamic(test_input_layout.get_partial_shape().size()), test_input_layout.data_type, test_input_layout.format};
auto eltwise_data_shape = p.out_shape.size() == 3 ? ov::PartialShape{1, 1, p.out_shape[2]} : ov::PartialShape{1, p.out_shape[1]};
auto eltwise_data_layout = layout{eltwise_data_shape, p.default_type, p.default_format};
create_topologies(
input_layout("input", dynamic_input_layout),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("eltwise_data", get_mem(eltwise_data_layout, 1, 9)),
fully_connected("fc_prim", input_info("input"), "weights", "bias", get_output_dim_size(p)),
eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sum),
reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
);

tolerance = 1e-2f;
execute(p, true);
}
};

TEST_P(fc_fp16_eltwise_add_ocl_dynamic, basic) {
if (engine.get_device_info().supports_immad)
return;
run_test();
}

INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_add_ocl_dynamic, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
fully_connected_test_params{ CASE_FC_FP16_1, 2, 3 },
fully_connected_test_params{ CASE_FC_FP16_2, 2, 3 },
fully_connected_test_params{ CASE_FC_FP16_3, 2, 3 },
fully_connected_test_params{ CASE_FC_FP16_4, 2, 3 },
fully_connected_test_params{ DYN_CASE_FC_FP16_5, 2, 3 },
fully_connected_test_params{ DYN_CASE_FC_FP16_6, 2, 3 },
fully_connected_test_params{ DYN_CASE_FC_FP16_7, 2, 3 },
fully_connected_test_params{ DYN_CASE_FC_FP16_3D_1, 2, 3 },
fully_connected_test_params{ DYN_CASE_FC_FP16_3D_2, 2, 3 },
}));

0 comments on commit cbe2edf

Please sign in to comment.