diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 6b739e1e9f4b6b..8e7cdd0337874c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -496,7 +496,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { return true; } else { auto in_dt = node.get_input_layout(0).data_type; - return data_type_traits::is_i8_u8(in_dt); + return node.is_dynamic() || data_type_traits::is_i8_u8(in_dt); } }; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index ad1541177b7dd6..2ebe53620161f8 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2382,6 +2382,16 @@ bool primitive_inst::is_valid_fusion() const { if (fused_eltwise_prims.empty()) return true; + if (_node->is_type() && _node->get_preferred_impl_type() == impl_types::ocl) { + // TODO: Only fc_bf_tiled_kernel & ref kernel are verified for fused eltwise. To support more fc kernels for eltwise fusion + if (!_node->get_selected_impl()) + return false; + if ((_node->get_selected_impl()->get_kernel_name().find("fully_connected_gpu_bf_tiled") == std::string::npos) + && (_node->get_selected_impl()->get_kernel_name().find("fully_connected_gpu_bfyx_ref") == std::string::npos)) { + return false; + } + } + const auto& out_pshape = _impl_params->get_output_layout().get_partial_shape(); for (auto& fd : fused_eltwise_prims) { auto outer_dep_idx = fd.outer_dep_start_idx; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 7a3ea70f37d366..c64b7419725611 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -642,10 +642,10 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para if (!params.fused_ops.empty()) { std::vector idx_order_scalar = { "(out_b + bi)", "(out_f + sglid)", "0", "0" }; - std::vector idx_order_vec = { "(out_b + bi)", "(out_f + fi + sglid)", "0", "0" }; + std::vector idx_order_vec = { "(out_b + bi)", "(out_f + sglid + fi * SIMD)", "0", "0" }; if (params.outputs[0].GetLayout() == DataLayout::bfyx) { - idx_order_scalar = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "sglid", "0" }; - idx_order_vec = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "sglid", "0" }; + idx_order_scalar = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "(out_f + sglid)", "0" }; + idx_order_vec = { "(out_b + bi) / OUTPUT_FEATURE_NUM", "(out_b + bi) % OUTPUT_FEATURE_NUM", "(out_f + sglid + fi * SIMD)", "0" }; } // Simplify fused ops configuration to prevent mixed layout exception in jitter diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp index ee482ed5543d56..5e1b381328301b 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp @@ -188,6 +188,14 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest{ + fully_connected_test_params{ CASE_FC_FP16_1, 2, 3 }, + fully_connected_test_params{ CASE_FC_FP16_2, 2, 3 }, + fully_connected_test_params{ CASE_FC_FP16_3, 2, 3 }, + fully_connected_test_params{ CASE_FC_FP16_4, 2, 3 }, + fully_connected_test_params{ DYN_CASE_FC_FP16_5, 2, 3 }, + fully_connected_test_params{ DYN_CASE_FC_FP16_6, 2, 3 }, + fully_connected_test_params{ DYN_CASE_FC_FP16_7, 2, 3 }, + fully_connected_test_params{ DYN_CASE_FC_FP16_3D_1, 2, 3 }, + fully_connected_test_params{ DYN_CASE_FC_FP16_3D_2, 2, 3 }, +}));