diff --git a/paddle/phi/kernels/legacy/gpu/moe_combine_grad_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_combine_grad_kernel.cu index fed4a94b0531ce..229d3480c416fa 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_combine_grad_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_combine_grad_kernel.cu @@ -149,13 +149,11 @@ void MoeCombineGradKernel(const Context& dev_ctx, DenseTensor* grad_combine_weights_helper) { dev_ctx.template Alloc(grad_x); dev_ctx.template Alloc(grad_combine_weights_helper); - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(grad_x->dims())), 0, grad_x); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(grad_combine_weights_helper->dims())), - 0, - grad_combine_weights_helper); + Full(dev_ctx, grad_x->dims(), 0, grad_x); + Full(dev_ctx, + grad_combine_weights_helper->dims(), + 0, + grad_combine_weights_helper); auto x_shape = x.dims(); auto combine_weights_shape = combine_weights.dims(); moe_combine_bwd(dev_ctx, @@ -182,18 +180,13 @@ void MoeCombineAutoGradKernel(const Context& dev_ctx, dev_ctx.template Alloc(grad_combine_weights_helper); dev_ctx.template Alloc(grad_scatter_index); - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(grad_x->dims())), 0, grad_x); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(grad_combine_weights_helper->dims())), - 0, - grad_combine_weights_helper); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(grad_scatter_index->dims())), - 0, - grad_scatter_index); + Full(dev_ctx, grad_x->dims(), 0, grad_x); + Full(dev_ctx, + grad_combine_weights_helper->dims(), + 0, + grad_combine_weights_helper); + Full( + dev_ctx, grad_scatter_index->dims(), 0, grad_scatter_index); // TODO(nieyuntao): Temporarily use 'grad_combine_weight_intermediate' to // bypass the grad_combine_weights_helper's shape mismatch to kernel shape @@ -207,11 +200,10 @@ void MoeCombineAutoGradKernel(const Context& dev_ctx, x.dims()[1]})); grad_combine_weight_intermediate_meta.set_dtype(combine_weights.dtype()); dev_ctx.template Alloc(grad_combine_weight_intermediate); - phi::Full(dev_ctx, - phi::IntArray(common::vectorize( - grad_combine_weight_intermediate->dims())), - 0, - grad_combine_weight_intermediate); + Full(dev_ctx, + grad_combine_weight_intermediate->dims(), + 0, + grad_combine_weight_intermediate); auto x_shape = x.dims(); auto combine_weights_shape = combine_weights.dims(); diff --git a/paddle/phi/kernels/legacy/gpu/moe_combine_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_combine_kernel.cu index d35e61af2b025d..e831536310e932 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_combine_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_combine_kernel.cu @@ -109,8 +109,7 @@ void MoeCombineKernel(const Context& dev_ctx, DenseTensor* y) { dev_ctx.template Alloc(y); // T cannot support phi::dtype::float8 very // well, maybe replaced with x.dtype(); - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(y->dims())), 0, y); + Full(dev_ctx, y->dims(), 0, y); auto combine_weights_shape = combine_weights.dims(); auto x_shape = x.dims(); moe_combine_fwd(dev_ctx, diff --git a/paddle/phi/kernels/legacy/gpu/moe_combine_no_weight_grad_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_combine_no_weight_grad_kernel.cu index 32b60e0a007509..d00e5c3868ba89 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_combine_no_weight_grad_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_combine_no_weight_grad_kernel.cu @@ -105,8 +105,7 @@ void MoeCombineNoWeightGradKernel(const Context& dev_ctx, const int64_t k = scatter_index_shape[1]; dev_ctx.template Alloc(grad_x); - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(grad_x->dims())), 0, grad_x); + Full(dev_ctx, grad_x->dims(), 0, grad_x); moe_combine_no_weight_bwd(combine_weights.data(), scatter_index.data(), diff --git a/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_and_quant_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_and_quant_kernel.cu index bfa8289b434110..8f14edb32ba99e 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_and_quant_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_and_quant_kernel.cu @@ -367,8 +367,7 @@ void MoeDispatchAndQuantKernel(const Context &dev_ctx, sizeof(phi::float8_e4m3fn) * out_fp8->numel(), dev_ctx.stream()); - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(scale->dims())), 1, scale); + Full(dev_ctx, scale->dims(), 1, scale); const auto &x_shape = x.dims(); const auto &gate_logits_shape = gate_logits.dims(); diff --git a/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_kernel.cu index 48c82689ba6ee7..011164d087e1f6 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_kernel.cu @@ -129,8 +129,7 @@ void MoeGateDispatchKernel(const Context &dev_ctx, dev_ctx.template Alloc(combine_weights); dev_ctx.template Alloc(y); - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(y->dims())), 0, y); + Full(dev_ctx, y->dims(), 0, y); auto x_dims = x.dims(); auto gate_logits_dims = gate_logits.dims(); diff --git a/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_permute_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_permute_kernel.cu index 0d553be787b242..b28044e805da05 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_permute_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_permute_kernel.cu @@ -134,8 +134,7 @@ void MoEDispatchPermuteKernel(const Context &dev_ctx, dev_ctx.template Alloc(scatter_index); dev_ctx.template Alloc(combine_weights); dev_ctx.template Alloc(y); - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(y->dims())), 0, y); + Full(dev_ctx, y->dims(), 0, y); const auto &x_shape = x.dims(); const auto &gate_logits_shape = gate_logits.dims(); int64_t num_rows = x_shape[0]; diff --git a/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_grad_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_grad_kernel.cu index 65e19913b05cef..dba5d55dcf5bf3 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_grad_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_grad_kernel.cu @@ -114,11 +114,8 @@ void MoeGateDispatchPartialNoSoftMaxTopkGradKernel( DenseTensor* combine_weights_grad) { dev_ctx.template Alloc(x_grad); dev_ctx.template Alloc(combine_weights_grad); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(combine_weights_grad->dims())), - 0, - combine_weights_grad); + Full( + dev_ctx, combine_weights_grad->dims(), 0, combine_weights_grad); DenseTensor t_scatter_index; phi::Transpose( dev_ctx, scatter_index, {1, 0}, &t_scatter_index); diff --git a/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_kernel.cu b/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_kernel.cu index c4b729656ab1c4..e5457ea07aa79e 100644 --- a/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_kernel.cu @@ -439,8 +439,7 @@ void apply_moe_dispatch_fwd( y->Resize({expert_offset_host.back(), x.dims()[1]}); dev_ctx.template Alloc(y); } - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(y->dims())), 0, y); + Full(dev_ctx, y->dims(), 0, y); copy_unpermuted_to_permuted_kernelLauncher( x.data(), y->data(), // out @@ -526,31 +525,14 @@ void MoeGateDispatchPartialNoSoftMaxTopkKernel( dev_ctx.template Alloc(expert_offset); dev_ctx.template Alloc(expert_nums_local); dev_ctx.template Alloc(combine_weights_out); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(scatter_index->dims())), - 0, - scatter_index); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(scatter_index_rev->dims())), - 0, - scatter_index_rev); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(expert_offset->dims())), - 0, - expert_offset); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(expert_nums_local->dims())), - 0, - expert_nums_local); - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(combine_weights_out->dims())), - 0, - combine_weights_out); + Full(dev_ctx, scatter_index->dims(), 0, scatter_index); + Full( + dev_ctx, scatter_index_rev->dims(), 0, scatter_index_rev); + Full(dev_ctx, expert_offset->dims(), 0, expert_offset); + Full( + dev_ctx, expert_nums_local->dims(), 0, expert_nums_local); + Full( + dev_ctx, combine_weights_out->dims(), 0, combine_weights_out); phi::Copy( dev_ctx, combine_weights, dev_ctx.GetPlace(), false, combine_weights_out); const auto &x_shape = x.dims(); diff --git a/paddle/phi/kernels/stride/indexing_kernel.cu b/paddle/phi/kernels/stride/indexing_kernel.cu index 6ae80653531d99..0be66896c02151 100644 --- a/paddle/phi/kernels/stride/indexing_kernel.cu +++ b/paddle/phi/kernels/stride/indexing_kernel.cu @@ -332,11 +332,7 @@ void IndexPutGradKernel_V2(const Context& dev_ctx, dev_ctx.template Alloc(x_grad); // Fill value_grad with 0. if (value_grad) { - phi::Full( - dev_ctx, - phi::IntArray(common::vectorize(value_grad->dims())), - 0, - value_grad); + phi::Full(dev_ctx, value_grad->dims(), 0, value_grad); } return; } @@ -390,10 +386,7 @@ void IndexPutGradKernel_V2(const Context& dev_ctx, x_grad->ShareInplaceVersionCounterWith(out_grad); } else { DenseTensor value_zero; - phi::Full(dev_ctx, - phi::IntArray(common::vectorize(value.dims())), - 0, - &value_zero); + phi::Full(dev_ctx, value.dims(), 0, &value_zero); if (funcs::IsInUint32Range(x_grad->numel(), value.numel())) { LaunchIndexPutKernel_V2( dev_ctx, out_grad, indices, value_zero, false, x_grad); diff --git a/paddle/phi/kernels/stride/reduce_stride_kernel.cu b/paddle/phi/kernels/stride/reduce_stride_kernel.cu index 39536232d02741..b9f069986626dd 100644 --- a/paddle/phi/kernels/stride/reduce_stride_kernel.cu +++ b/paddle/phi/kernels/stride/reduce_stride_kernel.cu @@ -320,8 +320,7 @@ void ProdStrideKernel(const Context& dev_ctx, if (x_.numel() == 0) { // fill with 1. - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(out->dims())), 1, out); + phi::Full(dev_ctx, out->dims(), 1, out); return; } @@ -647,8 +646,7 @@ void MeanStrideKernel(const Context& dev_ctx, } if (x_.numel() == 0) { - phi::Full( - dev_ctx, phi::IntArray(common::vectorize(out->dims())), NAN, out); + phi::Full(dev_ctx, out->dims(), NAN, out); return; }