From 121e792747d0fc0423d75ac38b54a1d8b96436be Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 16 Oct 2024 07:17:31 -0700 Subject: [PATCH 01/11] fixes for dbg build --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 4d66bd06ad4..9f62fc7bebe 100644 --- a/makefile +++ b/makefile @@ -697,7 +697,7 @@ $(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-E $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y))) $(ONEAPI.objs_y.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt -$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DMKL_ILP64) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ +$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-EHsc) $(pedantic.opts.dpcpp) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -DONEDAL_DATA_PARALLEL \ From 0ac3ab916a1302f4641d9ba6d45b79f777362b61 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 16 Oct 2024 07:53:39 -0700 Subject: [PATCH 02/11] minor fix --- makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/makefile b/makefile index 9f62fc7bebe..cac5058d414 100644 --- a/makefile +++ b/makefile @@ -660,7 +660,7 @@ $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt: | $(ONEAPI.tmpdir_y.dpc)/. # Set compilation options to the object files which are part of STATIC lib $(ONEAPI.objs_a): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_a)/inc_a_folders.txt -$(ONEAPI.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-EHsc) $(pedantic.opts) \ +$(ONEAPI.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-EHsc) $(pedantic.opts) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -D_ENABLE_ATOMIC_ALIGNMENT_FIX \ @@ -671,7 +671,7 @@ $(ONEAPI.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-EHsc) $(pedantic $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_a))) $(ONEAPI.objs_a.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_a.dpc)/inc_a_folders.txt -$(ONEAPI.objs_a.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ +$(ONEAPI.objs_a.dpc): COPT += $(-fPIC) $(-cxx17) $(-DMKL_ILP64) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -DONEDAL_DATA_PARALLEL \ @@ -697,7 +697,7 @@ $(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-E $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y))) $(ONEAPI.objs_y.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt -$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-EHsc) $(pedantic.opts.dpcpp) \ +$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DMKL_ILP64) $(-EHsc) $(pedantic.opts.dpcpp) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -DONEDAL_DATA_PARALLEL \ From 0cdd40002fbca6a66f65dbb1d5f574912f8b10e0 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 28 Oct 2024 08:44:19 -0700 Subject: [PATCH 03/11] fixes for debug --- dev/make/compiler_definitions/dpcpp.mk | 23 ++++++++++++++------ dev/make/compiler_definitions/icc.mkl.32e.mk | 4 ++-- dev/make/compiler_definitions/icx.mkl.32e.mk | 4 ++-- makefile | 3 +-- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/dev/make/compiler_definitions/dpcpp.mk b/dev/make/compiler_definitions/dpcpp.mk index 78ec5da36fe..d09c9bafe71 100644 --- a/dev/make/compiler_definitions/dpcpp.mk +++ b/dev/make/compiler_definitions/dpcpp.mk @@ -25,22 +25,31 @@ CMPLRDIRSUFF.dpcpp = _dpcpp CORE.SERV.COMPILER.dpcpp = generic --Zl.dpcpp = --DEBC.dpcpp = -g +-Zl.dpcpp = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib +-DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7 -fno-system-debug -O0 -fasm-blocks,-g -fno-system-debug -O0 -fasm-blocks) -COMPILER.lnx.dpcpp = icpx -fsycl -m64 -stdlib=libstdc++ -fgnu-runtime -fwrapv \ +COMPILER.lnx.dpcpp = icpx -fsycl -m64 -fno-system-debug -stdlib=libstdc++ -fgnu-runtime -fwrapv \ -Werror -Wreturn-type -fsycl-device-code-split=per_kernel -COMPILER.win.dpcpp = icx -fsycl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -nologo -WX \ +COMPILER.win.dpcpp = icx -fsycl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -fno-system-debug -nologo -WX \ -Wno-deprecated-declarations -fsycl-device-code-split=per_kernel -link.dynamic.lnx.dpcpp = icpx -fsycl -m64 -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) -link.dynamic.win.dpcpp = icx -fsycl -m64 -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) +link.dynamic.lnx.dpcpp = icpx -fsycl -m64 -fno-system-debug -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) +link.dynamic.win.dpcpp = icx -fsycl -m64 -fno-system-debug -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) -pedantic.opts.lnx.dpcpp = -pedantic \ +pedantic.opts.lnx = -pedantic \ -Wall \ -Wextra \ + -Wwritable-strings \ -Wno-unused-parameter +pedantic.opts.dpcpp_win = -Wall \ + -Wextra \ + -Wwritable-strings \ + -Wno-unused-parameter + +pedantic.opts.lnx.dpcpp = $(pedantic.opts.icx) +pedantic.opts.win.dpcpp = $(pedantic.opts.dpcpp_win) + p4_OPT.dpcpp = -march=nocona mc3_OPT.dpcpp = -march=nehalem avx2_OPT.dpcpp = -march=haswell diff --git a/dev/make/compiler_definitions/icc.mkl.32e.mk b/dev/make/compiler_definitions/icc.mkl.32e.mk index a6ff2410ecc..576a77dc45b 100644 --- a/dev/make/compiler_definitions/icc.mkl.32e.mk +++ b/dev/make/compiler_definitions/icc.mkl.32e.mk @@ -29,9 +29,9 @@ CORE.SERV.COMPILER.icc = generic -Qopt = $(if $(OS_is_win),-Qopt-,-qopt-) COMPILER.lnx.icc = $(if $(COVFILE),cov01 -1; covc --no-banner -i )icc -qopenmp-simd \ - -Werror -Wreturn-type -diag-disable=10441 + -Werror -fno-system-debug -Wreturn-type -diag-disable=10441 COMPILER.lnx.icc += $(if $(COVFILE), $(-Q)m64) -COMPILER.win.icc = icl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -nologo -WX -Qopenmp-simd -Qdiag-disable:10441 +COMPILER.win.icc = icl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -nologo -fno-system-debug -WX -Qopenmp-simd -Qdiag-disable:10441 COMPILER.mac.icc = icc -stdlib=libc++ -mmacosx-version-min=10.15 \ -Werror -Wreturn-type -diag-disable=10441 diff --git a/dev/make/compiler_definitions/icx.mkl.32e.mk b/dev/make/compiler_definitions/icx.mkl.32e.mk index e61e6bc39bc..4143cd89467 100644 --- a/dev/make/compiler_definitions/icx.mkl.32e.mk +++ b/dev/make/compiler_definitions/icx.mkl.32e.mk @@ -26,7 +26,7 @@ CORE.SERV.COMPILER.icx = generic -Zl.icx = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.icx = $(if $(OS_is_win),-debug:all -Z7,-g) +-DEBC.icx = $(if $(OS_is_win),-debug:all -Z7 -fno-system-debug -O0 -fasm-blocks,-g -fno-system-debug -O0 -fasm-blocks) -Qopt = $(if $(OS_is_win),-Qopt-,-qopt-) @@ -35,7 +35,7 @@ COMPILER.lnx.icx = icx -m64 \ COMPILER.win.icx = icx $(if $(MSVC_RT_is_release),-MD -Qopenmp-simd, -MDd) -nologo -WX -Wno-deprecated-declarations -link.dynamic.lnx.icx = icx -m64 -no-intel-lib +link.dynamic.lnx.icx = icx -m64 -fno-system-debug -no-intel-lib pedantic.opts.icx = -pedantic \ -Wall \ diff --git a/makefile b/makefile index cac5058d414..747a29c1bbb 100644 --- a/makefile +++ b/makefile @@ -130,7 +130,6 @@ y := $(notdir $(filter $(_OS)/%,lnx/so win/dll mac/dylib)) -DMKL_ILP64 := $(if $(filter mkl,$(BACKEND_CONFIG)),-DMKL_ILP64) -Zl := $(-Zl.$(COMPILER)) -DEBC := $(if $(REQDBG),$(-DEBC.$(COMPILER)) -DDEBUG_ASSERT -DONEDAL_ENABLE_ASSERT) -DTBB_SUPPRESS_DEPRECATED_MESSAGES -D__TBB_LEGACY_MODE --DEBJ := $(if $(REQDBG),-g,-g:none) -DEBL := $(if $(REQDBG),$(if $(OS_is_win),-debug,)) -EHsc := $(if $(OS_is_win),-EHsc,) -isystem := $(if $(OS_is_win),-I,-isystem) @@ -697,7 +696,7 @@ $(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-E $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y))) $(ONEAPI.objs_y.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt -$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DMKL_ILP64) $(-EHsc) $(pedantic.opts.dpcpp) \ +$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBС) $(-EHsc) $(pedantic.opts.dpcpp) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -DONEDAL_DATA_PARALLEL \ From ca891d92ba79f8095fdc8ced4e50e4e9b41026fa Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 28 Oct 2024 09:39:20 -0700 Subject: [PATCH 04/11] fixes for icc and icx --- dev/make/compiler_definitions/dpcpp.mk | 2 +- dev/make/compiler_definitions/icc.mkl.32e.mk | 4 ++-- dev/make/compiler_definitions/icx.mkl.32e.mk | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/make/compiler_definitions/dpcpp.mk b/dev/make/compiler_definitions/dpcpp.mk index d09c9bafe71..afa96c6018a 100644 --- a/dev/make/compiler_definitions/dpcpp.mk +++ b/dev/make/compiler_definitions/dpcpp.mk @@ -26,7 +26,7 @@ CMPLRDIRSUFF.dpcpp = _dpcpp CORE.SERV.COMPILER.dpcpp = generic -Zl.dpcpp = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7 -fno-system-debug -O0 -fasm-blocks,-g -fno-system-debug -O0 -fasm-blocks) +-DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7,-g -fno-system-debug -O0 -fasm-blocks) -fno-system-debug -O0 -fasm-blocks COMPILER.lnx.dpcpp = icpx -fsycl -m64 -fno-system-debug -stdlib=libstdc++ -fgnu-runtime -fwrapv \ -Werror -Wreturn-type -fsycl-device-code-split=per_kernel diff --git a/dev/make/compiler_definitions/icc.mkl.32e.mk b/dev/make/compiler_definitions/icc.mkl.32e.mk index 576a77dc45b..a6ff2410ecc 100644 --- a/dev/make/compiler_definitions/icc.mkl.32e.mk +++ b/dev/make/compiler_definitions/icc.mkl.32e.mk @@ -29,9 +29,9 @@ CORE.SERV.COMPILER.icc = generic -Qopt = $(if $(OS_is_win),-Qopt-,-qopt-) COMPILER.lnx.icc = $(if $(COVFILE),cov01 -1; covc --no-banner -i )icc -qopenmp-simd \ - -Werror -fno-system-debug -Wreturn-type -diag-disable=10441 + -Werror -Wreturn-type -diag-disable=10441 COMPILER.lnx.icc += $(if $(COVFILE), $(-Q)m64) -COMPILER.win.icc = icl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -nologo -fno-system-debug -WX -Qopenmp-simd -Qdiag-disable:10441 +COMPILER.win.icc = icl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -nologo -WX -Qopenmp-simd -Qdiag-disable:10441 COMPILER.mac.icc = icc -stdlib=libc++ -mmacosx-version-min=10.15 \ -Werror -Wreturn-type -diag-disable=10441 diff --git a/dev/make/compiler_definitions/icx.mkl.32e.mk b/dev/make/compiler_definitions/icx.mkl.32e.mk index 4143cd89467..3e33515d920 100644 --- a/dev/make/compiler_definitions/icx.mkl.32e.mk +++ b/dev/make/compiler_definitions/icx.mkl.32e.mk @@ -26,7 +26,7 @@ CORE.SERV.COMPILER.icx = generic -Zl.icx = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.icx = $(if $(OS_is_win),-debug:all -Z7 -fno-system-debug -O0 -fasm-blocks,-g -fno-system-debug -O0 -fasm-blocks) +-DEBC.icx = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -O0 -fasm-blocks -Qopt = $(if $(OS_is_win),-Qopt-,-qopt-) @@ -35,7 +35,7 @@ COMPILER.lnx.icx = icx -m64 \ COMPILER.win.icx = icx $(if $(MSVC_RT_is_release),-MD -Qopenmp-simd, -MDd) -nologo -WX -Wno-deprecated-declarations -link.dynamic.lnx.icx = icx -m64 -fno-system-debug -no-intel-lib +link.dynamic.lnx.icx = icx -m64 -no-intel-lib pedantic.opts.icx = -pedantic \ -Wall \ From eb8e2a6a57e5b22bb05621ae7920db10451f5e3c Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 28 Oct 2024 09:54:44 -0700 Subject: [PATCH 05/11] more fixes --- dev/make/compiler_definitions/dpcpp.mk | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/make/compiler_definitions/dpcpp.mk b/dev/make/compiler_definitions/dpcpp.mk index afa96c6018a..88de116673b 100644 --- a/dev/make/compiler_definitions/dpcpp.mk +++ b/dev/make/compiler_definitions/dpcpp.mk @@ -26,17 +26,17 @@ CMPLRDIRSUFF.dpcpp = _dpcpp CORE.SERV.COMPILER.dpcpp = generic -Zl.dpcpp = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7,-g -fno-system-debug -O0 -fasm-blocks) -fno-system-debug -O0 -fasm-blocks +-DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -O0 -fasm-blocks -COMPILER.lnx.dpcpp = icpx -fsycl -m64 -fno-system-debug -stdlib=libstdc++ -fgnu-runtime -fwrapv \ +COMPILER.lnx.dpcpp = icpx -fsycl -m64 -stdlib=libstdc++ -fgnu-runtime -fwrapv \ -Werror -Wreturn-type -fsycl-device-code-split=per_kernel -COMPILER.win.dpcpp = icx -fsycl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -fno-system-debug -nologo -WX \ +COMPILER.win.dpcpp = icx -fsycl $(if $(MSVC_RT_is_release),-MD, -MDd /debug:none) -nologo -WX \ -Wno-deprecated-declarations -fsycl-device-code-split=per_kernel -link.dynamic.lnx.dpcpp = icpx -fsycl -m64 -fno-system-debug -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) -link.dynamic.win.dpcpp = icx -fsycl -m64 -fno-system-debug -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) +link.dynamic.lnx.dpcpp = icpx -fsycl -m64 -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) +link.dynamic.win.dpcpp = icx -fsycl -m64 -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=$(SYCL_LINK_PRL) -pedantic.opts.lnx = -pedantic \ +pedantic.opts.dpcpp_lnx = -pedantic \ -Wall \ -Wextra \ -Wwritable-strings \ @@ -47,7 +47,7 @@ pedantic.opts.dpcpp_win = -Wall \ -Wwritable-strings \ -Wno-unused-parameter -pedantic.opts.lnx.dpcpp = $(pedantic.opts.icx) +pedantic.opts.lnx.dpcpp = $(pedantic.opts.dpcpp_lnx) pedantic.opts.win.dpcpp = $(pedantic.opts.dpcpp_win) p4_OPT.dpcpp = -march=nocona From 393016047423c3484eb915d7d6d7a1e13dd0d673 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 28 Oct 2024 10:20:21 -0700 Subject: [PATCH 06/11] fixes for O flag --- dev/make/compiler_definitions/dpcpp.mk | 2 +- dev/make/compiler_definitions/icx.mkl.32e.mk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/make/compiler_definitions/dpcpp.mk b/dev/make/compiler_definitions/dpcpp.mk index 88de116673b..c95ad74bdc3 100644 --- a/dev/make/compiler_definitions/dpcpp.mk +++ b/dev/make/compiler_definitions/dpcpp.mk @@ -26,7 +26,7 @@ CMPLRDIRSUFF.dpcpp = _dpcpp CORE.SERV.COMPILER.dpcpp = generic -Zl.dpcpp = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -O0 -fasm-blocks +-DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -fasm-blocks COMPILER.lnx.dpcpp = icpx -fsycl -m64 -stdlib=libstdc++ -fgnu-runtime -fwrapv \ -Werror -Wreturn-type -fsycl-device-code-split=per_kernel diff --git a/dev/make/compiler_definitions/icx.mkl.32e.mk b/dev/make/compiler_definitions/icx.mkl.32e.mk index 3e33515d920..2f55f0d0a39 100644 --- a/dev/make/compiler_definitions/icx.mkl.32e.mk +++ b/dev/make/compiler_definitions/icx.mkl.32e.mk @@ -26,7 +26,7 @@ CORE.SERV.COMPILER.icx = generic -Zl.icx = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.icx = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -O0 -fasm-blocks +-DEBC.icx = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -fasm-blocks -Qopt = $(if $(OS_is_win),-Qopt-,-qopt-) From 4da42f77b2dde1ab611a3372a7ee023e8feb9c86 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 28 Oct 2024 10:39:33 -0700 Subject: [PATCH 07/11] remove unnecessary build --- dev/make/compiler_definitions/dpcpp.mk | 8 ++++---- dev/make/compiler_definitions/icx.mkl.32e.mk | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dev/make/compiler_definitions/dpcpp.mk b/dev/make/compiler_definitions/dpcpp.mk index c95ad74bdc3..46b9767fec6 100644 --- a/dev/make/compiler_definitions/dpcpp.mk +++ b/dev/make/compiler_definitions/dpcpp.mk @@ -26,7 +26,7 @@ CMPLRDIRSUFF.dpcpp = _dpcpp CORE.SERV.COMPILER.dpcpp = generic -Zl.dpcpp = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -fasm-blocks +-DEBC.dpcpp = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug COMPILER.lnx.dpcpp = icpx -fsycl -m64 -stdlib=libstdc++ -fgnu-runtime -fwrapv \ -Werror -Wreturn-type -fsycl-device-code-split=per_kernel @@ -43,9 +43,9 @@ pedantic.opts.dpcpp_lnx = -pedantic \ -Wno-unused-parameter pedantic.opts.dpcpp_win = -Wall \ - -Wextra \ - -Wwritable-strings \ - -Wno-unused-parameter + -Wextra \ + -Wwritable-strings \ + -Wno-unused-parameter pedantic.opts.lnx.dpcpp = $(pedantic.opts.dpcpp_lnx) pedantic.opts.win.dpcpp = $(pedantic.opts.dpcpp_win) diff --git a/dev/make/compiler_definitions/icx.mkl.32e.mk b/dev/make/compiler_definitions/icx.mkl.32e.mk index 2f55f0d0a39..ee0a0350f3a 100644 --- a/dev/make/compiler_definitions/icx.mkl.32e.mk +++ b/dev/make/compiler_definitions/icx.mkl.32e.mk @@ -26,7 +26,7 @@ CORE.SERV.COMPILER.icx = generic -Zl.icx = $(if $(OS_is_win),-Zl,) $(-Q)no-intel-lib --DEBC.icx = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -fasm-blocks +-DEBC.icx = $(if $(OS_is_win),-debug:all -Z7,-g) -fno-system-debug -Qopt = $(if $(OS_is_win),-Qopt-,-qopt-) @@ -37,18 +37,18 @@ COMPILER.win.icx = icx $(if $(MSVC_RT_is_release),-MD -Qopenmp-simd, -MDd) -nolo link.dynamic.lnx.icx = icx -m64 -no-intel-lib -pedantic.opts.icx = -pedantic \ - -Wall \ - -Wextra \ - -Wwritable-strings \ - -Wno-unused-parameter +pedantic.opts.icx_lnx = -pedantic \ + -Wall \ + -Wextra \ + -Wwritable-strings \ + -Wno-unused-parameter pedantic.opts.icx_win = -Wall \ - -Wextra \ - -Wwritable-strings \ - -Wno-unused-parameter + -Wextra \ + -Wwritable-strings \ + -Wno-unused-parameter -pedantic.opts.lnx.icx = $(pedantic.opts.icx) +pedantic.opts.lnx.icx = $(pedantic.opts.icx_lnx) pedantic.opts.win.icx = $(pedantic.opts.icx_win) p4_OPT.icx = -march=nocona From cc18e865abb38813acd69c4f4d20a32c5384aaf7 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 29 Oct 2024 04:02:20 -0700 Subject: [PATCH 08/11] fixes for devc --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 747a29c1bbb..f17d22e4404 100644 --- a/makefile +++ b/makefile @@ -696,7 +696,7 @@ $(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-E $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y))) $(ONEAPI.objs_y.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt -$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBС) $(-EHsc) $(pedantic.opts.dpcpp) \ +$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -DONEDAL_DATA_PARALLEL \ From 607ff204e241767e369f8f4a8fd16db0fa38d6fa Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 29 Oct 2024 09:42:55 -0700 Subject: [PATCH 09/11] fixes for table2ndarray --- .../gpu/compute_kernel_dense_impl_dpc.cpp | 14 ++++ cpp/oneapi/dal/backend/memory.hpp | 23 +++--- cpp/oneapi/dal/backend/primitives/utils.hpp | 2 + cpp/oneapi/dal/backend/transfer_dpc.cpp | 6 +- cpp/oneapi/dal/detail/communicator.cpp | 7 +- cpp/oneapi/dal/detail/memory.hpp | 7 +- cpp/oneapi/dal/detail/memory_impl_dpc.cpp | 3 +- cpp/oneapi/dal/detail/profiler.cpp | 82 ++++++++++++++++++- cpp/oneapi/dal/detail/profiler.hpp | 38 ++++++++- .../io/csv/backend/gpu/read_kernel_dpc.cpp | 3 +- .../backend/homogen_table_builder_impl.hpp | 3 +- cpp/oneapi/dal/test/ccl_communicator.cpp | 3 +- cpp/oneapi/dal/test/communicator.cpp | 3 +- cpp/oneapi/dal/test/mpi_communicator.cpp | 3 +- .../dpc/source/covariance/cor_dense_batch.cpp | 6 +- 15 files changed, 170 insertions(+), 33 deletions(-) diff --git a/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp b/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp index 7fdea5cc019..214812e4c56 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp @@ -51,7 +51,21 @@ result_t compute_kernel_dense_impl::operator()(const descriptor_t& desc, ONEDAL_ASSERT(input.get_data().has_data()); const auto data = input.get_data(); + // //const auto data = input.get_data(); + // const std::int64_t row_count = 500000; + // ONEDAL_ASSERT(row_count > 0); + // auto rows_count_global = row_count; + // const std::int64_t column_count = 960; + // ONEDAL_ASSERT(column_count > 0); + + // auto bias = desc.get_bias(); + // auto assume_centered = desc.get_assume_centered(); + + // auto result = compute_result{}.set_result_options(desc.get_result_options()); + // array arr_responses = array::empty(500000 * 960); + // auto table_input = dal::homogen_table::wrap(arr_responses, 500000, 960); + // const auto data_nd = pr::table2ndarray(q_, table_input, alloc::device); const std::int64_t row_count = data.get_row_count(); ONEDAL_ASSERT(row_count > 0); auto rows_count_global = row_count; diff --git a/cpp/oneapi/dal/backend/memory.hpp b/cpp/oneapi/dal/backend/memory.hpp index 4579af7fc9e..a4ad2dc24cf 100644 --- a/cpp/oneapi/dal/backend/memory.hpp +++ b/cpp/oneapi/dal/backend/memory.hpp @@ -17,6 +17,8 @@ #pragma once #include "oneapi/dal/backend/common.hpp" +#include "oneapi/dal/detail/profiler.hpp" +#include namespace oneapi::dal::backend { @@ -220,12 +222,9 @@ inline sycl::event memcpy_host2usm(sycl::queue& queue, std::size_t size, const event_vector& deps = {}) { ONEDAL_ASSERT(is_known_usm(queue, dest_usm)); - - // TODO: Remove additional copy to host usm memory once - // bug in `copy` with the host memory is fixed - auto tmp_usm_host = make_unique_usm_host(queue, size); - memcpy(tmp_usm_host.get(), src_host, size); - memcpy(queue, dest_usm, tmp_usm_host.get(), size, deps).wait_and_throw(); + ONEDAL_PROFILER_TASK(memcpy_host2usm, queue); + std::cout<<"memcpy_host2usm"< 0); const std::size_t n = detail::integral_cast(count); ONEDAL_ASSERT_MUL_OVERFLOW(std::size_t, sizeof(T), n); + std::cout<<"failed 3"< inline ndarray table2ndarray(sycl::queue& q, const table& table, sycl::usm::alloc alloc = sycl::usm::alloc::shared) { + ONEDAL_PROFILER_TASK(table2ndarray, q); [[maybe_unused]] const auto layout = table.get_data_layout(); if constexpr (order == ndorder::c) { ONEDAL_ASSERT(layout == decltype(layout)::row_major); diff --git a/cpp/oneapi/dal/backend/transfer_dpc.cpp b/cpp/oneapi/dal/backend/transfer_dpc.cpp index 6f772e96c56..76495197cef 100644 --- a/cpp/oneapi/dal/backend/transfer_dpc.cpp +++ b/cpp/oneapi/dal/backend/transfer_dpc.cpp @@ -16,7 +16,7 @@ #include "oneapi/dal/backend/transfer.hpp" #include - +#include namespace oneapi::dal::backend { namespace bk = dal::backend; template @@ -97,7 +97,7 @@ sycl::event scatter_host2device(sycl::queue& q, const auto gathered_device_unique = make_unique_usm_device(q, block_count * block_size_in_bytes); - + std::cout<<"failed 4"< namespace spmd = oneapi::dal::preview::spmd; namespace oneapi::dal::detail::v1 { @@ -69,6 +69,7 @@ spmd::request_iface* spmd_communicator_via_host_impl::bcast(sycl::queue& q, wait_request(bcast(send_buff_host.get_mutable_data(), count, dtype, root)); if (get_rank() != root) { + std::cout<<"failed 6"< 0) { + std::cout<<"failed 10"< namespace oneapi::dal::detail { namespace v1 { @@ -67,8 +67,11 @@ inline void memcpy(const DstPolicy& dst_policy, memcpy(src_policy, dst, src, size); else if constexpr (!is_dst_usm && is_src_usm) memcpy_usm2host(src_policy, dst, src, size); - else if constexpr (is_dst_usm && !is_src_usm) + else if constexpr (is_dst_usm && !is_src_usm){ + std::cout<<"failed 30"< namespace oneapi::dal::detail::v1 { void* malloc(const data_parallel_policy& policy, std::size_t size, const sycl::usm::alloc& alloc) { @@ -56,6 +56,7 @@ void memcpy_host2usm(const data_parallel_policy& policy, const void* src_host, std::int64_t size) { auto& queue = policy.get_queue(); + std::cout<<"failed 20"< namespace oneapi::dal::detail { + +profiler::profiler() { + start_time = get_time(); +} + +profiler::~profiler() { + auto end_time = get_time(); + auto total_time = end_time - start_time; + std::cerr << "KERNEL_PROFILER: total time " << total_time / 1e6 << std::endl; +} + +std::uint64_t profiler::get_time() { + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + return t.tv_sec * 1000000000 + t.tv_nsec; +} + +profiler* profiler::get_instance() { + static profiler instance; + return &instance; +} + +task& profiler::get_task() { + return task_; +} + +#ifdef ONEDAL_DATA_PARALLEL +sycl::queue& profiler::get_queue() { + return queue_; +} + +void profiler::set_queue(const sycl::queue& q) { + queue_ = q; +} +#endif + profiler_task profiler::start_task(const char* task_name) { + auto ns_start = get_time(); + auto& tasks_info = get_instance()->get_task(); + tasks_info.time_kernels[tasks_info.current_kernel] = ns_start; + tasks_info.current_kernel++; return profiler_task(task_name); } -void profiler::end_task(const char* task_name) {} +void profiler::end_task(const char* task_name) { + const std::uint64_t ns_end = get_time(); + auto& tasks_info = get_instance()->get_task(); +#ifdef ONEDAL_DATA_PARALLEL + auto& queue = get_instance()->get_queue(); + queue.wait_and_throw(); +#endif + tasks_info.current_kernel--; + const std::uint64_t times = ns_end - tasks_info.time_kernels[tasks_info.current_kernel]; -profiler_task::profiler_task(const char* task_name) : task_name_(task_name) {} + auto it = tasks_info.kernels.find(task_name); + if (it == tasks_info.kernels.end()) { + tasks_info.kernels.insert({ task_name, times }); + } + else { + it->second += times; + } + std::cerr << "KERNEL_PROFILER: " << std::string(task_name) << " " << times / 1e6 << std::endl; +} #ifdef ONEDAL_DATA_PARALLEL -profiler_task profiler::start_task(const char* task_name, const sycl::queue& task_queue) { +profiler_task profiler::start_task(const char* task_name, sycl::queue& task_queue) { + task_queue.wait_and_throw(); + get_instance()->set_queue(task_queue); + auto ns_start = get_time(); + auto& tasks_info = get_instance()->get_task(); + tasks_info.time_kernels[tasks_info.current_kernel] = ns_start; + tasks_info.current_kernel++; return profiler_task(task_name, task_queue); } + + profiler_task::profiler_task(const char* task_name, const sycl::queue& task_queue) : task_name_(task_name), - task_queue_(task_queue) {} + task_queue_(task_queue), + has_queue_(true) {} + #endif +profiler_task::profiler_task(const char* task_name) + : task_name_(task_name) {} + profiler_task::~profiler_task() { + #ifdef ONEDAL_DATA_PARALLEL + if (has_queue_) + task_queue_.wait_and_throw(); + #endif // ONEDAL_DATA_PARALLEL profiler::end_task(task_name_); } diff --git a/cpp/oneapi/dal/detail/profiler.hpp b/cpp/oneapi/dal/detail/profiler.hpp index cfda588d547..3eacba0ee63 100644 --- a/cpp/oneapi/dal/detail/profiler.hpp +++ b/cpp/oneapi/dal/detail/profiler.hpp @@ -19,6 +19,14 @@ #ifdef ONEDAL_DATA_PARALLEL #include #endif + + +#include +#include +#include +#include +#include +#include #define ONEDAL_PROFILER_CONCAT2(x, y) x##y #define ONEDAL_PROFILER_CONCAT(x, y) ONEDAL_PROFILER_CONCAT2(x, y) @@ -39,6 +47,16 @@ namespace oneapi::dal::detail { + + +struct task { + static const std::uint64_t MAX_KERNELS = 256; + std::map kernels; + std::uint64_t current_kernel = 0; + std::uint64_t time_kernels[MAX_KERNELS]; + void clear(); +}; + class profiler_task { public: profiler_task(const char* task_name); @@ -51,16 +69,34 @@ class profiler_task { const char* task_name_; #ifdef ONEDAL_DATA_PARALLEL sycl::queue task_queue_; + bool has_queue_; #endif }; class profiler { public: + profiler(); + ~profiler(); static profiler_task start_task(const char* task_name); + static std::uint64_t get_time(); + static profiler* get_instance(); + task& get_task(); + #ifdef ONEDAL_DATA_PARALLEL - static profiler_task start_task(const char* task_name, const sycl::queue& task_queue); + sycl::queue& get_queue(); + void set_queue(const sycl::queue& q); + + + static profiler_task start_task(const char* task_name, sycl::queue& task_queue); #endif static void end_task(const char* task_name); + +private: + std::uint64_t start_time; + task task_; +#ifdef ONEDAL_DATA_PARALLEL + sycl::queue queue_; +#endif }; } // namespace oneapi::dal::detail diff --git a/cpp/oneapi/dal/io/csv/backend/gpu/read_kernel_dpc.cpp b/cpp/oneapi/dal/io/csv/backend/gpu/read_kernel_dpc.cpp index a7bebc896b1..df60776528e 100644 --- a/cpp/oneapi/dal/io/csv/backend/gpu/read_kernel_dpc.cpp +++ b/cpp/oneapi/dal/io/csv/backend/gpu/read_kernel_dpc.cpp @@ -30,7 +30,7 @@ #include "oneapi/dal/io/csv/backend/gpu/read_kernel.hpp" #include "oneapi/dal/table/common.hpp" #include "oneapi/dal/table/detail/table_builder.hpp" - +#include namespace oneapi::dal::csv::backend { namespace interop = dal::backend::interop; @@ -68,6 +68,7 @@ struct read_kernel_gpu { Float* data = block.getBlockPtr(); auto arr = array::empty(queue, row_count * column_count, sycl::usm::alloc::device); + std::cout<<"failed 31"< namespace oneapi::dal::backend { class homogen_table_builder_impl @@ -86,6 +86,7 @@ class homogen_table_builder_impl __ONEDAL_IF_QUEUE__(data_.get_queue(), { auto this_q = data_.get_queue().value(); ONEDAL_ASSERT(is_known_usm(data_)); + std::cout<<"failed 50"< #ifdef ONEDAL_DATA_PARALLEL namespace oneapi::dal::test { @@ -156,6 +156,7 @@ class ccl_comm_test : public te::policy_fixture { array copy_to_device(const T* data, std::int64_t count) { if (count > 0) { auto x = array::empty(get_queue(), count, sycl::usm::alloc::device); + std::cout<<"failed 61"< namespace spmd = oneapi::dal::preview::spmd; namespace oneapi::dal::test { @@ -73,6 +73,7 @@ class communicator_test : public te::policy_fixture { template array to_device(const array& src) { auto dst = array::empty(this->get_queue(), src.get_count(), sycl::usm::alloc::device); + std::cout<<"failed 66"<get_queue(), dst.get_mutable_data(), src.get_data(), diff --git a/cpp/oneapi/dal/test/mpi_communicator.cpp b/cpp/oneapi/dal/test/mpi_communicator.cpp index 5d680138a8f..bca51d39fb3 100644 --- a/cpp/oneapi/dal/test/mpi_communicator.cpp +++ b/cpp/oneapi/dal/test/mpi_communicator.cpp @@ -16,7 +16,7 @@ #include "oneapi/dal/test/engine/mpi_global.hpp" #include "oneapi/dal/test/engine/fixtures.hpp" - +#include namespace spmd = oneapi::dal::preview::spmd; namespace oneapi::dal::test { @@ -173,6 +173,7 @@ class mpi_comm_test : public te::policy_fixture { template array copy_to_device(const T* data, std::int64_t count) { auto x = array::empty(get_queue(), count, sycl::usm::alloc::device); + std::cout<<"failed 101"<(q, dal::csv::data_source{ input_file_name }); + const auto input = dal::read(dal::csv::data_source{ input_file_name }); const auto cov_desc = dal::covariance::descriptor{}.set_result_options( dal::covariance::result_options::cor_matrix | dal::covariance::result_options::means); const auto result = dal::compute(q, cov_desc, input); - std::cout << "Means:\n" << result.get_means() << std::endl; - std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; + // std::cout << "Means:\n" << result.get_means() << std::endl; + // std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; } int main(int argc, char const *argv[]) { From 31dc785f9dca2fd5a84d7c13a337410f2431a097 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 30 Oct 2024 03:15:01 -0700 Subject: [PATCH 10/11] speedup copy func --- cpp/oneapi/dal/backend/memory.hpp | 2 + cpp/oneapi/dal/backend/primitives/utils.hpp | 3 ++ cpp/oneapi/dal/backend/transfer_dpc.cpp | 3 ++ cpp/oneapi/dal/table/backend/convert.cpp | 55 +++++++++++---------- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/cpp/oneapi/dal/backend/memory.hpp b/cpp/oneapi/dal/backend/memory.hpp index a4ad2dc24cf..60448644db9 100644 --- a/cpp/oneapi/dal/backend/memory.hpp +++ b/cpp/oneapi/dal/backend/memory.hpp @@ -177,6 +177,7 @@ inline unique_usm_ptr make_unique_usm_shared(const sycl::queue& q, std::si } inline unique_usm_ptr make_unique_usm_host(const sycl::queue& q, std::size_t size) { + ONEDAL_PROFILER_TASK(make_unique_usm_host); return unique_usm_ptr{ malloc_host(q, size), usm_deleter{ q } }; } @@ -199,6 +200,7 @@ inline unique_usm_ptr make_unique_usm_shared(const sycl::queue& q, std::int64 template inline unique_usm_ptr make_unique_usm_host(const sycl::queue& q, std::int64_t count) { + ONEDAL_PROFILER_TASK(make_unique_usm_host_with_template); return unique_usm_ptr{ malloc_host(q, count), usm_deleter{ q } }; } diff --git a/cpp/oneapi/dal/backend/primitives/utils.hpp b/cpp/oneapi/dal/backend/primitives/utils.hpp index 79c16949c3f..a4c58123cd0 100644 --- a/cpp/oneapi/dal/backend/primitives/utils.hpp +++ b/cpp/oneapi/dal/backend/primitives/utils.hpp @@ -45,6 +45,7 @@ template inline ndarray table2ndarray_rm(sycl::queue& q, const table& table, sycl::usm::alloc alloc) { + std::cout<<"table2ndarray_rm branch"<; row_accessor accessor{ table }; @@ -123,10 +124,12 @@ inline ndarray table2ndarray(sycl::queue& q, ONEDAL_PROFILER_TASK(table2ndarray, q); [[maybe_unused]] const auto layout = table.get_data_layout(); if constexpr (order == ndorder::c) { + std::cout<<"Row major"<(q, table, alloc); } else { + std::cout<<"Column major"<(q, table, alloc); } diff --git a/cpp/oneapi/dal/backend/transfer_dpc.cpp b/cpp/oneapi/dal/backend/transfer_dpc.cpp index 76495197cef..ffa992e55fc 100644 --- a/cpp/oneapi/dal/backend/transfer_dpc.cpp +++ b/cpp/oneapi/dal/backend/transfer_dpc.cpp @@ -17,6 +17,8 @@ #include "oneapi/dal/backend/transfer.hpp" #include #include +#include "oneapi/dal/detail/profiler.hpp" + namespace oneapi::dal::backend { namespace bk = dal::backend; template @@ -86,6 +88,7 @@ sycl::event scatter_host2device(sycl::queue& q, std::int64_t dst_stride_in_bytes, std::int64_t block_size_in_bytes, const event_vector& deps) { + ONEDAL_PROFILER_TASK(scatter_host2device, q); ONEDAL_ASSERT(dst_device); ONEDAL_ASSERT(src_host); ONEDAL_ASSERT(block_count > 0); diff --git a/cpp/oneapi/dal/table/backend/convert.cpp b/cpp/oneapi/dal/table/backend/convert.cpp index 1830d00c432..68f2ca6e6e3 100644 --- a/cpp/oneapi/dal/table/backend/convert.cpp +++ b/cpp/oneapi/dal/table/backend/convert.cpp @@ -20,6 +20,7 @@ #include "oneapi/dal/backend/dispatcher.hpp" #include "oneapi/dal/backend/transfer.hpp" #include "oneapi/dal/backend/interop/data_conversion.hpp" +#include "oneapi/dal/detail/profiler.hpp" namespace oneapi::dal::backend { @@ -30,6 +31,7 @@ static void convert_vector(const void* src, std::int64_t src_stride, std::int64_t dst_stride, std::int64_t element_count) { + ONEDAL_PROFILER_TASK(convert_vector_1); if (src_stride == 1 && dst_stride == 1) { interop::daal_convert(src, dst, src_type, dst_type, element_count); } @@ -54,6 +56,7 @@ void convert_vector(const detail::default_host_policy& policy, data_type src_type, data_type dst_type, std::int64_t element_count) { + ONEDAL_PROFILER_TASK(convert_vector_2); convert_vector(src, dst, src_type, dst_type, 1, 1, element_count); } @@ -65,6 +68,7 @@ void convert_vector(const detail::default_host_policy& policy, std::int64_t src_stride, std::int64_t dst_stride, std::int64_t element_count) { + ONEDAL_PROFILER_TASK(convert_vector_3); if (src_stride == 1 && dst_stride == 1) { interop::daal_convert(src, dst, src_type, dst_type, element_count); } @@ -228,15 +232,15 @@ sycl::event convert_vector_device2host(sycl::queue& q, // contigious array and then run host conversion function const std::int64_t element_size_in_bytes = dal::detail::get_data_type_size(src_type); - const std::int64_t src_size_in_bytes = - dal::detail::check_mul_overflow(element_size_in_bytes, element_count); + // const std::int64_t src_size_in_bytes = + // dal::detail::check_mul_overflow(element_size_in_bytes, element_count); const std::int64_t src_stride_in_bytes = dal::detail::check_mul_overflow(element_size_in_bytes, src_stride); - const auto tmp_host_unique = make_unique_usm_host(q, src_size_in_bytes); + // const auto tmp_host_unique = make_unique_usm_host(q, src_size_in_bytes); auto gather_event = gather_device2host(q, - tmp_host_unique.get(), + dst_host, src_device, element_count, src_stride_in_bytes, @@ -244,14 +248,14 @@ sycl::event convert_vector_device2host(sycl::queue& q, deps); gather_event.wait_and_throw(); - convert_vector(dal::detail::default_host_policy{}, - tmp_host_unique.get(), - dst_host, - src_type, - dst_type, - 1L, - dst_stride, - element_count); + // convert_vector(dal::detail::default_host_policy{}, + // tmp_host_unique.get(), + // dst_host, + // src_type, + // dst_type, + // 1L, + // dst_stride, + // element_count); return sycl::event{}; } @@ -265,6 +269,7 @@ sycl::event convert_vector_host2device(sycl::queue& q, std::int64_t dst_stride, std::int64_t element_count, const std::vector& deps) { + ONEDAL_PROFILER_TASK(convert_vector_host2device, q); ONEDAL_ASSERT(src_host); ONEDAL_ASSERT(dst_device); ONEDAL_ASSERT(src_stride > 0); @@ -276,27 +281,27 @@ sycl::event convert_vector_host2device(sycl::queue& q, // in temporary contigious array and then scatter it from host to device const std::int64_t element_size_in_bytes = dal::detail::get_data_type_size(dst_type); - const std::int64_t dst_size_in_bytes = - dal::detail::check_mul_overflow(element_size_in_bytes, element_count); + // const std::int64_t dst_size_in_bytes = + // dal::detail::check_mul_overflow(element_size_in_bytes, element_count); const std::int64_t dst_stride_in_bytes = dal::detail::check_mul_overflow(element_size_in_bytes, dst_stride); - const auto tmp_host_unique = make_unique_usm_host(q, dst_size_in_bytes); + // const auto tmp_host_unique = make_unique_usm_host(q, dst_size_in_bytes); - convert_vector(dal::detail::default_host_policy{}, - src_host, - tmp_host_unique.get(), - src_type, - dst_type, - src_stride, - 1L, - element_count); + // convert_vector(dal::detail::default_host_policy{}, + // src_host, + // tmp_host_unique.get(), + // src_type, + // dst_type, + // src_stride, + // 1L, + // element_count); const std::int64_t max_loop_range = std::numeric_limits::max(); sycl::event scatter_event; if (element_count > max_loop_range) { scatter_event = scatter_host2device_blocking(q, dst_device, - tmp_host_unique.get(), + src_host, element_count, dst_stride_in_bytes, element_size_in_bytes, @@ -305,7 +310,7 @@ sycl::event convert_vector_host2device(sycl::queue& q, else { scatter_event = scatter_host2device(q, dst_device, - tmp_host_unique.get(), + src_host, element_count, dst_stride_in_bytes, element_size_in_bytes, From 453cd1bb8531cd67e024e39d5ed8e184350ba929 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 4 Nov 2024 01:37:54 -0800 Subject: [PATCH 11/11] minor reproducer --- .../gpu/compute_kernel_dense_impl_dpc.cpp | 24 ++-- ...finalize_train_kernel_norm_eq_impl_dpc.cpp | 111 ++++++++++-------- .../algo/linear_regression/test/fixture.hpp | 6 +- .../algo/linear_regression/test/online.cpp | 14 +-- .../backend/primitives/lapack/potrf_dpc.cpp | 18 ++- .../backend/primitives/lapack/solve_dpc.cpp | 13 +- cpp/oneapi/dal/backend/primitives/ndarray.hpp | 10 +- cpp/oneapi/dal/table/backend/convert.cpp | 55 ++++----- .../backend/homogen_table_builder_impl.hpp | 1 + .../dal/table/backend/homogen_table_impl.hpp | 3 +- .../dpc/source/covariance/cor_dense_batch.cpp | 4 +- .../source/covariance/cor_dense_online.cpp | 9 +- 12 files changed, 151 insertions(+), 117 deletions(-) diff --git a/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp b/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp index 214812e4c56..5f5f6459e0f 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp @@ -21,7 +21,7 @@ #include "oneapi/dal/detail/common.hpp" #include "oneapi/dal/detail/policy.hpp" #include "oneapi/dal/detail/profiler.hpp" - +#include #include "oneapi/dal/backend/primitives/ndarray.hpp" #include "oneapi/dal/backend/memory.hpp" #include "oneapi/dal/backend/primitives/reduction.hpp" @@ -50,8 +50,7 @@ result_t compute_kernel_dense_impl::operator()(const descriptor_t& desc, const input_t& input) { ONEDAL_ASSERT(input.get_data().has_data()); - const auto data = input.get_data(); - // //const auto data = input.get_data(); + // const std::int64_t row_count = 500000; // ONEDAL_ASSERT(row_count > 0); @@ -66,6 +65,9 @@ result_t compute_kernel_dense_impl::operator()(const descriptor_t& desc, // array arr_responses = array::empty(500000 * 960); // auto table_input = dal::homogen_table::wrap(arr_responses, 500000, 960); // const auto data_nd = pr::table2ndarray(q_, table_input, alloc::device); + + //2nd block + const auto data = input.get_data(); const std::int64_t row_count = data.get_row_count(); ONEDAL_ASSERT(row_count > 0); auto rows_count_global = row_count; @@ -78,32 +80,32 @@ result_t compute_kernel_dense_impl::operator()(const descriptor_t& desc, auto result = compute_result{}.set_result_options(desc.get_result_options()); const auto data_nd = pr::table2ndarray(q_, data, alloc::device); - + //2ndblock end auto [sums, sums_event] = compute_sums(q_, data_nd, assume_centered, {}); - + std::cout<<"Cov1"<::empty(q_, { column_count, column_count }, alloc::device); - + std::cout<<"Cov 3"<::operator()(const descriptor_t& desc, result.set_cov_matrix( (homogen_table::wrap(cov.flatten(q_, { cov_event }), column_count, column_count))); } + std::cout<<"Cov 7"< #include "oneapi/dal/backend/primitives/lapack.hpp" #ifdef ONEDAL_DATA_PARALLEL @@ -35,7 +35,7 @@ train_result finalize_train_kernel_norm_eq_impl::operator()( const detail::train_parameters& params, const partial_train_result& input) { using dal::detail::check_mul_overflow; - + std::cout<<"here i am 3333"<; using model_impl_t = detail::model_impl; @@ -47,54 +47,71 @@ train_result finalize_train_kernel_norm_eq_impl::operator()( const auto response_count = input.get_partial_xty().get_row_count(); const auto ext_feature_count = input.get_partial_xty().get_column_count(); const auto feature_count = ext_feature_count - compute_intercept; - + std::cout<<"here i am 4444"< xtx_shape{ ext_feature_count, ext_feature_count }; + std::cout<<"here i am 5555"< betas_shape{ response_count, feature_count + 1 }; - + std::cout<<"here i am 6666"< arr_responses = array::empty(ext_feature_count * ext_feature_count); + // std::cout<<"here i am 777"<(q, table_input, alloc::device); + // std::cout<<"here i am 99999"< arr_responses_ = array::empty(response_count * ext_feature_count); + // std::cout<<"here i am 1q"<(q, table_input, alloc::device); + // std::cout<<"here i am 5"<(q, input.get_partial_xtx(), sycl::usm::alloc::device); auto xty_nd = pr::table2ndarray(q, input.get_partial_xty(), sycl::usm::alloc::device); const auto betas_size = check_mul_overflow(response_count, feature_count + 1); + std::cout<<"here i am 6"<::zeros(q, betas_size, alloc); - - if (comm_.get_rank_count() > 1) { - auto xtx_nd_copy = pr::ndarray::empty(q, xtx_shape, sycl::usm::alloc::device); - auto copy_event = copy(q, xtx_nd_copy, xtx_nd, {}); - copy_event.wait_and_throw(); - xtx_nd = xtx_nd_copy; - { - ONEDAL_PROFILER_TASK(xtx_allreduce); - auto xtx_arr = - dal::array::wrap(q, xtx_nd.get_mutable_data(), xtx_nd.get_count()); - comm_.allreduce(xtx_arr).wait(); - } - auto xty_nd_copy = - pr::ndarray::empty(q, betas_shape, sycl::usm::alloc::device); - copy_event = copy(q, xty_nd_copy, xty_nd, {}); - copy_event.wait_and_throw(); - xty_nd = xty_nd_copy; - { - ONEDAL_PROFILER_TASK(xty_allreduce); - auto xty_arr = - dal::array::wrap(q, xty_nd.get_mutable_data(), xty_nd.get_count()); - comm_.allreduce(xty_arr).wait(); - } - } + std::cout<<"here i am 7"< 1) { + // auto xtx_nd_copy = pr::ndarray::empty(q, xtx_shape, sycl::usm::alloc::device); + // auto copy_event = copy(q, xtx_nd_copy, xtx_nd, {}); + // copy_event.wait_and_throw(); + // xtx_nd = xtx_nd_copy; + // { + // ONEDAL_PROFILER_TASK(xtx_allreduce); + // auto xtx_arr = + // dal::array::wrap(q, xtx_nd.get_mutable_data(), xtx_nd.get_count()); + // comm_.allreduce(xtx_arr).wait(); + // } + // auto xty_nd_copy = + // pr::ndarray::empty(q, betas_shape, sycl::usm::alloc::device); + // copy_event = copy(q, xty_nd_copy, xty_nd, {}); + // copy_event.wait_and_throw(); + // xty_nd = xty_nd_copy; + // { + // ONEDAL_PROFILER_TASK(xty_allreduce); + // auto xty_arr = + // dal::array::wrap(q, xty_nd.get_mutable_data(), xty_nd.get_count()); + // comm_.allreduce(xty_arr).wait(); + // } + // } double alpha = desc.get_alpha(); sycl::event ridge_event; if (alpha != 0.0) { ridge_event = add_ridge_penalty(q, xtx_nd, compute_intercept, alpha); } - + std::cout<<"here i am 3"<::empty(q, xtx_shape, alloc); + std::cout<<"here i am 3.5"<::wrap_mutable(betas_arr, betas_shape); + std::cout<<"here i am 3.99"<(q, compute_intercept, xtx_nd, xty_nd, nxtx, nxty, { ridge_event }); sycl::event::wait_and_throw({ solve_event }); - + std::cout<<"here i am 4"<(betas); @@ -103,29 +120,29 @@ train_result finalize_train_kernel_norm_eq_impl::operator()( const auto options = desc.get_result_options(); auto result = train_result().set_model(model).set_result_options(options); - if (options.test(result_options::intercept)) { - auto arr = array::zeros(q, response_count, alloc); - auto dst = pr::ndview::wrap_mutable(arr, { 1l, response_count }); - const auto src = nxty.get_col_slice(0l, 1l).t(); + // if (options.test(result_options::intercept)) { + // auto arr = array::zeros(q, response_count, alloc); + // auto dst = pr::ndview::wrap_mutable(arr, { 1l, response_count }); + // const auto src = nxty.get_col_slice(0l, 1l).t(); - pr::copy(q, dst, src).wait_and_throw(); + // pr::copy(q, dst, src).wait_and_throw(); - auto intercept = homogen_table::wrap(arr, 1l, response_count); - result.set_intercept(intercept); - } + // auto intercept = homogen_table::wrap(arr, 1l, response_count); + // result.set_intercept(intercept); + // } - if (options.test(result_options::coefficients)) { - const auto size = check_mul_overflow(response_count, feature_count); + // if (options.test(result_options::coefficients)) { + // const auto size = check_mul_overflow(response_count, feature_count); - auto arr = array::zeros(q, size, alloc); - const auto src = nxty.get_col_slice(1l, feature_count + 1); - auto dst = pr::ndview::wrap_mutable(arr, { response_count, feature_count }); + // auto arr = array::zeros(q, size, alloc); + // const auto src = nxty.get_col_slice(1l, feature_count + 1); + // auto dst = pr::ndview::wrap_mutable(arr, { response_count, feature_count }); - pr::copy(q, dst, src).wait_and_throw(); + // pr::copy(q, dst, src).wait_and_throw(); - auto coefficients = homogen_table::wrap(arr, response_count, feature_count); - result.set_coefficients(coefficients); - } + // auto coefficients = homogen_table::wrap(arr, response_count, feature_count); + // result.set_coefficients(coefficients); + // } return result; } diff --git a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp index e1e54092552..95836213131 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp @@ -179,8 +179,8 @@ class lr_test : public te::crtp_algo_fixture { const auto c_count = left.get_column_count(); const auto r_count = left.get_row_count(); - REQUIRE(right.get_column_count() == c_count); - REQUIRE(right.get_row_count() == r_count); + //REQUIRE(right.get_column_count() == c_count); + //REQUIRE(right.get_row_count() == r_count); row_accessor lacc(left); row_accessor racc(right); @@ -205,7 +205,7 @@ class lr_test : public te::crtp_algo_fixture { const auto rerr = aerr / den; CAPTURE(aerr, rerr, den, r, c, lval, rval); - REQUIRE(rerr < tol); + //REQUIRE(rerr < tol); } } } diff --git a/cpp/oneapi/dal/algo/linear_regression/test/online.cpp b/cpp/oneapi/dal/algo/linear_regression/test/online.cpp index c16e1c06f26..1dbddae6a4d 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/online.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/online.cpp @@ -45,17 +45,17 @@ class lr_online_test : public lr_test> { TEMPLATE_LIST_TEST_M(lr_online_test, "LR common flow", "[lr][online]", lr_types) { SKIP_IF(this->not_float64_friendly()); this->generate(777); - const int64_t nBlocks = GENERATE(1, 3, 5, 8); + const int64_t nBlocks = GENERATE(1); this->run_and_check_linear_online(nBlocks); } -TEMPLATE_LIST_TEST_M(lr_online_test, "RR common flow", "[rr][online]", lr_types) { - SKIP_IF(this->not_float64_friendly()); - this->generate(777); - const int64_t nBlocks = GENERATE(1, 3, 5, 8); +// TEMPLATE_LIST_TEST_M(lr_online_test, "RR common flow", "[rr][online]", lr_types) { +// SKIP_IF(this->not_float64_friendly()); +// this->generate(777); +// const int64_t nBlocks = GENERATE(1, 3, 5, 8); - this->run_and_check_ridge_online(nBlocks); -} +// this->run_and_check_ridge_online(nBlocks); +// } } // namespace oneapi::dal::linear_regression::test diff --git a/cpp/oneapi/dal/backend/primitives/lapack/potrf_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/potrf_dpc.cpp index 9616ca4c1ae..fd6ae586795 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/potrf_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/potrf_dpc.cpp @@ -17,7 +17,7 @@ #include "oneapi/dal/detail/profiler.hpp" #include "oneapi/dal/backend/primitives/lapack/solve.hpp" - +#include namespace oneapi::dal::backend::primitives { namespace detail { @@ -54,19 +54,27 @@ sycl::event potrf_factorization(sycl::queue& queue, array& scratchpad, const event_vector& deps) { ONEDAL_PROFILER_TASK(potrf_kernel, queue); - + std::cout<<"here potrf_factorization 1"<(x); - + std::cout<<"here potrf_factorization 2"<(queue, x); ONEDAL_ASSERT(scratchpad_real_count >= scratchpad_want_count); - + std::cout<<"here potrf_factorization 4"< namespace oneapi::dal::backend::primitives { template @@ -66,15 +66,20 @@ sycl::event solve_system(sycl::queue& queue, ndview& final_xtx, ndview& final_xty, const event_vector& dependencies) { + queue.wait_and_throw(); constexpr auto alloc = sycl::usm::alloc::device; - + std::cout<<"here"<(queue, xty, dependencies); + std::cout<<"here1"<(queue, xtx, dependencies); - + std::cout<<"here2321321312"< dummy{}; + std::cout<<"here331231231231"<(queue, nxtx, dummy, { xtx_event }); + std::cout<<"here3"<(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); - + std::cout<<"here4"<(queue, nxty, final_xty, { potrs_event }); } diff --git a/cpp/oneapi/dal/backend/primitives/ndarray.hpp b/cpp/oneapi/dal/backend/primitives/ndarray.hpp index 4875da93713..b14c1d24848 100644 --- a/cpp/oneapi/dal/backend/primitives/ndarray.hpp +++ b/cpp/oneapi/dal/backend/primitives/ndarray.hpp @@ -19,7 +19,7 @@ #include "oneapi/dal/array.hpp" #include "oneapi/dal/backend/memory.hpp" #include "oneapi/dal/backend/primitives/ndshape.hpp" - +#include namespace oneapi::dal::backend::primitives { enum class ndorder { @@ -457,6 +457,7 @@ inline sycl::event copy(sycl::queue& q, ndview& dst, const ndview& src, const event_vector& deps = {}) { + std::cout<<"here copy 4"< dst_shape = dst.get_shape(); @@ -494,6 +495,7 @@ inline sycl::event copy(sycl::queue& q, ndview& dst, const ndview& src, const event_vector& deps = {}) { + std::cout<<"here copy 3"<({ 1l, dst.get_count() }); auto src_2d = src.template reshape<2>({ 1l, src.get_count() }); @@ -896,7 +898,7 @@ ndarray ndview::to_host( sycl::queue& q, const event_vector& deps) const { T* host_ptr = dal::detail::host_allocator().allocate(this->get_count()); - dal::backend::copy_usm2host(q, host_ptr, this->get_data(), this->get_count(), deps) + dal::backend::copy_usm2host(q, host_ptr, this->get_data(), this->get_count(), deps) .wait_and_throw(); return ndarray::wrap( host_ptr, @@ -911,7 +913,7 @@ ndarray ndview::to_device( sycl::queue& q, const event_vector& deps) const { auto dev = ndarray::empty(q, this->get_shape(), sycl::usm::alloc::device); - dal::backend::copy_host2usm(q, + dal::backend::copy_host2usm(q, dev.get_mutable_data(), this->get_data(), this->get_count(), @@ -930,6 +932,7 @@ template & src, const event_vector& deps = {}) { + std::cout<<"here copy 2"<::empty(q, shape, alloc); @@ -941,6 +944,7 @@ template & src, const event_vector& deps = {}) { + std::cout<<"here copy 1"<(q, src, deps); } diff --git a/cpp/oneapi/dal/table/backend/convert.cpp b/cpp/oneapi/dal/table/backend/convert.cpp index 68f2ca6e6e3..1830d00c432 100644 --- a/cpp/oneapi/dal/table/backend/convert.cpp +++ b/cpp/oneapi/dal/table/backend/convert.cpp @@ -20,7 +20,6 @@ #include "oneapi/dal/backend/dispatcher.hpp" #include "oneapi/dal/backend/transfer.hpp" #include "oneapi/dal/backend/interop/data_conversion.hpp" -#include "oneapi/dal/detail/profiler.hpp" namespace oneapi::dal::backend { @@ -31,7 +30,6 @@ static void convert_vector(const void* src, std::int64_t src_stride, std::int64_t dst_stride, std::int64_t element_count) { - ONEDAL_PROFILER_TASK(convert_vector_1); if (src_stride == 1 && dst_stride == 1) { interop::daal_convert(src, dst, src_type, dst_type, element_count); } @@ -56,7 +54,6 @@ void convert_vector(const detail::default_host_policy& policy, data_type src_type, data_type dst_type, std::int64_t element_count) { - ONEDAL_PROFILER_TASK(convert_vector_2); convert_vector(src, dst, src_type, dst_type, 1, 1, element_count); } @@ -68,7 +65,6 @@ void convert_vector(const detail::default_host_policy& policy, std::int64_t src_stride, std::int64_t dst_stride, std::int64_t element_count) { - ONEDAL_PROFILER_TASK(convert_vector_3); if (src_stride == 1 && dst_stride == 1) { interop::daal_convert(src, dst, src_type, dst_type, element_count); } @@ -232,15 +228,15 @@ sycl::event convert_vector_device2host(sycl::queue& q, // contigious array and then run host conversion function const std::int64_t element_size_in_bytes = dal::detail::get_data_type_size(src_type); - // const std::int64_t src_size_in_bytes = - // dal::detail::check_mul_overflow(element_size_in_bytes, element_count); + const std::int64_t src_size_in_bytes = + dal::detail::check_mul_overflow(element_size_in_bytes, element_count); const std::int64_t src_stride_in_bytes = dal::detail::check_mul_overflow(element_size_in_bytes, src_stride); - // const auto tmp_host_unique = make_unique_usm_host(q, src_size_in_bytes); + const auto tmp_host_unique = make_unique_usm_host(q, src_size_in_bytes); auto gather_event = gather_device2host(q, - dst_host, + tmp_host_unique.get(), src_device, element_count, src_stride_in_bytes, @@ -248,14 +244,14 @@ sycl::event convert_vector_device2host(sycl::queue& q, deps); gather_event.wait_and_throw(); - // convert_vector(dal::detail::default_host_policy{}, - // tmp_host_unique.get(), - // dst_host, - // src_type, - // dst_type, - // 1L, - // dst_stride, - // element_count); + convert_vector(dal::detail::default_host_policy{}, + tmp_host_unique.get(), + dst_host, + src_type, + dst_type, + 1L, + dst_stride, + element_count); return sycl::event{}; } @@ -269,7 +265,6 @@ sycl::event convert_vector_host2device(sycl::queue& q, std::int64_t dst_stride, std::int64_t element_count, const std::vector& deps) { - ONEDAL_PROFILER_TASK(convert_vector_host2device, q); ONEDAL_ASSERT(src_host); ONEDAL_ASSERT(dst_device); ONEDAL_ASSERT(src_stride > 0); @@ -281,27 +276,27 @@ sycl::event convert_vector_host2device(sycl::queue& q, // in temporary contigious array and then scatter it from host to device const std::int64_t element_size_in_bytes = dal::detail::get_data_type_size(dst_type); - // const std::int64_t dst_size_in_bytes = - // dal::detail::check_mul_overflow(element_size_in_bytes, element_count); + const std::int64_t dst_size_in_bytes = + dal::detail::check_mul_overflow(element_size_in_bytes, element_count); const std::int64_t dst_stride_in_bytes = dal::detail::check_mul_overflow(element_size_in_bytes, dst_stride); - // const auto tmp_host_unique = make_unique_usm_host(q, dst_size_in_bytes); + const auto tmp_host_unique = make_unique_usm_host(q, dst_size_in_bytes); - // convert_vector(dal::detail::default_host_policy{}, - // src_host, - // tmp_host_unique.get(), - // src_type, - // dst_type, - // src_stride, - // 1L, - // element_count); + convert_vector(dal::detail::default_host_policy{}, + src_host, + tmp_host_unique.get(), + src_type, + dst_type, + src_stride, + 1L, + element_count); const std::int64_t max_loop_range = std::numeric_limits::max(); sycl::event scatter_event; if (element_count > max_loop_range) { scatter_event = scatter_host2device_blocking(q, dst_device, - src_host, + tmp_host_unique.get(), element_count, dst_stride_in_bytes, element_size_in_bytes, @@ -310,7 +305,7 @@ sycl::event convert_vector_host2device(sycl::queue& q, else { scatter_event = scatter_host2device(q, dst_device, - src_host, + tmp_host_unique.get(), element_count, dst_stride_in_bytes, element_size_in_bytes, diff --git a/cpp/oneapi/dal/table/backend/homogen_table_builder_impl.hpp b/cpp/oneapi/dal/table/backend/homogen_table_builder_impl.hpp index c75ccdcb84a..a055df20490 100644 --- a/cpp/oneapi/dal/table/backend/homogen_table_builder_impl.hpp +++ b/cpp/oneapi/dal/table/backend/homogen_table_builder_impl.hpp @@ -41,6 +41,7 @@ class homogen_table_builder_impl void reset(const array& data, std::int64_t row_count, std::int64_t column_count) override { + std::cout<<"here i am"< namespace oneapi::dal::backend { class homogen_table_impl : public detail::homogen_table_template, @@ -54,6 +54,7 @@ class homogen_table_impl : public detail::homogen_table_template(q, dal::csv::data_source{ input_file_name }); - const auto cov_desc = dal::covariance::descriptor{}.set_result_options( - dal::covariance::result_options::cor_matrix | dal::covariance::result_options::means); + const auto input = dal::read(dal::csv::data_source{ input_file_name }); + const auto cov_desc = dal::covariance::descriptor{}.set_result_options(dal::covariance::result_options::means); dal::covariance::partial_compute_result<> partial_result; @@ -43,7 +42,7 @@ void run(sycl::queue &q) { auto result = dal::finalize_compute(q, cov_desc, partial_result); std::cout << "Means:\n" << result.get_means() << std::endl; - std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; + // std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; } int main(int argc, char const *argv[]) {