diff --git a/sources/prolong/MEX_EXTI_Prol/EXTI_prolongation.cpp b/sources/prolong/MEX_EXTI_Prol/EXTI_prolongation.cpp index b6ef5ec..2764971 100644 --- a/sources/prolong/MEX_EXTI_Prol/EXTI_prolongation.cpp +++ b/sources/prolong/MEX_EXTI_Prol/EXTI_prolongation.cpp @@ -5,7 +5,8 @@ #include "omp.h" #include "MIS.h" -#include "TWOLP_ProlStripe_EXTI.h" +#include "ProlStripe_EXTI.h" +#include "ONELP_ProlStripe_EXTI.h" int EXTI_prolongation(const int level, const int nthreads, const int *vecstart, const int nn_A, const int nt_A, @@ -16,22 +17,17 @@ int EXTI_prolongation(const int level, const int nthreads, const int nr_I, const int nc_I, int &nt_I, int *&iat_I, int *&ja_I, double *&coef_I){ - int *ridv_i; int *ridvn_i; - int ierr = 0; - ridv_i = new int [nthreads+1](); if (ridv_i == NULL) exit(ierr += 1); ridvn_i = new int [nthreads+1](); if (ridvn_i == NULL) exit(ierr += 1); int nn_I; - #pragma omp parallel num_threads(nthreads) - { std::chrono::time_point START_OLD,END_OLD; std::chrono::time_point START_ONE,END_ONE; @@ -64,13 +60,39 @@ int EXTI_prolongation(const int level, const int nthreads, coef_scr = new double [nt_Imax](); if (coef_scr == NULL) {ierr_L = 1; goto exit_pragma;} - START_TWO = std::chrono::high_resolution_clock::now(); - ierr_L = TWOLP_ProlStripe_EXTI(firstrow,lastrow,nn_loc,nn_A,nt_A,nt_Imax, - iat_A,ja_A,coef_A,coef_S,iat_C,ja_C,coef_C, - fcnodes,nn_I_loc,nt_I_loc,iat_scr,ja_scr, - coef_scr); - END_TWO = std::chrono::high_resolution_clock::now(); - time_TWO = std::chrono::duration(END_TWO-START_TWO); + +#if 1 + START_ONE = std::chrono::high_resolution_clock::now(); + ierr_L = ONELP_ProlStripe_EXTI(firstrow,lastrow,nn_loc,nn_A,nt_A,nt_Imax, + iat_A,ja_A,coef_A,coef_S,iat_C,ja_C,coef_C, + fcnodes,nn_I_loc,nt_I_loc,iat_scr,ja_scr, + coef_scr); + END_ONE = std::chrono::high_resolution_clock::now(); + time_ONE = std::chrono::duration(END_ONE-START_ONE); + //printf("TIME ONE [s] = %15.6e\n",time_ONE.count()); + //printf("\n\n\n\n------------\n\n\n\n"); +#endif +#if 0 + START_TWO = std::chrono::high_resolution_clock::now(); + ierr_L = TWOLP_ProlStripe_EXTI(firstrow,lastrow,nn_loc,nn_A,nt_A,nt_Imax, + iat_A,ja_A,coef_A,coef_S,iat_C,ja_C,coef_C, + fcnodes,nn_I_loc,nt_I_loc,iat_scr,ja_scr, + coef_scr); + END_TWO = std::chrono::high_resolution_clock::now(); + time_TWO = std::chrono::duration(END_TWO-START_TWO); + printf("TIME TWO [s] = %15.6e\n",time_TWO.count()); + printf("\n\n\n\n------------\n\n\n\n"); +#endif +#if 0 + START_OLD = std::chrono::high_resolution_clock::now(); + ierr_L = ProlStripe_EXTI(firstrow,lastrow,nn_loc,nn_A,nt_A,nt_Imax, + iat_A,ja_A,coef_A,coef_S,fcnodes,nn_I_loc,nt_I_loc, + iat_scr,ja_scr,coef_scr); + END_OLD = std::chrono::high_resolution_clock::now(); + time_OLD = std::chrono::duration(END_OLD-START_OLD); + printf("TIME OLD [s] = %15.6e\n",time_OLD.count()); + printf("\n\n\n\n------------\n\n\n\n"); +#endif #pragma omp atomic update ierr += ierr_L; @@ -127,13 +149,10 @@ int EXTI_prolongation(const int level, const int nthreads, delete [] ja_scr; delete [] coef_scr; - exit_pragma: ; - #pragma omp atomic update ierr += ierr_L; - } delete [] ridv_i; diff --git a/sources/prolong/MEX_EXTI_Prol/TWOLP_ProlStripe_EXTI.cpp b/sources/prolong/MEX_EXTI_Prol/ONELP_ProlStripe_EXTI.cpp similarity index 59% rename from sources/prolong/MEX_EXTI_Prol/TWOLP_ProlStripe_EXTI.cpp rename to sources/prolong/MEX_EXTI_Prol/ONELP_ProlStripe_EXTI.cpp index 74ccb0b..b23c7eb 100644 --- a/sources/prolong/MEX_EXTI_Prol/TWOLP_ProlStripe_EXTI.cpp +++ b/sources/prolong/MEX_EXTI_Prol/ONELP_ProlStripe_EXTI.cpp @@ -1,24 +1,20 @@ - #include #include -#define DBNODE 000 -#define ENDNODE 200 - - +#define DBNODE 7 +#define ENDNODE 7 #include #include #include #include -using namespace std; #include "ir_heapsort.h" const double ONE = 1.0; const double ZERO = 0.0; -const double EPS = numeric_limits::epsilon(); +const double EPS = std::numeric_limits::epsilon(); -int TWOLP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_loc, +int ONELP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_loc, const int nn_A, const int nt_A, const int ntmax_P, const int *const iat_A, const int *const ja_A, const double *const coef_A, const int *const coef_S, @@ -27,12 +23,10 @@ int TWOLP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_lo int &nn_P, int &nt_P, int *iat_P, int *ja_P, double *coef_P){ - - double avg_nnz = 20.0*static_cast(nt_A) / static_cast(lastrow-firstrow); int size_scr = static_cast(avg_nnz); int *WI = new int [nn_A](); if (WI == NULL) return 1; - fill_n(WI,nn_A,0); + std::fill_n(WI,nn_A,0); int *ja_FS = new int [size_scr](); if (ja_FS == NULL) return 1; double *coef_FS = new double [size_scr](); if (coef_FS == NULL) return 1; int *list_weak = new int [size_scr](); if (list_weak == NULL) return 1; @@ -48,11 +42,6 @@ int TWOLP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_lo int inod_coarse = fcnodes[inod]; - - - - - if (inod_coarse >= 0){ nn_P++; @@ -64,56 +53,30 @@ int TWOLP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_lo } else if (inod_coarse == -1) { nn_P++; - - - - - int n_int = 0; int n_weak = 0; double denom = ZERO; + double a_ii; for (int j = iat_A[inod]; j < iat_A[inod+1]; j++){ int jcol = ja_A[j]; if (jcol == inod){ - double a_ii = coef_A[j]; + a_ii = coef_A[j]; denom += a_ii; } else { if (fcnodes[jcol] >= 0){ if (coef_S[j] > 0){ + ja_P[ind_P+n_int] = jcol; + coef_P[ind_P+n_int] = coef_A[j]; + WI[jcol] = n_int+1; + n_int++; - if (WI[jcol] <= 0){ - - ja_P[ind_P+n_int] = jcol; - coef_P[ind_P+n_int] = coef_A[j]; - WI[jcol] = n_int+1; - - - - - - n_int++; - } } else { - - - - - - - denom += coef_A[j]; - - - - - - - - WI[jcol] = -1; + WI[jcol] = -j; list_weak[n_weak] = jcol; n_weak++; } @@ -122,140 +85,78 @@ int TWOLP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_lo if (coef_S[j] < 0){ denom += coef_A[j]; - - - - - } } } } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (int j = iat_C[inod]; j < iat_C[inod+1]; j++){ int jcol = ja_C[j]; if (jcol != inod){ if (fcnodes[jcol] < 0){ double a_ik = coef_C[j]; - double a_ik_bar = min(ZERO,a_ik); - double ext_sum = a_ik_bar; - - - - - + double a_ki_bar; + double ext_sum = 0.0; int n_FS = 0; for (int k = iat_C[jcol]; k < iat_C[jcol+1]; k++){ int kcol = ja_C[k]; + if (kcol == inod){ + a_ki_bar = std::min(ZERO,coef_C[k]); + ext_sum += a_ki_bar; + } else { + if (fcnodes[kcol] >= 0){ + double a_kj_bar = std::min(ZERO,coef_C[k]); + ja_FS[n_FS] = kcol; + coef_FS[n_FS] = a_kj_bar; + n_FS++; + ext_sum += a_kj_bar; + if ( WI[kcol] <= 0 ){ + double value = ZERO; + if (WI[kcol] < 0){ - if (fcnodes[kcol] >= 0){ - - double a_kj_bar = min(ZERO,coef_C[k]); - ja_FS[n_FS] = kcol; - coef_FS[n_FS] = a_kj_bar; - n_FS++; - ext_sum += a_kj_bar; + value = coef_A[-WI[kcol]]; + denom -= value; + } - - - - - - if ( WI[kcol] <= 0 ){ - - - - - - - - - ja_P[ind_P+n_int] = kcol; - coef_P[ind_P+n_int] = ZERO; - WI[kcol] = n_int+1; - n_int++; + ja_P[ind_P+n_int] = kcol; + coef_P[ind_P+n_int] = value; + WI[kcol] = n_int+1; + n_int++; + } } } } + if ( std::abs(ext_sum) > EPS*a_ii ){ + for (int k = 0; k < n_FS; k++){ + int pos = WI[ja_FS[k]]-1; + coef_P[ind_P+pos] += (a_ik*coef_FS[k]) / ext_sum; + } + denom += (a_ik * a_ki_bar) / ext_sum; + } else { + double fac = ONE / (static_cast(n_int+1)); + denom += a_ik*fac; + for (int k = 0; k < n_FS; k++){ + int pos = WI[ja_FS[k]]-1; + coef_P[ind_P+pos] += a_ik*fac; + } - - - - - - - - - - for (int k = 0; k < n_FS; k++){ - int pos = WI[ja_FS[k]]-1; - coef_P[ind_P+pos] += (a_ik*coef_FS[k]) / ext_sum; } - - denom += (a_ik * a_ik_bar) / ext_sum; } } } - - - - - - - - - - - - - - - - for (int i = ind_P; i < ind_P+n_int; i++){ WI[ja_P[i]] = 0; @@ -264,51 +165,15 @@ int TWOLP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_lo } ir_heapsort(&(ja_P[ind_P]),&(coef_P[ind_P]),n_int); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ind_P += n_int; - - for (int i = 0; i < n_weak; i++) WI[list_weak[i]] = 0; - } - iat_P[nn_P] = ind_P; - } - nt_P = ind_P; - delete [] WI; delete [] ja_FS; delete [] coef_FS; diff --git a/sources/prolong/MEX_EXTI_Prol/ProlStripe_EXTI.cpp b/sources/prolong/MEX_EXTI_Prol/ProlStripe_EXTI.cpp new file mode 100644 index 0000000..0e93684 --- /dev/null +++ b/sources/prolong/MEX_EXTI_Prol/ProlStripe_EXTI.cpp @@ -0,0 +1,229 @@ + +#include +#include +#define DBNODE 000 +#define ENDNODE 200 + +#include +#include +#include +#include + +#include "ir_heapsort.h" + +const double ONE = 1.0; +const double ZERO = 0.0; +const double EPS = numeric_limits::epsilon(); + +int ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_loc, + const int nn_A, const int nt_A, const int ntmax_P, + const int *const iat_A, const int *const ja_A, + const double *const coef_A, const int *const coef_S, + const int *const fcnodes, int &nn_P, int &nt_P, int *iat_P, int *ja_P, + double *coef_P){ + + double avg_nnz = 1.2*static_cast(nt_A) / static_cast(lastrow-firstrow); + int size_scr = static_cast(avg_nnz)*static_cast(avg_nnz); + int *WI = new int [nn_A](); if (WI == NULL) return 1; + fill_n(WI,nn_A,0); + int *ja_CC = new int [size_scr](); if (ja_CC == NULL) return 1; + int *ja_FS = new int [size_scr](); if (ja_FS == NULL) return 1; + int *pos_kj = new int [size_scr](); if (pos_kj == NULL) return 1; + double *coef_CC = new double [size_scr](); if (coef_CC == NULL) return 1; + double *coef_FS = new double [size_scr](); if (coef_FS == NULL) return 1; + double *a_kj = new double [size_scr](); if (a_kj == NULL) return 1; + + nn_P=0; + int ind_P = 0; + iat_P[0] = ind_P; + + + int shift = firstrow - 1; + for (int inod = firstrow; inod < lastrow ; inod ++){ + + int inod_coarse = fcnodes[inod]; + + if (inod_coarse >= 0){ + nn_P++; + + ja_P[ind_P] = inod_coarse; + coef_P[ind_P] = ONE; + ind_P++; + + } else if (inod_coarse == -1) { + nn_P++; + + int n_FS = 0; + int n_CC = 0; + double a_ii; + double denom = ZERO; + for (int j = iat_A[inod]; j < iat_A[inod+1]; j++){ + int jcol = ja_A[j]; + if (jcol == inod){ + + a_ii = coef_A[j]; + denom += a_ii; + } else { + if (coef_S[j] > 0){ + + int jnod = fcnodes[jcol]; + if (jnod >= 0){ + + if (WI[jcol] > 0){ + + + int pos = WI[jcol]-1; + coef_CC[pos] = coef_A[j]; + } else if (WI[jcol] == 0){ + + ja_CC[n_CC] = jcol; + coef_CC[n_CC] = coef_A[j]; + WI[jcol] = n_CC+1; + n_CC++; + } else { + + + WI[jcol] = -WI[jcol]; + } + } else { + + + ja_FS[n_FS] = jcol; + coef_FS[n_FS] = coef_A[j]; + n_FS++; + + for (int k = iat_A[jcol]; k < iat_A[jcol+1]; k++){ + if (coef_S[k] > 0){ + int kcol = ja_A[k]; + if (fcnodes[kcol] >= 0){ + + if (WI[kcol] == 0){ + + ja_CC[n_CC] = kcol; + coef_CC[n_CC] = 0.0; + + WI[kcol] = n_CC+1; + n_CC++; + } else if (WI[kcol] < 0){ + + int pos = -(WI[kcol]+1); + + denom -= coef_CC[pos]; + WI[kcol] = -WI[kcol]; + } + } + } + } + } + } else { + + + if (WI[jcol] == 0){ + denom += coef_A[j]; + + if (fcnodes[jcol] >= 0){ + + ja_CC[n_CC] = jcol; + + coef_CC[n_CC] = coef_A[j]; + WI[jcol] = -(n_CC+1); + n_CC++; + } + } else { + + int pos = WI[jcol] - 1; + coef_CC[pos] = coef_A[j]; + } + } + } + } + + int n_int = 0; + for (int i = 0; i < n_CC; i++){ + int jcol = ja_CC[i]; + if (WI[jcol] > 0){ + + ja_P[ind_P+n_int] = jcol; + coef_P[ind_P+n_int] = coef_CC[i]; + WI[jcol] = n_int+1; + n_int++; + + } else { + + WI[jcol] = 0; + } + } + + for (int k = 0; k < n_FS; k++){ + int knod = ja_FS[k]; + double a_ik = coef_FS[k]; + + double ext_sum = min(ZERO,a_ik); + + + + + int ind = 0; + for (int l = iat_A[knod]; l < iat_A[knod+1]; l++){ + int lcol = ja_A[l]; + if ( WI[lcol] > 0){ + + double a_kl = min(ZERO,coef_A[l]); + + ext_sum += a_kl; + a_kj[ind] = a_kl; + pos_kj[ind] = WI[lcol] - 1; + ind++; + } + } + + if ( abs(ext_sum) > EPS*a_ii ){ + + denom += a_ik*min(ZERO,a_ik) / ext_sum; + + for (int jj = 0; jj < ind; jj++){ + int jpos = pos_kj[jj]; + coef_P[ind_P+jpos] += a_ik*a_kj[jj] / ext_sum; + } + + } else { + + double fac = ONE / (static_cast(n_int+1)); + + denom += a_ik*fac; + + for (int jj = 0; jj < ind; jj++){ + int jpos = pos_kj[jj]; + coef_P[ind_P+jpos] += a_ik*fac; + } + } + } + + for (int i = ind_P; i < ind_P+n_int; i++){ + + WI[ja_P[i]] = 0; + ja_P[i] = fcnodes[ja_P[i]]; + coef_P[i] = -coef_P[i] / denom; + } + ir_heapsort(&(ja_P[ind_P]),&(coef_P[ind_P]),n_int); + + ind_P += n_int; + + } + + iat_P[nn_P] = ind_P; + + } + + nt_P = ind_P; + + delete [] WI; + delete [] ja_CC; + delete [] ja_FS; + delete [] pos_kj; + delete [] coef_CC; + delete [] coef_FS; + delete [] a_kj; + + return 0; +} diff --git a/sources/prolong/MEX_EXTI_Prol/compile.m b/sources/prolong/MEX_EXTI_Prol/compile.m index e96e3a1..39167de 100644 --- a/sources/prolong/MEX_EXTI_Prol/compile.m +++ b/sources/prolong/MEX_EXTI_Prol/compile.m @@ -1,4 +1,4 @@ mex -O cpt_Prolongation_EXTI.cpp EXTI_prolongation.cpp ir_heapsort.cpp... - TWOLP_ProlStripe_EXTI.cpp... + ONELP_ProlStripe_EXTI.cpp... CFLAGS="-I./include/ -std=c++11 -O2 -fopenmp -fPIC" ... LDFLAGS="-std=c++11 -fopenmp -O2" diff --git a/sources/prolong/MEX_EXTI_Prol/include/TWOLP_ProlStripe_EXTI.h b/sources/prolong/MEX_EXTI_Prol/include/ONELP_ProlStripe_EXTI.h similarity index 86% rename from sources/prolong/MEX_EXTI_Prol/include/TWOLP_ProlStripe_EXTI.h rename to sources/prolong/MEX_EXTI_Prol/include/ONELP_ProlStripe_EXTI.h index 63e6872..c18e37a 100644 --- a/sources/prolong/MEX_EXTI_Prol/include/TWOLP_ProlStripe_EXTI.h +++ b/sources/prolong/MEX_EXTI_Prol/include/ONELP_ProlStripe_EXTI.h @@ -1,8 +1,8 @@ -int TWOLP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_loc, +int ONELP_ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_loc, const int nn_A, const int nt_A, const int ntmax_P, const int *const iat_A, const int *const ja_A, const double *const coef_A, const int *const coef_S, const int *const iat_C, const int *const ja_C, - const double *const coef_C, const int *const fcnodes, + const double *const coef_C, const int *const fcnodes, int &nn_P, int &nt_P, int *iat_P, int *ja_P, double *coef_P); diff --git a/sources/prolong/MEX_EXTI_Prol/include/ProlStripe_EXTI.h b/sources/prolong/MEX_EXTI_Prol/include/ProlStripe_EXTI.h new file mode 100644 index 0000000..832edf4 --- /dev/null +++ b/sources/prolong/MEX_EXTI_Prol/include/ProlStripe_EXTI.h @@ -0,0 +1,6 @@ +int ProlStripe_EXTI(const int firstrow, const int lastrow, const int nn_loc, + const int nn_A, const int nt_A, const int ntmax_P, + const int *const iat_A, const int *const ja_A, + const double *const coef_A,const int *const coef_S, + const int *const fcnodes,int &nn_P, int &nt_P, int *iat_P, int *ja_P, + double *coef_P);