-
Notifications
You must be signed in to change notification settings - Fork 6
/
mem.c
139 lines (100 loc) · 4.23 KB
/
mem.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include <fftw3-mpi.h>
#include "fof.h"
#include "mem.h"
#include "msg.h"
#include "comm.h"
Particles* allocate_particles(const int nc, const int nx, const double np_alloc_factor)
{
Particles* particles= malloc(sizeof(Particles));
const int np_alloc= (int)(np_alloc_factor*nc*nc*(nx+1));
particles->p= malloc(sizeof(Particle)*np_alloc);
if(particles->p == 0)
msg_abort(0010, "Error: Failed to allocate memory for particles\n");
particles->force= malloc(sizeof(float)*3*np_alloc);
if(particles->force == 0)
msg_abort(0010, "Error: Failed to allocate memory for particle forces\n");
msg_printf(info, "%d Mbytes allocated for %d particles (alloc_factor= %.2lf)\n",
(sizeof(Particle)+3*sizeof(float))*np_alloc/(1024*1024),
np_alloc, np_alloc_factor);
particles->np_allocated= np_alloc;
particles->np_total= (long long) nc*nc*nc;
const int nnode= comm_nnode();
particles->np_average= (float)(pow((double) nc, 3) / nnode);
return particles;
}
Snapshot* allocate_snapshot(const int nc, const int nx, const int np_alloc, void* const mem, const size_t mem_size)
{
Snapshot* snapshot= malloc(sizeof(Snapshot));
snapshot->np_allocated= np_alloc;
long long nc_long= nc;
snapshot->np_total= nc_long*nc_long*nc_long;
snapshot->p= mem; assert(mem_size >= sizeof(ParticleMinimum)*np_alloc);
snapshot->nc= nc;
snapshot->a= 0.0f;
return snapshot;
}
void allocate_shared_memory(const int nc, const int nc_factor, const double np_alloc_factor, Memory* const mem)
{
// Allocate shared memory
// mem1
// for 2LPT grids / PM density grid / FoF kdtree
// Memory for 2LPT (6*np_local words)
ptrdiff_t local_nx, local_x_start;
ptrdiff_t size_lpt_one=
fftwf_mpi_local_size_3d(nc, nc, nc/2+1, MPI_COMM_WORLD,
&local_nx, &local_x_start);
ptrdiff_t ncomplex_lpt= 12*size_lpt_one;
const int np_alloc= (int)(np_alloc_factor*nc*nc*(local_nx+1));
msg_printf(verbose, "%d Mbytes requested for LPT\n",
(int)(ncomplex_lpt*sizeof(fftwf_complex)/(1024*1024)));
// Memory for PM (nc_factor^3 * np_local each)
const int Ngrid= nc_factor*nc;
ptrdiff_t local_ny, local_y_start;
ptrdiff_t size_pm_one=
fftwf_mpi_local_size_3d_transposed(Ngrid, Ngrid, Ngrid/2+1, MPI_COMM_WORLD,
&local_nx, &local_x_start, &local_ny, &local_y_start);
ptrdiff_t ncomplex_pm= size_pm_one;
msg_printf(verbose, "%d Mbytes requested for one PM grid\n",
(int)(ncomplex_pm*sizeof(fftwf_complex)/(1024*1024)));
msg_printf(verbose, "PM size %d %d %d\n", size_pm_one, local_nx*Ngrid*Ngrid, local_nx);
ptrdiff_t ncomplex1= ncomplex_lpt > ncomplex_pm ? ncomplex_lpt : ncomplex_pm;
size_t size1= sizeof(fftwf_complex)*ncomplex1;
// Memory for FoF halo finder
size_t size_fof= fof_calc_memory(np_alloc, nc);
msg_printf(verbose, "%d Mbytes requested for FoF\n",
(int)(size_fof/(1024*1024)));
if(size_fof > size1) {
ncomplex1= size_fof/sizeof(fftwf_complex) + 1;
size1= size_fof;
}
mem->mem1= fftwf_alloc_complex(ncomplex1);
mem->size1= sizeof(fftwf_complex)*ncomplex1;
if(mem->mem1 == 0)
msg_abort(0050, "Error: Unable to allocate %d Mbytes for mem1\n",
(int)(mem->size1/(1024*1024)));
// mem2
// for PM density_k mesh and snapshot
size_t ncomplex2= (Ngrid/2+1)*Ngrid*local_ny; //ncomplex_pm;
size_t size2= sizeof(fftwf_complex)*(Ngrid/2+1)*Ngrid*local_ny;
msg_printf(verbose, "%d Mbytes requested for delta_k mesh (mem2). ny=%d\n",
(int)(size2/(1024*1024)), local_ny);
size_t size_snapshot= sizeof(ParticleMinimum)*np_alloc;
if(size_snapshot > size2) {
msg_printf(verbose, "%d Mbytes requested for snapshot in mem2\n",
(int)(size_snapshot/(1024*1024)));
size2= size_snapshot;
ncomplex2= size_snapshot/sizeof(fftwf_complex) + 1;
}
mem->mem2= fftwf_alloc_complex(ncomplex2);
mem->size2= sizeof(fftwf_complex)*ncomplex2;
if(mem->mem2 == 0)
msg_abort(0060, "Error: Unable to allocate %d + %d Mbytes for mem1&2.\n",
(int)(mem->size1/(1024*1024)), (int)(mem->size2/(1024*1024)));
msg_printf(info, "%d Mbytes allocated for mem1.\n",
(int)(mem->size1/(1024*1024)));
msg_printf(info, "%d Mbytes allocated for mem2.\n",
(int)(mem->size2/(1024*1024)));
}