HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Initialize.c
Go to the documentation of this file.
1 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <basic.h>
9 #if defined(HAVE_CUDA)
10 #include <arrayfunctions_gpu.h>
11 #else
12 #include <arrayfunctions.h>
13 #endif
14 #include <mpivars.h>
15 #include <simulation_object.h>
16 
26 int Initialize( void *s,
27  int nsims
28  )
29 {
30  SimulationObject* simobj = (SimulationObject*) s;
31  int i,d,n;
32 
33  if (nsims == 0) {
34  return 1;
35  }
36 
37 #if defined(HAVE_CUDA)
38  if (simobj[0].solver.use_gpu && (simobj[0].solver.gpu_device_no >= 0)) {
39  gpuSetDevice(simobj[0].solver.gpu_device_no);
40  }
41 #endif
42 
43  if (!simobj[0].mpi.rank) printf("Partitioning domain and allocating data arrays.\n");
44 
45  for (n = 0; n < nsims; n++) {
46 
47  /* this is a full initialization, not a barebones one */
48  simobj[n].is_barebones = 0;
49 
50  /* allocations */
51  simobj[n].mpi.ip = (int*) calloc (simobj[n].solver.ndims,sizeof(int));
52  simobj[n].mpi.is = (int*) calloc (simobj[n].solver.ndims,sizeof(int));
53  simobj[n].mpi.ie = (int*) calloc (simobj[n].solver.ndims,sizeof(int));
54  simobj[n].mpi.bcperiodic = (int*) calloc (simobj[n].solver.ndims,sizeof(int));
55  simobj[n].solver.dim_local = (int*) calloc (simobj[n].solver.ndims,sizeof(int));
56  simobj[n].solver.isPeriodic= (int*) calloc (simobj[n].solver.ndims,sizeof(int));
57 
58 #if defined(HAVE_CUDA)
59  simobj[n].mpi.wctime = 0;
60  simobj[n].mpi.wctime_total = 0;
61 #endif
62 
63 #ifndef serial
65 
66  /* Domain partitioning */
67  int total_proc = 1;
68  for (i=0; i<simobj[n].solver.ndims; i++) total_proc *= simobj[n].mpi.iproc[i];
69  if (simobj[n].mpi.nproc != total_proc) {
70  fprintf(stderr,"Error on rank %d: total number of processes is not consistent ", simobj[n].mpi.rank);
71  fprintf(stderr,"with number of processes along each dimension.\n");
72  if (nsims > 1) fprintf(stderr,"for domain %d.\n", n);
73  fprintf(stderr,"mpiexec was called with %d processes, ",simobj[n].mpi.nproc);
74  fprintf(stderr,"total number of processes from \"solver.inp\" is %d.\n", total_proc);
75  return(1);
76  }
77 
78  /* calculate ndims-D rank of each process (ip[]) from rank in MPI_COMM_WORLD */
79  IERR MPIRanknD( simobj[n].solver.ndims,
80  simobj[n].mpi.rank,
81  simobj[n].mpi.iproc,
82  simobj[n].mpi.ip); CHECKERR(ierr);
83 
84  /* calculate local domain sizes along each dimension */
85  for (i=0; i<simobj[n].solver.ndims; i++) {
86  simobj[n].solver.dim_local[i] = MPIPartition1D( simobj[n].solver.dim_global[i],
87  simobj[n].mpi.iproc[i],
88  simobj[n].mpi.ip[i] );
89  }
90 
91  /* calculate local domain limits in terms of global domain */
92  IERR MPILocalDomainLimits( simobj[n].solver.ndims,
93  simobj[n].mpi.rank,
94  &(simobj[n].mpi),
95  simobj[n].solver.dim_global,
96  simobj[n].mpi.is,
97  simobj[n].mpi.ie );
98  CHECKERR(ierr);
99 
100  /* create sub-communicators for parallel computations along grid lines in each dimension */
101  IERR MPICreateCommunicators(simobj[n].solver.ndims,&(simobj[n].mpi)); CHECKERR(ierr);
102 
103  /* initialize periodic BC flags to zero */
104  for (i=0; i<simobj[n].solver.ndims; i++) simobj[n].mpi.bcperiodic[i] = 0;
105 
106  /* create communication groups */
107  IERR MPICreateIOGroups(&(simobj[n].mpi)); CHECKERR(ierr);
108 
109 #else
110 
111  for (i=0; i<simobj[n].solver.ndims; i++) {
112  simobj[n].mpi.ip[i] = 0;
113  simobj[n].solver.dim_local[i] = simobj[n].solver.dim_global[i];
114  simobj[n].mpi.iproc[i] = 1;
115  simobj[n].mpi.is[i] = 0;
116  simobj[n].mpi.ie[i] = simobj[n].solver.dim_local[i];
117  simobj[n].mpi.bcperiodic[i] = 0;
118  }
119 
120 #endif
121 
122  simobj[n].solver.npoints_global
123  = simobj[n].solver.npoints_local
124  = simobj[n].solver.npoints_local_wghosts
125  = 1;
126  for (i=0; i<simobj[n].solver.ndims; i++) {
127  simobj[n].solver.npoints_global *= simobj[n].solver.dim_global[i];
128  simobj[n].solver.npoints_local *= simobj[n].solver.dim_local [i];
129  simobj[n].solver.npoints_local_wghosts *= (simobj[n].solver.dim_local[i]+2*simobj[n].solver.ghosts);
130  }
131 
132  /* Allocations */
133  simobj[n].solver.index = (int*) calloc ((short)simobj[n].solver.ndims,sizeof(int));
134  simobj[n].solver.stride_with_ghosts = (int*) calloc ((short)simobj[n].solver.ndims,sizeof(int));
135  simobj[n].solver.stride_without_ghosts = (int*) calloc ((short)simobj[n].solver.ndims,sizeof(int));
136  int accu1 = 1, accu2 = 1;
137  for (i=0; i<simobj[n].solver.ndims; i++) {
138  simobj[n].solver.stride_with_ghosts[i] = accu1;
139  simobj[n].solver.stride_without_ghosts[i] = accu2;
140  accu1 *= (simobj[n].solver.dim_local[i]+2*simobj[n].solver.ghosts);
141  accu2 *= simobj[n].solver.dim_local[i];
142  }
143 
144 #if defined(HAVE_CUDA)
145  if (simobj[n].solver.use_gpu) {
146  gpuMalloc((void**)&simobj[n].solver.gpu_dim_local, simobj[n].solver.ndims*sizeof(int));
147  gpuMemcpy( simobj[n].solver.gpu_dim_local,
148  simobj[n].solver.dim_local,
149  simobj[n].solver.ndims*sizeof(int),
151  }
152 #endif
153 
154  /* state variables */
155  int size = 1;
156  for (i=0; i<simobj[n].solver.ndims; i++) {
157  size *= (simobj[n].solver.dim_local[i]+2*simobj[n].solver.ghosts);
158  }
159  simobj[n].solver.ndof_cells_wghosts = simobj[n].solver.nvars*size;
160  simobj[n].solver.u = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
161 #if defined(HAVE_CUDA)
162  if (simobj[n].solver.use_gpu) {
163  gpuMalloc((void**)&simobj[n].solver.gpu_u, simobj[n].solver.nvars*size*sizeof(double));
164  gpuMemset(simobj[n].solver.gpu_u, 0, simobj[n].solver.nvars*size*sizeof(double));
165  }
166 #endif
167 #ifdef with_petsc
168  if (simobj[n].solver.use_petscTS) {
169  simobj[n].solver.u0 = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
170  simobj[n].solver.uref = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
171  simobj[n].solver.rhsref = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
172  simobj[n].solver.rhs = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
173  } else simobj[n].solver.u0 = simobj[n].solver.uref = simobj[n].solver.rhsref = simobj[n].solver.rhs = NULL;
174 #endif
175 #ifdef with_librom
176  simobj[n].solver.u_rom_predicted = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
177 #endif
178  simobj[n].solver.hyp = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
179  simobj[n].solver.par = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
180  simobj[n].solver.source = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
181  simobj[n].solver.iblank = (double*) calloc (size ,sizeof(double));
182 
183 #if defined(HAVE_CUDA)
184  if (simobj[n].solver.use_gpu) {
185  gpuMalloc((void**)&simobj[n].solver.hyp, simobj[n].solver.nvars*size*sizeof(double));
186  gpuMalloc((void**)&simobj[n].solver.par, simobj[n].solver.nvars*size*sizeof(double));
187  gpuMalloc((void**)&simobj[n].solver.source, simobj[n].solver.nvars*size*sizeof(double));
188  gpuMemset(simobj[n].solver.hyp, 0, simobj[n].solver.nvars*size*sizeof(double));
189  gpuMemset(simobj[n].solver.par, 0, simobj[n].solver.nvars*size*sizeof(double));
190  gpuMemset(simobj[n].solver.source, 0, simobj[n].solver.nvars*size*sizeof(double));
191  } else {
192 #endif
193  simobj[n].solver.hyp = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
194  simobj[n].solver.par = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
195  simobj[n].solver.source = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
196 #if defined(HAVE_CUDA)
197  }
198 #endif
199 
200  simobj[n].solver.iblank = (double*) calloc (size,sizeof(double));
201 #if defined(HAVE_CUDA)
202  if (simobj[n].solver.use_gpu) {
203  gpuMalloc((void**)&simobj[n].solver.gpu_iblank, size*sizeof(double));
204  gpuMemset(simobj[n].solver.gpu_iblank, 0, size*sizeof(double));
205  }
206 #endif
207 
208  /* grid */
209  size = 0;
210  for (i=0; i<simobj[n].solver.ndims; i++) {
211  size += (simobj[n].solver.dim_local[i]+2*simobj[n].solver.ghosts);
212  }
213  simobj[n].solver.x = (double*) calloc (size,sizeof(double));
214  simobj[n].solver.dxinv = (double*) calloc (size,sizeof(double));
215  simobj[n].solver.size_x = size;
216 #if defined(HAVE_CUDA)
217  if (simobj[n].solver.use_gpu) {
218  gpuMalloc((void**)&simobj[n].solver.gpu_x, size*sizeof(double));
219  gpuMalloc((void**)&simobj[n].solver.gpu_dxinv, size*sizeof(double));
220  gpuMemset(simobj[n].solver.gpu_x, 0, size*sizeof(double));
221  gpuMemset(simobj[n].solver.gpu_dxinv, 0, size*sizeof(double));
222  }
223 #endif
224 
225  /* cell-centered arrays needed to compute fluxes */
226  size = 1;
227  for (i=0; i<simobj[n].solver.ndims; i++) {
228  size *= (simobj[n].solver.dim_local[i]+2*simobj[n].solver.ghosts);
229  }
230 #if defined(HAVE_CUDA)
231  if (simobj[n].solver.use_gpu) {
232  gpuMalloc((void**)&simobj[n].solver.fluxC, simobj[n].solver.nvars*size*sizeof(double));
233  gpuMalloc((void**)&simobj[n].solver.uC, simobj[n].solver.nvars*size*sizeof(double));
234  gpuMalloc((void**)&simobj[n].solver.Deriv1, simobj[n].solver.nvars*size*sizeof(double));
235  gpuMalloc((void**)&simobj[n].solver.Deriv2, simobj[n].solver.nvars*size*sizeof(double));
236  gpuMemset(simobj[n].solver.fluxC, 0, simobj[n].solver.nvars*size*sizeof(double));
237  gpuMemset(simobj[n].solver.uC, 0, simobj[n].solver.nvars*size*sizeof(double));
238  gpuMemset(simobj[n].solver.Deriv1, 0, simobj[n].solver.nvars*size*sizeof(double));
239  gpuMemset(simobj[n].solver.Deriv2, 0, simobj[n].solver.nvars*size*sizeof(double));
240  } else {
241 #endif
242  simobj[n].solver.uC = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
243  simobj[n].solver.fluxC = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
244  simobj[n].solver.Deriv1 = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
245  simobj[n].solver.Deriv2 = (double*) calloc (simobj[n].solver.nvars*size,sizeof(double));
246 #if defined(HAVE_CUDA)
247  }
248 #endif
249 
250  /* node-centered arrays needed to compute fluxes */
251  size = 1; for (i=0; i<simobj[n].solver.ndims; i++) size *= (simobj[n].solver.dim_local[i]+1);
252  size *= simobj[n].solver.nvars;
253  simobj[n].solver.ndof_nodes = size;
254 #if defined(HAVE_CUDA)
255  if (simobj[n].solver.use_gpu) {
256  gpuMalloc((void**)&simobj[n].solver.fluxI, size*sizeof(double));
257  gpuMalloc((void**)&simobj[n].solver.uL, size*sizeof(double));
258  gpuMalloc((void**)&simobj[n].solver.uR, size*sizeof(double));
259  gpuMalloc((void**)&simobj[n].solver.fL, size*sizeof(double));
260  gpuMalloc((void**)&simobj[n].solver.fR, size*sizeof(double));
261  gpuMemset(simobj[n].solver.fluxI, 0, size*sizeof(double));
262  gpuMemset(simobj[n].solver.uL, 0, size*sizeof(double));
263  gpuMemset(simobj[n].solver.uR, 0, size*sizeof(double));
264  gpuMemset(simobj[n].solver.fL, 0, size*sizeof(double));
265  gpuMemset(simobj[n].solver.fR, 0, size*sizeof(double));
266  } else {
267 #endif
268  simobj[n].solver.fluxI = (double*) calloc (size,sizeof(double));
269  simobj[n].solver.uL = (double*) calloc (size,sizeof(double));
270  simobj[n].solver.uR = (double*) calloc (size,sizeof(double));
271  simobj[n].solver.fL = (double*) calloc (size,sizeof(double));
272  simobj[n].solver.fR = (double*) calloc (size,sizeof(double));
273 #if defined(HAVE_CUDA)
274  }
275 #endif
276 
277  /* allocate MPI send/receive buffer arrays */
278  int bufdim[simobj[n].solver.ndims], maxbuf = 0;
279  for (d = 0; d < simobj[n].solver.ndims; d++) {
280  bufdim[d] = 1;
281  for (i = 0; i < simobj[n].solver.ndims; i++) {
282  if (i == d) bufdim[d] *= simobj[n].solver.ghosts;
283  else bufdim[d] *= simobj[n].solver.dim_local[i];
284  }
285  if (bufdim[d] > maxbuf) maxbuf = bufdim[d];
286  }
287  maxbuf *= (simobj[n].solver.nvars*simobj[n].solver.ndims);
288  simobj[n].mpi.maxbuf = maxbuf;
289  simobj[n].mpi.sendbuf = (double*) calloc (2*simobj[n].solver.ndims*maxbuf,sizeof(double));
290  simobj[n].mpi.recvbuf = (double*) calloc (2*simobj[n].solver.ndims*maxbuf,sizeof(double));
291 #if defined(HAVE_CUDA)
292  if (simobj[n].solver.use_gpu) {
293  simobj[n].mpi.cpu_dim = (int *) calloc(simobj[n].solver.ndims, sizeof(int));
294  _ArrayCopy1D_(simobj[n].solver.dim_local, simobj[n].mpi.cpu_dim, simobj[n].solver.ndims);
295  gpuMalloc((void**)&simobj[n].mpi.gpu_sendbuf, 2*simobj[n].solver.ndims*simobj[n].mpi.maxbuf*sizeof(double));
296  gpuMalloc((void**)&simobj[n].mpi.gpu_recvbuf, 2*simobj[n].solver.ndims*simobj[n].mpi.maxbuf*sizeof(double));
297  gpuMemset(simobj[n].mpi.gpu_sendbuf, 0, 2*simobj[n].solver.ndims*simobj[n].mpi.maxbuf*sizeof(double));
298  gpuMemset(simobj[n].mpi.gpu_recvbuf, 0, 2*simobj[n].solver.ndims*simobj[n].mpi.maxbuf*sizeof(double));
299  }
300 #endif
301 
302  /* allocate the volume and boundary integral arrays */
303  simobj[n].solver.VolumeIntegral = (double*) calloc (simobj[n].solver.nvars ,sizeof(double));
304  simobj[n].solver.VolumeIntegralInitial = (double*) calloc (simobj[n].solver.nvars ,sizeof(double));
305  simobj[n].solver.TotalBoundaryIntegral = (double*) calloc (simobj[n].solver.nvars,sizeof(double));
306  simobj[n].solver.ConservationError = (double*) calloc (simobj[n].solver.nvars,sizeof(double));
307  for (i=0; i<simobj[n].solver.nvars; i++) simobj[n].solver.ConservationError[i] = -1;
308 #if defined(HAVE_CUDA)
309  if (simobj[n].solver.use_gpu) {
310  int total_offset = 0;
311  for (d=0; d<simobj[n].solver.ndims; d++) {
312  simobj[n].solver.gpu_npoints_boundary_offset[d] = total_offset;
313  simobj[n].solver.gpu_npoints_boundary[d] = 1;
314 
315  for (i=0; i<simobj[n].solver.ndims; i++) {
316  if (i != d) simobj[n].solver.gpu_npoints_boundary[d] *= simobj[n].solver.dim_local[i];
317  }
318  total_offset += 2*simobj[n].solver.gpu_npoints_boundary[d];
319  }
320  simobj[n].solver.StageBoundaryBuffer_size = (total_offset*simobj[n].solver.nvars);
321  gpuMalloc((void**)&simobj[n].solver.StageBoundaryBuffer, simobj[n].solver.StageBoundaryBuffer_size*sizeof(double));
322  gpuMemset(simobj[n].solver.StageBoundaryBuffer, 0, simobj[n].solver.StageBoundaryBuffer_size*sizeof(double));
323 
324  size = 2*simobj[n].solver.ndims*simobj[n].solver.nvars;
325  gpuMalloc((void**)&simobj[n].solver.StageBoundaryIntegral, size*sizeof(double));
326  gpuMalloc((void**)&simobj[n].solver.StepBoundaryIntegral, size*sizeof(double));
327  gpuMemset(simobj[n].solver.StageBoundaryIntegral, 0, size*sizeof(double));
328  gpuMemset(simobj[n].solver.StepBoundaryIntegral, 0, size*sizeof(double));
329  } else {
330 #endif
331  simobj[n].solver.StageBoundaryIntegral = (double*) calloc (2*simobj[n].solver.ndims*simobj[n].solver.nvars,sizeof(double));
332  simobj[n].solver.StepBoundaryIntegral = (double*) calloc (2*simobj[n].solver.ndims*simobj[n].solver.nvars,sizeof(double));
333 #if defined(HAVE_CUDA)
334  }
335 #endif
336 
337  /* initialize function call counts to zero */
338  simobj[n].solver.count_hyp
339  = simobj[n].solver.count_par
340  = simobj[n].solver.count_sou
341  = 0;
342 #ifdef with_petsc
343  simobj[n].solver.count_RHSFunction
344  = simobj[n].solver.count_IFunction
345  = simobj[n].solver.count_IJacobian
346  = simobj[n].solver.count_IJacFunction
347  = 0;
348 #endif
349 
350  /* Initialize iblank to 1*/
351  _ArraySetValue_(simobj[n].solver.iblank,simobj[n].solver.npoints_local_wghosts,1);
352 #if defined(HAVE_CUDA)
353  if (simobj[n].solver.use_gpu) {
354  gpuArraySetValue(simobj[n].solver.gpu_iblank, simobj[n].solver.npoints_local_wghosts, 1.0);
355  }
356 #endif
357 
358  }
359 
360  return 0;
361 }
int Initialize(void *, int)
Definition: Initialize.c:26
double * hyp
Definition: hypar.h:119
int npoints_local_wghosts
Definition: hypar.h:42
#define _ArraySetValue_(x, size, value)
double * uR
Definition: hypar.h:139
int npoints_local
Definition: hypar.h:42
int ndof_nodes
Definition: hypar.h:46
int * stride_without_ghosts
Definition: hypar.h:416
int * dim_global
Definition: hypar.h:33
double * iblank
Definition: hypar.h:436
double * uL
Definition: hypar.h:139
double * u
Definition: hypar.h:116
int gpu_device_no
Definition: hypar.h:450
int npoints_global
Definition: hypar.h:40
double * Deriv1
Definition: hypar.h:151
double * fL
Definition: hypar.h:139
double * Deriv2
Definition: hypar.h:151
int * dim_local
Definition: hypar.h:37
void gpuMemcpy(void *, const void *, size_t, enum gpuMemcpyKind)
MPI related function definitions.
double * u0
Definition: hypar.h:396
void gpuArraySetValue(double *, int, double)
int ghosts
Definition: hypar.h:52
int MPICreateIOGroups(void *)
Definition: MPIIOGroups.c:37
int size_x
Definition: hypar.h:48
double * par
Definition: hypar.h:122
double wctime_total
double * fluxI
Definition: hypar.h:136
int * isPeriodic
Definition: hypar.h:162
double * source
Definition: hypar.h:125
int ndof_cells_wghosts
Definition: hypar.h:47
double * rhsref
Definition: hypar.h:398
int MPIRanknD(int, int, int *, int *)
Definition: MPIRanknD.c:27
double * uC
Definition: hypar.h:131
int MPIPartition1D(int, int, int)
Contains function definitions for common array operations on GPU.
int * stride_with_ghosts
Definition: hypar.h:414
double * uref
Definition: hypar.h:397
Simulation object.
#define _ArrayCopy1D_(x, y, size)
double * fluxC
Definition: hypar.h:128
int nvars
Definition: hypar.h:29
#define CHECKERR(ierr)
Definition: basic.h:18
void gpuMemset(void *, int, size_t)
void gpuMalloc(void **, size_t)
double * dxinv
Definition: hypar.h:110
void gpuSetDevice(int device)
int MPILocalDomainLimits(int, int, void *, int *, int *, int *)
Structure defining a simulation.
Some basic definitions and macros.
int ndims
Definition: hypar.h:26
int * index
Definition: hypar.h:102
double * u_rom_predicted
Definition: hypar.h:403
Contains macros and function definitions for common array operations.
#define IERR
Definition: basic.h:16
double * x
Definition: hypar.h:107
int MPICreateCommunicators(int, void *)
double * fR
Definition: hypar.h:139
#define _DECLARE_IERR_
Definition: basic.h:17
double * rhs
Definition: hypar.h:399