1 /*! @file HyperbolicFunction_GPU.cu
3 @brief Compute the hyperbolic term of the governing equations
6 #include <arrayfunctions_gpu.h>
11 static int ReconstructHyperbolic (double*,double*,double*,double*,int,void*,void*,double,int,
12 int(*)(double*,double*,double*,double*,
13 double*,double*,int,void*,double));
14 static int DefaultUpwinding (double*,double*,double*,double*,
15 double*,double*,int,void*,double);
17 /*! Kernel for stage boundary integral calculation */
18 template <int block_size>
20 void StageBoundaryIntegral_kernel(
24 const double * __restrict__ FluxI,
25 double * __restrict__ StageBoundaryIntegral
28 extern __shared__ double smem[];
30 int tid = threadIdx.x;
31 int grid_size = block_size * gridDim.x;
32 int i = blockIdx.x * block_size + threadIdx.x;
34 double tid_sum[GPU_MAX_NVARS] = { 0 };
37 for (j=0; j<nvars; j++) tid_sum[j] += FluxI[i*nvars+j];
40 for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j];
44 if (block_size >= 512 && tid < 256) {
45 for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+256)*nvars+j];
48 if (block_size >= 256 && tid < 128) {
49 for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+128)*nvars+j];
52 if (block_size >= 128 && tid < 64) {
53 for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+64)*nvars+j];
58 if (block_size >= 64) {
59 for (j=0; j<nvars; j++) tid_sum[j] += smem[(tid+32)*nvars+j];
61 for (int offset=16; offset>0; offset/=2) {
62 for (j=0; j<nvars; j++) {
63 tid_sum[j] += __shfl_down_sync(0xffffffff, tid_sum[j], offset);
70 for (j=0; j<nvars; j++) StageBoundaryIntegral[j] = (sign == 1) ? tid_sum[j] : -tid_sum[j];
75 #ifdef CUDA_VAR_ORDERDING_AOS
77 /*! 3D Kernel for gpuHyperbolicFunction() */
79 void HyperbolicFunction_dim3_kernel(
85 const int * __restrict__ dim,
86 const double * __restrict__ FluxI,
87 const double * __restrict__ dxinv,
88 double * __restrict__ hyp,
89 double * __restrict__ FluxI_p1,
90 double * __restrict__ FluxI_p2
93 int tx = threadIdx.x + (blockDim.x * blockIdx.x);
94 if (tx < npoints_grid) {
97 int index[3], index1[3], index2[3];
99 _ArrayIndexnD_(3,tx,dim,index,0);
100 _ArrayCopy1D_(dim,dim_interface,3);
101 dim_interface[d] += 1;
103 _ArrayCopy1D_(index,index1,3);
104 _ArrayCopy1D_(index,index2,3); index2[d]++;
105 _ArrayIndex1D_(3,dim ,index ,ghosts,p);
106 _ArrayIndex1D_(3,dim_interface,index1,0 ,p1);
107 _ArrayIndex1D_(3,dim_interface,index2,0 ,p2);
108 for (v=0; v<nvars; v++) hyp[nvars*p+v] += dxinv[offset+ghosts+index[d]]
109 * (FluxI[nvars*p2+v]-FluxI[nvars*p1+v]);
110 if (index[d] == 0 || index[d] == dim[d]-1) {
112 b = index[1] + index[2]*dim[1];
114 b = index[0] + index[2]*dim[0];
116 b = index[0] + index[1]*dim[0];
119 for (v=0; v<nvars; v++) FluxI_p1[b*nvars+v] = FluxI[nvars*p1+v];
121 for (v=0; v<nvars; v++) FluxI_p2[b*nvars+v] = FluxI[nvars*p2+v];
127 /*! 2D Kernel for gpuHyperbolicFunction() */
129 void HyperbolicFunction_dim2_kernel(
135 const int * __restrict__ dim,
136 const double * __restrict__ FluxI,
137 const double * __restrict__ dxinv,
138 double * __restrict__ hyp,
139 double * __restrict__ FluxI_p1,
140 double * __restrict__ FluxI_p2
143 int tx = threadIdx.x + (blockDim.x * blockIdx.x);
144 if (tx < npoints_grid) {
146 int dim_interface[2];
147 int index[2], index1[2], index2[2];
149 _ArrayIndexnD_(2,tx,dim,index,0);
150 _ArrayCopy1D_(dim,dim_interface,2);
151 dim_interface[d] += 1;
153 _ArrayCopy1D_(index,index1,2);
154 _ArrayCopy1D_(index,index2,2); index2[d]++;
155 _ArrayIndex1D_(2,dim ,index ,ghosts,p);
156 _ArrayIndex1D_(2,dim_interface,index1,0 ,p1);
157 _ArrayIndex1D_(2,dim_interface,index2,0 ,p2);
158 for (v=0; v<nvars; v++) hyp[nvars*p+v] += dxinv[offset+ghosts+index[d]]
159 * (FluxI[nvars*p2+v]-FluxI[nvars*p1+v]);
160 if (index[d] == 0 || index[d] == dim[d]-1) {
161 b = (d == 0) ? index[1] : index[0];
163 for (v=0; v<nvars; v++) FluxI_p1[b*nvars+v] = FluxI[nvars*p1+v];
165 for (v=0; v<nvars; v++) FluxI_p2[b*nvars+v] = FluxI[nvars*p2+v];
171 /*! This function computes the hyperbolic term of the governing equations, expressed as follows:-
173 {\bf F} \left({\bf u}\right) = \sum_{d=0}^{D-1} \frac {\partial {\bf f}_d\left({\bf u}\right)} {\partial x_d}
175 using a conservative finite-difference discretization is space:
177 \hat{\bf F} \left({\bf u}\right) = \sum_{d=0}^{D-1} \frac {1} {\Delta x_d} \left[ \hat{\bf f}_{d,j+1/2} - \hat{\bf f}_{d,j-1/2} \right]
179 where \f$d\f$ denotes the spatial dimension, \f$D\f$ denotes the total number of spatial dimensions, the hat denotes
180 the discretized quantity, and \f$j\f$ is the grid coordinate along dimension \f$d\f$.
181 The approximation to the flux function \f${\bf f}_d\f$ at the interfaces \f$j\pm1/2\f$, denoted by \f$\hat{\bf f}_{d,j\pm 1/2}\f$,
182 are computed using the function ReconstructHyperbolic().
184 extern "C" int gpuHyperbolicFunction(
185 double *hyp, /*!< Array to hold the computed hyperbolic term (shares the same layout as u */
186 double *gpu_u, /*!< Solution array */
187 void *s, /*!< Solver object of type #HyPar */
188 void *m, /*!< MPI object of type #MPIVariables */
189 double t, /*!< Current simulation time */
190 int LimFlag, /*!< Flag to indicate if the nonlinear coefficients for solution-dependent
191 interpolation method should be recomputed (see ReconstructHyperbolic() for
192 an explanation on why this is needed) */
193 /*! Function pointer to the flux function for the hyperbolic term */
194 int(*FluxFunction)(double*,double*,int,void*,double),
195 /*! Function pointer to the upwinding function for the hyperbolic term */
196 int(*UpwindFunction)(double*,double*,double*,double*,double*,double*,int,void*,double)
199 HyPar *solver = (HyPar*) s;
200 MPIVariables *mpi = (MPIVariables*) m;
202 double *gpu_FluxI = solver->fluxI; /* interface flux */
203 double *gpu_FluxC = solver->fluxC; /* cell centered flux */
205 int ndims = solver->ndims;
206 int nvars = solver->nvars;
207 int ghosts = solver->ghosts;
208 int *dim = solver->dim_local;
209 int size = solver->npoints_local_wghosts;
210 double *gpu_x = solver->gpu_x;
211 double *gpu_dxinv = solver->gpu_dxinv;
212 double *gpu_FluxI_p1, *gpu_FluxI_p2;
214 LimFlag = (LimFlag && solver->flag_nonlinearinterp && solver->SetInterpLimiterVar);
216 gpuMemset(hyp, 0, size*nvars*sizeof(double));
217 gpuMemset(solver->StageBoundaryBuffer, 0, solver->StageBoundaryBuffer_size*sizeof(double));
218 gpuMemset(solver->StageBoundaryIntegral, 0, 2*ndims*nvars*sizeof(double));
220 if (!FluxFunction) return(0); /* zero hyperbolic term */
221 /*solver->count_hyp++;*/
223 int npoints_grid = 1; for (d = 0; d < ndims; d++) npoints_grid *= dim[d];
224 int nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
227 #if defined(GPU_STAT)
228 cudaEvent_t start, stop;
229 float milliseconds = 0;
231 checkCuda( cudaEventCreate(&start) );
232 checkCuda( cudaEventCreate(&stop) );
235 for (d = 0; d < ndims; d++) {
236 /* evaluate cell-centered flux */
237 FluxFunction(gpu_FluxC,gpu_u,d,solver,t);
238 /* compute interface fluxes */
239 ReconstructHyperbolic(gpu_FluxI,gpu_FluxC,gpu_u,gpu_x+offset,d,solver,mpi,t,LimFlag,UpwindFunction);
241 gpu_FluxI_p1 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]*nvars);
242 gpu_FluxI_p2 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]+solver->gpu_npoints_boundary[d])*nvars;
244 #if defined(GPU_STAT)
245 checkCuda( cudaEventRecord(start, 0) );
249 HyperbolicFunction_dim3_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
250 npoints_grid, d, nvars, ghosts, offset,
251 solver->gpu_dim_local, gpu_FluxI, gpu_dxinv,
252 hyp, gpu_FluxI_p1, gpu_FluxI_p2
254 } else if (ndims == 2) {
255 HyperbolicFunction_dim2_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
256 npoints_grid, d, nvars, ghosts, offset,
257 solver->gpu_dim_local, gpu_FluxI, gpu_dxinv,
258 hyp, gpu_FluxI_p1, gpu_FluxI_p2
261 fprintf(stderr,"gpuHyperbolicFunction(): Not implemented for ndims = %d!\n", ndims);
264 cudaDeviceSynchronize();
266 #if defined(GPU_STAT)
267 checkCuda( cudaEventRecord(stop, 0) );
268 checkCuda( cudaEventSynchronize(stop) );
269 checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
270 printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
271 "HyperbolicFunction", milliseconds*1e-3, d, (1e-6*2*npoints_grid*nvars*sizeof(double))/milliseconds);
273 checkCuda( cudaEventRecord(start, 0) );
276 StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
277 solver->gpu_npoints_boundary[d], nvars, -1, gpu_FluxI_p1,
278 solver->StageBoundaryIntegral + 2*d*nvars
280 StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
281 solver->gpu_npoints_boundary[d], nvars, 1, gpu_FluxI_p2,
282 solver->StageBoundaryIntegral + (2*d+1)*nvars
284 cudaDeviceSynchronize();
286 #if defined(GPU_STAT)
287 checkCuda( cudaEventRecord(stop, 0) );
288 checkCuda( cudaEventSynchronize(stop) );
289 checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
290 printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
291 "StageBoundaryIntegral", milliseconds*1e-3, d, (1e-6*2*solver->gpu_npoints_boundary[d]*nvars*sizeof(double))/milliseconds);
294 offset += dim[d] + 2*ghosts;
297 if (solver->flag_ib) gpuArrayBlockMultiply(hyp, solver->gpu_iblank, size, nvars);
299 #if defined(GPU_STAT)
300 checkCuda(cudaEventDestroy(start));
301 checkCuda(cudaEventDestroy(stop));
307 /*! This function computes the numerical flux \f$\hat{\bf f}_{j+1/2}\f$ at the interface from the cell-centered
308 flux function \f${\bf f}_j\f$. This happens in two steps:-
310 \b Interpolation: High-order accurate approximations to the flux at the interface \f$j+1/2\f$ are computed from
311 the cell-centered flux with left- and right-biased interpolation methods. This is done by the
312 #HyPar::InterpolateInterfacesHyp function. This can be expressed as follows:
314 \hat{\bf f}^L_{j+1/2} &= \mathcal{I}\left({\bf f}_j,+1\right), \\
315 \hat{\bf f}^R_{j+1/2} &= \mathcal{I}\left({\bf f}_j,-1\right),
317 where the \f$\pm 1\f$ indicates the interpolation bias, and \f$\mathcal{I}\f$ is the interpolation operator
318 pointed to by #HyPar::InterpolateInterfacesHyp (see \b src/InterpolationFunctions for all the available operators).
319 The interface values of the solution are similarly computed:
321 \hat{\bf u}^L_{j+1/2} &= \mathcal{I}\left({\bf u}_j,+1\right), \\
322 \hat{\bf u}^R_{j+1/2} &= \mathcal{I}\left({\bf u}_j,-1\right),
324 The specific choice of \f$\mathcal{I}\f$ is set based on #HyPar::spatial_scheme_hyp.
326 \b Upwinding: The final flux at the interface is computed as
328 \hat{\bf f}_{j+1/2} = \mathcal{U}\left( \hat{\bf f}^L_{j+1/2}, \hat{\bf f}^R_{j+1/2}, \hat{\bf u}^L_{j+1/2}, \hat{\bf u}^R_{j+1/2} \right),
330 where \f$\mathcal{U}\f$ denotes the upwinding function UpwindFunction() passed as an argument (if NULL, DefaultUpwinding() is used). The
331 upwinding function is specified by the physical model.
334 Solution-dependent, nonlinear interpolation methods (such as WENO, CRWENO) are implemented in a way that separates the calculation of the
335 nonlinear interpolation weights (based on, say, the smoothness of the flux function), and the actual evaluation of the interpolant, into
336 different functions. This allows the flexibility to choose if and when the nonlinear coefficients are evaluated (or previously computed
337 values are reused). Some possible scenarios are:
338 + For explicit time integration, they are computed every time the hyperbolic flux term is being computed.
339 + For implicit time integration, consistency or linearization may require that they be computed and "frozen"
340 at the beginning of a time step or stage.
342 The argument \b LimFlag controls this behavior:
343 + LimFlag = 1 means recompute the nonlinear coefficients.
344 + LimFlag = 0 means reuse the the previously computed coefficients.
346 int ReconstructHyperbolic(
347 double *gpu_fluxI, /*!< Array to hold the computed interface fluxes. This array does not
348 have ghost points. The dimensions are the same as those of u without
349 ghost points in all dimensions, except along dir, where it is one more */
350 double *gpu_fluxC, /*!< Array of the flux function computed at the cell centers
351 (same layout as u) */
352 double *gpu_u, /*!< Solution array */
353 double *gpu_x, /*!< Array of spatial coordinates */
354 int dir, /*!< Spatial dimension along which to reconstruct the interface fluxes */
355 void *s, /*!< Solver object of type #HyPar */
356 void *m, /*!< MPI object of type #MPIVariables */
357 double t, /*!< Current solution time */
358 int LimFlag, /*!< Flag to indicate if the nonlinear coefficients for solution-dependent
359 interpolation method should be recomputed */
360 /*! Function pointer to the upwinding function for the interface flux computation. If NULL,
361 DefaultUpwinding() will be used. */
362 int(*UpwindFunction)(double*,double*,double*,double*,
363 double*,double*,int,void*,double)
366 HyPar *solver = (HyPar*) s;
367 MPIVariables *mpi = (MPIVariables*) m;
369 double *gpu_uC = NULL;
370 double *gpu_uL = solver->uL;
371 double *gpu_uR = solver->uR;
372 double *gpu_fluxL = solver->fL;
373 double *gpu_fluxR = solver->fR;
376 precalculate the non-linear interpolation coefficients if required
377 else reuse the weights previously calculated
379 if (LimFlag) solver->SetInterpLimiterVar(gpu_fluxC,gpu_u,gpu_x,dir,solver,mpi);
381 /* if defined, calculate the modified u-function to be used for upwinding
382 e.g.: used in well-balanced schemes for Euler/Navier-Stokes with gravity
383 otherwise, just copy u to uC */
384 if (solver->UFunction) {
386 solver->UFunction(gpu_uC,gpu_u,dir,solver,mpi,t);
387 } else gpu_uC = gpu_u;
389 /* Interpolation -> to calculate left and right-biased interface flux and state variable*/
390 solver->InterpolateInterfacesHyp(gpu_uL ,gpu_uC ,gpu_u,gpu_x, 1,dir,solver,mpi,1);
391 solver->InterpolateInterfacesHyp(gpu_uR ,gpu_uC ,gpu_u,gpu_x,-1,dir,solver,mpi,1);
392 solver->InterpolateInterfacesHyp(gpu_fluxL,gpu_fluxC,gpu_u,gpu_x, 1,dir,solver,mpi,0);
393 solver->InterpolateInterfacesHyp(gpu_fluxR,gpu_fluxC,gpu_u,gpu_x,-1,dir,solver,mpi,0);
395 /* Upwind -> to calculate the final interface flux */
396 if (UpwindFunction) { UpwindFunction (gpu_fluxI,gpu_fluxL,gpu_fluxR,gpu_uL,gpu_uR,gpu_u,dir,solver,t); }
397 else { DefaultUpwinding (gpu_fluxI,gpu_fluxL,gpu_fluxR,NULL,NULL,NULL,dir,solver,t); }
404 /*! 3D Kernel for gpuHyperbolicFunction() */
406 void HyperbolicFunction_dim3_kernel(
408 int npoints_local_wghosts,
409 int npoints_dim_interface,
414 const int * __restrict__ dim,
415 const double * __restrict__ FluxI,
416 const double * __restrict__ dxinv,
417 double * __restrict__ hyp,
418 double * __restrict__ FluxI_p1,
419 double * __restrict__ FluxI_p2
422 int tx = threadIdx.x + (blockDim.x * blockIdx.x);
423 if (tx < npoints_grid) {
425 int dim_interface[3];
426 int index[3], index1[3], index2[3];
428 _ArrayIndexnD_(3,tx,dim,index,0);
429 _ArrayCopy1D_(dim,dim_interface,3);
430 dim_interface[d] += 1;
432 _ArrayCopy1D_(index,index1,3);
433 _ArrayCopy1D_(index,index2,3); index2[d]++;
434 _ArrayIndex1D_(3,dim ,index ,ghosts,p);
435 _ArrayIndex1D_(3,dim_interface,index1,0 ,p1);
436 _ArrayIndex1D_(3,dim_interface,index2,0 ,p2);
437 for (v=0; v<nvars; v++) {
438 hyp[p+v*npoints_local_wghosts] += dxinv[offset+ghosts+index[d]]
439 * (FluxI[p2+v*npoints_dim_interface]-FluxI[p1+v*npoints_dim_interface]);
441 if (index[d] == 0 || index[d] == dim[d]-1) {
443 b = index[1] + index[2]*dim[1];
445 b = index[0] + index[2]*dim[0];
447 b = index[0] + index[1]*dim[0];
450 for (v=0; v<nvars; v++) FluxI_p1[b*nvars+v] = FluxI[p1+v*npoints_dim_interface];
452 for (v=0; v<nvars; v++) FluxI_p2[b*nvars+v] = FluxI[p2+v*npoints_dim_interface];
460 /*! This function computes the hyperbolic term of the governing equations, expressed as follows:-
462 {\bf F} \left({\bf u}\right) = \sum_{d=0}^{D-1} \frac {\partial {\bf f}_d\left({\bf u}\right)} {\partial x_d}
464 using a conservative finite-difference discretization is space:
466 \hat{\bf F} \left({\bf u}\right) = \sum_{d=0}^{D-1} \frac {1} {\Delta x_d} \left[ \hat{\bf f}_{d,j+1/2} - \hat{\bf f}_{d,j-1/2} \right]
468 where \f$d\f$ denotes the spatial dimension, \f$D\f$ denotes the total number of spatial dimensions, the hat denotes
469 the discretized quantity, and \f$j\f$ is the grid coordinate along dimension \f$d\f$.
470 The approximation to the flux function \f${\bf f}_d\f$ at the interfaces \f$j\pm1/2\f$, denoted by \f$\hat{\bf f}_{d,j\pm 1/2}\f$,
471 are computed using the function ReconstructHyperbolic().
473 extern "C" int gpuHyperbolicFunction(
474 double *hyp, /*!< Array to hold the computed hyperbolic term (shares the same layout as u */
475 double *u, /*!< Solution array */
476 void *s, /*!< Solver object of type #HyPar */
477 void *m, /*!< MPI object of type #MPIVariables */
478 double t, /*!< Current simulation time */
479 int LimFlag, /*!< Flag to indicate if the nonlinear coefficients for solution-dependent
480 interpolation method should be recomputed (see ReconstructHyperbolic() for
481 an explanation on why this is needed) */
482 /*! Function pointer to the flux function for the hyperbolic term */
483 int(*FluxFunction)(double*,double*,int,void*,double),
484 /*! Function pointer to the upwinding function for the hyperbolic term */
485 int(*UpwindFunction)( double*,double*,double*,double*,
486 double*,double*,int,void*,double)
489 HyPar *solver = (HyPar*) s;
490 MPIVariables *mpi = (MPIVariables*) m;
492 double *FluxI = solver->fluxI; /* interface flux */
493 double *FluxC = solver->fluxC; /* cell centered flux */
495 int ndims = solver->ndims;
496 int nvars = solver->nvars;
497 int ghosts = solver->ghosts;
498 int *dim = solver->dim_local;
499 int size = solver->npoints_local_wghosts;
500 double *x = solver->gpu_x;
501 double *dxinv = solver->gpu_dxinv;
502 double *FluxI_p1, *FluxI_p2;
504 LimFlag = (LimFlag && solver->flag_nonlinearinterp && solver->SetInterpLimiterVar);
506 gpuMemset(hyp, 0, size*nvars*sizeof(double));
507 gpuMemset(solver->StageBoundaryBuffer, 0, solver->StageBoundaryBuffer_size*sizeof(double));
508 gpuMemset(solver->StageBoundaryIntegral, 0, 2*ndims*nvars*sizeof(double));
510 if (!FluxFunction) return(0); /* zero hyperbolic term */
511 /*solver->count_hyp++;*/
513 int npoints_grid = 1; for (d = 0; d < ndims; d++) npoints_grid *= dim[d];
514 int nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
517 #if defined(GPU_STAT)
518 cudaEvent_t start, stop;
519 float milliseconds = 0;
521 checkCuda( cudaEventCreate(&start) );
522 checkCuda( cudaEventCreate(&stop) );
525 for (d = 0; d < ndims; d++) {
526 int npoints_dim_interface = 1; for (int i=0; i<ndims; i++) npoints_dim_interface *= (i==d) ? (dim[i]+1) : dim[i];
527 /* evaluate cell-centered flux */
528 FluxFunction(FluxC,u,d,solver,t);
529 /* compute interface fluxes */
530 ReconstructHyperbolic(FluxI,FluxC,u,x+offset,d,solver,mpi,t,LimFlag,UpwindFunction);
532 FluxI_p1 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]*nvars);
533 FluxI_p2 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]+solver->gpu_npoints_boundary[d])*nvars;
535 #if defined(GPU_STAT)
536 checkCuda( cudaEventRecord(start, 0) );
539 HyperbolicFunction_dim3_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
540 npoints_grid, size, npoints_dim_interface, d, nvars, ghosts, offset,
541 solver->gpu_dim_local, FluxI, dxinv,
542 hyp, FluxI_p1, FluxI_p2
545 fprintf(stderr,"gpuHyperbolicFunction(): Not implemented for ndims = %d!\n", ndims);
548 cudaDeviceSynchronize();
550 #if defined(GPU_STAT)
551 checkCuda( cudaEventRecord(stop, 0) );
552 checkCuda( cudaEventSynchronize(stop) );
553 checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
554 printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
555 "HyperbolicFunction", milliseconds*1e-3, d, (1e-6*2*npoints_grid*nvars*sizeof(double))/milliseconds);
557 checkCuda( cudaEventRecord(start, 0) );
560 StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
561 solver->gpu_npoints_boundary[d], nvars, -1, FluxI_p1,
562 solver->StageBoundaryIntegral + 2*d*nvars
564 StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
565 solver->gpu_npoints_boundary[d], nvars, 1, FluxI_p2,
566 solver->StageBoundaryIntegral + (2*d+1)*nvars
568 cudaDeviceSynchronize();
570 #if defined(GPU_STAT)
571 checkCuda( cudaEventRecord(stop, 0) );
572 checkCuda( cudaEventSynchronize(stop) );
573 checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
574 printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
575 "StageBoundaryIntegral", milliseconds*1e-3, d, (1e-6*2*solver->gpu_npoints_boundary[d]*nvars*sizeof(double))/milliseconds);
577 offset += dim[d] + 2*ghosts;
580 if (solver->flag_ib) gpuArrayBlockMultiply(hyp, solver->gpu_iblank, size, nvars);
582 #if defined(GPU_STAT)
583 checkCuda(cudaEventDestroy(start));
584 checkCuda(cudaEventDestroy(stop));
590 /*! This function computes the numerical flux \f$\hat{\bf f}_{j+1/2}\f$ at the interface from the cell-centered
591 flux function \f${\bf f}_j\f$. This happens in two steps:-
593 \b Interpolation: High-order accurate approximations to the flux at the interface \f$j+1/2\f$ are computed from
594 the cell-centered flux with left- and right-biased interpolation methods. This is done by the
595 #HyPar::InterpolateInterfacesHyp function. This can be expressed as follows:
597 \hat{\bf f}^L_{j+1/2} &= \mathcal{I}\left({\bf f}_j,+1\right), \\
598 \hat{\bf f}^R_{j+1/2} &= \mathcal{I}\left({\bf f}_j,-1\right),
600 where the \f$\pm 1\f$ indicates the interpolation bias, and \f$\mathcal{I}\f$ is the interpolation operator
601 pointed to by #HyPar::InterpolateInterfacesHyp (see \b src/InterpolationFunctions for all the available operators).
602 The interface values of the solution are similarly computed:
604 \hat{\bf u}^L_{j+1/2} &= \mathcal{I}\left({\bf u}_j,+1\right), \\
605 \hat{\bf u}^R_{j+1/2} &= \mathcal{I}\left({\bf u}_j,-1\right),
607 The specific choice of \f$\mathcal{I}\f$ is set based on #HyPar::spatial_scheme_hyp.
609 \b Upwinding: The final flux at the interface is computed as
611 \hat{\bf f}_{j+1/2} = \mathcal{U}\left( \hat{\bf f}^L_{j+1/2}, \hat{\bf f}^R_{j+1/2}, \hat{\bf u}^L_{j+1/2}, \hat{\bf u}^R_{j+1/2} \right),
613 where \f$\mathcal{U}\f$ denotes the upwinding function UpwindFunction() passed as an argument (if NULL, DefaultUpwinding() is used). The
614 upwinding function is specified by the physical model.
617 Solution-dependent, nonlinear interpolation methods (such as WENO, CRWENO) are implemented in a way that separates the calculation of the
618 nonlinear interpolation weights (based on, say, the smoothness of the flux function), and the actual evaluation of the interpolant, into
619 different functions. This allows the flexibility to choose if and when the nonlinear coefficients are evaluated (or previously computed
620 values are reused). Some possible scenarios are:
621 + For explicit time integration, they are computed every time the hyperbolic flux term is being computed.
622 + For implicit time integration, consistency or linearization may require that they be computed and "frozen"
623 at the beginning of a time step or stage.
625 The argument \b LimFlag controls this behavior:
626 + LimFlag = 1 means recompute the nonlinear coefficients.
627 + LimFlag = 0 means reuse the the previously computed coefficients.
629 int ReconstructHyperbolic(
630 double *fluxI, /*!< Array to hold the computed interface fluxes. This array does not
631 have ghost points. The dimensions are the same as those of u without
632 ghost points in all dimensions, except along dir, where it is one more */
633 double *fluxC, /*!< Array of the flux function computed at the cell centers
634 (same layout as u) */
635 double *u, /*!< Solution array */
636 double *x, /*!< Array of spatial coordinates */
637 int dir, /*!< Spatial dimension along which to reconstruct the interface fluxes */
638 void *s, /*!< Solver object of type #HyPar */
639 void *m, /*!< MPI object of type #MPIVariables */
640 double t, /*!< Current solution time */
641 int LimFlag, /*!< Flag to indicate if the nonlinear coefficients for solution-dependent
642 interpolation method should be recomputed */
643 /*! Function pointer to the upwinding function for the interface flux computation. If NULL,
644 DefaultUpwinding() will be used. */
645 int(*UpwindFunction)(double*,double*,double*,double*,
646 double*,double*,int,void*,double)
649 HyPar *solver = (HyPar*) s;
650 MPIVariables *mpi = (MPIVariables*) m;
653 double *uL = solver->uL;
654 double *uR = solver->uR;
655 double *fluxL = solver->fL;
656 double *fluxR = solver->fR;
659 precalculate the non-linear interpolation coefficients if required
660 else reuse the weights previously calculated
663 if (LimFlag) solver->SetInterpLimiterVar(fluxC,u,x,dir,solver,mpi);
665 /* if defined, calculate the modified u-function to be used for upwinding
666 e.g.: used in well-balanced schemes for Euler/Navier-Stokes with gravity
667 otherwise, just copy u to uC */
668 if (solver->UFunction) {
670 solver->UFunction(uC,u,dir,solver,mpi,t);
674 /* Interpolation -> to calculate left and right-biased interface flux and state variable*/
675 solver->InterpolateInterfacesHyp(uL ,uC ,u,x, 1,dir,solver,mpi,1);
676 solver->InterpolateInterfacesHyp(uR ,uC ,u,x,-1,dir,solver,mpi,1);
677 solver->InterpolateInterfacesHyp(fluxL,fluxC,u,x, 1,dir,solver,mpi,0);
678 solver->InterpolateInterfacesHyp(fluxR,fluxC,u,x,-1,dir,solver,mpi,0);
680 /* Upwind -> to calculate the final interface flux */
681 if (UpwindFunction) { UpwindFunction (fluxI,fluxL,fluxR,uL,uR,u,dir,solver,t); }
682 else { DefaultUpwinding (fluxI,fluxL,fluxR,NULL,NULL,NULL,dir,solver,t); }
689 /*! If no upwinding scheme is specified, this function defines the "upwind" flux as the
690 arithmetic mean of the left- and right-biased fluxes. */
691 int DefaultUpwinding(
692 double *fI, /*!< Computed upwind interface flux */
693 double *fL, /*!< Left-biased reconstructed interface flux */
694 double *fR, /*!< Right-biased reconstructed interface flux */
695 double *uL, /*!< Left-biased reconstructed interface solution */
696 double *uR, /*!< Right-biased reconstructed interface solution */
697 double *u, /*!< Cell-centered solution */
698 int dir, /*!< Spatial dimension */
699 void *s, /*!< Solver object of type #HyPar */
700 double t /*!< Current solution time */
703 HyPar *solver = (HyPar*) s;
706 int *dim = solver->dim_local;
707 int ndims = solver->ndims;
708 int nvars = solver->nvars;
710 int bounds_outer[ndims], bounds_inter[ndims];
711 _ArrayCopy1D_(dim,bounds_outer,ndims); bounds_outer[dir] = 1;
712 _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
714 done = 0; int index_outer[ndims], index_inter[ndims];
715 _ArraySetValue_(index_outer,ndims,0);
717 _ArrayCopy1D_(index_outer,index_inter,ndims);
718 for (index_inter[dir] = 0; index_inter[dir] < bounds_inter[dir]; index_inter[dir]++) {
719 int p; _ArrayIndex1D_(ndims,bounds_inter,index_inter,0,p);
720 int v; for (v=0; v<nvars; v++) fI[nvars*p+v] = 0.5 * (fL[nvars*p+v]+fR[nvars*p+v]);
722 _ArrayIncrementIndex_(ndims,bounds_outer,index_outer,done);