HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
HyperbolicFunction_GPU.cu File Reference

Compute the hyperbolic term of the governing equations. More...

#include <arrayfunctions_gpu.h>
#include <basic_gpu.h>
#include <mpivars.h>
#include <hypar.h>

Go to the source code of this file.

Functions

static int ReconstructHyperbolic (double *, double *, double *, double *, int, void *, void *, double, int, int(*)(double *, double *, double *, double *, double *, double *, int, void *, double))
 
static int DefaultUpwinding (double *, double *, double *, double *, double *, double *, int, void *, double)
 
template<int block_size>
__global__ void StageBoundaryIntegral_kernel (int n, int nvars, int sign, const double *__restrict__ FluxI, double *__restrict__ StageBoundaryIntegral)
 
__global__ void HyperbolicFunction_dim3_kernel (int npoints_grid, int npoints_local_wghosts, int npoints_dim_interface, int d, int nvars, int ghosts, int offset, const int *__restrict__ dim, const double *__restrict__ FluxI, const double *__restrict__ dxinv, double *__restrict__ hyp, double *__restrict__ FluxI_p1, double *__restrict__ FluxI_p2)
 
int gpuHyperbolicFunction (double *hyp, double *u, void *s, void *m, double t, int LimFlag, int(*FluxFunction)(double *, double *, int, void *, double), int(*UpwindFunction)(double *, double *, double *, double *, double *, double *, int, void *, double))
 

Detailed Description

Compute the hyperbolic term of the governing equations.

Author
Youngdae Kim

Definition in file HyperbolicFunction_GPU.cu.

Function Documentation

int ReconstructHyperbolic ( double *  fluxI,
double *  fluxC,
double *  u,
double *  x,
int  dir,
void *  s,
void *  m,
double  t,
int  LimFlag,
int(*)(double *, double *, double *, double *, double *, double *, int, void *, double)  UpwindFunction 
)
static

This function computes the numerical flux \(\hat{\bf f}_{j+1/2}\) at the interface from the cell-centered flux function \({\bf f}_j\). This happens in two steps:-

Interpolation: High-order accurate approximations to the flux at the interface \(j+1/2\) are computed from the cell-centered flux with left- and right-biased interpolation methods. This is done by the HyPar::InterpolateInterfacesHyp function. This can be expressed as follows:

\begin{align} \hat{\bf f}^L_{j+1/2} &= \mathcal{I}\left({\bf f}_j,+1\right), \\ \hat{\bf f}^R_{j+1/2} &= \mathcal{I}\left({\bf f}_j,-1\right), \end{align}

where the \(\pm 1\) indicates the interpolation bias, and \(\mathcal{I}\) is the interpolation operator pointed to by HyPar::InterpolateInterfacesHyp (see src/InterpolationFunctions for all the available operators). The interface values of the solution are similarly computed:

\begin{align} \hat{\bf u}^L_{j+1/2} &= \mathcal{I}\left({\bf u}_j,+1\right), \\ \hat{\bf u}^R_{j+1/2} &= \mathcal{I}\left({\bf u}_j,-1\right), \end{align}

The specific choice of \(\mathcal{I}\) is set based on HyPar::spatial_scheme_hyp.

Upwinding: The final flux at the interface is computed as

\begin{equation} \hat{\bf f}_{j+1/2} = \mathcal{U}\left( \hat{\bf f}^L_{j+1/2}, \hat{\bf f}^R_{j+1/2}, \hat{\bf u}^L_{j+1/2}, \hat{\bf u}^R_{j+1/2} \right), \end{equation}

where \(\mathcal{U}\) denotes the upwinding function UpwindFunction() passed as an argument (if NULL, DefaultUpwinding() is used). The upwinding function is specified by the physical model.

Note: Solution-dependent, nonlinear interpolation methods (such as WENO, CRWENO) are implemented in a way that separates the calculation of the nonlinear interpolation weights (based on, say, the smoothness of the flux function), and the actual evaluation of the interpolant, into different functions. This allows the flexibility to choose if and when the nonlinear coefficients are evaluated (or previously computed values are reused). Some possible scenarios are:

  • For explicit time integration, they are computed every time the hyperbolic flux term is being computed.
  • For implicit time integration, consistency or linearization may require that they be computed and "frozen" at the beginning of a time step or stage.

The argument LimFlag controls this behavior:

  • LimFlag = 1 means recompute the nonlinear coefficients.
  • LimFlag = 0 means reuse the the previously computed coefficients.
Parameters
fluxIArray to hold the computed interface fluxes. This array does not have ghost points. The dimensions are the same as those of u without ghost points in all dimensions, except along dir, where it is one more
fluxCArray of the flux function computed at the cell centers (same layout as u)
uSolution array
xArray of spatial coordinates
dirSpatial dimension along which to reconstruct the interface fluxes
sSolver object of type HyPar
mMPI object of type MPIVariables
tCurrent solution time
LimFlagFlag to indicate if the nonlinear coefficients for solution-dependent interpolation method should be recomputed
UpwindFunctionFunction pointer to the upwinding function for the interface flux computation. If NULL, DefaultUpwinding() will be used.

Definition at line 629 of file HyperbolicFunction_GPU.cu.

648 {
649  HyPar *solver = (HyPar*) s;
650  MPIVariables *mpi = (MPIVariables*) m;
651 
652  double *uC = NULL;
653  double *uL = solver->uL;
654  double *uR = solver->uR;
655  double *fluxL = solver->fL;
656  double *fluxR = solver->fR;
657 
658  /*
659  precalculate the non-linear interpolation coefficients if required
660  else reuse the weights previously calculated
661  */
662 
663  if (LimFlag) solver->SetInterpLimiterVar(fluxC,u,x,dir,solver,mpi);
664 
665  /* if defined, calculate the modified u-function to be used for upwinding
666  e.g.: used in well-balanced schemes for Euler/Navier-Stokes with gravity
667  otherwise, just copy u to uC */
668  if (solver->UFunction) {
669  uC = solver->uC;
670  solver->UFunction(uC,u,dir,solver,mpi,t);
671  } else uC = u;
672 
673 
674  /* Interpolation -> to calculate left and right-biased interface flux and state variable*/
675  solver->InterpolateInterfacesHyp(uL ,uC ,u,x, 1,dir,solver,mpi,1);
676  solver->InterpolateInterfacesHyp(uR ,uC ,u,x,-1,dir,solver,mpi,1);
677  solver->InterpolateInterfacesHyp(fluxL,fluxC,u,x, 1,dir,solver,mpi,0);
678  solver->InterpolateInterfacesHyp(fluxR,fluxC,u,x,-1,dir,solver,mpi,0);
679 
680  /* Upwind -> to calculate the final interface flux */
681  if (UpwindFunction) { UpwindFunction (fluxI,fluxL,fluxR,uL,uR,u,dir,solver,t); }
682  else { DefaultUpwinding (fluxI,fluxL,fluxR,NULL,NULL,NULL,dir,solver,t); }
683 
684  return(0);
685 }
double * uR
Definition: hypar.h:139
double * uL
Definition: hypar.h:139
double * fL
Definition: hypar.h:139
int(* SetInterpLimiterVar)(double *, double *, double *, int, void *, void *)
Definition: hypar.h:234
double * uC
Definition: hypar.h:131
int(* InterpolateInterfacesHyp)(double *, double *, double *, double *, int, int, void *, void *, int)
Definition: hypar.h:224
int(* UFunction)(double *, double *, int, void *, void *, double)
Definition: hypar.h:321
static int DefaultUpwinding(double *, double *, double *, double *, double *, double *, int, void *, double)
Structure of MPI-related variables.
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23
double * fR
Definition: hypar.h:139
int DefaultUpwinding ( double *  fI,
double *  fL,
double *  fR,
double *  uL,
double *  uR,
double *  u,
int  dir,
void *  s,
double  t 
)
static

If no upwinding scheme is specified, this function defines the "upwind" flux as the arithmetic mean of the left- and right-biased fluxes.

Parameters
fIComputed upwind interface flux
fLLeft-biased reconstructed interface flux
fRRight-biased reconstructed interface flux
uLLeft-biased reconstructed interface solution
uRRight-biased reconstructed interface solution
uCell-centered solution
dirSpatial dimension
sSolver object of type HyPar
tCurrent solution time

Definition at line 691 of file HyperbolicFunction_GPU.cu.

702 {
703  HyPar *solver = (HyPar*) s;
704  int done;
705 
706  int *dim = solver->dim_local;
707  int ndims = solver->ndims;
708  int nvars = solver->nvars;
709 
710  int bounds_outer[ndims], bounds_inter[ndims];
711  _ArrayCopy1D_(dim,bounds_outer,ndims); bounds_outer[dir] = 1;
712  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
713 
714  done = 0; int index_outer[ndims], index_inter[ndims];
715  _ArraySetValue_(index_outer,ndims,0);
716  while (!done) {
717  _ArrayCopy1D_(index_outer,index_inter,ndims);
718  for (index_inter[dir] = 0; index_inter[dir] < bounds_inter[dir]; index_inter[dir]++) {
719  int p; _ArrayIndex1D_(ndims,bounds_inter,index_inter,0,p);
720  int v; for (v=0; v<nvars; v++) fI[nvars*p+v] = 0.5 * (fL[nvars*p+v]+fR[nvars*p+v]);
721  }
722  _ArrayIncrementIndex_(ndims,bounds_outer,index_outer,done);
723  }
724 
725  return(0);
726 }
#define _ArraySetValue_(x, size, value)
#define _ArrayIncrementIndex_(N, imax, i, done)
int * dim_local
Definition: hypar.h:37
#define _ArrayIndex1D_(N, imax, i, ghost, index)
#define _ArrayCopy1D_(x, y, size)
int nvars
Definition: hypar.h:29
int ndims
Definition: hypar.h:26
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23
__global__ void StageBoundaryIntegral_kernel ( int  n,
int  nvars,
int  sign,
const double *__restrict__  FluxI,
double *__restrict__  StageBoundaryIntegral 
)

Kernel for stage boundary integral calculation

Definition at line 20 of file HyperbolicFunction_GPU.cu.

27 {
28  extern __shared__ double smem[];
29 
30  int tid = threadIdx.x;
31  int grid_size = block_size * gridDim.x;
32  int i = blockIdx.x * block_size + threadIdx.x;
33  int j;
34  double tid_sum[GPU_MAX_NVARS] = { 0 };
35 
36  while (i < n) {
37  for (j=0; j<nvars; j++) tid_sum[j] += FluxI[i*nvars+j];
38  i += grid_size;
39  }
40  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j];
41  __syncthreads();
42 
43 
44  if (block_size >= 512 && tid < 256) {
45  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+256)*nvars+j];
46  }
47  __syncthreads();
48  if (block_size >= 256 && tid < 128) {
49  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+128)*nvars+j];
50  }
51  __syncthreads();
52  if (block_size >= 128 && tid < 64) {
53  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+64)*nvars+j];
54  }
55  __syncthreads();
56 
57  if (tid < 32) {
58  if (block_size >= 64) {
59  for (j=0; j<nvars; j++) tid_sum[j] += smem[(tid+32)*nvars+j];
60  }
61  for (int offset=16; offset>0; offset/=2) {
62  for (j=0; j<nvars; j++) {
63  tid_sum[j] += __shfl_down_sync(0xffffffff, tid_sum[j], offset);
64  }
65  }
66  }
67  __syncthreads();
68 
69  if (tid == 0) {
70  for (j=0; j<nvars; j++) StageBoundaryIntegral[j] = (sign == 1) ? tid_sum[j] : -tid_sum[j];
71  }
72  return;
73 }
#define GPU_MAX_NVARS
Definition: basic_gpu.h:9
#define sign(a)
Definition: math_ops.h:54
__global__ void HyperbolicFunction_dim3_kernel ( int  npoints_grid,
int  npoints_local_wghosts,
int  npoints_dim_interface,
int  d,
int  nvars,
int  ghosts,
int  offset,
const int *__restrict__  dim,
const double *__restrict__  FluxI,
const double *__restrict__  dxinv,
double *__restrict__  hyp,
double *__restrict__  FluxI_p1,
double *__restrict__  FluxI_p2 
)

3D Kernel for gpuHyperbolicFunction()

Definition at line 406 of file HyperbolicFunction_GPU.cu.

421 {
422  int tx = threadIdx.x + (blockDim.x * blockIdx.x);
423  if (tx < npoints_grid) {
424  int v, p, p1, p2, b;
425  int dim_interface[3];
426  int index[3], index1[3], index2[3];
427 
428  _ArrayIndexnD_(3,tx,dim,index,0);
429  _ArrayCopy1D_(dim,dim_interface,3);
430  dim_interface[d] += 1;
431 
432  _ArrayCopy1D_(index,index1,3);
433  _ArrayCopy1D_(index,index2,3); index2[d]++;
434  _ArrayIndex1D_(3,dim ,index ,ghosts,p);
435  _ArrayIndex1D_(3,dim_interface,index1,0 ,p1);
436  _ArrayIndex1D_(3,dim_interface,index2,0 ,p2);
437  for (v=0; v<nvars; v++) {
438  hyp[p+v*npoints_local_wghosts] += dxinv[offset+ghosts+index[d]]
439  * (FluxI[p2+v*npoints_dim_interface]-FluxI[p1+v*npoints_dim_interface]);
440  }
441  if (index[d] == 0 || index[d] == dim[d]-1) {
442  if (d == 0) {
443  b = index[1] + index[2]*dim[1];
444  } else if (d == 1) {
445  b = index[0] + index[2]*dim[0];
446  } else {
447  b = index[0] + index[1]*dim[0];
448  }
449  if (index[d] == 0) {
450  for (v=0; v<nvars; v++) FluxI_p1[b*nvars+v] = FluxI[p1+v*npoints_dim_interface];
451  } else {
452  for (v=0; v<nvars; v++) FluxI_p2[b*nvars+v] = FluxI[p2+v*npoints_dim_interface];
453  }
454  }
455  }
456  return;
457 }
#define _ArrayIndexnD_(N, index, imax, i, ghost)
#define _ArrayIndex1D_(N, imax, i, ghost, index)
#define _ArrayCopy1D_(x, y, size)
int gpuHyperbolicFunction ( double *  hyp,
double *  u,
void *  s,
void *  m,
double  t,
int  LimFlag,
int(*)(double *, double *, int, void *, double)  FluxFunction,
int(*)(double *, double *, double *, double *, double *, double *, int, void *, double)  UpwindFunction 
)

This function computes the hyperbolic term of the governing equations, expressed as follows:-

\begin{equation} {\bf F} \left({\bf u}\right) = \sum_{d=0}^{D-1} \frac {\partial {\bf f}_d\left({\bf u}\right)} {\partial x_d} \end{equation}

using a conservative finite-difference discretization is space:

\begin{equation} \hat{\bf F} \left({\bf u}\right) = \sum_{d=0}^{D-1} \frac {1} {\Delta x_d} \left[ \hat{\bf f}_{d,j+1/2} - \hat{\bf f}_{d,j-1/2} \right] \end{equation}

where \(d\) denotes the spatial dimension, \(D\) denotes the total number of spatial dimensions, the hat denotes the discretized quantity, and \(j\) is the grid coordinate along dimension \(d\). The approximation to the flux function \({\bf f}_d\) at the interfaces \(j\pm1/2\), denoted by \(\hat{\bf f}_{d,j\pm 1/2}\), are computed using the function ReconstructHyperbolic().

Parameters
hypArray to hold the computed hyperbolic term (shares the same layout as u
uSolution array
sSolver object of type HyPar
mMPI object of type MPIVariables
tCurrent simulation time
LimFlagFlag to indicate if the nonlinear coefficients for solution-dependent interpolation method should be recomputed (see ReconstructHyperbolic() for an explanation on why this is needed)
FluxFunctionFunction pointer to the flux function for the hyperbolic term
UpwindFunctionFunction pointer to the upwinding function for the hyperbolic term

Definition at line 473 of file HyperbolicFunction_GPU.cu.

488 {
489  HyPar *solver = (HyPar*) s;
490  MPIVariables *mpi = (MPIVariables*) m;
491  int d;
492  double *FluxI = solver->fluxI; /* interface flux */
493  double *FluxC = solver->fluxC; /* cell centered flux */
494 
495  int ndims = solver->ndims;
496  int nvars = solver->nvars;
497  int ghosts = solver->ghosts;
498  int *dim = solver->dim_local;
499  int size = solver->npoints_local_wghosts;
500  double *x = solver->gpu_x;
501  double *dxinv = solver->gpu_dxinv;
502  double *FluxI_p1, *FluxI_p2;
503 
504  LimFlag = (LimFlag && solver->flag_nonlinearinterp && solver->SetInterpLimiterVar);
505 
506  gpuMemset(hyp, 0, size*nvars*sizeof(double));
507  gpuMemset(solver->StageBoundaryBuffer, 0, solver->StageBoundaryBuffer_size*sizeof(double));
508  gpuMemset(solver->StageBoundaryIntegral, 0, 2*ndims*nvars*sizeof(double));
509 
510  if (!FluxFunction) return(0); /* zero hyperbolic term */
511  /*solver->count_hyp++;*/
512 
513  int npoints_grid = 1; for (d = 0; d < ndims; d++) npoints_grid *= dim[d];
514  int nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
515  int offset = 0;
516 
517 #if defined(GPU_STAT)
518  cudaEvent_t start, stop;
519  float milliseconds = 0;
520 
521  checkCuda( cudaEventCreate(&start) );
522  checkCuda( cudaEventCreate(&stop) );
523 #endif
524 
525  for (d = 0; d < ndims; d++) {
526  int npoints_dim_interface = 1; for (int i=0; i<ndims; i++) npoints_dim_interface *= (i==d) ? (dim[i]+1) : dim[i];
527  /* evaluate cell-centered flux */
528  FluxFunction(FluxC,u,d,solver,t);
529  /* compute interface fluxes */
530  ReconstructHyperbolic(FluxI,FluxC,u,x+offset,d,solver,mpi,t,LimFlag,UpwindFunction);
531 
532  FluxI_p1 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]*nvars);
533  FluxI_p2 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]+solver->gpu_npoints_boundary[d])*nvars;
534 
535 #if defined(GPU_STAT)
536  checkCuda( cudaEventRecord(start, 0) );
537 #endif
538  if (ndims == 3) {
539  HyperbolicFunction_dim3_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
540  npoints_grid, size, npoints_dim_interface, d, nvars, ghosts, offset,
541  solver->gpu_dim_local, FluxI, dxinv,
542  hyp, FluxI_p1, FluxI_p2
543  );
544  } else {
545  fprintf(stderr,"gpuHyperbolicFunction(): Not implemented for ndims = %d!\n", ndims);
546  exit(1);
547  }
548  cudaDeviceSynchronize();
549 
550 #if defined(GPU_STAT)
551  checkCuda( cudaEventRecord(stop, 0) );
552  checkCuda( cudaEventSynchronize(stop) );
553  checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
554  printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
555  "HyperbolicFunction", milliseconds*1e-3, d, (1e-6*2*npoints_grid*nvars*sizeof(double))/milliseconds);
556 
557  checkCuda( cudaEventRecord(start, 0) );
558 #endif
559 
560  StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
561  solver->gpu_npoints_boundary[d], nvars, -1, FluxI_p1,
562  solver->StageBoundaryIntegral + 2*d*nvars
563  );
564  StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
565  solver->gpu_npoints_boundary[d], nvars, 1, FluxI_p2,
566  solver->StageBoundaryIntegral + (2*d+1)*nvars
567  );
568  cudaDeviceSynchronize();
569 
570 #if defined(GPU_STAT)
571  checkCuda( cudaEventRecord(stop, 0) );
572  checkCuda( cudaEventSynchronize(stop) );
573  checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
574  printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
575  "StageBoundaryIntegral", milliseconds*1e-3, d, (1e-6*2*solver->gpu_npoints_boundary[d]*nvars*sizeof(double))/milliseconds);
576 #endif
577  offset += dim[d] + 2*ghosts;
578  }
579 
580  if (solver->flag_ib) gpuArrayBlockMultiply(hyp, solver->gpu_iblank, size, nvars);
581 
582 #if defined(GPU_STAT)
583  checkCuda(cudaEventDestroy(start));
584  checkCuda(cudaEventDestroy(stop));
585 #endif
586 
587  return(0);
588 }
int npoints_local_wghosts
Definition: hypar.h:42
double * StageBoundaryBuffer
Definition: hypar.h:462
void gpuArrayBlockMultiply(double *, const double *, int, int)
int flag_ib
Definition: hypar.h:441
static int ReconstructHyperbolic(double *, double *, double *, double *, int, void *, void *, double, int, int(*)(double *, double *, double *, double *, double *, double *, int, void *, double))
int * dim_local
Definition: hypar.h:37
double * gpu_dxinv
Definition: hypar.h:458
int gpu_npoints_boundary[3]
Definition: hypar.h:453
double * gpu_iblank
Definition: hypar.h:456
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
int ghosts
Definition: hypar.h:52
int(* SetInterpLimiterVar)(double *, double *, double *, int, void *, void *)
Definition: hypar.h:234
double * fluxI
Definition: hypar.h:136
int * gpu_dim_local
Definition: hypar.h:455
double * gpu_x
Definition: hypar.h:457
int gpu_npoints_boundary_offset[3]
Definition: hypar.h:452
double * fluxC
Definition: hypar.h:128
int nvars
Definition: hypar.h:29
void gpuMemset(void *, int, size_t)
double * StageBoundaryIntegral
Definition: hypar.h:382
int ndims
Definition: hypar.h:26
int StageBoundaryBuffer_size
Definition: hypar.h:461
Structure of MPI-related variables.
int flag_nonlinearinterp
Definition: hypar.h:411
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23