HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Interp1PrimFifthOrderWENO_GPU.cu File Reference

WENO5 Scheme (Component-wise application to vectors). More...

#include <basic.h>
#include <basic_gpu.h>
#include <arrayfunctions_gpu.h>
#include <mathfunctions.h>
#include <interpolation.h>
#include <mpivars.h>
#include <hypar.h>

Go to the source code of this file.

Macros

#define _MINIMUM_GHOSTS_   3
 

Functions

__global__ void Interp1PrimFifthOrderWENO_kernel (int npoints_grid, int npoints_local_wghosts, int ndims, int dir, int ghosts, int nvars, int weno_size, int offset, int stride, int upw, int uflag, const int *dim, const double *fC, const double *w1, const double *w2, const double *w3, double *fI)
 
int gpuInterp1PrimFifthOrderWENO (double *fI, double *fC, double *u, double *x, int upw, int dir, void *s, void *m, int uflag)
 5th order WENO reconstruction (component-wise) on a uniform grid More...
 

Detailed Description

WENO5 Scheme (Component-wise application to vectors).

Author
Youngdae Kim

Definition in file Interp1PrimFifthOrderWENO_GPU.cu.

Macro Definition Documentation

#define _MINIMUM_GHOSTS_   3

Minimum number of ghost points required for this interpolation method.

Definition at line 19 of file Interp1PrimFifthOrderWENO_GPU.cu.

Function Documentation

__global__ void Interp1PrimFifthOrderWENO_kernel ( int  npoints_grid,
int  npoints_local_wghosts,
int  ndims,
int  dir,
int  ghosts,
int  nvars,
int  weno_size,
int  offset,
int  stride,
int  upw,
int  uflag,
const int *  dim,
const double *  fC,
const double *  w1,
const double *  w2,
const double *  w3,
double *  fI 
)

Kernel for gpuInterp1PrimFifthOrderWENO()

Definition at line 218 of file Interp1PrimFifthOrderWENO_GPU.cu.

237 {
238  int p = threadIdx.x + (blockDim.x * blockIdx.x);
239  if (p < npoints_grid) {
240  int bounds_inter[GPU_MAX_NDIMS], indexC[GPU_MAX_NDIMS], indexI[GPU_MAX_NDIMS];
241  int qm1,qm2,qm3,qp1,qp2;
242  const double *ww1, *ww2, *ww3;
243 
244  ww1 = w1 + (upw < 0 ? 2*weno_size : 0) + (uflag ? weno_size : 0) + offset;
245  ww2 = w2 + (upw < 0 ? 2*weno_size : 0) + (uflag ? weno_size : 0) + offset;
246  ww3 = w3 + (upw < 0 ? 2*weno_size : 0) + (uflag ? weno_size : 0) + offset;
247 
248  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
249  _ArrayIndexnD_(ndims,p,bounds_inter,indexC,0);
250  _ArrayCopy1D_(indexC,indexI,ndims);
251 
252  if (upw > 0) {
253  indexC[dir] = indexI[dir]-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
254  qm3 = qm1 - 2*stride;
255  qm2 = qm1 - stride;
256  qp1 = qm1 + stride;
257  qp2 = qm1 + 2*stride;
258  } else {
259  indexC[dir] = indexI[dir] ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
260  qm3 = qm1 + 2*stride;
261  qm2 = qm1 + stride;
262  qp1 = qm1 - stride;
263  qp2 = qm1 - 2*stride;
264  }
265 
266  /* Defining stencil points */
267  const double *fm3, *fm2, *fm1, *fp1, *fp2;
268  /* Candidate stencils and their optimal weights*/
269  double f1, f2, f3;
270 
271  int l = p;
272  for (int j = 0; j < nvars; j++) {
273  fm3 = (fC+qm3);
274  fm2 = (fC+qm2);
275  fm1 = (fC+qm1);
276  fp1 = (fC+qp1);
277  fp2 = (fC+qp2);
278 
279  f1 = (2*GPU_ONE_SIXTH)*fm3[0] + (-7*GPU_ONE_SIXTH)*fm2[0] + (11*GPU_ONE_SIXTH)*fm1[0];
280  f2 = ( -GPU_ONE_SIXTH)*fm2[0] + ( 5*GPU_ONE_SIXTH)*fm1[0] + ( 2*GPU_ONE_SIXTH)*fp1[0];
281  f3 = (2*GPU_ONE_SIXTH)*fm1[0] + ( 5*GPU_ONE_SIXTH)*fp1[0] + ( -GPU_ONE_SIXTH)*fp2[0];
282 
283  qm3 += npoints_local_wghosts;
284  qm2 += npoints_local_wghosts;
285  qm1 += npoints_local_wghosts;
286  qp1 += npoints_local_wghosts;
287  qp2 += npoints_local_wghosts;
288 
289  /*
290  _ArrayAXBYCZ_(f1,(2*GPU_ONE_SIXTH),fm3,(-7*GPU_ONE_SIXTH) ,fm2,(11*GPU_ONE_SIXTH) ,fm1,nvars);
291  _ArrayAXBYCZ_(f2,(-GPU_ONE_SIXTH) ,fm2,(5*GPU_ONE_SIXTH) ,fm1,(2*GPU_ONE_SIXTH) ,fp1,nvars);
292  _ArrayAXBYCZ_(f3,(2*GPU_ONE_SIXTH),fm1,(5*GPU_ONE_SIXTH) ,fp1,(-GPU_ONE_SIXTH) ,fp2,nvars);
293  */
294 
295  /* calculate WENO weights */
296  fI[l] = (ww1+l)[0]*f1 + (ww2+l)[0]*f2 + (ww3+l)[0]*f3;
297  l += npoints_grid;
298  //_ArrayMultiply3Add1D_((fI+p),cur_w1,f1,cur_w2,f2,cur_w3,f3,1);
299  }
300  }
301 
302  return;
303 }
#define _ArrayIndexnD_(N, index, imax, i, ghost)
#define GPU_MAX_NDIMS
Definition: basic_gpu.h:8
#define _ArrayIndex1D_(N, imax, i, ghost, index)
#define _ArrayCopy1D_(x, y, size)
#define GPU_ONE_SIXTH
Definition: basic_gpu.h:6
int gpuInterp1PrimFifthOrderWENO ( double *  fI,
double *  fC,
double *  u,
double *  x,
int  upw,
int  dir,
void *  s,
void *  m,
int  uflag 
)

5th order WENO reconstruction (component-wise) on a uniform grid

Computes the interpolated values of the first primitive of a function \({\bf f}\left({\bf u}\right)\) at the interfaces from the cell-centered values of the function using the fifth order WENO scheme on a uniform grid. The first primitive is defined as a function \({\bf h}\left({\bf u}\right)\) that satisfies:

\begin{equation} {\bf f}\left({\bf u}\left(x\right)\right) = \frac{1}{\Delta x} \int_{x-\Delta x/2}^{x+\Delta x/2} {\bf h}\left({\bf u}\left(\zeta\right)\right)d\zeta, \end{equation}

where \(x\) is the spatial coordinate along the dimension of the interpolation. This function computes the 5th order WENO numerical approximation \(\hat{\bf f}_{j+1/2} \approx {\bf h}_{j+1/2}\) as the convex combination of three 3rd order methods:

\begin{align} &\ \omega_1\ \times\ \left[ \hat{\bf f}_{j+1/2}^1 = \frac{1}{3} {\bf f}_{j-2} - \frac{7}{6} {\bf f}_{j-1} + \frac{11}{6} {\bf f}_j \right]\\ + &\ \omega_2\ \times\ \left[ \hat{\bf f}_{j+1/2}^2 = -\frac{1}{6} {\bf f}_{j-1} + \frac{5}{6} {\bf f}_j + \frac{1}{3} {\bf f}_{j+1} \right]\\ + &\ \omega_3\ \times\ \left[ \hat{\bf f}_{j+1/2}^3 = \frac{1}{3} {\bf f}_j + \frac{5}{6} {\bf f}_{j+1} - \frac{1}{6} {\bf f}_{j+2} \right]\\ \Rightarrow &\ \hat{\bf f}_{j+1/2} = \frac{\omega_1}{3} {\bf f}_{j-2} - \frac{1}{6}(7\omega_1+\omega_2){\bf f}_{j-1} + \frac{1}{6}(11\omega_1+5\omega_2+2\omega_3){\bf f}_j + \frac{1}{6}(2\omega_2+5\omega_3){\bf f}_{j+1} - \frac{\omega_3}{6}{\bf f}_{j+2}, \end{align}

where \(\omega_k; k=1,2,3\) are the nonlinear WENO weights computed in WENOFifthOrderCalculateWeights() (note that the \(\omega\) are different for each component of the vector \(\hat{\bf f}\)).

Implementation Notes:

  • This method assumes a uniform grid in the spatial dimension corresponding to the interpolation.
  • The method described above corresponds to a left-biased interpolation. The corresponding right-biased interpolation can be obtained by reflecting the equations about interface j+1/2.
  • The scalar interpolation method is applied to the vector function in a component-wise manner.
  • The function computes the interpolant for the entire grid in one call. It loops over all the grid lines along the interpolation direction and carries out the 1D interpolation along these grid lines.
  • Location of cell-centers and cell interfaces along the spatial dimension of the interpolation is shown in the following figure:
    chap1_1Ddomain.png

Function arguments:

Argument Type Explanation
fI double* Array to hold the computed interpolant at the grid interfaces. This array must have the same layout as the solution, but with no ghost points. Its size should be the same as u in all dimensions, except dir (the dimension along which to interpolate) along which it should be larger by 1 (number of interfaces is 1 more than the number of interior cell centers).
fC double* Array with the cell-centered values of the flux function \({\bf f}\left({\bf u}\right)\). This array must have the same layout and size as the solution, with ghost points.
u double* The solution array \({\bf u}\) (with ghost points). If the interpolation is characteristic based, this is needed to compute the eigendecomposition. For a multidimensional problem, the layout is as follows: u is a contiguous 1D array of size (nvars*dim[0]*dim[1]*...*dim[D-1]) corresponding to the multi-dimensional solution, with the following ordering - nvars, dim[0], dim[1], ..., dim[D-1], where nvars is the number of solution components (HyPar::nvars), dim is the local size (HyPar::dim_local), D is the number of spatial dimensions.
x double* The grid array (with ghost points). This is used only by non-uniform-grid interpolation methods. For multidimensional problems, the layout is as follows: x is a contiguous 1D array of size (dim[0]+dim[1]+...+dim[D-1]), with the spatial coordinates along dim[0] stored from 0,...,dim[0]-1, the spatial coordinates along dim[1] stored along dim[0],...,dim[0]+dim[1]-1, and so forth.
upw int Upwinding direction: if positive, a left-biased interpolant will be computed; if negative, a right-biased interpolant will be computed. If the interpolation method is central, then this has no effect.
dir int Spatial dimension along which to interpolate (eg: 0 for 1D; 0 or 1 for 2D; 0,1 or 2 for 3D)
s void* Solver object of type HyPar: the following variables are needed - HyPar::ghosts, HyPar::ndims, HyPar::nvars, HyPar::dim_local.
m void* MPI object of type MPIVariables: this is needed only by compact interpolation method that need to solve a global implicit system across MPI ranks.
uflag int A flag indicating if the function being interpolated \({\bf f}\) is the solution itself \({\bf u}\) (if 1, \({\bf f}\left({\bf u}\right) \equiv {\bf u}\)).

Reference:

Parameters
fIArray of interpolated function values at the interfaces
fCArray of cell-centered values of the function \({\bf f}\left({\bf u}\right)\)
uArray of cell-centered values of the solution \({\bf u}\)
xGrid coordinates
upwUpwind direction (left or right biased)
dirSpatial dimension along which to interpolation
sObject of type HyPar containing solver-related variables
mObject of type MPIVariables containing MPI-related variables
uflagFlag to indicate if \(f(u) \equiv u\), i.e, if the solution is being reconstructed

Definition at line 352 of file Interp1PrimFifthOrderWENO_GPU.cu.

363 {
364  HyPar *solver = (HyPar*) s;
365  WENOParameters *weno = (WENOParameters*) solver->interp;
366 
367  int ghosts = solver->ghosts;
368  int ndims = solver->ndims;
369  int nvars = solver->nvars;
370  int *dim = solver->dim_local;
371  int *stride= solver->stride_with_ghosts;
372 
373  /* calculate dimension offset */
374  int offset = weno->offset[dir];
375  int bounds_inter[ndims];
376  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
377  int npoints_grid; _ArrayProduct1D_(bounds_inter,ndims,npoints_grid);
378  int nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
379 
380 #if defined(GPU_STAT)
381  cudaEvent_t start, stop;
382  float milliseconds = 0;
383  checkCuda( cudaEventCreate(&start) );
384  checkCuda( cudaEventCreate(&stop) );
385 
386 
387  int weno_memory_accessed = 3*npoints_grid*nvars*sizeof(double);
388  int fI_memory_accessed = npoints_grid*nvars*sizeof(double);
389  int fC_memory_accessed = 1;
390  for (int d=0; d<ndims; d++) {
391  if (d == dir) fC_memory_accessed *= (dim[d]+2*ghosts);
392  else fC_memory_accessed *= dim[d];
393  }
394  fC_memory_accessed *= nvars*sizeof(double);
395 
396  checkCuda( cudaEventRecord(start) );
397 #endif
398 
399  Interp1PrimFifthOrderWENO_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
400  npoints_grid, solver->npoints_local_wghosts, ndims, dir, ghosts, nvars, weno->size, offset, stride[dir], upw, uflag,
401  solver->gpu_dim_local, fC, weno->w1, weno->w2, weno->w3, fI
402  );
403 #if defined(GPU_STAT)
404  checkCuda( cudaEventRecord(stop) );
405  checkCuda( cudaEventSynchronize(stop) );
406 #endif
407 
408  cudaDeviceSynchronize();
409 
410 #if defined(GPU_STAT)
411  checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
412 
413  printf("%-50s GPU time (secs) = %.6f dir = %d bandwidth (GB/s) = %6.2f\n",
414  "Interp1PrimFifthOrderWENO2", milliseconds*1e-3, dir,
415  (1e-6*(weno_memory_accessed+fI_memory_accessed+fC_memory_accessed))/milliseconds);
416 
417  checkCuda( cudaEventDestroy(start) );
418  checkCuda( cudaEventDestroy(stop) );
419 #endif
420 
421  return 0;
422 }
void * interp
Definition: hypar.h:362
int npoints_local_wghosts
Definition: hypar.h:42
int * dim_local
Definition: hypar.h:37
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
Structure of variables/parameters needed by the WENO-type scheme.
int ghosts
Definition: hypar.h:52
int * gpu_dim_local
Definition: hypar.h:455
int * stride_with_ghosts
Definition: hypar.h:414
#define _ArrayCopy1D_(x, y, size)
int nvars
Definition: hypar.h:29
int ndims
Definition: hypar.h:26
#define _ArrayProduct1D_(x, size, p)
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23