WENO5 Scheme (Component-wise application to vectors). More...

#include <basic.h>
#include <basic_gpu.h>
#include <arrayfunctions_gpu.h>
#include <mathfunctions.h>
#include <interpolation.h>
#include <mpivars.h>
#include <hypar.h>

Macros
#define	_MINIMUM_GHOSTS_ 3

Functions
__global__ void	Interp1PrimFifthOrderWENO_kernel (int npoints_grid, int npoints_local_wghosts, int ndims, int dir, int ghosts, int nvars, int weno_size, int offset, int stride, int upw, int uflag, const int dim, const double fC, const double w1, const double w2, const double w3, double fI)

int	gpuInterp1PrimFifthOrderWENO (double fI, double fC, double u, double x, int upw, int dir, void s, void m, int uflag)
	5th order WENO reconstruction (component-wise) on a uniform grid More...

Detailed Description

WENO5 Scheme (Component-wise application to vectors).

Author: Youngdae Kim

Definition in file Interp1PrimFifthOrderWENO_GPU.cu.

Macro Definition Documentation

#define _MINIMUM_GHOSTS_ 3

Minimum number of ghost points required for this interpolation method.

Definition at line 19 of file Interp1PrimFifthOrderWENO_GPU.cu.

Function Documentation

__global__ void Interp1PrimFifthOrderWENO_kernel	(	int	npoints_grid,
		int	npoints_local_wghosts,
		int	ndims,
		int	dir,
		int	ghosts,
		int	nvars,
		int	weno_size,
		int	offset,
		int	stride,
		int	upw,
		int	uflag,
		const int *	dim,
		const double *	fC,
		const double *	w1,
		const double *	w2,
		const double *	w3,
		double *	fI
	)

Kernel for gpuInterp1PrimFifthOrderWENO()

Definition at line 218 of file Interp1PrimFifthOrderWENO_GPU.cu.

 {
     int p = threadIdx.x + (blockDim.x * blockIdx.x);
     if (p < npoints_grid) {
       int    bounds_inter[GPU_MAX_NDIMS], indexC[GPU_MAX_NDIMS], indexI[GPU_MAX_NDIMS];
       int qm1,qm2,qm3,qp1,qp2;
       const double *ww1, *ww2, *ww3;
 
       ww1 = w1 + (upw < 0 ? 2*weno_size : 0) + (uflag ? weno_size : 0) + offset;
       ww2 = w2 + (upw < 0 ? 2*weno_size : 0) + (uflag ? weno_size : 0) + offset;
       ww3 = w3 + (upw < 0 ? 2*weno_size : 0) + (uflag ? weno_size : 0) + offset;
 
       _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
       _ArrayIndexnD_(ndims,p,bounds_inter,indexC,0);
       _ArrayCopy1D_(indexC,indexI,ndims);
 
       if (upw > 0) {
           indexC[dir] = indexI[dir]-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
           qm3 = qm1 - 2*stride;
           qm2 = qm1 -   stride;
           qp1 = qm1 +   stride;
           qp2 = qm1 + 2*stride;
       } else {
           indexC[dir] = indexI[dir]  ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
           qm3 = qm1 + 2*stride;
           qm2 = qm1 +   stride;
           qp1 = qm1 -   stride;
           qp2 = qm1 - 2*stride;
       }
 
       /* Defining stencil points */
       const double *fm3, *fm2, *fm1, *fp1, *fp2;
       /* Candidate stencils and their optimal weights*/
       double f1, f2, f3;
 
       int l = p;
       for (int j = 0; j < nvars; j++) {
         fm3 = (fC+qm3);
         fm2 = (fC+qm2);
         fm1 = (fC+qm1);
         fp1 = (fC+qp1);
         fp2 = (fC+qp2);
 
         f1 = (2*GPU_ONE_SIXTH)*fm3[0] + (-7*GPU_ONE_SIXTH)*fm2[0] + (11*GPU_ONE_SIXTH)*fm1[0];
         f2 = ( -GPU_ONE_SIXTH)*fm2[0] + ( 5*GPU_ONE_SIXTH)*fm1[0] + ( 2*GPU_ONE_SIXTH)*fp1[0];
         f3 = (2*GPU_ONE_SIXTH)*fm1[0] + ( 5*GPU_ONE_SIXTH)*fp1[0] + (  -GPU_ONE_SIXTH)*fp2[0];
 
         qm3 += npoints_local_wghosts;
         qm2 += npoints_local_wghosts;
         qm1 += npoints_local_wghosts;
         qp1 += npoints_local_wghosts;
         qp2 += npoints_local_wghosts;
 
         /*
         _ArrayAXBYCZ_(f1,(2*GPU_ONE_SIXTH),fm3,(-7*GPU_ONE_SIXTH) ,fm2,(11*GPU_ONE_SIXTH) ,fm1,nvars);
         _ArrayAXBYCZ_(f2,(-GPU_ONE_SIXTH) ,fm2,(5*GPU_ONE_SIXTH)  ,fm1,(2*GPU_ONE_SIXTH)  ,fp1,nvars);
         _ArrayAXBYCZ_(f3,(2*GPU_ONE_SIXTH),fm1,(5*GPU_ONE_SIXTH)  ,fp1,(-GPU_ONE_SIXTH)   ,fp2,nvars);
         */
 
         /* calculate WENO weights */
         fI[l] = (ww1+l)[0]*f1 + (ww2+l)[0]*f2 + (ww3+l)[0]*f3;
         l += npoints_grid;
         //_ArrayMultiply3Add1D_((fI+p),cur_w1,f1,cur_w2,f2,cur_w3,f3,1);
       }
     }
 
     return;
 }

int gpuInterp1PrimFifthOrderWENO	(	double *	fI,
		double *	fC,
		double *	u,
		double *	x,
		int	upw,
		int	dir,
		void *	s,
		void *	m,
		int	uflag
	)

5th order WENO reconstruction (component-wise) on a uniform grid

Computes the interpolated values of the first primitive of a function \({\bf f}\left({\bf u}\right)\) at the interfaces from the cell-centered values of the function using the fifth order WENO scheme on a uniform grid. The first primitive is defined as a function \({\bf h}\left({\bf u}\right)\) that satisfies:

\begin{equation} {\bf f}\left({\bf u}\left(x\right)\right) = \frac{1}{\Delta x} \int_{x-\Delta x/2}^{x+\Delta x/2} {\bf h}\left({\bf u}\left(\zeta\right)\right)d\zeta, \end{equation}

where \(x\) is the spatial coordinate along the dimension of the interpolation. This function computes the 5th order WENO numerical approximation \(\hat{\bf f}_{j+1/2} \approx {\bf h}_{j+1/2}\) as the convex combination of three 3rd order methods:

\begin{align} &\ \omega_1\ \times\ \left[ \hat{\bf f}_{j+1/2}^1 = \frac{1}{3} {\bf f}_{j-2} - \frac{7}{6} {\bf f}_{j-1} + \frac{11}{6} {\bf f}_j \right]\\ + &\ \omega_2\ \times\ \left[ \hat{\bf f}_{j+1/2}^2 = -\frac{1}{6} {\bf f}_{j-1} + \frac{5}{6} {\bf f}_j + \frac{1}{3} {\bf f}_{j+1} \right]\\ + &\ \omega_3\ \times\ \left[ \hat{\bf f}_{j+1/2}^3 = \frac{1}{3} {\bf f}_j + \frac{5}{6} {\bf f}_{j+1} - \frac{1}{6} {\bf f}_{j+2} \right]\\ \Rightarrow &\ \hat{\bf f}_{j+1/2} = \frac{\omega_1}{3} {\bf f}_{j-2} - \frac{1}{6}(7\omega_1+\omega_2){\bf f}_{j-1} + \frac{1}{6}(11\omega_1+5\omega_2+2\omega_3){\bf f}_j + \frac{1}{6}(2\omega_2+5\omega_3){\bf f}_{j+1} - \frac{\omega_3}{6}{\bf f}_{j+2}, \end{align}

where \(\omega_k; k=1,2,3\) are the nonlinear WENO weights computed in WENOFifthOrderCalculateWeights() (note that the \(\omega\) are different for each component of the vector \(\hat{\bf f}\)).

Implementation Notes:

This method assumes a uniform grid in the spatial dimension corresponding to the interpolation.
The method described above corresponds to a left-biased interpolation. The corresponding right-biased interpolation can be obtained by reflecting the equations about interface j+1/2.
The scalar interpolation method is applied to the vector function in a component-wise manner.
The function computes the interpolant for the entire grid in one call. It loops over all the grid lines along the interpolation direction and carries out the 1D interpolation along these grid lines.
Location of cell-centers and cell interfaces along the spatial dimension of the interpolation is shown in the following figure:

Function arguments:

Argument	Type	Explanation
fI	double*	Array to hold the computed interpolant at the grid interfaces. This array must have the same layout as the solution, but with no ghost points. Its size should be the same as u in all dimensions, except dir (the dimension along which to interpolate) along which it should be larger by 1 (number of interfaces is 1 more than the number of interior cell centers).
fC	double*	Array with the cell-centered values of the flux function \({\bf f}\left({\bf u}\right)\). This array must have the same layout and size as the solution, with ghost points.
u	double*	The solution array \({\bf u}\) (with ghost points). If the interpolation is characteristic based, this is needed to compute the eigendecomposition. For a multidimensional problem, the layout is as follows: u is a contiguous 1D array of size (nvarsdim[0]dim[1]...dim[D-1]) corresponding to the multi-dimensional solution, with the following ordering - nvars, dim[0], dim[1], ..., dim[D-1], where nvars is the number of solution components (HyPar::nvars), dim is the local size (HyPar::dim_local), D is the number of spatial dimensions.
x	double*	The grid array (with ghost points). This is used only by non-uniform-grid interpolation methods. For multidimensional problems, the layout is as follows: x is a contiguous 1D array of size (dim[0]+dim[1]+...+dim[D-1]), with the spatial coordinates along dim[0] stored from 0,...,dim[0]-1, the spatial coordinates along dim[1] stored along dim[0],...,dim[0]+dim[1]-1, and so forth.
upw	int	Upwinding direction: if positive, a left-biased interpolant will be computed; if negative, a right-biased interpolant will be computed. If the interpolation method is central, then this has no effect.
dir	int	Spatial dimension along which to interpolate (eg: 0 for 1D; 0 or 1 for 2D; 0,1 or 2 for 3D)
s	void*	Solver object of type HyPar: the following variables are needed - HyPar::ghosts, HyPar::ndims, HyPar::nvars, HyPar::dim_local.
m	void*	MPI object of type MPIVariables: this is needed only by compact interpolation method that need to solve a global implicit system across MPI ranks.
uflag	int	A flag indicating if the function being interpolated \({\bf f}\) is the solution itself \({\bf u}\) (if 1, \({\bf f}\left({\bf u}\right) \equiv {\bf u}\)).

Reference:

Jiang, G.-S., Shu, C.-W., Efficient Implementation of Weighted ENO Schemes, J. Comput. Phys., 126 (1), 1996, pp. 202-228, http://dx.doi.org/10.1006/jcph.1996.0130

Parameters

fI	Array of interpolated function values at the interfaces
fC	Array of cell-centered values of the function \({\bf f}\left({\bf u}\right)\)
u	Array of cell-centered values of the solution \({\bf u}\)
x	Grid coordinates
upw	Upwind direction (left or right biased)
dir	Spatial dimension along which to interpolation
s	Object of type HyPar containing solver-related variables
m	Object of type MPIVariables containing MPI-related variables
uflag	Flag to indicate if \(f(u) \equiv u\), i.e, if the solution is being reconstructed

Definition at line 352 of file Interp1PrimFifthOrderWENO_GPU.cu.

 {
     HyPar           *solver = (HyPar*)          s;
     WENOParameters  *weno   = (WENOParameters*) solver->interp;
 
     int ghosts = solver->ghosts;
     int ndims  = solver->ndims;
     int nvars  = solver->nvars;
     int *dim   = solver->dim_local;
     int *stride= solver->stride_with_ghosts;
 
     /* calculate dimension offset */
     int offset = weno->offset[dir];
     int bounds_inter[ndims];
     _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] +=  1;
     int npoints_grid; _ArrayProduct1D_(bounds_inter,ndims,npoints_grid);
     int nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
 
 #if defined(GPU_STAT)
     cudaEvent_t start, stop;
     float milliseconds = 0;
     checkCuda( cudaEventCreate(&start) );
     checkCuda( cudaEventCreate(&stop) );
 
 
     int weno_memory_accessed = 3*npoints_grid*nvars*sizeof(double);
     int fI_memory_accessed = npoints_grid*nvars*sizeof(double);
     int fC_memory_accessed = 1;
     for (int d=0; d<ndims; d++) {
       if (d == dir) fC_memory_accessed *= (dim[d]+2*ghosts);
       else          fC_memory_accessed *= dim[d];
     }
     fC_memory_accessed *= nvars*sizeof(double);
 
     checkCuda( cudaEventRecord(start) );
 #endif
 
     Interp1PrimFifthOrderWENO_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
         npoints_grid, solver->npoints_local_wghosts, ndims, dir, ghosts, nvars, weno->size, offset, stride[dir], upw, uflag,
         solver->gpu_dim_local, fC, weno->w1, weno->w2, weno->w3, fI
     );
 #if defined(GPU_STAT)
     checkCuda( cudaEventRecord(stop) );
     checkCuda( cudaEventSynchronize(stop) );
 #endif
 
     cudaDeviceSynchronize();
 
 #if defined(GPU_STAT)
     checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
 
     printf("%-50s GPU time (secs) = %.6f dir = %d bandwidth (GB/s) = %6.2f\n",
             "Interp1PrimFifthOrderWENO2", milliseconds*1e-3, dir,
             (1e-6*(weno_memory_accessed+fI_memory_accessed+fC_memory_accessed))/milliseconds);
 
     checkCuda( cudaEventDestroy(start) );
     checkCuda( cudaEventDestroy(stop) );
 #endif
 
     return 0;
 }

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation