HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
WENOFifthOrderCalculateWeights_GPU.cu File Reference

Functions to compute the nonlinear weights for WENO-type schemes. More...

#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <cuda_profiler_api.h>
#include <basic.h>
#include <basic_gpu.h>
#include <arrayfunctions.h>
#include <matmult_native.h>
#include <mathfunctions.h>
#include <interpolation.h>
#include <mpivars.h>
#include <hypar.h>
#include <arrayfunctions_gpu.h>

Go to the source code of this file.

Functions

static int gpuWENOFifthOrderCalculateWeightsYC (double *, double *, double *, int, void *, void *)
 
static int gpuWENOFifthOrderCalculateWeightsM (double *, double *, double *, int, void *, void *)
 
int gpuWENOFifthOrderCalculateWeights (double *fC, double *uC, double *x, int dir, void *s, void *m)
 
__global__ void WENOFifthOrderCalculateWeightsM_kernel (int npoints_grid, int npoints_local_wghosts, int ndims, int dir, int ghosts, int nvars, int weno_size, int offset, int stride, int is_crweno, int is_mpi_ip_zero, int is_mpi_ip_proc, double weno_eps, const int *dim, const double *fC, const double *uC, double *w1, double *w2, double *w3)
 
int gpuWENOFifthOrderCalculateWeightsM (double *__restrict__ fC, double *__restrict__ uC, double *__restrict__ x, int dir, void *s, void *m)
 
__global__ void WENOFifthOrderCalculateWeightsYC_kernel (int npoints_grid, int npoints_local_wghosts, int ndims, int dir, int ghosts, int nvars, int weno_size, int offset, int stride, int is_crweno, int is_mpi_ip_zero, int is_mpi_ip_proc, double weno_eps, const int *__restrict__ dim, const double *__restrict__ fC, const double *__restrict__ uC, double *__restrict__ w1, double *__restrict__ w2, double *__restrict__ w3)
 
int gpuWENOFifthOrderCalculateWeightsYC (double *__restrict__ fC, double *__restrict__ uC, double *__restrict__ x, int dir, void *s, void *m)
 

Detailed Description

Functions to compute the nonlinear weights for WENO-type schemes.

Author
Youngdae Kim

Definition in file WENOFifthOrderCalculateWeights_GPU.cu.

Function Documentation

static int gpuWENOFifthOrderCalculateWeightsYC ( double *  ,
double *  ,
double *  ,
int  ,
void *  ,
void *   
)
static
static int gpuWENOFifthOrderCalculateWeightsM ( double *  ,
double *  ,
double *  ,
int  ,
void *  ,
void *   
)
static
int gpuWENOFifthOrderCalculateWeights ( double *  fC,
double *  uC,
double *  x,
int  dir,
void *  s,
void *  m 
)

Compute the nonlinear weights for 5th order WENO-type schemes. This function is a wrapper that calls the appropriate function, depending on the type of WENO weights.

Parameters
fCArray of cell-centered values of the function \({\bf f}\left({\bf u}\right)\)
uCArray of cell-centered values of the solution \({\bf u}\)
xGrid coordinates
dirSpatial dimension along which to interpolation
sObject of type HyPar containing solver-related variables
mObject of type MPIVariables containing MPI-related variables

Definition at line 30 of file WENOFifthOrderCalculateWeights_GPU.cu.

38 {
39  HyPar *solver = (HyPar*) s;
40  WENOParameters *weno = (WENOParameters*) solver->interp;
41  MPIVariables *mpi = (MPIVariables*) m;
42 
43  if (weno->yc) return(gpuWENOFifthOrderCalculateWeightsYC (fC,uC,x,dir,solver,mpi));
44  else if (weno->mapped) return(gpuWENOFifthOrderCalculateWeightsM (fC,uC,x,dir,solver,mpi));
45  else {
46  printf("ERROR! WENO functions other than yc or mapped have not been implemented on GPUs.\n");
47  return 1;
48  }
49 }
void * interp
Definition: hypar.h:362
Structure of variables/parameters needed by the WENO-type scheme.
static int gpuWENOFifthOrderCalculateWeightsYC(double *, double *, double *, int, void *, void *)
Structure of MPI-related variables.
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23
static int gpuWENOFifthOrderCalculateWeightsM(double *, double *, double *, int, void *, void *)
__global__ void WENOFifthOrderCalculateWeightsM_kernel ( int  npoints_grid,
int  npoints_local_wghosts,
int  ndims,
int  dir,
int  ghosts,
int  nvars,
int  weno_size,
int  offset,
int  stride,
int  is_crweno,
int  is_mpi_ip_zero,
int  is_mpi_ip_proc,
double  weno_eps,
const int *  dim,
const double *  fC,
const double *  uC,
double *  w1,
double *  w2,
double *  w3 
)

Kernel for gpuWENOFifthOrderCalculateWeightsM()

Definition at line 485 of file WENOFifthOrderCalculateWeights_GPU.cu.

506 {
507  int p = threadIdx.x + (blockDim.x * blockIdx.x);
508  if (p < npoints_grid) {
509  const int max_ndims = 3;
510  const double thirteen_by_twelve = 13.0 / 12.0;
511  const double one_fourth = 1.0 / 4.0;
512 
513  int bounds_inter[max_ndims], indexC[max_ndims], indexI[max_ndims];
514  int qm1L, qm2L, qm3L, qp1L, qp2L, qm1R, qm2R, qm3R, qp1R, qp2R;
515  double *ww1LF, *ww2LF, *ww3LF, *ww1RF, *ww2RF, *ww3RF;
516  double *ww1LU, *ww2LU, *ww3LU, *ww1RU, *ww2RU, *ww3RU;
517 
518  ww1LF = w1 + offset;
519  ww2LF = w2 + offset;
520  ww3LF = w3 + offset;
521  ww1RF = w1 + 2*weno_size + offset;
522  ww2RF = w2 + 2*weno_size + offset;
523  ww3RF = w3 + 2*weno_size + offset;
524  ww1LU = w1 + weno_size + offset;
525  ww2LU = w2 + weno_size + offset;
526  ww3LU = w3 + weno_size + offset;
527  ww1RU = w1 + 2*weno_size + weno_size + offset;
528  ww2RU = w2 + 2*weno_size + weno_size + offset;
529  ww3RU = w3 + 2*weno_size + weno_size + offset;
530 
531  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
532  _ArrayIndexnD_(ndims,p,bounds_inter,indexC,0);
533  _ArrayCopy1D_(indexC,indexI,ndims);
534 
535  indexC[dir] = indexI[dir]-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1L);
536  qm3L = qm1L - 2*stride;
537  qm2L = qm1L - stride;
538  qp1L = qm1L + stride;
539  qp2L = qm1L + 2*stride;
540 
541  indexC[dir] = indexI[dir] ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1R);
542  qm3R = qm1R + 2*stride;
543  qm2R = qm1R + stride;
544  qp1R = qm1R - stride;
545  qp2R = qm1R - 2*stride;
546 
547  for (int j=0; j<nvars; j++) {
548  /* Defining stencil points */
549  const double *m3LF, *m2LF, *m1LF, *p1LF, *p2LF;
550  m3LF = (fC+qm3L);
551  m2LF = (fC+qm2L);
552  m1LF = (fC+qm1L);
553  p1LF = (fC+qp1L);
554  p2LF = (fC+qp2L);
555  const double *m3RF, *m2RF, *m1RF, *p1RF, *p2RF;
556  m3RF = (fC+qm3R);
557  m2RF = (fC+qm2R);
558  m1RF = (fC+qm1R);
559  p1RF = (fC+qp1R);
560  p2RF = (fC+qp2R);
561  const double *m3LU, *m2LU, *m1LU, *p1LU, *p2LU;
562  m3LU = (uC+qm3L);
563  m2LU = (uC+qm2L);
564  m1LU = (uC+qm1L);
565  p1LU = (uC+qp1L);
566  p2LU = (uC+qp2L);
567  const double *m3RU, *m2RU, *m1RU, *p1RU, *p2RU;
568  m3RU = (uC+qm3R);
569  m2RU = (uC+qm2R);
570  m1RU = (uC+qm1R);
571  p1RU = (uC+qp1R);
572  p2RU = (uC+qp2R);
573 
574  qm3L += npoints_local_wghosts;
575  qm2L += npoints_local_wghosts;
576  qm1L += npoints_local_wghosts;
577  qp1L += npoints_local_wghosts;
578  qp2L += npoints_local_wghosts;
579 
580  qm3R += npoints_local_wghosts;
581  qm2R += npoints_local_wghosts;
582  qm1R += npoints_local_wghosts;
583  qp1R += npoints_local_wghosts;
584  qp2R += npoints_local_wghosts;
585 
586  double c1, c2, c3;
587  if (is_crweno) {
588  if ( (is_mpi_ip_zero && (indexI[dir] == 0 ))
589  || (is_mpi_ip_proc && (indexI[dir] == dim[dir])) ) {
590  /* Use WENO5 at the physical boundaries */
594  } else {
595  /* CRWENO5 at the interior points */
599  }
600  } else {
601  /* WENO5 and HCWENO5 */
605  }
606 
607  /* calculate WENO weights */
608  _WENOWeights_v_M_Scalar_((ww1LF+p),(ww2LF+p),(ww3LF+p),c1,c2,c3,m3LF,m2LF,m1LF,p1LF,p2LF,weno_eps,0);
609  _WENOWeights_v_M_Scalar_((ww1RF+p),(ww2RF+p),(ww3RF+p),c1,c2,c3,m3RF,m2RF,m1RF,p1RF,p2RF,weno_eps,0);
610  _WENOWeights_v_M_Scalar_((ww1LU+p),(ww2LU+p),(ww3LU+p),c1,c2,c3,m3LU,m2LU,m1LU,p1LU,p2LU,weno_eps,0);
611  _WENOWeights_v_M_Scalar_((ww1RU+p),(ww2RU+p),(ww3RU+p),c1,c2,c3,m3RU,m2RU,m1RU,p1RU,p2RU,weno_eps,0);
612  p += npoints_grid;
613  }
614  }
615 
616  return;
617 }
#define _ArrayIndexnD_(N, index, imax, i, ghost)
#define _WENO_OPTIMAL_WEIGHT_2_
#define _ArrayIndex1D_(N, imax, i, ghost, index)
#define _CRWENO_OPTIMAL_WEIGHT_1_
#define _WENO_OPTIMAL_WEIGHT_3_
#define _WENO_OPTIMAL_WEIGHT_1_
#define _WENOWeights_v_M_Scalar_(w1, w2, w3, c1, c2, c3, m3, m2, m1, p1, p2, eps, idx)
#define _ArrayCopy1D_(x, y, size)
#define _CRWENO_OPTIMAL_WEIGHT_2_
#define _CRWENO_OPTIMAL_WEIGHT_3_
int gpuWENOFifthOrderCalculateWeightsM ( double *__restrict__  fC,
double *__restrict__  uC,
double *__restrict__  x,
int  dir,
void *  s,
void *  m 
)

Computes the nonlinear weights for the 5th order component-wise WENO-type schemes using the mapped formulation of Henrick, Aslam & Powers:

\begin{eqnarray} \omega_k &=& \frac {a_k} {\sum_{j=1}^3 a_j },\ a_k = \frac {\tilde{\omega}_k \left( c_k + c_k^2 - 3c_k\tilde{\omega}_k + \tilde{\omega}_k^2\right)} {c_k^2 + \tilde{\omega}_k\left(1-2c_k\right)}, \\ \tilde{\omega}_k &=& \frac {\tilde{a}_k} {\sum_{j=1}^3 \tilde{a}_j },\ \tilde{a}_k = \frac {c_k} {\left(\beta_k+\epsilon\right)^p},\ k = 1,2,3, \end{eqnarray}

where \(c_k\) are the optimal weights, \(p\) is hardcoded to \(2\), and \(\epsilon\) is an input parameter (WENOParameters::eps) (typically \(10^{-6}\)). The smoothness indicators \(\beta_k\) are given by:

\begin{eqnarray} \beta_1 &=& \frac{13}{12} \left(f_{j-2}-2f_{j-1}+f_j\right)^2 + \frac{1}{4}\left(f_{j-2}-4f_{j-1}+3f_j\right)^2 \\ \beta_2 &=& \frac{13}{12} \left(f_{j-1}-2f_j+f_{j+1}\right)^2 + \frac{1}{4}\left(f_{j-1}-f_{j+1}\right)^2 \\ \beta_3 &=& \frac{13}{12} \left(f_j-2f_{j+1}+f_{j+2}\right)^2 + \frac{1}{4}\left(3f_j-4f_{j+1}+f_{j+2}\right)^2 \end{eqnarray}

Notes:

  • This function computes the weights for the entire grid (for interpolation along a given spatial dimension dir ). Thus, it loops over all grid lines along dir.
  • The weights are computed for all components, for both the left- and right-biased interpolations, and for both the flux function \({\bf f}\left({\bf u}\right)\) and the solution \(\bf u\). Thus, this approach of computing and storing the weights is quite memory-intensive.
  • The variable offset denotes the offset in the complete array from where the weights for interpolation along the current interpolation dimension (dir ) are stored.

Reference:

Parameters
fCArray of cell-centered values of the function \({\bf f}\left({\bf u}\right)\)
uCArray of cell-centered values of the solution \({\bf u}\)
xGrid coordinates
dirSpatial dimension along which to interpolation
sObject of type HyPar containing solver-related variables
mObject of type MPIVariables containing MPI-related variables

Definition at line 647 of file WENOFifthOrderCalculateWeights_GPU.cu.

655 {
656  HyPar *solver = (HyPar*) s;
657  WENOParameters *weno = (WENOParameters*) solver->interp;
658  MPIVariables *mpi = (MPIVariables*) m;
659 
660  int ghosts = solver->ghosts;
661  int ndims = solver->ndims;
662  int nvars = solver->nvars;
663  int *dim = solver->dim_local;
664  int *stride= solver->stride_with_ghosts;
665 
666  /* calculate dimension offset */
667  int offset = weno->offset[dir];
668  int bounds_inter[ndims];
669  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
670  int npoints_grid; _ArrayProduct1D_(bounds_inter,ndims,npoints_grid);
671  int nblocks = (npoints_grid-1) / GPU_THREADS_PER_BLOCK + 1;
672 
673  int is_crweno = (strcmp(solver->spatial_scheme_hyp,_FIFTH_ORDER_CRWENO_) == 0) ? 1 : 0;
674  int is_mpi_ip_zero = (mpi->ip[dir] == 0) ? 1 : 0;
675  int is_mpi_ip_proc = (mpi->ip[dir] == mpi->iproc[dir]-1) ? 1 : 0;
676 
677 #if defined(GPU_STAT)
678  cudaEvent_t startEvent, stopEvent;
679  float milliseconds = 0;
680 
681  checkCuda( cudaEventCreate(&startEvent) );
682  checkCuda( cudaEventCreate(&stopEvent) );
683 
684  int weno_memory_accessed = 12*npoints_grid*nvars*sizeof(double);
685  int fCuC_memory_accessed = 1;
686  for (int d=0; d<ndims; d++) {
687  if (d == dir) fCuC_memory_accessed *= (dim[d]+2*ghosts);
688  else fCuC_memory_accessed *= dim[d];
689  }
690  fCuC_memory_accessed *= nvars*sizeof(double);
691 
692  checkCuda( cudaEventRecord(startEvent, 0) );
693 #endif
694 
695  WENOFifthOrderCalculateWeightsM_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
696  npoints_grid, solver->npoints_local_wghosts, ndims, dir, ghosts, nvars, weno->size, offset, stride[dir],
697  is_crweno, is_mpi_ip_zero, is_mpi_ip_proc, weno->eps,
698  solver->gpu_dim_local, fC, uC, weno->w1, weno->w2, weno->w3
699  );
700  cudaDeviceSynchronize();
701 
702 #if defined(GPU_STAT)
703  checkCuda( cudaEventRecord(stopEvent, 0) );
704  checkCuda( cudaEventSynchronize(stopEvent) );
705  checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
706 
707  printf("%-50s GPU time (secs) = %.6f dir = %d bandwidth (GB/s) = %6.2f stride = %d\n",
708  "WENOFifthOrderCalculateWeightsM2", milliseconds*1e-3, dir,
709  (1e-6*(weno_memory_accessed+fCuC_memory_accessed)/milliseconds),
710  stride[dir]);
711 
712  checkCuda( cudaEventDestroy(startEvent) );
713  checkCuda( cudaEventDestroy(stopEvent) );
714 #endif
715 
716  return 0;
717 }
char spatial_scheme_hyp[_MAX_STRING_SIZE_]
Definition: hypar.h:84
void * interp
Definition: hypar.h:362
int npoints_local_wghosts
Definition: hypar.h:42
int * dim_local
Definition: hypar.h:37
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
Structure of variables/parameters needed by the WENO-type scheme.
int ghosts
Definition: hypar.h:52
int * gpu_dim_local
Definition: hypar.h:455
int * stride_with_ghosts
Definition: hypar.h:414
#define _ArrayCopy1D_(x, y, size)
int nvars
Definition: hypar.h:29
int ndims
Definition: hypar.h:26
#define _FIFTH_ORDER_CRWENO_
Definition: interpolation.h:28
#define _ArrayProduct1D_(x, size, p)
Structure of MPI-related variables.
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23
__global__ void WENOFifthOrderCalculateWeightsYC_kernel ( int  npoints_grid,
int  npoints_local_wghosts,
int  ndims,
int  dir,
int  ghosts,
int  nvars,
int  weno_size,
int  offset,
int  stride,
int  is_crweno,
int  is_mpi_ip_zero,
int  is_mpi_ip_proc,
double  weno_eps,
const int *__restrict__  dim,
const double *__restrict__  fC,
const double *__restrict__  uC,
double *__restrict__  w1,
double *__restrict__  w2,
double *__restrict__  w3 
)

Kernel for gpuWENOFifthOrderCalculateWeightsYC()

Definition at line 721 of file WENOFifthOrderCalculateWeights_GPU.cu.

742 {
743  int p = threadIdx.x + (blockDim.x * blockIdx.x);
744  if (p < npoints_grid) {
745  const int max_ndims = 3;
746  const double thirteen_by_twelve = 13.0 / 12.0;
747  const double one_fourth = 1.0 / 4.0;
748 
749  int bounds_inter[max_ndims], indexC[max_ndims], indexI[max_ndims];
750  int qm1L, qm2L, qm3L, qp1L, qp2L, qm1R, qm2R, qm3R, qp1R, qp2R;
751  double *ww1LF, *ww2LF, *ww3LF, *ww1RF, *ww2RF, *ww3RF;
752  double *ww1LU, *ww2LU, *ww3LU, *ww1RU, *ww2RU, *ww3RU;
753 
754  ww1LF = w1 + offset;
755  ww2LF = w2 + offset;
756  ww3LF = w3 + offset;
757  ww1RF = w1 + 2*weno_size + offset;
758  ww2RF = w2 + 2*weno_size + offset;
759  ww3RF = w3 + 2*weno_size + offset;
760  ww1LU = w1 + weno_size + offset;
761  ww2LU = w2 + weno_size + offset;
762  ww3LU = w3 + weno_size + offset;
763  ww1RU = w1 + 2*weno_size + weno_size + offset;
764  ww2RU = w2 + 2*weno_size + weno_size + offset;
765  ww3RU = w3 + 2*weno_size + weno_size + offset;
766 
767  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
768  _ArrayIndexnD_(ndims,p,bounds_inter,indexC,0);
769  _ArrayCopy1D_(indexC,indexI,ndims);
770 
771  indexC[dir] = indexI[dir]-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1L);
772  qm3L = qm1L - 2*stride;
773  qm2L = qm1L - stride;
774  qp1L = qm1L + stride;
775  qp2L = qm1L + 2*stride;
776 
777  indexC[dir] = indexI[dir] ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1R);
778  qm3R = qm1R + 2*stride;
779  qm2R = qm1R + stride;
780  qp1R = qm1R - stride;
781  qp2R = qm1R - 2*stride;
782 
783  for (int j=0; j <nvars; j++) {
784  /* Defining stencil points */
785  const double *m3LF, *m2LF, *m1LF, *p1LF, *p2LF;
786  m3LF = (fC+qm3L);
787  m2LF = (fC+qm2L);
788  m1LF = (fC+qm1L);
789  p1LF = (fC+qp1L);
790  p2LF = (fC+qp2L);
791  const double *m3RF, *m2RF, *m1RF, *p1RF, *p2RF;
792  m3RF = (fC+qm3R);
793  m2RF = (fC+qm2R);
794  m1RF = (fC+qm1R);
795  p1RF = (fC+qp1R);
796  p2RF = (fC+qp2R);
797  const double *m3LU, *m2LU, *m1LU, *p1LU, *p2LU;
798  m3LU = (uC+qm3L);
799  m2LU = (uC+qm2L);
800  m1LU = (uC+qm1L);
801  p1LU = (uC+qp1L);
802  p2LU = (uC+qp2L);
803  const double *m3RU, *m2RU, *m1RU, *p1RU, *p2RU;
804  m3RU = (uC+qm3R);
805  m2RU = (uC+qm2R);
806  m1RU = (uC+qm1R);
807  p1RU = (uC+qp1R);
808  p2RU = (uC+qp2R);
809 
810  qm3L += npoints_local_wghosts;
811  qm2L += npoints_local_wghosts;
812  qm1L += npoints_local_wghosts;
813  qp1L += npoints_local_wghosts;
814  qp2L += npoints_local_wghosts;
815 
816  qm3R += npoints_local_wghosts;
817  qm2R += npoints_local_wghosts;
818  qm1R += npoints_local_wghosts;
819  qp1R += npoints_local_wghosts;
820  qp2R += npoints_local_wghosts;
821 
822  double c1, c2, c3;
823  if (is_crweno) {
824  if ( (is_mpi_ip_zero && (indexI[dir] == 0 ))
825  || (is_mpi_ip_proc && (indexI[dir] == dim[dir])) ) {
826  /* Use WENO5 at the physical boundaries */
830  } else {
831  /* CRWENO5 at the interior points */
835  }
836  } else {
837  /* WENO5 and HCWENO5 */
841  }
842 
843  /* calculate WENO weights */
844  _WENOWeights_v_YC_Scalar_((ww1LF+p),(ww2LF+p),(ww3LF+p),c1,c2,c3,m3LF,m2LF,m1LF,p1LF,p2LF,weno_eps,0);
845  _WENOWeights_v_YC_Scalar_((ww1RF+p),(ww2RF+p),(ww3RF+p),c1,c2,c3,m3RF,m2RF,m1RF,p1RF,p2RF,weno_eps,0);
846  _WENOWeights_v_YC_Scalar_((ww1LU+p),(ww2LU+p),(ww3LU+p),c1,c2,c3,m3LU,m2LU,m1LU,p1LU,p2LU,weno_eps,0);
847  _WENOWeights_v_YC_Scalar_((ww1RU+p),(ww2RU+p),(ww3RU+p),c1,c2,c3,m3RU,m2RU,m1RU,p1RU,p2RU,weno_eps,0);
848  p += npoints_grid;
849  }
850  }
851 
852  return;
853 }
#define _WENOWeights_v_YC_Scalar_(w1, w2, w3, c1, c2, c3, m3, m2, m1, p1, p2, eps, idx)
#define _ArrayIndexnD_(N, index, imax, i, ghost)
#define _WENO_OPTIMAL_WEIGHT_2_
#define _ArrayIndex1D_(N, imax, i, ghost, index)
#define _CRWENO_OPTIMAL_WEIGHT_1_
#define _WENO_OPTIMAL_WEIGHT_3_
#define _WENO_OPTIMAL_WEIGHT_1_
#define _ArrayCopy1D_(x, y, size)
#define _CRWENO_OPTIMAL_WEIGHT_2_
#define _CRWENO_OPTIMAL_WEIGHT_3_
int gpuWENOFifthOrderCalculateWeightsYC ( double *__restrict__  fC,
double *__restrict__  uC,
double *__restrict__  x,
int  dir,
void *  s,
void *  m 
)

Computes the nonlinear weights for the 5th order component-wise WENO-type schemes using the ESWENO formulation of Yamaleev & Carpenter. Note that only the formulation for the nonlinear weights is adopted and implemented here, not the ESWENO scheme as a whole.

\begin{equation} \omega_k = \frac {a_k} {\sum_{j=1}^3 a_j },\ a_k = c_k \left( 1 + \frac{\tau_5}{\beta_k+\epsilon} \right)^p,\ k = 1,2,3, \end{equation}

where \(c_k\) are the optimal weights, \(p\) is hardcoded to \(2\), and \(\epsilon\) is an input parameter (WENOParameters::eps) (typically \(10^{-6}\)). The smoothness indicators \(\beta_k\) are given by:

\begin{eqnarray} \beta_1 &=& \frac{13}{12} \left(f_{j-2}-2f_{j-1}+f_j\right)^2 + \frac{1}{4}\left(f_{j-2}-4f_{j-1}+3f_j\right)^2 \\ \beta_2 &=& \frac{13}{12} \left(f_{j-1}-2f_j+f_{j+1}\right)^2 + \frac{1}{4}\left(f_{j-1}-f_{j+1}\right)^2 \\ \beta_3 &=& \frac{13}{12} \left(f_j-2f_{j+1}+f_{j+2}\right)^2 + \frac{1}{4}\left(3f_j-4f_{j+1}+f_{j+2}\right)^2, \end{eqnarray}

and \(\tau_5 = \left( f_{j-2}-4f_{j-1}+6f_j-4f_{j+1}+f_{j+2} \right)^2\).

Notes:

  • This function computes the weights for the entire grid (for interpolation along a given spatial dimension dir ). Thus, it loops over all grid lines along dir.
  • The weights are computed for all components, for both the left- and right-biased interpolations, and for both the flux function \({\bf f}\left({\bf u}\right)\) and the solution \(\bf u\). Thus, this approach of computing and storing the weights is quite memory-intensive.
  • The variable offset denotes the offset in the complete array from where the weights for interpolation along the current interpolation dimension (dir ) are stored.

Reference:

Parameters
fCArray of cell-centered values of the function \({\bf f}\left({\bf u}\right)\)
uCArray of cell-centered values of the solution \({\bf u}\)
xGrid coordinates
dirSpatial dimension along which to interpolation
sObject of type HyPar containing solver-related variables
mObject of type MPIVariables containing MPI-related variables

Definition at line 884 of file WENOFifthOrderCalculateWeights_GPU.cu.

892 {
893  HyPar *solver = (HyPar*) s;
894  WENOParameters *weno = (WENOParameters*) solver->interp;
895  MPIVariables *mpi = (MPIVariables*) m;
896 
897  int ghosts = solver->ghosts;
898  int ndims = solver->ndims;
899  int nvars = solver->nvars;
900  int *dim = solver->dim_local;
901  int *stride= solver->stride_with_ghosts;
902 
903  /* calculate dimension offset */
904  int offset = weno->offset[dir];
905  int bounds_inter[ndims];
906  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
907  int npoints_grid; _ArrayProduct1D_(bounds_inter,ndims,npoints_grid);
908  int nblocks = (npoints_grid-1) / GPU_THREADS_PER_BLOCK + 1;
909 
910  int is_crweno = (strcmp(solver->spatial_scheme_hyp,_FIFTH_ORDER_CRWENO_) == 0) ? 1 : 0;
911  int is_mpi_ip_zero = (mpi->ip[dir] == 0) ? 1 : 0;
912  int is_mpi_ip_proc = (mpi->ip[dir] == mpi->iproc[dir]-1) ? 1 : 0;
913 
914 #if defined(GPU_STAT)
915  cudaEvent_t startEvent, stopEvent;
916  float milliseconds = 0;
917 
918  checkCuda( cudaEventCreate(&startEvent) );
919  checkCuda( cudaEventCreate(&stopEvent) );
920 
921  int weno_memory_accessed = 12*npoints_grid*nvars*sizeof(double);
922  int fCuC_memory_accessed = 1;
923  for (int d=0; d<ndims; d++) {
924  if (d == dir) fCuC_memory_accessed *= (dim[d]+2*ghosts);
925  else fCuC_memory_accessed *= dim[d];
926  }
927  fCuC_memory_accessed *= nvars*sizeof(double);
928 
929  checkCuda( cudaEventRecord(startEvent, 0) );
930 #endif
931 
932  WENOFifthOrderCalculateWeightsYC_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
933  npoints_grid, solver->npoints_local_wghosts, ndims, dir, ghosts, nvars, weno->size, offset, stride[dir],
934  is_crweno, is_mpi_ip_zero, is_mpi_ip_proc, weno->eps,
935  solver->gpu_dim_local, fC, uC, weno->w1, weno->w2, weno->w3
936  );
937  cudaDeviceSynchronize();
938 
939 #if defined(GPU_STAT)
940  checkCuda( cudaEventRecord(stopEvent, 0) );
941  checkCuda( cudaEventSynchronize(stopEvent) );
942  checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
943 
944  printf("%-50s GPU time (secs) = %.6f dir = %d bandwidth (GB/s) = %6.2f stride = %d\n",
945  "WENOFifthOrderCalculateWeightsYC2", milliseconds*1e-3, dir,
946  (1e-6*(weno_memory_accessed+fCuC_memory_accessed)/milliseconds),
947  stride[dir]);
948 
949  checkCuda( cudaEventDestroy(startEvent) );
950  checkCuda( cudaEventDestroy(stopEvent) );
951 #endif
952 
953  return (0);
954 }
char spatial_scheme_hyp[_MAX_STRING_SIZE_]
Definition: hypar.h:84
void * interp
Definition: hypar.h:362
int npoints_local_wghosts
Definition: hypar.h:42
int * dim_local
Definition: hypar.h:37
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
Structure of variables/parameters needed by the WENO-type scheme.
int ghosts
Definition: hypar.h:52
int * gpu_dim_local
Definition: hypar.h:455
int * stride_with_ghosts
Definition: hypar.h:414
#define _ArrayCopy1D_(x, y, size)
int nvars
Definition: hypar.h:29
int ndims
Definition: hypar.h:26
#define _FIFTH_ORDER_CRWENO_
Definition: interpolation.h:28
#define _ArrayProduct1D_(x, size, p)
Structure of MPI-related variables.
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23