HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
FirstDerivativeFourthOrder_GPU.cu File Reference

GPU implementation of fourth order finite-difference approximation to the first derivative. More...

#include <basic_gpu.h>
#include <firstderivative.h>
#include <arrayfunctions.h>
#include <mpivars.h>
#include <hypar.h>

Go to the source code of this file.

Typedefs

typedef MPIVariables MPIContext
 
typedef HyPar SolverContext
 

Functions

__global__ void FirstDerivativeFourthOrderCentral_boundary_kernel (int N_outer, int npoints_local_wghosts, int ghosts, int ndims, int nvars, int dir, const int *dim, const double *f, double *Df)
 
__global__ void FirstDerivativeFourthOrderCentral_interior_kernel (int ngrid_points, int npoints_local_wghosts, int ghosts, int ndims, int nvars, int dir, const int *dim, const double *f, double *Df)
 
int gpuFirstDerivativeFourthOrderCentral (double *Df, double *f, int dir, int bias, void *s, void *m)
 

Detailed Description

GPU implementation of fourth order finite-difference approximation to the first derivative.

Author
Youngdae Kim

Definition in file FirstDerivativeFourthOrder_GPU.cu.

Typedef Documentation

Definition at line 11 of file FirstDerivativeFourthOrder_GPU.cu.

Definition at line 12 of file FirstDerivativeFourthOrder_GPU.cu.

Function Documentation

__global__ void FirstDerivativeFourthOrderCentral_boundary_kernel ( int  N_outer,
int  npoints_local_wghosts,
int  ghosts,
int  ndims,
int  nvars,
int  dir,
const int *  dim,
const double *  f,
double *  Df 
)

Kernel for gpuFirstDerivativeFourthOrderCentral()

Definition at line 223 of file FirstDerivativeFourthOrder_GPU.cu.

234 {
235  int j = threadIdx.x + (blockDim.x * blockIdx.x);
236  if (j < N_outer) {
237  double one_twelve = 1.0/12.0;
238 
239  int i, v;
240  int indexC[GPU_MAX_NDIMS], index_outer[GPU_MAX_NDIMS], bounds_outer[GPU_MAX_NDIMS];
241  _ArrayCopy1D_(dim,bounds_outer,ndims); bounds_outer[dir] = 1;
242  _ArrayIndexnD_(ndims,j,bounds_outer,index_outer,0);
243  _ArrayCopy1D_(index_outer,indexC,ndims);
244 
245  /* left boundary */
246  for (i = -ghosts; i < -ghosts+1; i++) {
247  int qC, qp1, qp2, qp3, qp4;
248  indexC[dir] = i ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qC );
249  indexC[dir] = i+1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp1);
250  indexC[dir] = i+2; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp2);
251  indexC[dir] = i+3; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp3);
252  indexC[dir] = i+4; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp4);
253  for (v=0; v<nvars; v++) {
254  Df[qC+v*npoints_local_wghosts] = (-25*f[qC+v*npoints_local_wghosts]+48*f[qp1+v*npoints_local_wghosts]-36*f[qp2+v*npoints_local_wghosts]+16*f[qp3+v*npoints_local_wghosts]-3*f[qp4+v*npoints_local_wghosts])*one_twelve;
255  }
256  }
257  for (i = -ghosts+1; i < -ghosts+2; i++) {
258  int qC, qm1, qp1, qp2, qp3;
259  indexC[dir] = i-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
260  indexC[dir] = i ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qC );
261  indexC[dir] = i+1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp1);
262  indexC[dir] = i+2; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp2);
263  indexC[dir] = i+3; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp3);
264  for (v=0; v<nvars; v++)
265  Df[qC+v*npoints_local_wghosts] = (-3*f[qm1+v*npoints_local_wghosts]-10*f[qC+v*npoints_local_wghosts]+18*f[qp1+v*npoints_local_wghosts]-6*f[qp2+v*npoints_local_wghosts]+f[qp3+v*npoints_local_wghosts])*one_twelve;
266  }
267  /* right boundary */
268  for (i = dim[dir]+ghosts-2; i < dim[dir]+ghosts-1; i++) {
269  int qC, qm3, qm2, qm1, qp1;
270  indexC[dir] = i-3; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm3);
271  indexC[dir] = i-2; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm2);
272  indexC[dir] = i-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
273  indexC[dir] = i ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qC );
274  indexC[dir] = i+1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp1);
275  for (v=0; v<nvars; v++)
276  Df[qC+v*npoints_local_wghosts] = (-f[qm3+v*npoints_local_wghosts]+6*f[qm2+v*npoints_local_wghosts]-18*f[qm1+v*npoints_local_wghosts]+10*f[qC+v*npoints_local_wghosts]+3*f[qp1+v*npoints_local_wghosts])*one_twelve;
277  }
278  for (i = dim[dir]+ghosts-1; i < dim[dir]+ghosts; i++) {
279  int qC, qm4, qm3, qm2, qm1;
280  indexC[dir] = i-4; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm4);
281  indexC[dir] = i-3; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm3);
282  indexC[dir] = i-2; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm2);
283  indexC[dir] = i-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
284  indexC[dir] = i ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qC );
285  for (v=0; v<nvars; v++)
286  Df[qC+v*npoints_local_wghosts] = (3*f[qm4+v*npoints_local_wghosts]-16*f[qm3+v*npoints_local_wghosts]+36*f[qm2+v*npoints_local_wghosts]-48*f[qm1+v*npoints_local_wghosts]+25*f[qC+v*npoints_local_wghosts])*one_twelve;
287  }
288  }
289 
290  return;
291 }
#define _ArrayIndexnD_(N, index, imax, i, ghost)
#define GPU_MAX_NDIMS
Definition: basic_gpu.h:8
#define _ArrayIndex1D_(N, imax, i, ghost, index)
#define _ArrayCopy1D_(x, y, size)
__global__ void FirstDerivativeFourthOrderCentral_interior_kernel ( int  ngrid_points,
int  npoints_local_wghosts,
int  ghosts,
int  ndims,
int  nvars,
int  dir,
const int *  dim,
const double *  f,
double *  Df 
)

Kernel for gpuFirstDerivativeFourthOrderCentral()

Definition at line 295 of file FirstDerivativeFourthOrder_GPU.cu.

306 {
307  int i = threadIdx.x + (blockDim.x * blockIdx.x);
308  if (i < ngrid_points) {
309  /* interior */
310  double one_twelve = 1.0/12.0;
311 
312  int j, v;
313  int qC, qm1, qm2, qp1, qp2;
314  int indexC[GPU_MAX_NDIMS], index_outer[GPU_MAX_NDIMS], bounds_outer[GPU_MAX_NDIMS];
315 
316  j = i/(dim[dir] + 2*ghosts - 4); /* Outer index */
317  _ArrayCopy1D_(dim,bounds_outer,ndims); bounds_outer[dir] = 1;
318  _ArrayIndexnD_(ndims,j,bounds_outer,index_outer,0);
319  _ArrayCopy1D_(index_outer,indexC,ndims);
320 
321  //i += (-ghosts + 2);
322  i = (i % (dim[dir] + 2*ghosts - 4)) + (-ghosts + 2);
323  indexC[dir] = i-2; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm2);
324  indexC[dir] = i-1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qm1);
325  indexC[dir] = i ; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qC );
326  indexC[dir] = i+1; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp1);
327  indexC[dir] = i+2; _ArrayIndex1D_(ndims,dim,indexC,ghosts,qp2);
328  for (v=0; v<nvars; v++)
329  Df[qC+v*npoints_local_wghosts] = (f[qm2+v*npoints_local_wghosts]-8*f[qm1+v*npoints_local_wghosts]+8*f[qp1+v*npoints_local_wghosts]-f[qp2+v*npoints_local_wghosts])*one_twelve;
330  }
331  return;
332 }
#define _ArrayIndexnD_(N, index, imax, i, ghost)
#define GPU_MAX_NDIMS
Definition: basic_gpu.h:8
#define _ArrayIndex1D_(N, imax, i, ghost, index)
#define _ArrayCopy1D_(x, y, size)
int gpuFirstDerivativeFourthOrderCentral ( double *  Df,
double *  f,
int  dir,
int  bias,
void *  s,
void *  m 
)

Computes the fourth-order finite-difference approximation to the first derivative (Note: not divided by the grid spacing):

\begin{equation} \left(\partial f\right)_i = \left\{ \begin{array}{ll} \frac{1}{12}\left(-25f_i+48f_{i+1}-36f_{i+2}+16f_{i+3}-3f_{i+4}\right) & i=-g \\ \frac{1}{12}\left(-3f_{i-1}-10f_i+18f_{i+1}-6f_{i+2}+f_{i+3}\right) & i = -g+1 \\ \frac{1}{2}\left( f_{i-2}-8f_{i-1}+8f_{i+1}-f_{i+2} \right) & -g+2 \leq i \leq N+g-3 \\ \frac{1}{12}\left( -f_{i-3}+6f_{i-2}-18f_{i-1}+10f_i+3f_{i+1}\right) & i = N+g-2 \\ \frac{1}{12}\left( 3f_{i-4}-16f_{i-3}+36f_{i-2}-48f_{i-1}+25f_i \right) & i = N+g-1 \end{array}\right. \end{equation}

where \(i\) is the grid index along the spatial dimension of the derivative, \(g\) is the number of ghost points, and \(N\) is the number of grid points (not including the ghost points) in the spatial dimension of the derivative.

Notes:

  • The first derivative is computed at the grid points or the cell centers.
  • The first derivative is computed at the ghost points too. Thus, biased schemes are used at and near the boundaries.
  • Df and f are 1D arrays containing the function and its computed derivatives on a multi- dimensional grid. The derivative along the specified dimension dir is computed by looping through all grid lines along dir.
See Also
FirstDerivativeFourthOrderCentral(), gpuFirstDerivativeFourthOrderCentral()
Parameters
DfArray to hold the computed first derivative (with ghost points)
fArray containing the grid point function values whose first derivative is to be computed (with ghost points)
dirThe spatial dimension along which the derivative is computed
biasForward or backward differencing for non-central finite-difference schemes (-1: backward, 1: forward)
sSolver object of type SolverContext
mMPI object of type MPIContext

Definition at line 351 of file FirstDerivativeFourthOrder_GPU.cu.

361 {
362  SolverContext *solver = (SolverContext*) s;
363 
364  int ghosts = solver->ghosts;
365  int ndims = solver->ndims;
366  int nvars = solver->nvars;
367  int *dim = solver->dim_local;
368 
369  if ((!Df) || (!f)) {
370  fprintf(stderr, "Error in FirstDerivativeFourthOrder(): input arrays not allocated.\n");
371  return(1);
372  }
373 
374  /* create index and bounds for the outer loop, i.e., to loop over all 1D lines along
375  dimension "dir" */
376  int bounds_outer[ndims];
377  _ArrayCopy1D_(dim,bounds_outer,ndims); bounds_outer[dir] = 1;
378  int N_outer; _ArrayProduct1D_(bounds_outer,ndims,N_outer);
379  int nblocks = (N_outer - 1) / GPU_THREADS_PER_BLOCK + 1;
380 
381 #if defined(GPU_STAT)
382  cudaEvent_t startEvent, stopEvent;
383  float milliseconds;
384 
385  checkCuda( cudaEventCreate(&startEvent) );
386  checkCuda( cudaEventCreate(&stopEvent) );
387 
388  checkCuda( cudaEventRecord(startEvent, 0) );
389 #endif
390 
391  FirstDerivativeFourthOrderCentral_boundary_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
392  N_outer, solver->npoints_local_wghosts, ghosts, ndims, nvars, dir, solver->gpu_dim_local, f, Df
393  );
394 
395 #if defined(GPU_STAT)
396  checkCuda( cudaEventRecord(stopEvent, 0) );
397  checkCuda( cudaEventSynchronize(stopEvent) );
398  checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
399 
400  printf("%-50s GPU time (secs) = %.6f\n",
401  "FirstDerivativeFourthOrderCentral_boundary", milliseconds*1e-3);
402 #endif
403 
404  int npoints_grid = N_outer*(dim[dir] + 2*ghosts - 4);
405  nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
406 
407 #if defined(GPU_STAT)
408  checkCuda( cudaEventRecord(startEvent, 0) );
409 #endif
410 
411  FirstDerivativeFourthOrderCentral_interior_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
412  npoints_grid, solver->npoints_local_wghosts, ghosts, ndims, nvars, dir, solver->gpu_dim_local, f, Df
413  );
414  cudaDeviceSynchronize();
415 
416 #if defined(GPU_STAT)
417  checkCuda( cudaEventRecord(stopEvent, 0) );
418  checkCuda( cudaEventSynchronize(stopEvent) );
419  checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
420 
421  printf("%-50s GPU time (secs) = %.6f\n",
422  "FirstDerivativeFourthOrderCentral_interior", milliseconds*1e-3);
423 #endif
424 
425  return(0);
426 }
int npoints_local_wghosts
Definition: hypar.h:42
int * dim_local
Definition: hypar.h:37
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
int ghosts
Definition: hypar.h:52
int * gpu_dim_local
Definition: hypar.h:455
#define _ArrayCopy1D_(x, y, size)
int nvars
Definition: hypar.h:29
int ndims
Definition: hypar.h:26
#define _ArrayProduct1D_(x, size, p)
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23