HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
NavierStokes2DPreStep_GPU.cu
Go to the documentation of this file.
1 /*! @file NavierStokes2DPreStep_GPU.cu
2  @brief Pre-step function for 2D Navier Stokes equations
3  @author Youngdae Kim
4 */
5 
6 #include <time.h>
7 #include <basic_gpu.h>
8 #include <arrayfunctions_gpu.h>
9 #include <matmult_native.h>
10 #include <basic_gpu.h>
11 #include <physicalmodels/navierstokes2d.h>
12 #include <hypar.h>
13 
14 #ifdef CUDA_VAR_ORDERDING_AOS
15 
16 /*! Kernel for gpuNavierStokes2DPreStep */
17 __global__
18 void NavierStokes2DPreStep_kernel(
19  int size,
20  double gamma,
21  const double *u,
22  double *fast_jac
23 )
24 {
25  const int JacSize = _MODEL_NVARS_*_MODEL_NVARS_;
26  int dir, p;
27  double *A;
28  double D[_MODEL_NVARS_*_MODEL_NVARS_],L[_MODEL_NVARS_*_MODEL_NVARS_],
29  R[_MODEL_NVARS_*_MODEL_NVARS_],DL[_MODEL_NVARS_*_MODEL_NVARS_];
30 
31  p = threadIdx.x + blockDim.x * blockIdx.x;
32  if (p < size) {
33  dir = _XDIR_;
34  A = (fast_jac + 2*JacSize*p + dir*JacSize);
35  /* get the eigenvalues, and left & right eigenvectors */
36  _NavierStokes2DEigenvalues_ ((u+_MODEL_NVARS_*p),D,gamma,dir);
37  _NavierStokes2DLeftEigenvectors_ ((u+_MODEL_NVARS_*p),L,gamma,dir);
38  _NavierStokes2DRightEigenvectors_((u+_MODEL_NVARS_*p),R,gamma,dir);
39  /* remove the entropy modes (corresponding to eigenvalues u) */
40  D[2*_MODEL_NVARS_+2] = D[3*_MODEL_NVARS_+3] = 0.0;
41  /* assemble the Jacobian */
42  MatMult4(_MODEL_NVARS_,DL,D,L );
43  MatMult4(_MODEL_NVARS_,A,R,DL);
44 
45  dir = _YDIR_;
46  A = (fast_jac + 2*JacSize*p + dir*JacSize);
47  /* get the eigenvalues, and left & right eigenvectors */
48  _NavierStokes2DEigenvalues_ ((u+_MODEL_NVARS_*p),D,gamma,dir)
49  _NavierStokes2DLeftEigenvectors_ ((u+_MODEL_NVARS_*p),L,gamma,dir);
50  _NavierStokes2DRightEigenvectors_((u+_MODEL_NVARS_*p),R,gamma,dir);
51  /* remove the entropy modes (corresponding to eigenvalues v) */
52  D[1*_MODEL_NVARS_+1] = D[3*_MODEL_NVARS_+3] = 0.0;
53  /* assemble the Jacobian */
54  MatMult4(_MODEL_NVARS_,DL,D,L );
55  MatMult4(_MODEL_NVARS_,A,R,DL);
56  }
57 
58  return;
59 }
60 
61 /*! Pre-step function for the 2D Navier Stokes equations: This function
62  is called at the beginning of each time step.
63  + The solution at the beginning of the step is copied into #NavierStokes2D::solution
64  for the linearized flux partitioning.
65  + The linearized "fast" Jacobian (representing the acoustic modes) #NavierStokes2D::fast_jac
66  is computed as:
67  \f{equation}{
68  A_f\left({\bf u}\right) = R\left({\bf u}\right)\Lambda_f\left({\bf u}\right)L\left({\bf u}\right)
69  \f}
70  where \f$R\f$ and \f$L\f$ are the matrices of right and left eigenvectors, and,
71  \f{equation}{
72  \Lambda_f = diag\left[0,0,u+c,u-c \right]
73  \f}
74  with \f$c\f$ being the speed of sound.
75  \n\n
76 
77  Reference:
78  + Ghosh, D., Constantinescu, E. M., Semi-Implicit Time Integration of Atmospheric Flows
79  with Characteristic-Based Flux Partitioning, SIAM Journal on Scientific Computing,
80  38 (3), 2016, A1848-A1875, http://dx.doi.org/10.1137/15M1044369.
81 */
82 extern "C" int gpuNavierStokes2DPreStep(
83  double *u, /*!< Solution vector */
84  void *s, /*!< Solver object of type #HyPar */
85  void *m, /*!< MPI object of type #MPIVariables */
86  double waqt /*!< Current simulation time */
87 )
88 {
89  HyPar *solver = (HyPar*) s;
90  NavierStokes2D *param = (NavierStokes2D*) solver->physics;
91  int *dim = solver->dim_local;
92  int ghosts = solver->ghosts;
93 
94  static int bounds[_MODEL_NDIMS_];
95  _ArrayAddCopy1D_(dim,(2*ghosts),bounds,_MODEL_NDIMS_);
96  int N_grid; _ArrayProduct1D_(bounds,_MODEL_NDIMS_,N_grid);
97  int size = 2*N_grid*_MODEL_NVARS_*_MODEL_NVARS_;
98 
99  double cpu_time = 0.0;
100  clock_t cpu_start, cpu_end;
101 
102  gpuMemcpy(solver->gpu_u, u, N_grid*_MODEL_NVARS_*sizeof(double), gpuMemcpyHostToDevice);
103  gpuMemcpy(param->gpu_solution, u, N_grid*_MODEL_NVARS_*sizeof(double), gpuMemcpyHostToDevice);
104 
105  int nblocks = (N_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
106  cpu_start = clock();
107  NavierStokes2DPreStep_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(N_grid, param->gamma, solver->gpu_u, param->gpu_fast_jac);
108  cudaDeviceSynchronize();
109  cpu_end = clock();
110  cpu_time += (double)(cpu_end - cpu_start) / CLOCKS_PER_SEC;
111 
112  double *fast_jac_h = (double *)malloc(size*sizeof(double));
113  gpuMemcpy(fast_jac_h, param->gpu_fast_jac, size*sizeof(double), gpuMemcpyDeviceToHost);
114 
115  double err = 0;
116  for (int i = 0; i < size; i++) {
117  if (fabs(fast_jac_h[i] - param->fast_jac[i]) > 1e-8)
118  err += fabs(fast_jac_h[i] - param->fast_jac[i]);
119  }
120  free(fast_jac_h);
121 
122  return 0;
123 }
124 
125 #endif