HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
NavierStokes3DPreStep_GPU.cu
Go to the documentation of this file.
1 /*! @file NavierStokes3DPreStep_GPU.cu
2  @brief Pre-step function for 3D Navier Stokes equations
3  @author Youngdae Kim
4 */
5 #include <basic_gpu.h>
6 #include <arrayfunctions_gpu.h>
7 #include <mathfunctions.h>
8 #include <matmult_native.h>
9 #include <physicalmodels/navierstokes3d.h>
10 #include <hypar.h>
11 
12 #ifdef CUDA_VAR_ORDERDING_AOS
13 
14 /*! Kernel for gpuNavierStokes3DPreStep() */
15 __global__
16 void gpuNavierStokes3DPreStep_kernel(
17  int npoints_grid,
18  double gamma,
19  const double * __restrict__ u,
20  double * __restrict__ fast_jac
21 )
22 {
23  int p = blockDim.x * blockIdx.x + threadIdx.x;
24 
25  if (p < npoints_grid) {
26  double * __restrict__ A;
27  double D[_MODEL_NVARS_*_MODEL_NVARS_],L[_MODEL_NVARS_*_MODEL_NVARS_],
28  R[_MODEL_NVARS_*_MODEL_NVARS_],DL[_MODEL_NVARS_*_MODEL_NVARS_];
29  int JacSize = _MODEL_NVARS_*_MODEL_NVARS_;
30  int q = _MODEL_NVARS_*p;
31  int dir = _XDIR_;
32 
33  A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
34  /* get the eigenvalues, and left & right eigenvectors */
35  _NavierStokes3DEigenvalues_ ((u+q),_NavierStokes3D_stride_,D,gamma,dir);
36  _NavierStokes3DLeftEigenvectors_ ((u+q),_NavierStokes3D_stride_,L,gamma,dir);
37  _NavierStokes3DRightEigenvectors_((u+q),_NavierStokes3D_stride_,R,gamma,dir);
38  /* remove the entropy modes (corresponding to eigenvalues u) */
39  D[0] = D[12] = D[18] = 0.0;
40  /* assemble the Jacobian */
41  MatMult5(_MODEL_NVARS_,DL,D,L );
42  MatMult5(_MODEL_NVARS_,A ,R,DL);
43 
44  dir = _YDIR_;
45  A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
46  /* get the eigenvalues, and left & right eigenvectors */
47  _NavierStokes3DEigenvalues_ ((u+q),_NavierStokes3D_stride_,D,gamma,dir)
48  _NavierStokes3DLeftEigenvectors_ ((u+q),_NavierStokes3D_stride_,L,gamma,dir);
49  _NavierStokes3DRightEigenvectors_((u+q),_NavierStokes3D_stride_,R,gamma,dir);
50  /* remove the entropy modes (corresponding to eigenvalues v) */
51  D[0] = D[6] = D[18] = 0.0;
52  /* assemble the Jacobian */
53  MatMult5(_MODEL_NVARS_,DL,D,L );
54  MatMult5(_MODEL_NVARS_,A ,R,DL);
55 
56  dir = _ZDIR_;
57  A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
58  /* get the eigenvalues, and left & right eigenvectors */
59  _NavierStokes3DEigenvalues_ ((u+q),_NavierStokes3D_stride_,D,gamma,dir)
60  _NavierStokes3DLeftEigenvectors_ ((u+q),_NavierStokes3D_stride_,L,gamma,dir);
61  _NavierStokes3DRightEigenvectors_((u+q),_NavierStokes3D_stride_,R,gamma,dir);
62  /* remove the entropy modes (corresponding to eigenvalues v) */
63  D[0] = D[6] = D[12] = 0.0;
64  /* assemble the Jacobian */
65  MatMult5(_MODEL_NVARS_,DL,D,L );
66  MatMult5(_MODEL_NVARS_,A ,R,DL);
67  }
68  return;
69 }
70 
71 /*! Pre-step function for the 3D Navier Stokes equations: This function
72  is called at the beginning of each time step.
73  + The solution at the beginning of the step is copied into #NavierStokes3D::solution
74  for the linearized flux partitioning.
75  + The linearized "fast" Jacobian (representing the acoustic modes) #NavierStokes3D::fast_jac
76  is computed as:
77  \f{equation}{
78  A_f\left({\bf u}\right) = R\left({\bf u}\right)\Lambda_f\left({\bf u}\right)L\left({\bf u}\right)
79  \f}
80  where \f$R\f$ and \f$L\f$ are the matrices of right and left eigenvectors, and,
81  \f{equation}{
82  \Lambda_f = diag\left[0,0,0,u+c,u-c \right]
83  \f}
84  with \f$c\f$ being the speed of sound.
85  \n\n
86 
87  Reference:
88  + Ghosh, D., Constantinescu, E. M., Semi-Implicit Time Integration of Atmospheric Flows
89  with Characteristic-Based Flux Partitioning, SIAM Journal on Scientific Computing,
90  38 (3), 2016, A1848-A1875, http://dx.doi.org/10.1137/15M1044369.
91 */
92 extern "C" int gpuNavierStokes3DPreStep(
93  double *u, /*!< Solution vector */
94  void *s, /*!< Solver object of type #HyPar */
95  void *m, /*!< MPI object of type #MPIVariables */
96  double waqt /*!< Current simulation time */
97 )
98 {
99  HyPar *solver = (HyPar*) s;
100  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
101 
102  int nblocks = (solver->npoints_local_wghosts-1)/GPU_THREADS_PER_BLOCK + 1;
103 
104  gpuArrayCopy1D(u,param->gpu_solution,(solver->npoints_local_wghosts)*_MODEL_NVARS_);
105  gpuNavierStokes3DPreStep_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
106  solver->npoints_local_wghosts, param->gamma, u, param->gpu_fast_jac
107  );
108  cudaDeviceSynchronize();
109 
110  return 0;
111 }
112 
113 #else
114 
115 /*! Kernel for gpuNavierStokes3DPreStep() */
116 __global__
117 void gpuNavierStokes3DPreStep_kernel(
118  int npoints_grid,
119  double gamma,
120  const double * __restrict__ u,
121  double * __restrict__ fast_jac
122 )
123 {
124  int p = blockDim.x * blockIdx.x + threadIdx.x;
125 
126  if (p < npoints_grid) {
127  double * __restrict__ A;
128  double D[_MODEL_NVARS_*_MODEL_NVARS_],L[_MODEL_NVARS_*_MODEL_NVARS_],
129  R[_MODEL_NVARS_*_MODEL_NVARS_],DL[_MODEL_NVARS_*_MODEL_NVARS_];
130  int JacSize = _MODEL_NVARS_*_MODEL_NVARS_;
131  int dir = _XDIR_;
132 
133  A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
134  /* get the eigenvalues, and left & right eigenvectors */
135  _NavierStokes3DEigenvalues_ ((u+p),npoints_grid,D,gamma,dir);
136  _NavierStokes3DLeftEigenvectors_ ((u+p),npoints_grid,L,gamma,dir);
137  _NavierStokes3DRightEigenvectors_((u+p),npoints_grid,R,gamma,dir);
138  /* remove the entropy modes (corresponding to eigenvalues u) */
139  D[0] = D[12] = D[18] = 0.0;
140  /* assemble the Jacobian */
141  MatMult5(_MODEL_NVARS_,DL,D,L );
142  MatMult5(_MODEL_NVARS_,A ,R,DL);
143 
144  dir = _YDIR_;
145  A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
146  /* get the eigenvalues, and left & right eigenvectors */
147  _NavierStokes3DEigenvalues_ ((u+p),npoints_grid,D,gamma,dir)
148  _NavierStokes3DLeftEigenvectors_ ((u+p),npoints_grid,L,gamma,dir);
149  _NavierStokes3DRightEigenvectors_((u+p),npoints_grid,R,gamma,dir);
150  /* remove the entropy modes (corresponding to eigenvalues v) */
151  D[0] = D[6] = D[18] = 0.0;
152  /* assemble the Jacobian */
153  MatMult5(_MODEL_NVARS_,DL,D,L );
154  MatMult5(_MODEL_NVARS_,A ,R,DL);
155 
156  dir = _ZDIR_;
157  A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
158  /* get the eigenvalues, and left & right eigenvectors */
159  _NavierStokes3DEigenvalues_ ((u+p),npoints_grid,D,gamma,dir)
160  _NavierStokes3DLeftEigenvectors_ ((u+p),npoints_grid,L,gamma,dir);
161  _NavierStokes3DRightEigenvectors_((u+p),npoints_grid,R,gamma,dir);
162  /* remove the entropy modes (corresponding to eigenvalues v) */
163  D[0] = D[6] = D[12] = 0.0;
164  /* assemble the Jacobian */
165  MatMult5(_MODEL_NVARS_,DL,D,L );
166  MatMult5(_MODEL_NVARS_,A ,R,DL);
167  }
168  return;
169 }
170 
171 /*! Pre-step function for the 3D Navier Stokes equations: This function
172  is called at the beginning of each time step.
173  + The solution at the beginning of the step is copied into #NavierStokes3D::solution
174  for the linearized flux partitioning.
175  + The linearized "fast" Jacobian (representing the acoustic modes) #NavierStokes3D::fast_jac
176  is computed as:
177  \f{equation}{
178  A_f\left({\bf u}\right) = R\left({\bf u}\right)\Lambda_f\left({\bf u}\right)L\left({\bf u}\right)
179  \f}
180  where \f$R\f$ and \f$L\f$ are the matrices of right and left eigenvectors, and,
181  \f{equation}{
182  \Lambda_f = diag\left[0,0,0,u+c,u-c \right]
183  \f}
184  with \f$c\f$ being the speed of sound.
185  \n\n
186 
187  Reference:
188  + Ghosh, D., Constantinescu, E. M., Semi-Implicit Time Integration of Atmospheric Flows
189  with Characteristic-Based Flux Partitioning, SIAM Journal on Scientific Computing,
190  38 (3), 2016, A1848-A1875, http://dx.doi.org/10.1137/15M1044369.
191 */
192 extern "C" int gpuNavierStokes3DPreStep(
193  double *u, /*!< Solution vector */
194  void *s, /*!< Solver object of type #HyPar */
195  void *m, /*!< MPI object of type #MPIVariables */
196  double waqt /*!< Current simulation time */
197 )
198 {
199  HyPar *solver = (HyPar*) s;
200  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
201 
202  int nblocks = (solver->npoints_local_wghosts-1)/GPU_THREADS_PER_BLOCK + 1;
203 
204  gpuArrayCopy1D(u,param->gpu_solution,(solver->npoints_local_wghosts)*_MODEL_NVARS_);
205  gpuNavierStokes3DPreStep_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
206  solver->npoints_local_wghosts, param->gamma, u, param->gpu_fast_jac
207  );
208  cudaDeviceSynchronize();
209 
210  return 0;
211 }
212 
213 #endif