HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
NavierStokes3DSource_GPU.cu
Go to the documentation of this file.
1 /*! @file NavierStokes3DSource_GPU.cu
2  @author Youngdae Kim
3  @brief Compute the gravitational source term for the 3D Navier Stokes system
4 */
5 #include <basic_gpu.h>
6 #include <arrayfunctions_gpu.h>
7 #include <physicalmodels/navierstokes3d.h>
8 #include <mpivars.h>
9 #include <hypar.h>
10 
11 static int gpuNavierStokes3DSourceFunction (double*,double*,double*,void*,void*,double,int);
12 static int gpuNavierStokes3DSourceUpwind (double*,double*,double*,double*,int,void*,double);
13 
14 #ifdef CUDA_VAR_ORDERDING_AOS
15 
16 /*! Kernel function */
17 __global__
18 void gpuNavierStokes3DSource_grav_kernel(
19  int npoints_grid,
20  int ghosts,
21  int dir,
22  double gamma,
23  double RT,
24  const int * __restrict__ dim,
25  const double * __restrict__ dxinv,
26  const double * __restrict__ grav_field_f,
27  const double * __restrict__ SourceI,
28  const double * __restrict__ u,
29  double * __restrict__ source
30 )
31 {
32  int tid = blockDim.x * blockIdx.x + threadIdx.x;
33 
34  if (tid < npoints_grid) {
35  int v,p,p1,p2;
36  int index[GPU_MAX_NDIMS],index1[GPU_MAX_NDIMS],index2[GPU_MAX_NDIMS],dim_interface[GPU_MAX_NDIMS];
37 
38  _ArrayIndexnD_(_MODEL_NDIMS_,tid,dim,index,0);
39  _ArrayCopy1D_(dim,dim_interface,_MODEL_NDIMS_); dim_interface[dir]++;
40  _ArrayCopy1D_(index,index1,_MODEL_NDIMS_);
41  _ArrayCopy1D_(index,index2,_MODEL_NDIMS_); index2[dir]++;
42  _ArrayIndex1D_(_MODEL_NDIMS_,dim ,index ,ghosts,p );
43  _ArrayIndex1D_(_MODEL_NDIMS_,dim_interface,index1,0 ,p1);
44  _ArrayIndex1D_(_MODEL_NDIMS_,dim_interface,index2,0 ,p2);
45 
46  double dx_inverse; _GetCoordinate_(dir,index[dir],dim,ghosts,dxinv,dx_inverse);
47  double rho, vel[_MODEL_NDIMS_], e, P;
48  _NavierStokes3DGetFlowVar_((u+_MODEL_NVARS_*p),_NavierStokes3D_stride_,rho,vel[0],vel[1],vel[2],e,P,gamma);
49  double term[_MODEL_NVARS_] = {0.0, rho*RT*(dir==_XDIR_), rho*RT*(dir==_YDIR_), rho*RT*(dir==_ZDIR_), rho*RT*vel[dir]};
50  for (v=0; v<_MODEL_NVARS_; v++) {
51  source[_MODEL_NVARS_*p+v] += ( (term[v]*grav_field_f[p])
52  * (SourceI[_MODEL_NVARS_*p2+v]-SourceI[_MODEL_NVARS_*p1+v])*dx_inverse );
53  }
54  }
55  return;
56 }
57 
58 /*! Kernel function for gpuNavierStokes3DSourceFunction() */
59 __global__
60 void gpuNavierStokes3DSourceFunction_kernel(
61  int npoints_grid,
62  int dir,
63  const double * __restrict__ grav_field_g,
64  double * __restrict__ f
65 )
66 {
67  int p = blockDim.x * blockIdx.x + threadIdx.x;
68 
69  if (p < npoints_grid) {
70  (f+_MODEL_NVARS_*p)[0] = 0.0;
71  (f+_MODEL_NVARS_*p)[1] = grav_field_g[p] * (dir == _XDIR_);
72  (f+_MODEL_NVARS_*p)[2] = grav_field_g[p] * (dir == _YDIR_);
73  (f+_MODEL_NVARS_*p)[3] = grav_field_g[p] * (dir == _ZDIR_);
74  (f+_MODEL_NVARS_*p)[4] = grav_field_g[p];
75  }
76  return;
77 }
78 
79 /*! Kernel function for gpuNavierStokes3DSourceUpwind() */
80 __global__
81 void gpuNavierStokes3DSourceUpwind_kernel(
82  int npoints_grid,
83  const double * __restrict__ fL,
84  const double * __restrict__ fR,
85  double * __restrict__ fI
86 )
87 {
88  int p = blockDim.x * blockIdx.x + threadIdx.x;
89 
90  if (p < npoints_grid) {
91  for (int k = 0; k < _MODEL_NVARS_; k++)
92  (fI+_MODEL_NVARS_*p)[k] = 0.5 * ((fL+_MODEL_NVARS_*p)[k] + (fR+_MODEL_NVARS_*p)[k]);
93  }
94  return;
95 }
96 
97 /*! Compute the source function in the well-balanced treatment of the source terms. The source
98  function is:
99  \f{equation}{
100  dir = x \rightarrow \left[\begin{array}{c}0 \\ \varphi\left(x,y,z\right) \\ 0 \\ 0 \\ \varphi\left(x,y,z\right) \end{array}\right],
101  \ dir = y \rightarrow \left[\begin{array}{c}0 \\ 0 \\ \varphi\left(x,y,z\right) \\ 0 \\ \varphi\left(x,y,z\right) \end{array}\right],
102  \ dir = z \rightarrow \left[\begin{array}{c}0 \\ 0 \\ 0 \\ \varphi\left(x,y,z\right) \\ \varphi\left(x,y,z\right) \end{array}\right]
103  \f}
104  where \f$\varphi\f$ (#NavierStokes3D::grav_field_g) is computed in NavierStokes3D::GravityField().
105  \n\n
106  References:
107  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
108  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
109  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
110  http://dx.doi.org/10.2514/6.2015-2889
111  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
112  for Atmospheric Flows, AIAA Journal, http://dx.doi.org/10.2514/1.J054580.
113 */
114 int gpuNavierStokes3DSourceFunction(
115  double *f, /*!< Array to hold the computed source function */
116  double *u, /*!< Solution vector array */
117  double *x, /*!< Array of spatial coordinates (grid) */
118  void *s, /*!< Solver object of type #HyPar */
119  void *m, /*!< MPI object of type #MPIVariables */
120  double t, /*!< Current simulation time */
121  int dir /*!< Spatial dimension (x, y, or z) */
122 )
123 {
124  HyPar *solver = (HyPar* ) s;
125  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
126  int nblocks = (solver->npoints_local_wghosts-1)/GPU_THREADS_PER_BLOCK + 1;
127 
128  gpuNavierStokes3DSourceFunction_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
129  solver->npoints_local_wghosts, dir, param->gpu_grav_field_g, f
130  );
131  cudaDeviceSynchronize();
132 
133  return 0;
134 }
135 
136 /*! Compute the "upwind" source function value at the interface: the upwinding is just the
137  arithmetic average of the left and right biased interpolated values of the source function
138  at the interface.
139  \n\n
140  References:
141  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
142  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
143  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
144  http://dx.doi.org/10.2514/6.2015-2889
145  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
146  for Atmospheric Flows, Submitted
147 */
148 int gpuNavierStokes3DSourceUpwind(
149  double *fI, /*!< Array to hold the computed "upwind" interface source function */
150  double *fL, /*!< Interface source function value computed using left-biased interpolation */
151  double *fR, /*!< Interface source function value computed using right-biased interpolation */
152  double *u, /*!< Solution vector array */
153  int dir, /*!< Spatial dimension (x,y, or z) */
154  void *s, /*!< Solver object of type #HyPar */
155  double t /*!< Current simulation time */
156 )
157 {
158  HyPar *solver = (HyPar*) s;
159  int *dim = solver->dim_local;
160  _DECLARE_IERR_;
161 
162  int bounds_inter[_MODEL_NDIMS_];
163  _ArrayCopy1D_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
164  int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
165  int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
166 
167  gpuNavierStokes3DSourceUpwind_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
168  npoints_grid, fL, fR, fI
169  );
170  cudaDeviceSynchronize();
171 
172  return 0;
173 }
174 
175 /*! Computes the gravitational source term using a well-balanced formulation: the source term
176  is rewritten as follows:
177  \f{equation}{
178  \left[\begin{array}{c} 0 \\ -\rho {\bf g}\cdot{\bf \hat{i}} \\ -\rho {\bf g}\cdot{\bf \hat{j}} \\ -\rho {\bf g}\cdot{\bf \hat{k}} \\ -\rho u {\bf g}\cdot{\bf \hat{i}} - \rho v {\bf g}\cdot{\bf \hat{j}} - \rho w {\bf g}\cdot{\bf \hat{k}} \end{array}\right]
179  =
180  \left[\begin{array}{ccccc} 0 & p_0 \varrho^{-1} & 0 & 0 & p_0 u \varrho^{-1} \end{array}\right] \cdot \frac{\partial}{\partial x}\left[\begin{array}{c} 0 \\ \varphi \\ 0 \\ 0 \\ \varphi \end{array}\right]
181  +
182  \left[\begin{array}{ccccc} 0 & 0 & p_0 \varrho^{-1} & 0 & p_0 v \varrho^{-1} \end{array}\right] \cdot \frac{\partial}{\partial y}\left[\begin{array}{c} 0 \\ 0 \\ \varphi \\ 0 \\ \varphi \end{array}\right]
183  +
184  \left[\begin{array}{ccccc} 0 & 0 & 0 & p_0 \varrho^{-1} & p_0 w \varrho^{-1} \end{array}\right] \cdot \frac{\partial}{\partial y}\left[\begin{array}{c} 0 \\ 0 \\ 0 \\ \varphi \\ \varphi \end{array}\right]
185  \f}
186  where \f$\varphi = \varphi\left(x,y\right)\f$ and \f$\varrho = \varrho\left(x,y\right)\f$ are computed in
187  NavierStokes3DGravityField(). The derivatives are computed in an exactly identical manner as the hyperbolic
188  flux (i.e., using a conservative finite-difference formulation) (see HyperbolicFunction()).
189  \n\n
190  References:
191  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
192  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
193  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
194  http://dx.doi.org/10.2514/6.2015-2889
195  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
196  for Atmospheric Flows, AIAA Journal, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580.
197 */
198 extern "C" int gpuNavierStokes3DSource(
199  double * __restrict__ source, /*!< Array to hold the computed source */
200  double * __restrict__ u, /*!< Solution vector array */
201  void * __restrict__ s, /*!< Solver object of type #HyPar */
202  void * __restrict__ m, /*!< MPI object of type #MPIVariables */
203  double t /*!< Current simulation time */
204 )
205 {
206  HyPar *solver = (HyPar* ) s;
207  MPIVariables *mpi = (MPIVariables*) m;
208  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
209 
210  if ((param->grav_x == 0.0) && (param->grav_y == 0.0) && (param->grav_z == 0.0))
211  return(0); /* no gravitational forces */
212 
213  int dir;
214  double *SourceI = solver->fluxI; /* interace source term */
215  double *SourceC = solver->fluxC; /* cell-centered source term */
216  double *SourceL = solver->fL;
217  double *SourceR = solver->fR;
218 
219  int ghosts = solver->ghosts;
220  int *dim = solver->gpu_dim_local;
221  double *x = solver->gpu_x;
222  double *dxinv = solver->gpu_dxinv;
223  double RT = param->p0 / param->rho0;
224  static double grav[_MODEL_NDIMS_];
225 
226  grav[_XDIR_] = param->grav_x;
227  grav[_YDIR_] = param->grav_y;
228  grav[_ZDIR_] = param->grav_z;
229 
230  int nblocks = (solver->npoints_local-1)/GPU_THREADS_PER_BLOCK + 1;
231  for (dir = 0; dir < _MODEL_NDIMS_; dir++) {
232  if (grav[dir] != 0.0) {
233  /* calculate the split source function exp(-phi/RT) */
234  gpuNavierStokes3DSourceFunction(SourceC,u,x,solver,mpi,t,dir);
235  /* calculate the left and right interface source terms */
236  solver->InterpolateInterfacesHyp(SourceL,SourceC,u,x, 1,dir,solver,mpi,0);
237  solver->InterpolateInterfacesHyp(SourceR,SourceC,u,x,-1,dir,solver,mpi,0);
238  /* calculate the final interface source term */
239  gpuNavierStokes3DSourceUpwind(SourceI,SourceL,SourceR,u,dir,solver,t);
240  /* calculate the final cell-centered source term */
241  gpuNavierStokes3DSource_grav_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
242  solver->npoints_local, ghosts, dir, param->gamma, RT,
243  dim, dxinv, param->gpu_grav_field_f, SourceI, u, source
244  );
245  }
246  }
247 
248  return 0;
249 }
250 
251 #else
252 
253 /*! Kernel function */
254 __global__
255 void gpuNavierStokes3DSource_grav_kernel(
256  int npoints_grid,
257  int npoints_local_wghosts,
258  int npoints_fluxI,
259  int ghosts,
260  int dir,
261  double gamma,
262  double RT,
263  const int * __restrict__ dim,
264  const double * __restrict__ dxinv,
265  const double * __restrict__ grav_field_f,
266  const double * __restrict__ SourceI,
267  const double * __restrict__ u,
268  double * __restrict__ source
269 )
270 {
271  int tid = blockDim.x * blockIdx.x + threadIdx.x;
272 
273  if (tid < npoints_grid) {
274  int v,p,p1,p2;
275  int index[_MODEL_NDIMS_],index1[_MODEL_NDIMS_],index2[_MODEL_NDIMS_],dim_interface[_MODEL_NDIMS_];
276 
277  _ArrayIndexnD_(_MODEL_NDIMS_,tid,dim,index,0);
278  _ArrayCopy1D_(dim,dim_interface,_MODEL_NDIMS_); dim_interface[dir]++;
279  _ArrayCopy1D_(index,index1,_MODEL_NDIMS_);
280  _ArrayCopy1D_(index,index2,_MODEL_NDIMS_); index2[dir]++;
281  _ArrayIndex1D_(_MODEL_NDIMS_,dim ,index ,ghosts,p );
282  _ArrayIndex1D_(_MODEL_NDIMS_,dim_interface,index1,0 ,p1);
283  _ArrayIndex1D_(_MODEL_NDIMS_,dim_interface,index2,0 ,p2);
284 
285  double dx_inverse; _GetCoordinate_(dir,index[dir],dim,ghosts,dxinv,dx_inverse);
286  double rho, vel[_MODEL_NDIMS_], e, P;
287  _NavierStokes3DGetFlowVar_((u+p),npoints_local_wghosts,rho,vel[0],vel[1],vel[2],e,P,gamma);
288  double term[_MODEL_NVARS_] = {0.0, rho*RT*(dir==_XDIR_), rho*RT*(dir==_YDIR_), rho*RT*(dir==_ZDIR_), rho*RT*vel[dir]};
289  for (v=0; v<_MODEL_NVARS_; v++) {
290  source[p+v*npoints_local_wghosts] += ( (term[v]*grav_field_f[p])
291  * (SourceI[p2+v*npoints_fluxI]-SourceI[p1+v*npoints_fluxI])*dx_inverse );
292  }
293  vel[0] = P;
294  }
295  return;
296 }
297 
298 /*! Kernel function for gpuNavierStokes3DSourceFunction() */
299 __global__
300 void gpuNavierStokes3DSourceFunction_kernel(
301  int npoints_grid,
302  int ghosts,
303  int dir,
304  const int * __restrict__ dim,
305  const double * __restrict__ grav_field_g,
306  double * __restrict__ f
307 )
308 {
309  int p = blockDim.x * blockIdx.x + threadIdx.x;
310 
311  if (p < npoints_grid) {
312  (f+p)[0*npoints_grid] = 0.0;
313  (f+p)[1*npoints_grid] = grav_field_g[p] * (dir == _XDIR_);
314  (f+p)[2*npoints_grid] = grav_field_g[p] * (dir == _YDIR_);
315  (f+p)[3*npoints_grid] = grav_field_g[p] * (dir == _ZDIR_);
316  (f+p)[4*npoints_grid] = grav_field_g[p];
317  }
318  return;
319 }
320 
321 /*! Kernel function for gpuNavierStokes3DSourceUpwind() */
322 __global__
323 void gpuNavierStokes3DSourceUpwind_kernel(
324  int npoints_grid,
325  const double * __restrict__ fL,
326  const double * __restrict__ fR,
327  double * __restrict__ fI
328 )
329 {
330  int p = blockDim.x * blockIdx.x + threadIdx.x;
331 
332  if (p < npoints_grid) {
333  for (int k = 0; k < _MODEL_NVARS_; k++)
334  (fI+p)[k*npoints_grid] = 0.5 * ((fL+p)[k*npoints_grid] + (fR+p)[k*npoints_grid]);
335  }
336  return;
337 }
338 
339 /*! Compute the source function in the well-balanced treatment of the source terms. The source
340  function is:
341  \f{equation}{
342  dir = x \rightarrow \left[\begin{array}{c}0 \\ \varphi\left(x,y,z\right) \\ 0 \\ 0 \\ \varphi\left(x,y,z\right) \end{array}\right],
343  \ dir = y \rightarrow \left[\begin{array}{c}0 \\ 0 \\ \varphi\left(x,y,z\right) \\ 0 \\ \varphi\left(x,y,z\right) \end{array}\right],
344  \ dir = z \rightarrow \left[\begin{array}{c}0 \\ 0 \\ 0 \\ \varphi\left(x,y,z\right) \\ \varphi\left(x,y,z\right) \end{array}\right]
345  \f}
346  where \f$\varphi\f$ (#NavierStokes3D::grav_field_g) is computed in NavierStokes3D::GravityField().
347  \n\n
348  References:
349  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
350  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
351  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
352  http://dx.doi.org/10.2514/6.2015-2889
353  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
354  for Atmospheric Flows, AIAA Journal, http://dx.doi.org/10.2514/1.J054580.
355 */
356 int gpuNavierStokes3DSourceFunction(
357  double *f, /*!< Array to hold the computed source function */
358  double *u, /*!< Solution vector array */
359  double *x, /*!< Array of spatial coordinates (grid) */
360  void *s, /*!< Solver object of type #HyPar */
361  void *m, /*!< MPI object of type #MPIVariables */
362  double t, /*!< Current simulation time */
363  int dir /*!< Spatial dimension (x, y, or z) */
364 )
365 {
366  HyPar *solver = (HyPar* ) s;
367  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
368  int nblocks = (solver->npoints_local_wghosts-1)/GPU_THREADS_PER_BLOCK + 1;
369 
370  gpuNavierStokes3DSourceFunction_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
371  solver->npoints_local_wghosts, solver->ghosts, dir, solver->gpu_dim_local, param->gpu_grav_field_g, f
372  );
373  cudaDeviceSynchronize();
374 
375  return 0;
376 }
377 
378 /*! Compute the "upwind" source function value at the interface: the upwinding is just the
379  arithmetic average of the left and right biased interpolated values of the source function
380  at the interface.
381  \n\n
382  References:
383  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
384  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
385  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
386  http://dx.doi.org/10.2514/6.2015-2889
387  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
388  for Atmospheric Flows, Submitted
389 */
390 int gpuNavierStokes3DSourceUpwind(
391  double *fI, /*!< Array to hold the computed "upwind" interface source function */
392  double *fL, /*!< Interface source function value computed using left-biased interpolation */
393  double *fR, /*!< Interface source function value computed using right-biased interpolation */
394  double *u, /*!< Solution vector array */
395  int dir, /*!< Spatial dimension (x,y, or z) */
396  void *s, /*!< Solver object of type #HyPar */
397  double t /*!< Current simulation time */
398 )
399 {
400  HyPar *solver = (HyPar*) s;
401  int *dim = solver->dim_local;
402 
403  int bounds_inter[_MODEL_NDIMS_];
404  _ArrayCopy1D_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
405  int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
406  int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
407 
408  gpuNavierStokes3DSourceUpwind_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
409  npoints_grid, fL, fR, fI
410  );
411  cudaDeviceSynchronize();
412 
413  return 0;
414 }
415 
416 /*! Computes the gravitational source term using a well-balanced formulation: the source term
417  is rewritten as follows:
418  \f{equation}{
419  \left[\begin{array}{c} 0 \\ -\rho {\bf g}\cdot{\bf \hat{i}} \\ -\rho {\bf g}\cdot{\bf \hat{j}} \\ -\rho {\bf g}\cdot{\bf \hat{k}} \\ -\rho u {\bf g}\cdot{\bf \hat{i}} - \rho v {\bf g}\cdot{\bf \hat{j}} - \rho w {\bf g}\cdot{\bf \hat{k}} \end{array}\right]
420  =
421  \left[\begin{array}{ccccc} 0 & p_0 \varrho^{-1} & 0 & 0 & p_0 u \varrho^{-1} \end{array}\right] \cdot \frac{\partial}{\partial x}\left[\begin{array}{c} 0 \\ \varphi \\ 0 \\ 0 \\ \varphi \end{array}\right]
422  +
423  \left[\begin{array}{ccccc} 0 & 0 & p_0 \varrho^{-1} & 0 & p_0 v \varrho^{-1} \end{array}\right] \cdot \frac{\partial}{\partial y}\left[\begin{array}{c} 0 \\ 0 \\ \varphi \\ 0 \\ \varphi \end{array}\right]
424  +
425  \left[\begin{array}{ccccc} 0 & 0 & 0 & p_0 \varrho^{-1} & p_0 w \varrho^{-1} \end{array}\right] \cdot \frac{\partial}{\partial y}\left[\begin{array}{c} 0 \\ 0 \\ 0 \\ \varphi \\ \varphi \end{array}\right]
426  \f}
427  where \f$\varphi = \varphi\left(x,y\right)\f$ and \f$\varrho = \varrho\left(x,y\right)\f$ are computed in
428  NavierStokes3DGravityField(). The derivatives are computed in an exactly identical manner as the hyperbolic
429  flux (i.e., using a conservative finite-difference formulation) (see HyperbolicFunction()).
430  \n\n
431  References:
432  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
433  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
434  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
435  http://dx.doi.org/10.2514/6.2015-2889
436  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
437  for Atmospheric Flows, AIAA Journal, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580.
438 */
439 extern "C" int gpuNavierStokes3DSource(
440  double * __restrict__ source,
441  double * __restrict__ u,
442  void * __restrict__ s,
443  void * __restrict__ m,
444  double t
445 )
446 {
447  HyPar *solver = (HyPar* ) s;
448  MPIVariables *mpi = (MPIVariables*) m;
449  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
450 
451  if ((param->grav_x == 0.0) && (param->grav_y == 0.0) && (param->grav_z == 0.0))
452  return(0); /* no gravitational forces */
453 
454  int dir;
455  double *SourceI = solver->fluxI; /* interace source term */
456  double *SourceC = solver->fluxC; /* cell-centered source term */
457  double *SourceL = solver->fL;
458  double *SourceR = solver->fR;
459 
460  int ghosts = solver->ghosts;
461  int *dim = solver->gpu_dim_local;
462  double *x = solver->gpu_x;
463  double *dxinv = solver->gpu_dxinv;
464  double RT = param->p0 / param->rho0;
465  static double grav[_MODEL_NDIMS_];
466 
467  grav[_XDIR_] = param->grav_x;
468  grav[_YDIR_] = param->grav_y;
469  grav[_ZDIR_] = param->grav_z;
470 
471  int nblocks = (solver->npoints_local-1)/GPU_THREADS_PER_BLOCK + 1;
472  for (dir = 0; dir < _MODEL_NDIMS_; dir++) {
473  if (grav[dir] != 0.0) {
474  int bounds_inter[_MODEL_NDIMS_];
475  _ArrayCopy1D_(solver->dim_local,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
476  int npoints_fluxI; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_fluxI);
477 
478  /* calculate the split source function exp(-phi/RT) */
479  gpuNavierStokes3DSourceFunction(SourceC,u,x,solver,mpi,t,dir);
480  /* calculate the left and right interface source terms */
481  solver->InterpolateInterfacesHyp(SourceL,SourceC,u,x, 1,dir,solver,mpi,0);
482  solver->InterpolateInterfacesHyp(SourceR,SourceC,u,x,-1,dir,solver,mpi,0);
483  /* calculate the final interface source term */
484  gpuNavierStokes3DSourceUpwind(SourceI,SourceL,SourceR,u,dir,solver,t);
485  /* calculate the final cell-centered source term */
486  gpuNavierStokes3DSource_grav_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
487  solver->npoints_local, solver->npoints_local_wghosts, npoints_fluxI,
488  ghosts, dir, param->gamma, RT,
489  dim, dxinv, param->gpu_grav_field_f, SourceI, u, source
490  );
491  cudaDeviceSynchronize();
492  }
493  }
494 
495  return 0;
496 }
497 
498 #endif
499