HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
NavierStokes3DUpwind_GPU.cu
Go to the documentation of this file.
1 /*! @file NavierStokes3DUpwind_GPU.cu
2  @author Youngdae Kim
3  @brief Contains functions to compute the upwind flux at grid interfaces for the 3D Navier Stokes equations.
4 */
5 #include <math.h>
6 #include <basic_gpu.h>
7 #include <arrayfunctions_gpu.h>
8 #include <mathfunctions.h>
9 #include <matmult_native.h>
10 #include <physicalmodels/navierstokes3d.h>
11 #include <hypar.h>
12 
13 static const int dummy = 1;
14 
15 #ifdef CUDA_VAR_ORDERDING_AOS
16 
17 /* kernel for gpuNavierStokes3DUpwindRusanov() */
18 __global__
19 void gpuNavierStokes3DUpwindRusanov_kernel(
20  int npoints_grid,
21  int dir,
22  int ghosts,
23  double gamma,
24  const int * __restrict__ dim,
25  const double * __restrict__ grav_field_g,
26  const double * __restrict__ fL,
27  const double * __restrict__ fR,
28  const double * __restrict__ uL,
29  const double * __restrict__ uR,
30  const double * __restrict__ u,
31  double * __restrict__ fI
32 )
33 {
34  int p = blockDim.x * blockIdx.x + threadIdx.x;
35 
36  if (p < npoints_grid) {
37  double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_];
38  int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
39  indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
40 
41  bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
42  _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
43  _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
44  _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
45  int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
46  int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
47  int q = p*_MODEL_NVARS_;
48 
49  /* Modified Rusanov's upwinding scheme */
50 
51  udiff[0] = 0.5 * (uR[q+0] - uL[q+0]);
52  udiff[1] = 0.5 * (uR[q+1] - uL[q+1]);
53  udiff[2] = 0.5 * (uR[q+2] - uL[q+2]);
54  udiff[3] = 0.5 * (uR[q+3] - uL[q+3]);
55  udiff[4] = 0.5 * (uR[q+4] - uL[q+4]);
56 
57  _NavierStokes3DRoeAverage_(uavg,_NavierStokes3D_stride_,(u+_MODEL_NVARS_*pL),(u+_MODEL_NVARS_*pR),gamma);
58 
59  double c, vel[_MODEL_NDIMS_], rho,E,P;
60  _NavierStokes3DGetFlowVar_((u+_MODEL_NVARS_*pL),_NavierStokes3D_stride_,rho,vel[0],vel[1],vel[2],E,P,gamma);
61  c = sqrt(gamma*P/rho);
62  double alphaL = c + absolute(vel[dir]);
63  _NavierStokes3DGetFlowVar_((u+_MODEL_NVARS_*pR),_NavierStokes3D_stride_,rho,vel[0],vel[1],vel[2],E,P,gamma);
64  c = sqrt(gamma*P/rho);
65  double alphaR = c + absolute(vel[dir]);
66  _NavierStokes3DGetFlowVar_(uavg,dummy,rho,vel[0],vel[1],vel[2],E,P,gamma);
67  c = sqrt(gamma*P/rho);
68  double alphaavg = c + absolute(vel[dir]);
69 
70  double kappa = max(grav_field_g[pL],grav_field_g[pR]);
71  double alpha = kappa*max3(alphaL,alphaR,alphaavg);
72 
73  fI[q+0] = 0.5*(fL[q+0]+fR[q+0])-alpha*udiff[0];
74  fI[q+1] = 0.5*(fL[q+1]+fR[q+1])-alpha*udiff[1];
75  fI[q+2] = 0.5*(fL[q+2]+fR[q+2])-alpha*udiff[2];
76  fI[q+3] = 0.5*(fL[q+3]+fR[q+3])-alpha*udiff[3];
77  fI[q+4] = 0.5*(fL[q+4]+fR[q+4])-alpha*udiff[4];
78  }
79  return;
80 }
81 
82 /*! Rusanov's upwinding scheme.
83  \f{equation}{
84  {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
85  - \max_{j,j+1} \nu_j \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
86  \f}
87  where \f$\nu = c + \left|u\right|\f$.
88  + Rusanov, V. V., "The calculation of the interaction of non-stationary shock waves and obstacles," USSR
89  Computational Mathematics and Mathematical Physics, Vol. 1, No. 2, 1962, pp. 304–320
90 
91  This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
92  there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
93  it reduces to its original form.
94  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
95  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
96  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
97  http://dx.doi.org/10.2514/6.2015-2889
98  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
99  for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
100 
101 */
102 extern "C" int gpuNavierStokes3DUpwindRusanov(
103  double *fI, /*!< Computed upwind interface flux */
104  double *fL, /*!< Left-biased reconstructed interface flux */
105  double *fR, /*!< Right-biased reconstructed interface flux */
106  double *uL, /*!< Left-biased reconstructed interface solution */
107  double *uR, /*!< Right-biased reconstructed interface solution */
108  double *u, /*!< Cell-centered solution */
109  int dir, /*!< Spatial dimension (x,y, or z) */
110  void *s, /*!< Solver object of type #HyPar */
111  double t /*!< Current solution time */
112 )
113 {
114  HyPar *solver = (HyPar*) s;
115  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
116  int *dim = solver->dim_local;
117 
118  int bounds_inter[_MODEL_NDIMS_];
119  _ArrayCopy1D_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
120  int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
121  int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
122 
123  gpuNavierStokes3DUpwindRusanov_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
124  npoints_grid, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
125  param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
126  );
127  cudaDeviceSynchronize();
128 
129  return 0;
130 }
131 
132 /* kernel for gpuNavierStokes3DUpwindRoe() */
133 __global__
134 void gpuNavierStokes3DUpwindRoe_kernel(
135  int npoints_grid,
136  int dir,
137  int ghosts,
138  double gamma,
139  const int * __restrict__ dim,
140  const double * __restrict__ grav_field_g,
141  const double * __restrict__ fL,
142  const double * __restrict__ fR,
143  const double * __restrict__ uL,
144  const double * __restrict__ uR,
145  const double * __restrict__ u,
146  double * __restrict__ fI
147 )
148 {
149  int p = blockDim.x * blockIdx.x + threadIdx.x;
150 
151  if (p < npoints_grid) {
152  double R[_MODEL_NVARS_*_MODEL_NVARS_], D[_MODEL_NVARS_*_MODEL_NVARS_],
153  L[_MODEL_NVARS_*_MODEL_NVARS_], DL[_MODEL_NVARS_*_MODEL_NVARS_],
154  modA[_MODEL_NVARS_*_MODEL_NVARS_];
155  double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_],udiss[_MODEL_NVARS_];
156  int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
157  indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
158 
159  bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
160  _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
161  _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
162  _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
163  int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
164  int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
165 
166  p *= _MODEL_NVARS_;
167 
168  /* Modified Roe's upwinding scheme */
169 
170  udiff[0] = 0.5 * (uR[p+0] - uL[p+0]);
171  udiff[1] = 0.5 * (uR[p+1] - uL[p+1]);
172  udiff[2] = 0.5 * (uR[p+2] - uL[p+2]);
173  udiff[3] = 0.5 * (uR[p+3] - uL[p+3]);
174  udiff[4] = 0.5 * (uR[p+4] - uL[p+4]);
175 
176  _NavierStokes3DRoeAverage_ (uavg,_NavierStokes3D_stride_,(u+_MODEL_NVARS_*pL),(u+_MODEL_NVARS_*pR),gamma);
177  _NavierStokes3DEigenvalues_ (uavg,dummy,D,gamma,dir);
178  _NavierStokes3DLeftEigenvectors_ (uavg,dummy,L,gamma,dir);
179  _NavierStokes3DRightEigenvectors_ (uavg,dummy,R,gamma,dir);
180 
181  /* Harten's Entropy Fix - Page 362 of Leveque */
182  int k;
183  double delta = 0.000001, delta2 = delta*delta;
184  k=0; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
185  k=6; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
186  k=12; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
187  k=18; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
188  k=24; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
189 
190  MatMult5(_MODEL_NVARS_,DL,D,L);
191  MatMult5(_MODEL_NVARS_,modA,R,DL);
192  MatVecMult5(_MODEL_NVARS_,udiss,modA,udiff);
193 
194  fI[p+0] = 0.5 * (fL[p+0]+fR[p+0]) - udiss[0];
195  fI[p+1] = 0.5 * (fL[p+1]+fR[p+1]) - udiss[1];
196  fI[p+2] = 0.5 * (fL[p+2]+fR[p+2]) - udiss[2];
197  fI[p+3] = 0.5 * (fL[p+3]+fR[p+3]) - udiss[3];
198  fI[p+4] = 0.5 * (fL[p+4]+fR[p+4]) - udiss[4];
199  }
200  return;
201 }
202 
203 /*! Roe's upwinding scheme.
204  \f{equation}{
205  {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
206  - \left| A\left({\bf u}_{j+1/2}^L,{\bf u}_{j+1/2}^R\right) \right|
207  \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
208  \f}
209  + Roe, P. L., “Approximate Riemann solvers, parameter vectors, and difference schemes,” Journal of
210  Computational Physics, Vol. 43, No. 2, 1981, pp. 357–372, http://dx.doi.org/10.1016/0021-9991(81)90128-5.
211 
212  This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
213  there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
214  it reduces to its original form.
215  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
216  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
217  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
218  http://dx.doi.org/10.2514/6.2015-2889
219  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
220  for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
221 
222 */
223 extern "C" int gpuNavierStokes3DUpwindRoe(
224  double * __restrict__ fI, /*!< Computed upwind interface flux */
225  double * __restrict__ fL, /*!< Left-biased reconstructed interface flux */
226  double * __restrict__ fR, /*!< Right-biased reconstructed interface flux */
227  double * __restrict__ uL, /*!< Left-biased reconstructed interface solution */
228  double * __restrict__ uR, /*!< Right-biased reconstructed interface solution */
229  double * __restrict__ u, /*!< Cell-centered solution */
230  int dir, /*!< Spatial dimension (x,y, or z) */
231  void *s, /*!< Solver object of type #HyPar */
232  double t /*!< Current solution time */
233 )
234 {
235  HyPar *solver = (HyPar*) s;
236  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
237  int *dim = solver->dim_local;
238 
239  int bounds_inter[_MODEL_NDIMS_];
240  _ArrayCopy1D3_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
241  int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
242  int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
243 
244  gpuNavierStokes3DUpwindRoe_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
245  npoints_grid, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
246  param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
247  );
248  cudaDeviceSynchronize();
249 
250  return 0;
251 }
252 
253 #else
254 
255 /* kernel for gpuNavierStokes3DUpwindRusanov() */
256 __global__
257 void gpuNavierStokes3DUpwindRusanov_kernel(
258  int npoints_grid,
259  int npoints_local_wghosts,
260  int dir,
261  int ghosts,
262  double gamma,
263  const int * __restrict__ dim,
264  const double * __restrict__ grav_field_g,
265  const double * __restrict__ fL,
266  const double * __restrict__ fR,
267  const double * __restrict__ uL,
268  const double * __restrict__ uR,
269  const double * __restrict__ u,
270  double * __restrict__ fI
271 )
272 {
273  int p = blockDim.x * blockIdx.x + threadIdx.x;
274 
275  if (p < npoints_grid) {
276  double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_];
277  int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
278  indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
279 
280  bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
281  _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
282  _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
283  _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
284  int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
285  int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
286 
287  /* Modified Rusanov's upwinding scheme */
288 
289  udiff[0] = 0.5 * (uR[p+ 0] - uL[p+ 0]);
290  udiff[1] = 0.5 * (uR[p+ npoints_grid] - uL[p+ npoints_grid]);
291  udiff[2] = 0.5 * (uR[p+2*npoints_grid] - uL[p+2*npoints_grid]);
292  udiff[3] = 0.5 * (uR[p+3*npoints_grid] - uL[p+3*npoints_grid]);
293  udiff[4] = 0.5 * (uR[p+4*npoints_grid] - uL[p+4*npoints_grid]);
294 
295  _NavierStokes3DRoeAverage_(uavg,npoints_local_wghosts,(u+pL),(u+pR),gamma);
296 
297  double c, vel[_MODEL_NDIMS_], rho,E,P;
298  _NavierStokes3DGetFlowVar_((u+pL),npoints_local_wghosts,rho,vel[0],vel[1],vel[2],E,P,gamma);
299  c = sqrt(gamma*P/rho);
300  double alphaL = c + absolute(vel[dir]);
301  _NavierStokes3DGetFlowVar_((u+pR),npoints_local_wghosts,rho,vel[0],vel[1],vel[2],E,P,gamma);
302  c = sqrt(gamma*P/rho);
303  double alphaR = c + absolute(vel[dir]);
304  _NavierStokes3DGetFlowVar_(uavg,dummy,rho,vel[0],vel[1],vel[2],E,P,gamma);
305  c = sqrt(gamma*P/rho);
306  double alphaavg = c + absolute(vel[dir]);
307 
308  double kappa = max(grav_field_g[pL],grav_field_g[pR]);
309  double alpha = kappa*max3(alphaL,alphaR,alphaavg);
310 
311  fI[p+0] = 0.5*(fL[p+ 0]+fR[p+ 0])-alpha*udiff[0];
312  fI[p+ npoints_grid] = 0.5*(fL[p+ npoints_grid]+fR[p+ npoints_grid])-alpha*udiff[1];
313  fI[p+2*npoints_grid] = 0.5*(fL[p+2*npoints_grid]+fR[p+2*npoints_grid])-alpha*udiff[2];
314  fI[p+3*npoints_grid] = 0.5*(fL[p+3*npoints_grid]+fR[p+3*npoints_grid])-alpha*udiff[3];
315  fI[p+4*npoints_grid] = 0.5*(fL[p+4*npoints_grid]+fR[p+4*npoints_grid])-alpha*udiff[4];
316  }
317  return;
318 }
319 
320 /*! Rusanov's upwinding scheme.
321  \f{equation}{
322  {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
323  - \max_{j,j+1} \nu_j \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
324  \f}
325  where \f$\nu = c + \left|u\right|\f$.
326  + Rusanov, V. V., "The calculation of the interaction of non-stationary shock waves and obstacles," USSR
327  Computational Mathematics and Mathematical Physics, Vol. 1, No. 2, 1962, pp. 304–320
328 
329  This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
330  there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
331  it reduces to its original form.
332  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
333  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
334  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
335  http://dx.doi.org/10.2514/6.2015-2889
336  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
337  for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
338 
339 */
340 extern "C" int gpuNavierStokes3DUpwindRusanov(
341  double *fI, /*!< Computed upwind interface flux */
342  double *fL, /*!< Left-biased reconstructed interface flux */
343  double *fR, /*!< Right-biased reconstructed interface flux */
344  double *uL, /*!< Left-biased reconstructed interface solution */
345  double *uR, /*!< Right-biased reconstructed interface solution */
346  double *u, /*!< Cell-centered solution */
347  int dir, /*!< Spatial dimension (x,y, or z) */
348  void *s, /*!< Solver object of type #HyPar */
349  double t /*!< Current solution time */
350 )
351 {
352  HyPar *solver = (HyPar*) s;
353  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
354  int *dim = solver->dim_local;
355 
356  int bounds_inter[_MODEL_NDIMS_];
357  _ArrayCopy1D_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
358  int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
359  int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
360 
361  gpuNavierStokes3DUpwindRusanov_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
362  npoints_grid, solver->npoints_local_wghosts, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
363  param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
364  );
365  cudaDeviceSynchronize();
366 
367  return 0;
368 }
369 
370 /* kernel for gpuNavierStokes3DUpwindRoe() */
371 __global__
372 void gpuNavierStokes3DUpwindRoe_kernel(
373  int npoints_grid,
374  int npoints_local_wghosts,
375  int dir,
376  int ghosts,
377  double gamma,
378  const int * __restrict__ dim,
379  const double * __restrict__ grav_field_g,
380  const double * __restrict__ fL,
381  const double * __restrict__ fR,
382  const double * __restrict__ uL,
383  const double * __restrict__ uR,
384  const double * __restrict__ u,
385  double * __restrict__ fI
386 )
387 {
388  int p = blockDim.x * blockIdx.x + threadIdx.x;
389 
390  if (p < npoints_grid) {
391  double R[_MODEL_NVARS_*_MODEL_NVARS_], D[_MODEL_NVARS_*_MODEL_NVARS_],
392  L[_MODEL_NVARS_*_MODEL_NVARS_], DL[_MODEL_NVARS_*_MODEL_NVARS_],
393  modA[_MODEL_NVARS_*_MODEL_NVARS_];
394  double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_],udiss[_MODEL_NVARS_];
395  int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
396  indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
397 
398  bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
399  _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
400  _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
401  _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
402  int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
403  int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
404 
405  /* Modified Roe's upwinding scheme */
406 
407  udiff[0] = 0.5 * (uR[p+0] - uL[p+0]);
408  udiff[1] = 0.5 * (uR[p+ npoints_grid] - uL[p+ npoints_grid]);
409  udiff[2] = 0.5 * (uR[p+2*npoints_grid] - uL[p+2*npoints_grid]);
410  udiff[3] = 0.5 * (uR[p+3*npoints_grid] - uL[p+3*npoints_grid]);
411  udiff[4] = 0.5 * (uR[p+4*npoints_grid] - uL[p+4*npoints_grid]);
412 
413  _NavierStokes3DRoeAverage_ (uavg,npoints_local_wghosts,(u+pL),(u+pR),gamma);
414  _NavierStokes3DEigenvalues_ (uavg,dummy,D,gamma,dir);
415  _NavierStokes3DLeftEigenvectors_ (uavg,dummy,L,gamma,dir);
416  _NavierStokes3DRightEigenvectors_ (uavg,dummy,R,gamma,dir);
417 
418  /* Harten's Entropy Fix - Page 362 of Leveque */
419  int k;
420  double delta = 0.000001, delta2 = delta*delta;
421  k=0; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
422  k=6; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
423  k=12; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
424  k=18; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
425  k=24; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
426 
427  MatMult5(_MODEL_NVARS_,DL,D,L);
428  MatMult5(_MODEL_NVARS_,modA,R,DL);
429  MatVecMult5(_MODEL_NVARS_,udiss,modA,udiff);
430 
431  fI[p+0] = 0.5 * (fL[p+0]+fR[p+0]) - udiss[0];
432  fI[p+ npoints_grid] = 0.5 * (fL[p+ npoints_grid]+fR[p+ npoints_grid]) - udiss[1];
433  fI[p+2*npoints_grid] = 0.5 * (fL[p+2*npoints_grid]+fR[p+2*npoints_grid]) - udiss[2];
434  fI[p+3*npoints_grid] = 0.5 * (fL[p+3*npoints_grid]+fR[p+3*npoints_grid]) - udiss[3];
435  fI[p+4*npoints_grid] = 0.5 * (fL[p+4*npoints_grid]+fR[p+4*npoints_grid]) - udiss[4];
436  }
437  return;
438 }
439 
440 /*! Roe's upwinding scheme.
441  \f{equation}{
442  {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
443  - \left| A\left({\bf u}_{j+1/2}^L,{\bf u}_{j+1/2}^R\right) \right|
444  \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
445  \f}
446  + Roe, P. L., “Approximate Riemann solvers, parameter vectors, and difference schemes,” Journal of
447  Computational Physics, Vol. 43, No. 2, 1981, pp. 357–372, http://dx.doi.org/10.1016/0021-9991(81)90128-5.
448 
449  This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
450  there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
451  it reduces to its original form.
452  + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
453  Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
454  7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
455  http://dx.doi.org/10.2514/6.2015-2889
456  + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
457  for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
458 
459 */
460 extern "C" int gpuNavierStokes3DUpwindRoe(
461  double * __restrict__ fI, /*!< Computed upwind interface flux */
462  double * __restrict__ fL, /*!< Left-biased reconstructed interface flux */
463  double * __restrict__ fR, /*!< Right-biased reconstructed interface flux */
464  double * __restrict__ uL, /*!< Left-biased reconstructed interface solution */
465  double * __restrict__ uR, /*!< Right-biased reconstructed interface solution */
466  double * __restrict__ u, /*!< Cell-centered solution */
467  int dir, /*!< Spatial dimension (x,y, or z) */
468  void *s, /*!< Solver object of type #HyPar */
469  double t /*!< Current solution time */
470 )
471 {
472  HyPar *solver = (HyPar*) s;
473  NavierStokes3D *param = (NavierStokes3D*) solver->physics;
474  int *dim = solver->dim_local;
475 
476  int bounds_inter[_MODEL_NDIMS_];
477  _ArrayCopy1D3_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
478  int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
479  int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
480 
481 #if defined(GPU_STAT)
482  cudaEvent_t startEvent, stopEvent;
483  float milliseconds = 0;
484 
485  checkCuda( cudaEventCreate(&startEvent) );
486  checkCuda( cudaEventCreate(&stopEvent) );
487  checkCuda( cudaEventRecord(startEvent, 0) );
488 #endif
489 
490  gpuNavierStokes3DUpwindRoe_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
491  npoints_grid, solver->npoints_local_wghosts, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
492  param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
493  );
494 
495 #if defined(GPU_STAT)
496  checkCuda( cudaEventRecord(stopEvent, 0) );
497  checkCuda( cudaEventSynchronize(stopEvent) );
498 #endif
499 
500  cudaDeviceSynchronize();
501 
502 #if defined(GPU_STAT)
503  checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
504  printf("%-50s GPU time (secs) = %.6f dir = %d\n",
505  "NavierStokes3DUpwindRoe2", milliseconds*1e-3, dir);
506  checkCuda( cudaEventDestroy(startEvent) );
507  checkCuda( cudaEventDestroy(stopEvent) );
508 #endif
509 
510 
511 
512 
513  return 0;
514 }
515 
516 #endif