1 /*! @file NavierStokes3DUpwind_GPU.cu
3 @brief Contains functions to compute the upwind flux at grid interfaces for the 3D Navier Stokes equations.
7 #include <arrayfunctions_gpu.h>
8 #include <mathfunctions.h>
9 #include <matmult_native.h>
10 #include <physicalmodels/navierstokes3d.h>
13 static const int dummy = 1;
15 #ifdef CUDA_VAR_ORDERDING_AOS
17 /* kernel for gpuNavierStokes3DUpwindRusanov() */
19 void gpuNavierStokes3DUpwindRusanov_kernel(
24 const int * __restrict__ dim,
25 const double * __restrict__ grav_field_g,
26 const double * __restrict__ fL,
27 const double * __restrict__ fR,
28 const double * __restrict__ uL,
29 const double * __restrict__ uR,
30 const double * __restrict__ u,
31 double * __restrict__ fI
34 int p = blockDim.x * blockIdx.x + threadIdx.x;
36 if (p < npoints_grid) {
37 double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_];
38 int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
39 indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
41 bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
42 _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
43 _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
44 _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
45 int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
46 int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
47 int q = p*_MODEL_NVARS_;
49 /* Modified Rusanov's upwinding scheme */
51 udiff[0] = 0.5 * (uR[q+0] - uL[q+0]);
52 udiff[1] = 0.5 * (uR[q+1] - uL[q+1]);
53 udiff[2] = 0.5 * (uR[q+2] - uL[q+2]);
54 udiff[3] = 0.5 * (uR[q+3] - uL[q+3]);
55 udiff[4] = 0.5 * (uR[q+4] - uL[q+4]);
57 _NavierStokes3DRoeAverage_(uavg,_NavierStokes3D_stride_,(u+_MODEL_NVARS_*pL),(u+_MODEL_NVARS_*pR),gamma);
59 double c, vel[_MODEL_NDIMS_], rho,E,P;
60 _NavierStokes3DGetFlowVar_((u+_MODEL_NVARS_*pL),_NavierStokes3D_stride_,rho,vel[0],vel[1],vel[2],E,P,gamma);
61 c = sqrt(gamma*P/rho);
62 double alphaL = c + absolute(vel[dir]);
63 _NavierStokes3DGetFlowVar_((u+_MODEL_NVARS_*pR),_NavierStokes3D_stride_,rho,vel[0],vel[1],vel[2],E,P,gamma);
64 c = sqrt(gamma*P/rho);
65 double alphaR = c + absolute(vel[dir]);
66 _NavierStokes3DGetFlowVar_(uavg,dummy,rho,vel[0],vel[1],vel[2],E,P,gamma);
67 c = sqrt(gamma*P/rho);
68 double alphaavg = c + absolute(vel[dir]);
70 double kappa = max(grav_field_g[pL],grav_field_g[pR]);
71 double alpha = kappa*max3(alphaL,alphaR,alphaavg);
73 fI[q+0] = 0.5*(fL[q+0]+fR[q+0])-alpha*udiff[0];
74 fI[q+1] = 0.5*(fL[q+1]+fR[q+1])-alpha*udiff[1];
75 fI[q+2] = 0.5*(fL[q+2]+fR[q+2])-alpha*udiff[2];
76 fI[q+3] = 0.5*(fL[q+3]+fR[q+3])-alpha*udiff[3];
77 fI[q+4] = 0.5*(fL[q+4]+fR[q+4])-alpha*udiff[4];
82 /*! Rusanov's upwinding scheme.
84 {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
85 - \max_{j,j+1} \nu_j \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
87 where \f$\nu = c + \left|u\right|\f$.
88 + Rusanov, V. V., "The calculation of the interaction of non-stationary shock waves and obstacles," USSR
89 Computational Mathematics and Mathematical Physics, Vol. 1, No. 2, 1962, pp. 304–320
91 This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
92 there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
93 it reduces to its original form.
94 + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
95 Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
96 7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
97 http://dx.doi.org/10.2514/6.2015-2889
98 + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
99 for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
102 extern "C" int gpuNavierStokes3DUpwindRusanov(
103 double *fI, /*!< Computed upwind interface flux */
104 double *fL, /*!< Left-biased reconstructed interface flux */
105 double *fR, /*!< Right-biased reconstructed interface flux */
106 double *uL, /*!< Left-biased reconstructed interface solution */
107 double *uR, /*!< Right-biased reconstructed interface solution */
108 double *u, /*!< Cell-centered solution */
109 int dir, /*!< Spatial dimension (x,y, or z) */
110 void *s, /*!< Solver object of type #HyPar */
111 double t /*!< Current solution time */
114 HyPar *solver = (HyPar*) s;
115 NavierStokes3D *param = (NavierStokes3D*) solver->physics;
116 int *dim = solver->dim_local;
118 int bounds_inter[_MODEL_NDIMS_];
119 _ArrayCopy1D_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
120 int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
121 int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
123 gpuNavierStokes3DUpwindRusanov_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
124 npoints_grid, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
125 param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
127 cudaDeviceSynchronize();
132 /* kernel for gpuNavierStokes3DUpwindRoe() */
134 void gpuNavierStokes3DUpwindRoe_kernel(
139 const int * __restrict__ dim,
140 const double * __restrict__ grav_field_g,
141 const double * __restrict__ fL,
142 const double * __restrict__ fR,
143 const double * __restrict__ uL,
144 const double * __restrict__ uR,
145 const double * __restrict__ u,
146 double * __restrict__ fI
149 int p = blockDim.x * blockIdx.x + threadIdx.x;
151 if (p < npoints_grid) {
152 double R[_MODEL_NVARS_*_MODEL_NVARS_], D[_MODEL_NVARS_*_MODEL_NVARS_],
153 L[_MODEL_NVARS_*_MODEL_NVARS_], DL[_MODEL_NVARS_*_MODEL_NVARS_],
154 modA[_MODEL_NVARS_*_MODEL_NVARS_];
155 double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_],udiss[_MODEL_NVARS_];
156 int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
157 indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
159 bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
160 _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
161 _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
162 _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
163 int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
164 int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
168 /* Modified Roe's upwinding scheme */
170 udiff[0] = 0.5 * (uR[p+0] - uL[p+0]);
171 udiff[1] = 0.5 * (uR[p+1] - uL[p+1]);
172 udiff[2] = 0.5 * (uR[p+2] - uL[p+2]);
173 udiff[3] = 0.5 * (uR[p+3] - uL[p+3]);
174 udiff[4] = 0.5 * (uR[p+4] - uL[p+4]);
176 _NavierStokes3DRoeAverage_ (uavg,_NavierStokes3D_stride_,(u+_MODEL_NVARS_*pL),(u+_MODEL_NVARS_*pR),gamma);
177 _NavierStokes3DEigenvalues_ (uavg,dummy,D,gamma,dir);
178 _NavierStokes3DLeftEigenvectors_ (uavg,dummy,L,gamma,dir);
179 _NavierStokes3DRightEigenvectors_ (uavg,dummy,R,gamma,dir);
181 /* Harten's Entropy Fix - Page 362 of Leveque */
183 double delta = 0.000001, delta2 = delta*delta;
184 k=0; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
185 k=6; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
186 k=12; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
187 k=18; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
188 k=24; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
190 MatMult5(_MODEL_NVARS_,DL,D,L);
191 MatMult5(_MODEL_NVARS_,modA,R,DL);
192 MatVecMult5(_MODEL_NVARS_,udiss,modA,udiff);
194 fI[p+0] = 0.5 * (fL[p+0]+fR[p+0]) - udiss[0];
195 fI[p+1] = 0.5 * (fL[p+1]+fR[p+1]) - udiss[1];
196 fI[p+2] = 0.5 * (fL[p+2]+fR[p+2]) - udiss[2];
197 fI[p+3] = 0.5 * (fL[p+3]+fR[p+3]) - udiss[3];
198 fI[p+4] = 0.5 * (fL[p+4]+fR[p+4]) - udiss[4];
203 /*! Roe's upwinding scheme.
205 {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
206 - \left| A\left({\bf u}_{j+1/2}^L,{\bf u}_{j+1/2}^R\right) \right|
207 \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
209 + Roe, P. L., “Approximate Riemann solvers, parameter vectors, and difference schemes,” Journal of
210 Computational Physics, Vol. 43, No. 2, 1981, pp. 357–372, http://dx.doi.org/10.1016/0021-9991(81)90128-5.
212 This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
213 there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
214 it reduces to its original form.
215 + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
216 Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
217 7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
218 http://dx.doi.org/10.2514/6.2015-2889
219 + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
220 for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
223 extern "C" int gpuNavierStokes3DUpwindRoe(
224 double * __restrict__ fI, /*!< Computed upwind interface flux */
225 double * __restrict__ fL, /*!< Left-biased reconstructed interface flux */
226 double * __restrict__ fR, /*!< Right-biased reconstructed interface flux */
227 double * __restrict__ uL, /*!< Left-biased reconstructed interface solution */
228 double * __restrict__ uR, /*!< Right-biased reconstructed interface solution */
229 double * __restrict__ u, /*!< Cell-centered solution */
230 int dir, /*!< Spatial dimension (x,y, or z) */
231 void *s, /*!< Solver object of type #HyPar */
232 double t /*!< Current solution time */
235 HyPar *solver = (HyPar*) s;
236 NavierStokes3D *param = (NavierStokes3D*) solver->physics;
237 int *dim = solver->dim_local;
239 int bounds_inter[_MODEL_NDIMS_];
240 _ArrayCopy1D3_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
241 int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
242 int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
244 gpuNavierStokes3DUpwindRoe_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
245 npoints_grid, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
246 param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
248 cudaDeviceSynchronize();
255 /* kernel for gpuNavierStokes3DUpwindRusanov() */
257 void gpuNavierStokes3DUpwindRusanov_kernel(
259 int npoints_local_wghosts,
263 const int * __restrict__ dim,
264 const double * __restrict__ grav_field_g,
265 const double * __restrict__ fL,
266 const double * __restrict__ fR,
267 const double * __restrict__ uL,
268 const double * __restrict__ uR,
269 const double * __restrict__ u,
270 double * __restrict__ fI
273 int p = blockDim.x * blockIdx.x + threadIdx.x;
275 if (p < npoints_grid) {
276 double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_];
277 int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
278 indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
280 bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
281 _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
282 _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
283 _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
284 int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
285 int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
287 /* Modified Rusanov's upwinding scheme */
289 udiff[0] = 0.5 * (uR[p+ 0] - uL[p+ 0]);
290 udiff[1] = 0.5 * (uR[p+ npoints_grid] - uL[p+ npoints_grid]);
291 udiff[2] = 0.5 * (uR[p+2*npoints_grid] - uL[p+2*npoints_grid]);
292 udiff[3] = 0.5 * (uR[p+3*npoints_grid] - uL[p+3*npoints_grid]);
293 udiff[4] = 0.5 * (uR[p+4*npoints_grid] - uL[p+4*npoints_grid]);
295 _NavierStokes3DRoeAverage_(uavg,npoints_local_wghosts,(u+pL),(u+pR),gamma);
297 double c, vel[_MODEL_NDIMS_], rho,E,P;
298 _NavierStokes3DGetFlowVar_((u+pL),npoints_local_wghosts,rho,vel[0],vel[1],vel[2],E,P,gamma);
299 c = sqrt(gamma*P/rho);
300 double alphaL = c + absolute(vel[dir]);
301 _NavierStokes3DGetFlowVar_((u+pR),npoints_local_wghosts,rho,vel[0],vel[1],vel[2],E,P,gamma);
302 c = sqrt(gamma*P/rho);
303 double alphaR = c + absolute(vel[dir]);
304 _NavierStokes3DGetFlowVar_(uavg,dummy,rho,vel[0],vel[1],vel[2],E,P,gamma);
305 c = sqrt(gamma*P/rho);
306 double alphaavg = c + absolute(vel[dir]);
308 double kappa = max(grav_field_g[pL],grav_field_g[pR]);
309 double alpha = kappa*max3(alphaL,alphaR,alphaavg);
311 fI[p+0] = 0.5*(fL[p+ 0]+fR[p+ 0])-alpha*udiff[0];
312 fI[p+ npoints_grid] = 0.5*(fL[p+ npoints_grid]+fR[p+ npoints_grid])-alpha*udiff[1];
313 fI[p+2*npoints_grid] = 0.5*(fL[p+2*npoints_grid]+fR[p+2*npoints_grid])-alpha*udiff[2];
314 fI[p+3*npoints_grid] = 0.5*(fL[p+3*npoints_grid]+fR[p+3*npoints_grid])-alpha*udiff[3];
315 fI[p+4*npoints_grid] = 0.5*(fL[p+4*npoints_grid]+fR[p+4*npoints_grid])-alpha*udiff[4];
320 /*! Rusanov's upwinding scheme.
322 {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
323 - \max_{j,j+1} \nu_j \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
325 where \f$\nu = c + \left|u\right|\f$.
326 + Rusanov, V. V., "The calculation of the interaction of non-stationary shock waves and obstacles," USSR
327 Computational Mathematics and Mathematical Physics, Vol. 1, No. 2, 1962, pp. 304–320
329 This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
330 there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
331 it reduces to its original form.
332 + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
333 Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
334 7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
335 http://dx.doi.org/10.2514/6.2015-2889
336 + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
337 for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
340 extern "C" int gpuNavierStokes3DUpwindRusanov(
341 double *fI, /*!< Computed upwind interface flux */
342 double *fL, /*!< Left-biased reconstructed interface flux */
343 double *fR, /*!< Right-biased reconstructed interface flux */
344 double *uL, /*!< Left-biased reconstructed interface solution */
345 double *uR, /*!< Right-biased reconstructed interface solution */
346 double *u, /*!< Cell-centered solution */
347 int dir, /*!< Spatial dimension (x,y, or z) */
348 void *s, /*!< Solver object of type #HyPar */
349 double t /*!< Current solution time */
352 HyPar *solver = (HyPar*) s;
353 NavierStokes3D *param = (NavierStokes3D*) solver->physics;
354 int *dim = solver->dim_local;
356 int bounds_inter[_MODEL_NDIMS_];
357 _ArrayCopy1D_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
358 int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
359 int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
361 gpuNavierStokes3DUpwindRusanov_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
362 npoints_grid, solver->npoints_local_wghosts, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
363 param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
365 cudaDeviceSynchronize();
370 /* kernel for gpuNavierStokes3DUpwindRoe() */
372 void gpuNavierStokes3DUpwindRoe_kernel(
374 int npoints_local_wghosts,
378 const int * __restrict__ dim,
379 const double * __restrict__ grav_field_g,
380 const double * __restrict__ fL,
381 const double * __restrict__ fR,
382 const double * __restrict__ uL,
383 const double * __restrict__ uR,
384 const double * __restrict__ u,
385 double * __restrict__ fI
388 int p = blockDim.x * blockIdx.x + threadIdx.x;
390 if (p < npoints_grid) {
391 double R[_MODEL_NVARS_*_MODEL_NVARS_], D[_MODEL_NVARS_*_MODEL_NVARS_],
392 L[_MODEL_NVARS_*_MODEL_NVARS_], DL[_MODEL_NVARS_*_MODEL_NVARS_],
393 modA[_MODEL_NVARS_*_MODEL_NVARS_];
394 double udiff[_MODEL_NVARS_],uavg[_MODEL_NVARS_],udiss[_MODEL_NVARS_];
395 int bounds_inter[_MODEL_NDIMS_], index_inter[_MODEL_NDIMS_],
396 indexL[_MODEL_NDIMS_], indexR[_MODEL_NDIMS_];
398 bounds_inter[0] = dim[0]; bounds_inter[1] = dim[1]; bounds_inter[2] = dim[2]; bounds_inter[dir] += 1;
399 _ArrayIndexnD_(_MODEL_NDIMS_,p,bounds_inter,index_inter,0);
400 _ArrayCopy1D_(index_inter,indexL,_MODEL_NDIMS_); indexL[dir]--;
401 _ArrayCopy1D_(index_inter,indexR,_MODEL_NDIMS_);
402 int pL; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexL,ghosts,pL);
403 int pR; _ArrayIndex1D_(_MODEL_NDIMS_,dim,indexR,ghosts,pR);
405 /* Modified Roe's upwinding scheme */
407 udiff[0] = 0.5 * (uR[p+0] - uL[p+0]);
408 udiff[1] = 0.5 * (uR[p+ npoints_grid] - uL[p+ npoints_grid]);
409 udiff[2] = 0.5 * (uR[p+2*npoints_grid] - uL[p+2*npoints_grid]);
410 udiff[3] = 0.5 * (uR[p+3*npoints_grid] - uL[p+3*npoints_grid]);
411 udiff[4] = 0.5 * (uR[p+4*npoints_grid] - uL[p+4*npoints_grid]);
413 _NavierStokes3DRoeAverage_ (uavg,npoints_local_wghosts,(u+pL),(u+pR),gamma);
414 _NavierStokes3DEigenvalues_ (uavg,dummy,D,gamma,dir);
415 _NavierStokes3DLeftEigenvectors_ (uavg,dummy,L,gamma,dir);
416 _NavierStokes3DRightEigenvectors_ (uavg,dummy,R,gamma,dir);
418 /* Harten's Entropy Fix - Page 362 of Leveque */
420 double delta = 0.000001, delta2 = delta*delta;
421 k=0; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
422 k=6; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
423 k=12; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
424 k=18; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
425 k=24; D[k] = (absolute(D[k]) < delta ? (D[k]*D[k]+delta2)/(2*delta) : absolute(D[k]) );
427 MatMult5(_MODEL_NVARS_,DL,D,L);
428 MatMult5(_MODEL_NVARS_,modA,R,DL);
429 MatVecMult5(_MODEL_NVARS_,udiss,modA,udiff);
431 fI[p+0] = 0.5 * (fL[p+0]+fR[p+0]) - udiss[0];
432 fI[p+ npoints_grid] = 0.5 * (fL[p+ npoints_grid]+fR[p+ npoints_grid]) - udiss[1];
433 fI[p+2*npoints_grid] = 0.5 * (fL[p+2*npoints_grid]+fR[p+2*npoints_grid]) - udiss[2];
434 fI[p+3*npoints_grid] = 0.5 * (fL[p+3*npoints_grid]+fR[p+3*npoints_grid]) - udiss[3];
435 fI[p+4*npoints_grid] = 0.5 * (fL[p+4*npoints_grid]+fR[p+4*npoints_grid]) - udiss[4];
440 /*! Roe's upwinding scheme.
442 {\bf f}_{j+1/2} = \frac{1}{2}\left[ {\bf f}_{j+1/2}^L + {\bf f}_{j+1/2}^R
443 - \left| A\left({\bf u}_{j+1/2}^L,{\bf u}_{j+1/2}^R\right) \right|
444 \left( {\bf u}_{j+1/2}^R - {\bf u}_{j+1/2}^L \right)\right]
446 + Roe, P. L., “Approximate Riemann solvers, parameter vectors, and difference schemes,” Journal of
447 Computational Physics, Vol. 43, No. 2, 1981, pp. 357–372, http://dx.doi.org/10.1016/0021-9991(81)90128-5.
449 This upwinding scheme is modified for the balanced discretization of the 3D Navier Stokes equations when
450 there is a non-zero gravitational force. See the reference below. For flows without any gravitational forces,
451 it reduces to its original form.
452 + Ghosh, D., Constantinescu, E.M., Well-Balanced Formulation of Gravitational Source
453 Terms for Conservative Finite-Difference Atmospheric Flow Solvers, AIAA Paper 2015-2889,
454 7th AIAA Atmospheric and Space Environments Conference, June 22-26, 2015, Dallas, TX,
455 http://dx.doi.org/10.2514/6.2015-2889
456 + Ghosh, D., Constantinescu, E.M., A Well-Balanced, Conservative Finite-Difference Algorithm
457 for Atmospheric Flows, 54 (4), 2016, pp. 1370-1385, http://dx.doi.org/10.2514/1.J054580
460 extern "C" int gpuNavierStokes3DUpwindRoe(
461 double * __restrict__ fI, /*!< Computed upwind interface flux */
462 double * __restrict__ fL, /*!< Left-biased reconstructed interface flux */
463 double * __restrict__ fR, /*!< Right-biased reconstructed interface flux */
464 double * __restrict__ uL, /*!< Left-biased reconstructed interface solution */
465 double * __restrict__ uR, /*!< Right-biased reconstructed interface solution */
466 double * __restrict__ u, /*!< Cell-centered solution */
467 int dir, /*!< Spatial dimension (x,y, or z) */
468 void *s, /*!< Solver object of type #HyPar */
469 double t /*!< Current solution time */
472 HyPar *solver = (HyPar*) s;
473 NavierStokes3D *param = (NavierStokes3D*) solver->physics;
474 int *dim = solver->dim_local;
476 int bounds_inter[_MODEL_NDIMS_];
477 _ArrayCopy1D3_(dim,bounds_inter,_MODEL_NDIMS_); bounds_inter[dir] += 1;
478 int npoints_grid; _ArrayProduct1D_(bounds_inter,_MODEL_NDIMS_,npoints_grid);
479 int nblocks = (npoints_grid-1)/GPU_THREADS_PER_BLOCK + 1;
481 #if defined(GPU_STAT)
482 cudaEvent_t startEvent, stopEvent;
483 float milliseconds = 0;
485 checkCuda( cudaEventCreate(&startEvent) );
486 checkCuda( cudaEventCreate(&stopEvent) );
487 checkCuda( cudaEventRecord(startEvent, 0) );
490 gpuNavierStokes3DUpwindRoe_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
491 npoints_grid, solver->npoints_local_wghosts, dir, solver->ghosts, param->gamma, solver->gpu_dim_local,
492 param->gpu_grav_field_g, fL, fR, uL, uR, u, fI
495 #if defined(GPU_STAT)
496 checkCuda( cudaEventRecord(stopEvent, 0) );
497 checkCuda( cudaEventSynchronize(stopEvent) );
500 cudaDeviceSynchronize();
502 #if defined(GPU_STAT)
503 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
504 printf("%-50s GPU time (secs) = %.6f dir = %d\n",
505 "NavierStokes3DUpwindRoe2", milliseconds*1e-3, dir);
506 checkCuda( cudaEventDestroy(startEvent) );
507 checkCuda( cudaEventDestroy(stopEvent) );