1 /*! @file NavierStokes3DParabolicFunction_GPU.cu
3 @brief Compute the viscous terms for the 3D Navier Stokes equations
6 #include <arrayfunctions_gpu.h>
7 #include <mathfunctions.h>
8 #include <physicalmodels/navierstokes3d.h>
12 #ifdef CUDA_VAR_ORDERDING_AOS
14 /*! Kernel function */
16 void gpuNavierStokes3DParabolicFunction_Q_kernel(
19 const double * __restrict__ u,
20 double * __restrict__ Q
23 int p = blockDim.x * blockIdx.x + threadIdx.x;
25 if (p < npoints_grid) {
26 double energy, pressure;
28 _NavierStokes3DGetFlowVar_(
30 _NavierStokes3D_stride_,
39 Q[p+4] = gamma*pressure/Q[p+0]; /* temperature */
44 /*! Kernel function */
46 void gpuNavierStokes3DParabolicFunction_QDeriv_kernel(
49 const int * __restrict__ dim,
50 const double * __restrict__ dxinv,
51 double * __restrict__ QDerivX,
52 double * __restrict__ QDerivY,
53 double * __restrict__ QDerivZ
56 int p = blockDim.x * blockIdx.x + threadIdx.x;
58 if (p < npoints_grid) {
60 double dxinv_val, dyinv_val, dzinv_val;
62 _ArrayIndexnD_(_MODEL_NDIMS_,p,dim,index,ghosts); p *= _MODEL_NVARS_;
63 _GetCoordinate_(_XDIR_,index[_XDIR_],dim,ghosts,dxinv,dxinv_val);
64 _GetCoordinate_(_YDIR_,index[_YDIR_],dim,ghosts,dxinv,dyinv_val);
65 _GetCoordinate_(_ZDIR_,index[_ZDIR_],dim,ghosts,dxinv,dzinv_val);
66 _ArrayScale1D_((QDerivX+p),dxinv_val,_MODEL_NVARS_);
67 _ArrayScale1D_((QDerivY+p),dyinv_val,_MODEL_NVARS_);
68 _ArrayScale1D_((QDerivZ+p),dzinv_val,_MODEL_NVARS_);
73 /*! Kernel function */
75 void gpuNavierStokes3DParabolicFunction_Xdir_kernel(
81 const double * __restrict__ Q,
82 const double * __restrict__ QDerivX,
83 const double * __restrict__ QDerivY,
84 const double * __restrict__ QDerivZ,
85 double * __restrict__ FViscous
88 int p = blockDim.x * blockIdx.x + threadIdx.x;
90 if (p < npoints_grid) {
91 double uvel, vvel, wvel, T, Tx,
92 ux, uy, uz, vx, vy, wx, wz;
109 /* calculate viscosity coeff based on Sutherland's law */
110 double mu = raiseto(T, 0.76);
111 double tau_xx, tau_xy, tau_xz, qx;
112 tau_xx = two_third * (mu*inv_Re) * (2*ux - vy - wz);
113 tau_xy = (mu*inv_Re) * (uy + vx);
114 tau_xz = (mu*inv_Re) * (uz + wx);
115 qx = ( mu*inv_Re * inv_gamma_m1 * inv_Pr ) * Tx;
117 (FViscous+p)[0] = 0.0;
118 (FViscous+p)[1] = tau_xx;
119 (FViscous+p)[2] = tau_xy;
120 (FViscous+p)[3] = tau_xz;
121 (FViscous+p)[4] = uvel*tau_xx + vvel*tau_xy + wvel*tau_xz + qx;
126 /*! Kernel function */
128 void gpuNavierStokes3DParabolicFunction_Ydir_kernel(
134 const double * __restrict__ Q,
135 const double * __restrict__ QDerivX,
136 const double * __restrict__ QDerivY,
137 const double * __restrict__ QDerivZ,
138 double * __restrict__ FViscous
141 int p = blockDim.x * blockIdx.x + threadIdx.x;
142 if (p < npoints_grid) {
143 double uvel, vvel, wvel, T, Ty,
144 ux, uy, vx, vy, vz, wy, wz;
161 /* calculate viscosity coeff based on Sutherland's law */
162 double mu = raiseto(T, 0.76);
164 double tau_yx, tau_yy, tau_yz, qy;
165 tau_yx = (mu*inv_Re) * (uy + vx);
166 tau_yy = two_third * (mu*inv_Re) * (-ux + 2*vy - wz);
167 tau_yz = (mu*inv_Re) * (vz + wy);
168 qy = ( mu*inv_Re * inv_gamma_m1 * inv_Pr ) * Ty;
170 (FViscous+p)[0] = 0.0;
171 (FViscous+p)[1] = tau_yx;
172 (FViscous+p)[2] = tau_yy;
173 (FViscous+p)[3] = tau_yz;
174 (FViscous+p)[4] = uvel*tau_yx + vvel*tau_yy + wvel*tau_yz + qy;
179 /*! Kernel function */
181 void gpuNavierStokes3DParabolicFunction_Zdir_kernel(
187 const double * __restrict__ Q,
188 const double * __restrict__ QDerivX,
189 const double * __restrict__ QDerivY,
190 const double * __restrict__ QDerivZ,
191 double * __restrict__ FViscous
194 int p = blockDim.x * blockIdx.x + threadIdx.x;
195 if (p < npoints_grid) {
196 double uvel, vvel, wvel, T, Tz,
197 ux, uz, vy, vz, wx, wy, wz;
214 /* calculate viscosity coeff based on Sutherland's law */
215 double mu = raiseto(T,0.76);
217 double tau_zx, tau_zy, tau_zz, qz;
218 tau_zx = (mu*inv_Re) * (uz + wx);
219 tau_zy = (mu*inv_Re) * (vz + wy);
220 tau_zz = two_third * (mu*inv_Re) * (-ux - vy + 2*wz);
221 qz = ( mu*inv_Re * inv_gamma_m1 * inv_Pr ) * Tz;
223 (FViscous+p)[0] = 0.0;
224 (FViscous+p)[1] = tau_zx;
225 (FViscous+p)[2] = tau_zy;
226 (FViscous+p)[3] = tau_zz;
227 (FViscous+p)[4] = uvel*tau_zx + vvel*tau_zy + wvel*tau_zz + qz;
232 /*! Kernel function */
234 void gpuNavierStokes3DParabolicFunction_par_kernel(
238 const int * __restrict__ dim,
239 const double * __restrict__ dxinv,
240 const double * __restrict__ FDeriv,
241 double * __restrict__ par
244 int p = blockDim.x * blockIdx.x + threadIdx.x;
246 if (p < npoints_grid) {
250 _ArrayIndexnD_(_MODEL_NDIMS_,p,dim,index,0);
251 _ArrayIndex1D_(_MODEL_NDIMS_,dim,index,ghosts,p); p *= _MODEL_NVARS_;
252 _GetCoordinate_(dir,index[dir],dim,ghosts,dxinv,dxinv_val);
255 for (int v=0; v<_MODEL_NVARS_; v++) (par+p)[v] += (dxinv_val * (FDeriv+p)[v] );
261 Compute the viscous terms in the 3D Navier Stokes equations: this function computes
264 \frac {\partial} {\partial x} \left[\begin{array}{c} 0 \\ \tau_{xx} \\ \tau_{yx} \\ \tau_{zx} \\ u \tau_{xx} + v \tau_{yx} + w \tau_{zx} - q_x \end{array}\right]
265 + \frac {\partial} {\partial y} \left[\begin{array}{c} 0 \\ \tau_{xy} \\ \tau_{yy} \\ \tau_{zy} \\ u \tau_{xy} + v \tau_{yy} + w \tau_{zy} - q_y \end{array}\right]
266 + \frac {\partial} {\partial z} \left[\begin{array}{c} 0 \\ \tau_{xz} \\ \tau_{yz} \\ \tau_{zz} \\ u \tau_{xz} + v \tau_{yz} + w \tau_{zz} - q_z \end{array}\right]
270 \tau_{xx} &= \frac{2}{3}\left(\frac{\mu}{Re}\right)\left(2\frac{\partial u}{\partial x} - \frac{\partial v}{\partial y} - \frac{\partial w}{\partial z}\right),\\
271 \tau_{xy} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial y} + \frac{\partial v}{\partial x}\right),\\
272 \tau_{xz} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial z} + \frac{\partial w}{\partial x}\right),\\
273 \tau_{yx} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial y} + \frac{\partial v}{\partial x}\right),\\
274 \tau_{yy} &= \frac{2}{3}\left(\frac{\mu}{Re}\right)\left(-\frac{\partial u}{\partial x} +2\frac{\partial v}{\partial y} - \frac{\partial w}{\partial z}\right),\\
275 \tau_{yz} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial w}{\partial y} + \frac{\partial v}{\partial z}\right),\\
276 \tau_{zx} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial z} + \frac{\partial w}{\partial x}\right),\\
277 \tau_{zy} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial v}{\partial z} + \frac{\partial w}{\partial y}\right),\\
278 \tau_{zz} &= \frac{2}{3}\left(\frac{\mu}{Re}\right)\left(-\frac{\partial u}{\partial x} - \frac{\partial v}{\partial y} + 2\frac{\partial v}{\partial y}\right),\\
279 q_x &= -\frac{mu}{\left(\gamma-1\right)Re Pr}\frac{\partial T}{\partial x}, \\
280 q_y &= -\frac{mu}{\left(\gamma-1\right)Re Pr}\frac{\partial T}{\partial y}, \\
281 q_z &= -\frac{mu}{\left(\gamma-1\right)Re Pr}\frac{\partial T}{\partial z}
283 and the temperature is \f$T = \gamma p/\rho\f$. \f$Re\f$ and \f$Pr\f$ are the Reynolds and Prandtl numbers, respectively. Note that this function
284 computes the entire parabolic term, and thus bypasses HyPar's parabolic function calculation interfaces. NavierStokes3DInitialize() assigns this
285 function to #HyPar::ParabolicFunction.
288 + Tannehill, Anderson and Pletcher, Computational Fluid Mechanics and Heat Transfer,
289 Chapter 5, Section 5.1.7 (However, the non-dimensional velocity and the Reynolds
290 number is based on speed of sound, instead of the freestream velocity).
292 extern "C" int gpuNavierStokes3DParabolicFunction(
293 double * __restrict__ par, /*!< Array to hold the computed viscous terms */
294 double * __restrict__ u, /*!< Solution vector array */
295 void * __restrict__ s, /*!< Solver object of type #HyPar */
296 void * __restrict__ m, /*!< MPI object of type #MPIVariables */
297 double t /*!< Current simulation time */
300 HyPar *solver = (HyPar*) s;
301 MPIVariables *mpi = (MPIVariables*) m;
302 NavierStokes3D *physics = (NavierStokes3D*) solver->physics;
304 int ghosts = solver->ghosts;
305 int *dim = solver->gpu_dim_local;
306 int size = solver->npoints_local_wghosts;
308 gpuMemset(par, 0, size*_MODEL_NVARS_*sizeof(double));
309 if (physics->Re <= 0) return (0); /* inviscid flow */
312 static double two_third = 2.0/3.0;
313 double inv_gamma_m1 = 1.0 / (physics->gamma-1.0);
314 double inv_Re = 1.0 / physics->Re;
315 double inv_Pr = 1.0 / physics->Pr;
317 double *Q = physics->gpu_Q;
318 double *QDerivX = physics->gpu_QDerivX;
319 double *QDerivY = physics->gpu_QDerivY;
320 double *QDerivZ = physics->gpu_QDerivZ;
321 double *dxinv = solver->gpu_dxinv;
323 int nblocks = (size-1)/GPU_THREADS_PER_BLOCK + 1;
324 int nblocks_par = (solver->npoints_local-1)/GPU_THREADS_PER_BLOCK + 1;
326 #if defined(GPU_STAT)
327 cudaEvent_t startEvent, stopEvent;
330 checkCuda( cudaEventCreate(&startEvent) );
331 checkCuda( cudaEventCreate(&stopEvent) );
332 checkCuda( cudaEventRecord(startEvent, 0) );
335 gpuNavierStokes3DParabolicFunction_Q_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
336 size, physics->gamma, u, Q
338 #if defined(GPU_STAT)
339 checkCuda( cudaEventRecord(stopEvent, 0) );
340 checkCuda( cudaEventSynchronize(stopEvent) );
343 cudaDeviceSynchronize();
345 #if defined(GPU_STAT)
346 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
348 int memory_accessed = size*_MODEL_NVARS_*sizeof(double);
349 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
350 "NavierStokes3DParabolicFunction_Q", milliseconds*1e-3,
351 (1e-6*(memory_accessed)/milliseconds));
354 solver->FirstDerivativePar(QDerivX,Q,_XDIR_,1,solver,mpi);
355 solver->FirstDerivativePar(QDerivY,Q,_YDIR_,1,solver,mpi);
356 solver->FirstDerivativePar(QDerivZ,Q,_ZDIR_,1,solver,mpi);
358 gpuMPIExchangeBoundariesnD(_MODEL_NDIMS_,_MODEL_NVARS_,solver->gpu_dim_local,
359 solver->ghosts,mpi,QDerivX);
360 gpuMPIExchangeBoundariesnD(_MODEL_NDIMS_,_MODEL_NVARS_,solver->gpu_dim_local,
361 solver->ghosts,mpi,QDerivY);
362 gpuMPIExchangeBoundariesnD(_MODEL_NDIMS_,_MODEL_NVARS_,solver->gpu_dim_local,
363 solver->ghosts,mpi,QDerivY);
365 #if defined(GPU_STAT)
366 checkCuda( cudaEventRecord(startEvent, 0) );
369 gpuNavierStokes3DParabolicFunction_QDeriv_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
370 size, ghosts, dim, dxinv, QDerivX, QDerivY, QDerivZ
372 #if defined(GPU_STAT)
373 checkCuda( cudaEventRecord(stopEvent, 0) );
374 checkCuda( cudaEventSynchronize(stopEvent) );
377 cudaDeviceSynchronize();
379 #if defined(GPU_STAT)
380 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
382 memory_accessed = (3*size*_MODEL_NVARS_ + solver->size_x)*sizeof(double);
383 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
384 "NavierStokes3DParabolicFunction_QDeriv", milliseconds*1e-3,
385 (1e-6*(memory_accessed)/milliseconds));
388 double *FViscous = physics->gpu_FViscous;
389 double *FDeriv = physics->gpu_FDeriv;
390 gpuMemset(FViscous, 0, size*_MODEL_NVARS_*sizeof(double));
391 gpuMemset(FDeriv, 0, size*_MODEL_NVARS_*sizeof(double));
394 #if defined(GPU_STAT)
395 checkCuda( cudaEventRecord(startEvent, 0) );
398 gpuNavierStokes3DParabolicFunction_Xdir_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
399 size, two_third, inv_gamma_m1, inv_Re, inv_Pr, Q,
400 QDerivX, QDerivY, QDerivZ, FViscous
402 cudaDeviceSynchronize();
404 #if defined(GPU_STAT)
405 checkCuda( cudaEventRecord(stopEvent, 0) );
406 checkCuda( cudaEventSynchronize(stopEvent) );
407 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
409 memory_accessed = 17*size*sizeof(double);
410 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
411 "NavierStokes3DParabolicFunction_Xdir", milliseconds*1e-3,
412 (1e-6*(memory_accessed)/milliseconds));
415 solver->FirstDerivativePar(FDeriv,FViscous,_XDIR_,1,solver,mpi);
417 #if defined(GPU_STAT)
418 checkCuda( cudaEventRecord(startEvent, 0) );
421 gpuNavierStokes3DParabolicFunction_par_kernel<<<nblocks_par, GPU_THREADS_PER_BLOCK>>>(
422 solver->npoints_local, ghosts, _XDIR_, dim, dxinv, FDeriv, par
424 cudaDeviceSynchronize();
426 #if defined(GPU_STAT)
427 checkCuda( cudaEventRecord(stopEvent, 0) );
428 checkCuda( cudaEventSynchronize(stopEvent) );
429 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
431 memory_accessed = (solver->npoints_local + 2*solver->npoints_local*_MODEL_NVARS_)*sizeof(double);
432 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
433 "NavierStokes3DParabolicFunction_par", milliseconds*1e-3,
434 (1e-6*(memory_accessed)/milliseconds));
438 #if defined(GPU_STAT)
439 checkCuda( cudaEventRecord(startEvent, 0) );
442 gpuNavierStokes3DParabolicFunction_Ydir_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
443 size, two_third, inv_gamma_m1, inv_Re, inv_Pr, Q,
444 QDerivX, QDerivY, QDerivZ, FViscous
446 cudaDeviceSynchronize();
448 #if defined(GPU_STAT)
449 checkCuda( cudaEventRecord(stopEvent, 0) );
450 checkCuda( cudaEventSynchronize(stopEvent) );
451 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
452 //cudaDeviceSynchronize();
453 memory_accessed = 17*size*sizeof(double);
454 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
455 "NavierStokes3DParabolicFunction_Ydir", milliseconds*1e-3,
456 (1e-6*(memory_accessed)/milliseconds));
459 solver->FirstDerivativePar(FDeriv,FViscous,_YDIR_,1,solver,mpi);
460 gpuNavierStokes3DParabolicFunction_par_kernel<<<nblocks_par, GPU_THREADS_PER_BLOCK>>>(
461 solver->npoints_local, ghosts, _YDIR_, dim, dxinv, FDeriv, par
463 cudaDeviceSynchronize();
466 #if defined(GPU_STAT)
467 checkCuda( cudaEventRecord(startEvent, 0) );
470 gpuNavierStokes3DParabolicFunction_Zdir_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
471 size, two_third, inv_gamma_m1, inv_Re, inv_Pr, Q,
472 QDerivX, QDerivY, QDerivZ, FViscous
474 cudaDeviceSynchronize();
476 #if defined(GPU_STAT)
477 checkCuda( cudaEventRecord(stopEvent, 0) );
478 checkCuda( cudaEventSynchronize(stopEvent) );
479 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
481 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
482 "NavierStokes3DParabolicFunction_Zdir", milliseconds*1e-3,
483 (1e-6*(memory_accessed)/milliseconds));
486 solver->FirstDerivativePar(FDeriv,FViscous,_ZDIR_,1,solver,mpi);
487 gpuNavierStokes3DParabolicFunction_par_kernel<<<nblocks_par, GPU_THREADS_PER_BLOCK>>>(
488 solver->npoints_local, ghosts, _ZDIR_, dim, dxinv, FDeriv, par
490 cudaDeviceSynchronize();
492 #if defined(GPU_STAT)
493 checkCuda( cudaEventDestroy(startEvent) );
494 checkCuda( cudaEventDestroy(stopEvent) );
497 if (solver->flag_ib) gpuArrayBlockMultiply(par,solver->gpu_iblank,size,_MODEL_NVARS_);
504 /*! Kernel function */
506 void gpuNavierStokes3DParabolicFunction_Q_kernel(
509 const double * __restrict__ u,
510 double * __restrict__ Q
513 int p = blockDim.x * blockIdx.x + threadIdx.x;
515 if (p < npoints_grid) {
516 double energy, pressure;
517 _NavierStokes3DGetFlowVar_(
528 Q[p+4*npoints_grid] = gamma*pressure/Q[p+0]; /* temperature */
533 /*! Kernel function */
535 void gpuNavierStokes3DParabolicFunction_QDeriv_kernel(
538 const int * __restrict__ dim,
539 const double * __restrict__ dxinv,
540 double * __restrict__ QDerivX,
541 double * __restrict__ QDerivY,
542 double * __restrict__ QDerivZ
545 int p = blockDim.x * blockIdx.x + threadIdx.x;
547 if (p < npoints_grid) {
548 int index[_MODEL_NDIMS_];
549 double dxinv_val, dyinv_val, dzinv_val;
551 _ArrayIndexnD_(_MODEL_NDIMS_,p,dim,index,ghosts);
552 _GetCoordinate_(_XDIR_,index[_XDIR_],dim,ghosts,dxinv,dxinv_val);
553 _GetCoordinate_(_YDIR_,index[_YDIR_],dim,ghosts,dxinv,dyinv_val);
554 _GetCoordinate_(_ZDIR_,index[_ZDIR_],dim,ghosts,dxinv,dzinv_val);
557 for (int j=0; j<_MODEL_NVARS_; j++) {
558 QDerivX[p] *= dxinv_val;
559 QDerivY[p] *= dyinv_val;
560 QDerivZ[p] *= dzinv_val;
567 /*! Kernel function */
569 void gpuNavierStokes3DParabolicFunction_Xdir_kernel(
575 const double * __restrict__ Q,
576 const double * __restrict__ QDerivX,
577 const double * __restrict__ QDerivY,
578 const double * __restrict__ QDerivZ,
579 double * __restrict__ FViscous
582 int p = blockDim.x * blockIdx.x + threadIdx.x;
584 if (p < npoints_grid) {
585 double uvel, vvel, wvel, T, Tx,
586 ux, uy, uz, vx, vy, wx, wz;
588 uvel = (Q+p+ npoints_grid)[0];
589 vvel = (Q+p+2*npoints_grid)[0];
590 wvel = (Q+p+3*npoints_grid)[0];
591 T = (Q+p+4*npoints_grid)[0];
592 Tx = (QDerivX+p+4*npoints_grid)[0];
593 ux = (QDerivX+p+ npoints_grid)[0];
594 vx = (QDerivX+p+2*npoints_grid)[0];
595 wx = (QDerivX+p+3*npoints_grid)[0];
596 uy = (QDerivY+p+ npoints_grid)[0];
597 vy = (QDerivY+p+2*npoints_grid)[0];
598 uz = (QDerivZ+p+ npoints_grid)[0];
599 wz = (QDerivZ+p+3*npoints_grid)[0];
601 /* calculate viscosity coeff based on Sutherland's law */
602 double mu = raiseto(T, 0.76);
603 double tau_xx, tau_xy, tau_xz, qx;
604 tau_xx = two_third * (mu*inv_Re) * (2*ux - vy - wz);
605 tau_xy = (mu*inv_Re) * (uy + vx);
606 tau_xz = (mu*inv_Re) * (uz + wx);
607 qx = ( mu*inv_Re * inv_gamma_m1 * inv_Pr ) * Tx;
609 (FViscous+p)[0] = 0.0;
610 (FViscous+p+ npoints_grid)[0] = tau_xx;
611 (FViscous+p+2*npoints_grid)[0] = tau_xy;
612 (FViscous+p+3*npoints_grid)[0] = tau_xz;
613 (FViscous+p+4*npoints_grid)[0] = uvel*tau_xx + vvel*tau_xy + wvel*tau_xz + qx;
618 /*! Kernel function */
620 void gpuNavierStokes3DParabolicFunction_Ydir_kernel(
626 const double * __restrict__ Q,
627 const double * __restrict__ QDerivX,
628 const double * __restrict__ QDerivY,
629 const double * __restrict__ QDerivZ,
630 double * __restrict__ FViscous
633 int p = blockDim.x * blockIdx.x + threadIdx.x;
634 if (p < npoints_grid) {
635 double uvel, vvel, wvel, T, Ty,
636 ux, uy, vx, vy, vz, wy, wz;
638 uvel = (Q+p+ npoints_grid)[0];
639 vvel = (Q+p+2*npoints_grid)[0];
640 wvel = (Q+p+3*npoints_grid)[0];
641 T = (Q+p+4*npoints_grid)[0];
642 Ty = (QDerivY+p+4*npoints_grid)[0];
643 ux = (QDerivX+p+ npoints_grid)[0];
644 vx = (QDerivX+p+2*npoints_grid)[0];
645 uy = (QDerivY+p+ npoints_grid)[0];
646 vy = (QDerivY+p+2*npoints_grid)[0];
647 wy = (QDerivY+p+3*npoints_grid)[0];
648 vz = (QDerivZ+p+2*npoints_grid)[0];
649 wz = (QDerivZ+p+3*npoints_grid)[0];
651 /* calculate viscosity coeff based on Sutherland's law */
652 double mu = raiseto(T, 0.76);
654 double tau_yx, tau_yy, tau_yz, qy;
655 tau_yx = (mu*inv_Re) * (uy + vx);
656 tau_yy = two_third * (mu*inv_Re) * (-ux + 2*vy - wz);
657 tau_yz = (mu*inv_Re) * (vz + wy);
658 qy = ( mu*inv_Re * inv_gamma_m1 * inv_Pr ) * Ty;
660 (FViscous+p)[0] = 0.0;
661 (FViscous+p+ npoints_grid)[0] = tau_yx;
662 (FViscous+p+2*npoints_grid)[0] = tau_yy;
663 (FViscous+p+3*npoints_grid)[0] = tau_yz;
664 (FViscous+p+4*npoints_grid)[0] = uvel*tau_yx + vvel*tau_yy + wvel*tau_yz + qy;
669 /*! Kernel function */
671 void gpuNavierStokes3DParabolicFunction_Zdir_kernel(
677 const double * __restrict__ Q,
678 const double * __restrict__ QDerivX,
679 const double * __restrict__ QDerivY,
680 const double * __restrict__ QDerivZ,
681 double * __restrict__ FViscous
684 int p = blockDim.x * blockIdx.x + threadIdx.x;
685 if (p < npoints_grid) {
686 double uvel, vvel, wvel, T, Tz,
687 ux, uz, vy, vz, wx, wy, wz;
689 uvel = (Q+p+ npoints_grid)[0];
690 vvel = (Q+p+2*npoints_grid)[0];
691 wvel = (Q+p+3*npoints_grid)[0];
692 T = (Q+p+4*npoints_grid)[0];
693 Tz = (QDerivZ+p+4*npoints_grid)[0];
694 ux = (QDerivX+p+ npoints_grid)[0];
695 wx = (QDerivX+p+3*npoints_grid)[0];
696 vy = (QDerivY+p+2*npoints_grid)[0];
697 wy = (QDerivY+p+3*npoints_grid)[0];
698 uz = (QDerivZ+p+ npoints_grid)[0];
699 vz = (QDerivZ+p+2*npoints_grid)[0];
700 wz = (QDerivZ+p+3*npoints_grid)[0];
702 /* calculate viscosity coeff based on Sutherland's law */
703 double mu = raiseto(T,0.76);
705 double tau_zx, tau_zy, tau_zz, qz;
706 tau_zx = (mu*inv_Re) * (uz + wx);
707 tau_zy = (mu*inv_Re) * (vz + wy);
708 tau_zz = two_third * (mu*inv_Re) * (-ux - vy + 2*wz);
709 qz = ( mu*inv_Re * inv_gamma_m1 * inv_Pr ) * Tz;
711 (FViscous+p)[0] = 0.0;
712 (FViscous+p+ npoints_grid)[0] = tau_zx;
713 (FViscous+p+2*npoints_grid)[0] = tau_zy;
714 (FViscous+p+3*npoints_grid)[0] = tau_zz;
715 (FViscous+p+4*npoints_grid)[0] = uvel*tau_zx + vvel*tau_zy + wvel*tau_zz + qz;
720 /*! Kernel function */
722 void gpuNavierStokes3DParabolicFunction_par_kernel(
724 int npoints_local_wghosts,
727 const int * __restrict__ dim,
728 const double * __restrict__ dxinv,
729 const double * __restrict__ FDeriv,
730 double * __restrict__ par
733 int p = blockDim.x * blockIdx.x + threadIdx.x;
735 if (p < npoints_grid) {
739 _ArrayIndexnD_(_MODEL_NDIMS_,p,dim,index,0);
740 _ArrayIndex1D_(_MODEL_NDIMS_,dim,index,ghosts,p);
741 _GetCoordinate_(dir,index[dir],dim,ghosts,dxinv,dxinv_val);
744 for (int v=0; v<_MODEL_NVARS_; v++) {
745 (par+p+v*npoints_local_wghosts)[0] += (dxinv_val * (FDeriv+p+v*npoints_local_wghosts)[0]);
752 Compute the viscous terms in the 3D Navier Stokes equations: this function computes
755 \frac {\partial} {\partial x} \left[\begin{array}{c} 0 \\ \tau_{xx} \\ \tau_{yx} \\ \tau_{zx} \\ u \tau_{xx} + v \tau_{yx} + w \tau_{zx} - q_x \end{array}\right]
756 + \frac {\partial} {\partial y} \left[\begin{array}{c} 0 \\ \tau_{xy} \\ \tau_{yy} \\ \tau_{zy} \\ u \tau_{xy} + v \tau_{yy} + w \tau_{zy} - q_y \end{array}\right]
757 + \frac {\partial} {\partial z} \left[\begin{array}{c} 0 \\ \tau_{xz} \\ \tau_{yz} \\ \tau_{zz} \\ u \tau_{xz} + v \tau_{yz} + w \tau_{zz} - q_z \end{array}\right]
761 \tau_{xx} &= \frac{2}{3}\left(\frac{\mu}{Re}\right)\left(2\frac{\partial u}{\partial x} - \frac{\partial v}{\partial y} - \frac{\partial w}{\partial z}\right),\\
762 \tau_{xy} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial y} + \frac{\partial v}{\partial x}\right),\\
763 \tau_{xz} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial z} + \frac{\partial w}{\partial x}\right),\\
764 \tau_{yx} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial y} + \frac{\partial v}{\partial x}\right),\\
765 \tau_{yy} &= \frac{2}{3}\left(\frac{\mu}{Re}\right)\left(-\frac{\partial u}{\partial x} +2\frac{\partial v}{\partial y} - \frac{\partial w}{\partial z}\right),\\
766 \tau_{yz} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial w}{\partial y} + \frac{\partial v}{\partial z}\right),\\
767 \tau_{zx} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial u}{\partial z} + \frac{\partial w}{\partial x}\right),\\
768 \tau_{zy} &= \left(\frac{\mu}{Re}\right)\left(\frac{\partial v}{\partial z} + \frac{\partial w}{\partial y}\right),\\
769 \tau_{zz} &= \frac{2}{3}\left(\frac{\mu}{Re}\right)\left(-\frac{\partial u}{\partial x} - \frac{\partial v}{\partial y} + 2\frac{\partial v}{\partial y}\right),\\
770 q_x &= -\frac{mu}{\left(\gamma-1\right)Re Pr}\frac{\partial T}{\partial x}, \\
771 q_y &= -\frac{mu}{\left(\gamma-1\right)Re Pr}\frac{\partial T}{\partial y}, \\
772 q_z &= -\frac{mu}{\left(\gamma-1\right)Re Pr}\frac{\partial T}{\partial z}
774 and the temperature is \f$T = \gamma p/\rho\f$. \f$Re\f$ and \f$Pr\f$ are the Reynolds and Prandtl numbers, respectively. Note that this function
775 computes the entire parabolic term, and thus bypasses HyPar's parabolic function calculation interfaces. NavierStokes3DInitialize() assigns this
776 function to #HyPar::ParabolicFunction.
779 + Tannehill, Anderson and Pletcher, Computational Fluid Mechanics and Heat Transfer,
780 Chapter 5, Section 5.1.7 (However, the non-dimensional velocity and the Reynolds
781 number is based on speed of sound, instead of the freestream velocity).
783 extern "C" int gpuNavierStokes3DParabolicFunction(
784 double * __restrict__ par, /*!< Array to hold the computed viscous terms */
785 double * __restrict__ u, /*!< Solution vector array */
786 void * __restrict__ s, /*!< Solver object of type #HyPar */
787 void * __restrict__ m, /*!< MPI object of type #MPIVariables */
788 double t /*!< Current simulation time */
791 HyPar *solver = (HyPar*) s;
792 MPIVariables *mpi = (MPIVariables*) m;
793 NavierStokes3D *physics = (NavierStokes3D*) solver->physics;
795 int ghosts = solver->ghosts;
796 int *dim = solver->gpu_dim_local;
797 int size = solver->npoints_local_wghosts;
799 gpuMemset(par, 0, size*_MODEL_NVARS_*sizeof(double));
800 if (physics->Re <= 0) return (0); /* inviscid flow */
803 static double two_third = 2.0/3.0;
804 double inv_gamma_m1 = 1.0 / (physics->gamma-1.0);
805 double inv_Re = 1.0 / physics->Re;
806 double inv_Pr = 1.0 / physics->Pr;
808 double *Q = physics->gpu_Q;
809 double *QDerivX = physics->gpu_QDerivX;
810 double *QDerivY = physics->gpu_QDerivY;
811 double *QDerivZ = physics->gpu_QDerivZ;
812 double *dxinv = solver->gpu_dxinv;
814 int nblocks = (size-1)/GPU_THREADS_PER_BLOCK + 1;
815 int nblocks_par = (solver->npoints_local-1)/GPU_THREADS_PER_BLOCK + 1;
817 #if defined(GPU_STAT)
818 cudaEvent_t startEvent, stopEvent;
821 checkCuda( cudaEventCreate(&startEvent) );
822 checkCuda( cudaEventCreate(&stopEvent) );
824 checkCuda( cudaEventRecord(startEvent, 0) );
827 gpuNavierStokes3DParabolicFunction_Q_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
828 size, physics->gamma, u, Q
830 cudaDeviceSynchronize();
832 #if defined(GPU_STAT)
833 checkCuda( cudaEventRecord(stopEvent, 0) );
834 checkCuda( cudaEventSynchronize(stopEvent) );
835 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
837 int memory_accessed = size*_MODEL_NVARS_*sizeof(double);
838 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
839 "NavierStokes3DParabolicFunction_Q", milliseconds*1e-3,
840 (1e-6*(memory_accessed)/milliseconds));
843 solver->FirstDerivativePar(QDerivX,Q,_XDIR_,1,solver,mpi);
844 solver->FirstDerivativePar(QDerivY,Q,_YDIR_,1,solver,mpi);
845 solver->FirstDerivativePar(QDerivZ,Q,_ZDIR_,1,solver,mpi);
847 gpuMPIExchangeBoundariesnD(_MODEL_NDIMS_,_MODEL_NVARS_,solver->gpu_dim_local,
848 solver->ghosts,mpi,QDerivX);
849 gpuMPIExchangeBoundariesnD(_MODEL_NDIMS_,_MODEL_NVARS_,solver->gpu_dim_local,
850 solver->ghosts,mpi,QDerivY);
851 gpuMPIExchangeBoundariesnD(_MODEL_NDIMS_,_MODEL_NVARS_,solver->gpu_dim_local,
852 solver->ghosts,mpi,QDerivY);
854 #if defined(GPU_STAT)
855 checkCuda( cudaEventRecord(startEvent, 0) );
858 gpuNavierStokes3DParabolicFunction_QDeriv_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
859 size, ghosts, dim, dxinv, QDerivX, QDerivY, QDerivZ
861 cudaDeviceSynchronize();
863 #if defined(GPU_STAT)
864 checkCuda( cudaEventRecord(stopEvent, 0) );
865 checkCuda( cudaEventSynchronize(stopEvent) );
866 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
868 memory_accessed = (3*size*_MODEL_NVARS_ + solver->size_x)*sizeof(double);
869 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
870 "NavierStokes3DParabolicFunction_QDeriv", milliseconds*1e-3,
871 (1e-6*(memory_accessed)/milliseconds));
874 double *FViscous = physics->gpu_FViscous;
875 double *FDeriv = physics->gpu_FDeriv;
876 gpuMemset(FViscous, 0, size*_MODEL_NVARS_*sizeof(double));
877 gpuMemset(FDeriv, 0, size*_MODEL_NVARS_*sizeof(double));
881 #if defined(GPU_STAT)
882 checkCuda( cudaEventRecord(startEvent, 0) );
885 gpuNavierStokes3DParabolicFunction_Xdir_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
886 size, two_third, inv_gamma_m1, inv_Re, inv_Pr, Q,
887 QDerivX, QDerivY, QDerivZ, FViscous
889 cudaDeviceSynchronize();
891 #if defined(GPU_STAT)
892 checkCuda( cudaEventRecord(stopEvent, 0) );
893 checkCuda( cudaEventSynchronize(stopEvent) );
894 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
896 memory_accessed = 17*size*sizeof(double);
897 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
898 "NavierStokes3DParabolicFunction_Xdir", milliseconds*1e-3,
899 (1e-6*(memory_accessed)/milliseconds));
902 solver->FirstDerivativePar(FDeriv,FViscous,_XDIR_,1,solver,mpi);
904 #if defined(GPU_STAT)
905 checkCuda( cudaEventRecord(startEvent, 0) );
908 gpuNavierStokes3DParabolicFunction_par_kernel<<<nblocks_par, GPU_THREADS_PER_BLOCK>>>(
909 solver->npoints_local, size, ghosts, _XDIR_, dim, dxinv, FDeriv, par
911 cudaDeviceSynchronize();
913 #if defined(GPU_STAT)
914 checkCuda( cudaEventRecord(stopEvent, 0) );
915 checkCuda( cudaEventSynchronize(stopEvent) );
916 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
918 memory_accessed = (solver->npoints_local + 2*solver->npoints_local*_MODEL_NVARS_)*sizeof(double);
919 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
920 "NavierStokes3DParabolicFunction_par", milliseconds*1e-3,
921 (1e-6*(memory_accessed)/milliseconds));
926 #if defined(GPU_STAT)
927 checkCuda( cudaEventRecord(startEvent, 0) );
930 gpuNavierStokes3DParabolicFunction_Ydir_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
931 size, two_third, inv_gamma_m1, inv_Re, inv_Pr, Q,
932 QDerivX, QDerivY, QDerivZ, FViscous
934 cudaDeviceSynchronize();
936 #if defined(GPU_STAT)
937 checkCuda( cudaEventRecord(stopEvent, 0) );
938 checkCuda( cudaEventSynchronize(stopEvent) );
939 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
941 memory_accessed = 17*size*sizeof(double);
942 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
943 "NavierStokes3DParabolicFunction_Ydir", milliseconds*1e-3,
944 (1e-6*(memory_accessed)/milliseconds));
947 solver->FirstDerivativePar(FDeriv,FViscous,_YDIR_,1,solver,mpi);
948 gpuNavierStokes3DParabolicFunction_par_kernel<<<nblocks_par, GPU_THREADS_PER_BLOCK>>>(
949 solver->npoints_local, size, ghosts, _YDIR_, dim, dxinv, FDeriv, par
951 cudaDeviceSynchronize();
954 #if defined(GPU_STAT)
955 checkCuda( cudaEventRecord(startEvent, 0) );
958 gpuNavierStokes3DParabolicFunction_Zdir_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
959 size, two_third, inv_gamma_m1, inv_Re, inv_Pr, Q,
960 QDerivX, QDerivY, QDerivZ, FViscous
962 cudaDeviceSynchronize();
964 #if defined(GPU_STAT)
965 checkCuda( cudaEventRecord(stopEvent, 0) );
966 checkCuda( cudaEventSynchronize(stopEvent) );
967 checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
969 memory_accessed = 17*size*sizeof(double);
970 printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
971 "NavierStokes3DParabolicFunction_Zdir", milliseconds*1e-3,
972 (1e-6*(memory_accessed)/milliseconds));
975 solver->FirstDerivativePar(FDeriv,FViscous,_ZDIR_,1,solver,mpi);
976 gpuNavierStokes3DParabolicFunction_par_kernel<<<nblocks_par, GPU_THREADS_PER_BLOCK>>>(
977 solver->npoints_local, size, ghosts, _ZDIR_, dim, dxinv, FDeriv, par
979 cudaDeviceSynchronize();
981 #if defined(GPU_STAT)
982 checkCuda( cudaEventDestroy(startEvent) );
983 checkCuda( cudaEventDestroy(stopEvent) );
986 if (solver->flag_ib) gpuArrayBlockMultiply(par,solver->gpu_iblank,size,_MODEL_NVARS_);