1 /*! @file NavierStokes3DPreStep_GPU.cu
2 @brief Pre-step function for 3D Navier Stokes equations
6 #include <arrayfunctions_gpu.h>
7 #include <mathfunctions.h>
8 #include <matmult_native.h>
9 #include <physicalmodels/navierstokes3d.h>
12 #ifdef CUDA_VAR_ORDERDING_AOS
14 /*! Kernel for gpuNavierStokes3DPreStep() */
16 void gpuNavierStokes3DPreStep_kernel(
19 const double * __restrict__ u,
20 double * __restrict__ fast_jac
23 int p = blockDim.x * blockIdx.x + threadIdx.x;
25 if (p < npoints_grid) {
26 double * __restrict__ A;
27 double D[_MODEL_NVARS_*_MODEL_NVARS_],L[_MODEL_NVARS_*_MODEL_NVARS_],
28 R[_MODEL_NVARS_*_MODEL_NVARS_],DL[_MODEL_NVARS_*_MODEL_NVARS_];
29 int JacSize = _MODEL_NVARS_*_MODEL_NVARS_;
30 int q = _MODEL_NVARS_*p;
33 A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
34 /* get the eigenvalues, and left & right eigenvectors */
35 _NavierStokes3DEigenvalues_ ((u+q),_NavierStokes3D_stride_,D,gamma,dir);
36 _NavierStokes3DLeftEigenvectors_ ((u+q),_NavierStokes3D_stride_,L,gamma,dir);
37 _NavierStokes3DRightEigenvectors_((u+q),_NavierStokes3D_stride_,R,gamma,dir);
38 /* remove the entropy modes (corresponding to eigenvalues u) */
39 D[0] = D[12] = D[18] = 0.0;
40 /* assemble the Jacobian */
41 MatMult5(_MODEL_NVARS_,DL,D,L );
42 MatMult5(_MODEL_NVARS_,A ,R,DL);
45 A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
46 /* get the eigenvalues, and left & right eigenvectors */
47 _NavierStokes3DEigenvalues_ ((u+q),_NavierStokes3D_stride_,D,gamma,dir)
48 _NavierStokes3DLeftEigenvectors_ ((u+q),_NavierStokes3D_stride_,L,gamma,dir);
49 _NavierStokes3DRightEigenvectors_((u+q),_NavierStokes3D_stride_,R,gamma,dir);
50 /* remove the entropy modes (corresponding to eigenvalues v) */
51 D[0] = D[6] = D[18] = 0.0;
52 /* assemble the Jacobian */
53 MatMult5(_MODEL_NVARS_,DL,D,L );
54 MatMult5(_MODEL_NVARS_,A ,R,DL);
57 A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
58 /* get the eigenvalues, and left & right eigenvectors */
59 _NavierStokes3DEigenvalues_ ((u+q),_NavierStokes3D_stride_,D,gamma,dir)
60 _NavierStokes3DLeftEigenvectors_ ((u+q),_NavierStokes3D_stride_,L,gamma,dir);
61 _NavierStokes3DRightEigenvectors_((u+q),_NavierStokes3D_stride_,R,gamma,dir);
62 /* remove the entropy modes (corresponding to eigenvalues v) */
63 D[0] = D[6] = D[12] = 0.0;
64 /* assemble the Jacobian */
65 MatMult5(_MODEL_NVARS_,DL,D,L );
66 MatMult5(_MODEL_NVARS_,A ,R,DL);
71 /*! Pre-step function for the 3D Navier Stokes equations: This function
72 is called at the beginning of each time step.
73 + The solution at the beginning of the step is copied into #NavierStokes3D::solution
74 for the linearized flux partitioning.
75 + The linearized "fast" Jacobian (representing the acoustic modes) #NavierStokes3D::fast_jac
78 A_f\left({\bf u}\right) = R\left({\bf u}\right)\Lambda_f\left({\bf u}\right)L\left({\bf u}\right)
80 where \f$R\f$ and \f$L\f$ are the matrices of right and left eigenvectors, and,
82 \Lambda_f = diag\left[0,0,0,u+c,u-c \right]
84 with \f$c\f$ being the speed of sound.
88 + Ghosh, D., Constantinescu, E. M., Semi-Implicit Time Integration of Atmospheric Flows
89 with Characteristic-Based Flux Partitioning, SIAM Journal on Scientific Computing,
90 38 (3), 2016, A1848-A1875, http://dx.doi.org/10.1137/15M1044369.
92 extern "C" int gpuNavierStokes3DPreStep(
93 double *u, /*!< Solution vector */
94 void *s, /*!< Solver object of type #HyPar */
95 void *m, /*!< MPI object of type #MPIVariables */
96 double waqt /*!< Current simulation time */
99 HyPar *solver = (HyPar*) s;
100 NavierStokes3D *param = (NavierStokes3D*) solver->physics;
102 int nblocks = (solver->npoints_local_wghosts-1)/GPU_THREADS_PER_BLOCK + 1;
104 gpuArrayCopy1D(u,param->gpu_solution,(solver->npoints_local_wghosts)*_MODEL_NVARS_);
105 gpuNavierStokes3DPreStep_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
106 solver->npoints_local_wghosts, param->gamma, u, param->gpu_fast_jac
108 cudaDeviceSynchronize();
115 /*! Kernel for gpuNavierStokes3DPreStep() */
117 void gpuNavierStokes3DPreStep_kernel(
120 const double * __restrict__ u,
121 double * __restrict__ fast_jac
124 int p = blockDim.x * blockIdx.x + threadIdx.x;
126 if (p < npoints_grid) {
127 double * __restrict__ A;
128 double D[_MODEL_NVARS_*_MODEL_NVARS_],L[_MODEL_NVARS_*_MODEL_NVARS_],
129 R[_MODEL_NVARS_*_MODEL_NVARS_],DL[_MODEL_NVARS_*_MODEL_NVARS_];
130 int JacSize = _MODEL_NVARS_*_MODEL_NVARS_;
133 A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
134 /* get the eigenvalues, and left & right eigenvectors */
135 _NavierStokes3DEigenvalues_ ((u+p),npoints_grid,D,gamma,dir);
136 _NavierStokes3DLeftEigenvectors_ ((u+p),npoints_grid,L,gamma,dir);
137 _NavierStokes3DRightEigenvectors_((u+p),npoints_grid,R,gamma,dir);
138 /* remove the entropy modes (corresponding to eigenvalues u) */
139 D[0] = D[12] = D[18] = 0.0;
140 /* assemble the Jacobian */
141 MatMult5(_MODEL_NVARS_,DL,D,L );
142 MatMult5(_MODEL_NVARS_,A ,R,DL);
145 A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
146 /* get the eigenvalues, and left & right eigenvectors */
147 _NavierStokes3DEigenvalues_ ((u+p),npoints_grid,D,gamma,dir)
148 _NavierStokes3DLeftEigenvectors_ ((u+p),npoints_grid,L,gamma,dir);
149 _NavierStokes3DRightEigenvectors_((u+p),npoints_grid,R,gamma,dir);
150 /* remove the entropy modes (corresponding to eigenvalues v) */
151 D[0] = D[6] = D[18] = 0.0;
152 /* assemble the Jacobian */
153 MatMult5(_MODEL_NVARS_,DL,D,L );
154 MatMult5(_MODEL_NVARS_,A ,R,DL);
157 A = (fast_jac + _MODEL_NDIMS_*JacSize*p + dir*JacSize);
158 /* get the eigenvalues, and left & right eigenvectors */
159 _NavierStokes3DEigenvalues_ ((u+p),npoints_grid,D,gamma,dir)
160 _NavierStokes3DLeftEigenvectors_ ((u+p),npoints_grid,L,gamma,dir);
161 _NavierStokes3DRightEigenvectors_((u+p),npoints_grid,R,gamma,dir);
162 /* remove the entropy modes (corresponding to eigenvalues v) */
163 D[0] = D[6] = D[12] = 0.0;
164 /* assemble the Jacobian */
165 MatMult5(_MODEL_NVARS_,DL,D,L );
166 MatMult5(_MODEL_NVARS_,A ,R,DL);
171 /*! Pre-step function for the 3D Navier Stokes equations: This function
172 is called at the beginning of each time step.
173 + The solution at the beginning of the step is copied into #NavierStokes3D::solution
174 for the linearized flux partitioning.
175 + The linearized "fast" Jacobian (representing the acoustic modes) #NavierStokes3D::fast_jac
178 A_f\left({\bf u}\right) = R\left({\bf u}\right)\Lambda_f\left({\bf u}\right)L\left({\bf u}\right)
180 where \f$R\f$ and \f$L\f$ are the matrices of right and left eigenvectors, and,
182 \Lambda_f = diag\left[0,0,0,u+c,u-c \right]
184 with \f$c\f$ being the speed of sound.
188 + Ghosh, D., Constantinescu, E. M., Semi-Implicit Time Integration of Atmospheric Flows
189 with Characteristic-Based Flux Partitioning, SIAM Journal on Scientific Computing,
190 38 (3), 2016, A1848-A1875, http://dx.doi.org/10.1137/15M1044369.
192 extern "C" int gpuNavierStokes3DPreStep(
193 double *u, /*!< Solution vector */
194 void *s, /*!< Solver object of type #HyPar */
195 void *m, /*!< MPI object of type #MPIVariables */
196 double waqt /*!< Current simulation time */
199 HyPar *solver = (HyPar*) s;
200 NavierStokes3D *param = (NavierStokes3D*) solver->physics;
202 int nblocks = (solver->npoints_local_wghosts-1)/GPU_THREADS_PER_BLOCK + 1;
204 gpuArrayCopy1D(u,param->gpu_solution,(solver->npoints_local_wghosts)*_MODEL_NVARS_);
205 gpuNavierStokes3DPreStep_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
206 solver->npoints_local_wghosts, param->gamma, u, param->gpu_fast_jac
208 cudaDeviceSynchronize();