1 /*! @file NavierStokes2DPreStep_GPU.cu
2 @brief Pre-step function for 2D Navier Stokes equations
8 #include <arrayfunctions_gpu.h>
9 #include <matmult_native.h>
10 #include <basic_gpu.h>
11 #include <physicalmodels/navierstokes2d.h>
14 #ifdef CUDA_VAR_ORDERDING_AOS
16 /*! Kernel for gpuNavierStokes2DPreStep */
18 void NavierStokes2DPreStep_kernel(
25 const int JacSize = _MODEL_NVARS_*_MODEL_NVARS_;
28 double D[_MODEL_NVARS_*_MODEL_NVARS_],L[_MODEL_NVARS_*_MODEL_NVARS_],
29 R[_MODEL_NVARS_*_MODEL_NVARS_],DL[_MODEL_NVARS_*_MODEL_NVARS_];
31 p = threadIdx.x + blockDim.x * blockIdx.x;
34 A = (fast_jac + 2*JacSize*p + dir*JacSize);
35 /* get the eigenvalues, and left & right eigenvectors */
36 _NavierStokes2DEigenvalues_ ((u+_MODEL_NVARS_*p),D,gamma,dir);
37 _NavierStokes2DLeftEigenvectors_ ((u+_MODEL_NVARS_*p),L,gamma,dir);
38 _NavierStokes2DRightEigenvectors_((u+_MODEL_NVARS_*p),R,gamma,dir);
39 /* remove the entropy modes (corresponding to eigenvalues u) */
40 D[2*_MODEL_NVARS_+2] = D[3*_MODEL_NVARS_+3] = 0.0;
41 /* assemble the Jacobian */
42 MatMult4(_MODEL_NVARS_,DL,D,L );
43 MatMult4(_MODEL_NVARS_,A,R,DL);
46 A = (fast_jac + 2*JacSize*p + dir*JacSize);
47 /* get the eigenvalues, and left & right eigenvectors */
48 _NavierStokes2DEigenvalues_ ((u+_MODEL_NVARS_*p),D,gamma,dir)
49 _NavierStokes2DLeftEigenvectors_ ((u+_MODEL_NVARS_*p),L,gamma,dir);
50 _NavierStokes2DRightEigenvectors_((u+_MODEL_NVARS_*p),R,gamma,dir);
51 /* remove the entropy modes (corresponding to eigenvalues v) */
52 D[1*_MODEL_NVARS_+1] = D[3*_MODEL_NVARS_+3] = 0.0;
53 /* assemble the Jacobian */
54 MatMult4(_MODEL_NVARS_,DL,D,L );
55 MatMult4(_MODEL_NVARS_,A,R,DL);
61 /*! Pre-step function for the 2D Navier Stokes equations: This function
62 is called at the beginning of each time step.
63 + The solution at the beginning of the step is copied into #NavierStokes2D::solution
64 for the linearized flux partitioning.
65 + The linearized "fast" Jacobian (representing the acoustic modes) #NavierStokes2D::fast_jac
68 A_f\left({\bf u}\right) = R\left({\bf u}\right)\Lambda_f\left({\bf u}\right)L\left({\bf u}\right)
70 where \f$R\f$ and \f$L\f$ are the matrices of right and left eigenvectors, and,
72 \Lambda_f = diag\left[0,0,u+c,u-c \right]
74 with \f$c\f$ being the speed of sound.
78 + Ghosh, D., Constantinescu, E. M., Semi-Implicit Time Integration of Atmospheric Flows
79 with Characteristic-Based Flux Partitioning, SIAM Journal on Scientific Computing,
80 38 (3), 2016, A1848-A1875, http://dx.doi.org/10.1137/15M1044369.
82 extern "C" int gpuNavierStokes2DPreStep(
83 double *u, /*!< Solution vector */
84 void *s, /*!< Solver object of type #HyPar */
85 void *m, /*!< MPI object of type #MPIVariables */
86 double waqt /*!< Current simulation time */
89 HyPar *solver = (HyPar*) s;
90 NavierStokes2D *param = (NavierStokes2D*) solver->physics;
91 int *dim = solver->dim_local;
92 int ghosts = solver->ghosts;
94 static int bounds[_MODEL_NDIMS_];
95 _ArrayAddCopy1D_(dim,(2*ghosts),bounds,_MODEL_NDIMS_);
96 int N_grid; _ArrayProduct1D_(bounds,_MODEL_NDIMS_,N_grid);
97 int size = 2*N_grid*_MODEL_NVARS_*_MODEL_NVARS_;
99 double cpu_time = 0.0;
100 clock_t cpu_start, cpu_end;
102 gpuMemcpy(solver->gpu_u, u, N_grid*_MODEL_NVARS_*sizeof(double), gpuMemcpyHostToDevice);
103 gpuMemcpy(param->gpu_solution, u, N_grid*_MODEL_NVARS_*sizeof(double), gpuMemcpyHostToDevice);
105 int nblocks = (N_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
107 NavierStokes2DPreStep_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(N_grid, param->gamma, solver->gpu_u, param->gpu_fast_jac);
108 cudaDeviceSynchronize();
110 cpu_time += (double)(cpu_end - cpu_start) / CLOCKS_PER_SEC;
112 double *fast_jac_h = (double *)malloc(size*sizeof(double));
113 gpuMemcpy(fast_jac_h, param->gpu_fast_jac, size*sizeof(double), gpuMemcpyDeviceToHost);
116 for (int i = 0; i < size; i++) {
117 if (fabs(fast_jac_h[i] - param->fast_jac[i]) > 1e-8)
118 err += fabs(fast_jac_h[i] - param->fast_jac[i]);