HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
HyperbolicFunction_GPU.cu
Go to the documentation of this file.
1 
6 #include <arrayfunctions_gpu.h>
7 #include <basic_gpu.h>
8 #include <mpivars.h>
9 #include <hypar.h>
10 
11 static int ReconstructHyperbolic (double*,double*,double*,double*,int,void*,void*,double,int,
12  int(*)(double*,double*,double*,double*,
13  double*,double*,int,void*,double));
14 static int DefaultUpwinding (double*,double*,double*,double*,
15  double*,double*,int,void*,double);
16 
18 template <int block_size>
19 __global__
21  int n,
22  int nvars,
23  int sign,
24  const double * __restrict__ FluxI,
25  double * __restrict__ StageBoundaryIntegral
26 )
27 {
28  extern __shared__ double smem[];
29 
30  int tid = threadIdx.x;
31  int grid_size = block_size * gridDim.x;
32  int i = blockIdx.x * block_size + threadIdx.x;
33  int j;
34  double tid_sum[GPU_MAX_NVARS] = { 0 };
35 
36  while (i < n) {
37  for (j=0; j<nvars; j++) tid_sum[j] += FluxI[i*nvars+j];
38  i += grid_size;
39  }
40  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j];
41  __syncthreads();
42 
43 
44  if (block_size >= 512 && tid < 256) {
45  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+256)*nvars+j];
46  }
47  __syncthreads();
48  if (block_size >= 256 && tid < 128) {
49  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+128)*nvars+j];
50  }
51  __syncthreads();
52  if (block_size >= 128 && tid < 64) {
53  for (j=0; j<nvars; j++) smem[tid*nvars+j] = tid_sum[j] = tid_sum[j] + smem[(tid+64)*nvars+j];
54  }
55  __syncthreads();
56 
57  if (tid < 32) {
58  if (block_size >= 64) {
59  for (j=0; j<nvars; j++) tid_sum[j] += smem[(tid+32)*nvars+j];
60  }
61  for (int offset=16; offset>0; offset/=2) {
62  for (j=0; j<nvars; j++) {
63  tid_sum[j] += __shfl_down_sync(0xffffffff, tid_sum[j], offset);
64  }
65  }
66  }
67  __syncthreads();
68 
69  if (tid == 0) {
70  for (j=0; j<nvars; j++) StageBoundaryIntegral[j] = (sign == 1) ? tid_sum[j] : -tid_sum[j];
71  }
72  return;
73 }
74 
75 #ifdef CUDA_VAR_ORDERDING_AOS
76 
78 __global__
80  int npoints_grid,
81  int d,
82  int nvars,
83  int ghosts,
84  int offset,
85  const int * __restrict__ dim,
86  const double * __restrict__ FluxI,
87  const double * __restrict__ dxinv,
88  double * __restrict__ hyp,
89  double * __restrict__ FluxI_p1,
90  double * __restrict__ FluxI_p2
91 )
92 {
93  int tx = threadIdx.x + (blockDim.x * blockIdx.x);
94  if (tx < npoints_grid) {
95  int v, p, p1, p2, b;
96  int dim_interface[3];
97  int index[3], index1[3], index2[3];
98 
99  _ArrayIndexnD_(3,tx,dim,index,0);
100  _ArrayCopy1D_(dim,dim_interface,3);
101  dim_interface[d] += 1;
102 
103  _ArrayCopy1D_(index,index1,3);
104  _ArrayCopy1D_(index,index2,3); index2[d]++;
105  _ArrayIndex1D_(3,dim ,index ,ghosts,p);
106  _ArrayIndex1D_(3,dim_interface,index1,0 ,p1);
107  _ArrayIndex1D_(3,dim_interface,index2,0 ,p2);
108  for (v=0; v<nvars; v++) hyp[nvars*p+v] += dxinv[offset+ghosts+index[d]]
109  * (FluxI[nvars*p2+v]-FluxI[nvars*p1+v]);
110  if (index[d] == 0 || index[d] == dim[d]-1) {
111  if (d == 0) {
112  b = index[1] + index[2]*dim[1];
113  } else if (d == 1) {
114  b = index[0] + index[2]*dim[0];
115  } else {
116  b = index[0] + index[1]*dim[0];
117  }
118  if (index[d] == 0)
119  for (v=0; v<nvars; v++) FluxI_p1[b*nvars+v] = FluxI[nvars*p1+v];
120  else
121  for (v=0; v<nvars; v++) FluxI_p2[b*nvars+v] = FluxI[nvars*p2+v];
122  }
123  }
124  return;
125 }
126 
128 __global__
129 void HyperbolicFunction_dim2_kernel(
130  int npoints_grid,
131  int d,
132  int nvars,
133  int ghosts,
134  int offset,
135  const int * __restrict__ dim,
136  const double * __restrict__ FluxI,
137  const double * __restrict__ dxinv,
138  double * __restrict__ hyp,
139  double * __restrict__ FluxI_p1,
140  double * __restrict__ FluxI_p2
141 )
142 {
143  int tx = threadIdx.x + (blockDim.x * blockIdx.x);
144  if (tx < npoints_grid) {
145  int v, p, p1, p2, b;
146  int dim_interface[2];
147  int index[2], index1[2], index2[2];
148 
149  _ArrayIndexnD_(2,tx,dim,index,0);
150  _ArrayCopy1D_(dim,dim_interface,2);
151  dim_interface[d] += 1;
152 
153  _ArrayCopy1D_(index,index1,2);
154  _ArrayCopy1D_(index,index2,2); index2[d]++;
155  _ArrayIndex1D_(2,dim ,index ,ghosts,p);
156  _ArrayIndex1D_(2,dim_interface,index1,0 ,p1);
157  _ArrayIndex1D_(2,dim_interface,index2,0 ,p2);
158  for (v=0; v<nvars; v++) hyp[nvars*p+v] += dxinv[offset+ghosts+index[d]]
159  * (FluxI[nvars*p2+v]-FluxI[nvars*p1+v]);
160  if (index[d] == 0 || index[d] == dim[d]-1) {
161  b = (d == 0) ? index[1] : index[0];
162  if (index[d] == 0)
163  for (v=0; v<nvars; v++) FluxI_p1[b*nvars+v] = FluxI[nvars*p1+v];
164  else
165  for (v=0; v<nvars; v++) FluxI_p2[b*nvars+v] = FluxI[nvars*p2+v];
166  }
167  }
168  return;
169 }
170 
184 extern "C" int gpuHyperbolicFunction(
185  double *hyp,
186  double *gpu_u,
187  void *s,
188  void *m,
189  double t,
190  int LimFlag,
194  int(*FluxFunction)(double*,double*,int,void*,double),
196  int(*UpwindFunction)(double*,double*,double*,double*,double*,double*,int,void*,double)
197 )
198 {
199  HyPar *solver = (HyPar*) s;
200  MPIVariables *mpi = (MPIVariables*) m;
201  int d;
202  double *gpu_FluxI = solver->fluxI; /* interface flux */
203  double *gpu_FluxC = solver->fluxC; /* cell centered flux */
204 
205  int ndims = solver->ndims;
206  int nvars = solver->nvars;
207  int ghosts = solver->ghosts;
208  int *dim = solver->dim_local;
209  int size = solver->npoints_local_wghosts;
210  double *gpu_x = solver->gpu_x;
211  double *gpu_dxinv = solver->gpu_dxinv;
212  double *gpu_FluxI_p1, *gpu_FluxI_p2;
213 
214  LimFlag = (LimFlag && solver->flag_nonlinearinterp && solver->SetInterpLimiterVar);
215 
216  gpuMemset(hyp, 0, size*nvars*sizeof(double));
217  gpuMemset(solver->StageBoundaryBuffer, 0, solver->StageBoundaryBuffer_size*sizeof(double));
218  gpuMemset(solver->StageBoundaryIntegral, 0, 2*ndims*nvars*sizeof(double));
219 
220  if (!FluxFunction) return(0); /* zero hyperbolic term */
221  /*solver->count_hyp++;*/
222 
223  int npoints_grid = 1; for (d = 0; d < ndims; d++) npoints_grid *= dim[d];
224  int nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
225  int offset = 0;
226 
227 #if defined(GPU_STAT)
228  cudaEvent_t start, stop;
229  float milliseconds = 0;
230 
231  checkCuda( cudaEventCreate(&start) );
232  checkCuda( cudaEventCreate(&stop) );
233 #endif
234 
235  for (d = 0; d < ndims; d++) {
236  /* evaluate cell-centered flux */
237  FluxFunction(gpu_FluxC,gpu_u,d,solver,t);
238  /* compute interface fluxes */
239  ReconstructHyperbolic(gpu_FluxI,gpu_FluxC,gpu_u,gpu_x+offset,d,solver,mpi,t,LimFlag,UpwindFunction);
240 
241  gpu_FluxI_p1 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]*nvars);
242  gpu_FluxI_p2 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]+solver->gpu_npoints_boundary[d])*nvars;
243 
244 #if defined(GPU_STAT)
245  checkCuda( cudaEventRecord(start, 0) );
246 #endif
247 
248  if (ndims == 3) {
249  HyperbolicFunction_dim3_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
250  npoints_grid, d, nvars, ghosts, offset,
251  solver->gpu_dim_local, gpu_FluxI, gpu_dxinv,
252  hyp, gpu_FluxI_p1, gpu_FluxI_p2
253  );
254  } else if (ndims == 2) {
255  HyperbolicFunction_dim2_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
256  npoints_grid, d, nvars, ghosts, offset,
257  solver->gpu_dim_local, gpu_FluxI, gpu_dxinv,
258  hyp, gpu_FluxI_p1, gpu_FluxI_p2
259  );
260  } else {
261  fprintf(stderr,"gpuHyperbolicFunction(): Not implemented for ndims = %d!\n", ndims);
262  exit(1);
263  }
264  cudaDeviceSynchronize();
265 
266 #if defined(GPU_STAT)
267  checkCuda( cudaEventRecord(stop, 0) );
268  checkCuda( cudaEventSynchronize(stop) );
269  checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
270  printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
271  "HyperbolicFunction", milliseconds*1e-3, d, (1e-6*2*npoints_grid*nvars*sizeof(double))/milliseconds);
272 
273  checkCuda( cudaEventRecord(start, 0) );
274 #endif
275 
276  StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
277  solver->gpu_npoints_boundary[d], nvars, -1, gpu_FluxI_p1,
278  solver->StageBoundaryIntegral + 2*d*nvars
279  );
280  StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
281  solver->gpu_npoints_boundary[d], nvars, 1, gpu_FluxI_p2,
282  solver->StageBoundaryIntegral + (2*d+1)*nvars
283  );
284  cudaDeviceSynchronize();
285 
286 #if defined(GPU_STAT)
287  checkCuda( cudaEventRecord(stop, 0) );
288  checkCuda( cudaEventSynchronize(stop) );
289  checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
290  printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
291  "StageBoundaryIntegral", milliseconds*1e-3, d, (1e-6*2*solver->gpu_npoints_boundary[d]*nvars*sizeof(double))/milliseconds);
292 #endif
293 
294  offset += dim[d] + 2*ghosts;
295  }
296 
297  if (solver->flag_ib) gpuArrayBlockMultiply(hyp, solver->gpu_iblank, size, nvars);
298 
299 #if defined(GPU_STAT)
300  checkCuda(cudaEventDestroy(start));
301  checkCuda(cudaEventDestroy(stop));
302 #endif
303 
304  return(0);
305 }
306 
347  double *gpu_fluxI,
350  double *gpu_fluxC,
352  double *gpu_u,
353  double *gpu_x,
354  int dir,
355  void *s,
356  void *m,
357  double t,
358  int LimFlag,
362  int(*UpwindFunction)(double*,double*,double*,double*,
363  double*,double*,int,void*,double)
364 )
365 {
366  HyPar *solver = (HyPar*) s;
367  MPIVariables *mpi = (MPIVariables*) m;
368 
369  double *gpu_uC = NULL;
370  double *gpu_uL = solver->uL;
371  double *gpu_uR = solver->uR;
372  double *gpu_fluxL = solver->fL;
373  double *gpu_fluxR = solver->fR;
374 
375  /*
376  precalculate the non-linear interpolation coefficients if required
377  else reuse the weights previously calculated
378  */
379  if (LimFlag) solver->SetInterpLimiterVar(gpu_fluxC,gpu_u,gpu_x,dir,solver,mpi);
380 
381  /* if defined, calculate the modified u-function to be used for upwinding
382  e.g.: used in well-balanced schemes for Euler/Navier-Stokes with gravity
383  otherwise, just copy u to uC */
384  if (solver->UFunction) {
385  gpu_uC = solver->uC;
386  solver->UFunction(gpu_uC,gpu_u,dir,solver,mpi,t);
387  } else gpu_uC = gpu_u;
388 
389  /* Interpolation -> to calculate left and right-biased interface flux and state variable*/
390  solver->InterpolateInterfacesHyp(gpu_uL ,gpu_uC ,gpu_u,gpu_x, 1,dir,solver,mpi,1);
391  solver->InterpolateInterfacesHyp(gpu_uR ,gpu_uC ,gpu_u,gpu_x,-1,dir,solver,mpi,1);
392  solver->InterpolateInterfacesHyp(gpu_fluxL,gpu_fluxC,gpu_u,gpu_x, 1,dir,solver,mpi,0);
393  solver->InterpolateInterfacesHyp(gpu_fluxR,gpu_fluxC,gpu_u,gpu_x,-1,dir,solver,mpi,0);
394 
395  /* Upwind -> to calculate the final interface flux */
396  if (UpwindFunction) { UpwindFunction (gpu_fluxI,gpu_fluxL,gpu_fluxR,gpu_uL,gpu_uR,gpu_u,dir,solver,t); }
397  else { DefaultUpwinding (gpu_fluxI,gpu_fluxL,gpu_fluxR,NULL,NULL,NULL,dir,solver,t); }
398 
399  return(0);
400 }
401 
402 #else
403 
405 __global__
407  int npoints_grid,
408  int npoints_local_wghosts,
409  int npoints_dim_interface,
410  int d,
411  int nvars,
412  int ghosts,
413  int offset,
414  const int * __restrict__ dim,
415  const double * __restrict__ FluxI,
416  const double * __restrict__ dxinv,
417  double * __restrict__ hyp,
418  double * __restrict__ FluxI_p1,
419  double * __restrict__ FluxI_p2
420 )
421 {
422  int tx = threadIdx.x + (blockDim.x * blockIdx.x);
423  if (tx < npoints_grid) {
424  int v, p, p1, p2, b;
425  int dim_interface[3];
426  int index[3], index1[3], index2[3];
427 
428  _ArrayIndexnD_(3,tx,dim,index,0);
429  _ArrayCopy1D_(dim,dim_interface,3);
430  dim_interface[d] += 1;
431 
432  _ArrayCopy1D_(index,index1,3);
433  _ArrayCopy1D_(index,index2,3); index2[d]++;
434  _ArrayIndex1D_(3,dim ,index ,ghosts,p);
435  _ArrayIndex1D_(3,dim_interface,index1,0 ,p1);
436  _ArrayIndex1D_(3,dim_interface,index2,0 ,p2);
437  for (v=0; v<nvars; v++) {
438  hyp[p+v*npoints_local_wghosts] += dxinv[offset+ghosts+index[d]]
439  * (FluxI[p2+v*npoints_dim_interface]-FluxI[p1+v*npoints_dim_interface]);
440  }
441  if (index[d] == 0 || index[d] == dim[d]-1) {
442  if (d == 0) {
443  b = index[1] + index[2]*dim[1];
444  } else if (d == 1) {
445  b = index[0] + index[2]*dim[0];
446  } else {
447  b = index[0] + index[1]*dim[0];
448  }
449  if (index[d] == 0) {
450  for (v=0; v<nvars; v++) FluxI_p1[b*nvars+v] = FluxI[p1+v*npoints_dim_interface];
451  } else {
452  for (v=0; v<nvars; v++) FluxI_p2[b*nvars+v] = FluxI[p2+v*npoints_dim_interface];
453  }
454  }
455  }
456  return;
457 }
458 
459 
473 extern "C" int gpuHyperbolicFunction(
474  double *hyp,
475  double *u,
476  void *s,
477  void *m,
478  double t,
479  int LimFlag,
483  int(*FluxFunction)(double*,double*,int,void*,double),
485  int(*UpwindFunction)( double*,double*,double*,double*,
486  double*,double*,int,void*,double)
487 )
488 {
489  HyPar *solver = (HyPar*) s;
490  MPIVariables *mpi = (MPIVariables*) m;
491  int d;
492  double *FluxI = solver->fluxI; /* interface flux */
493  double *FluxC = solver->fluxC; /* cell centered flux */
494 
495  int ndims = solver->ndims;
496  int nvars = solver->nvars;
497  int ghosts = solver->ghosts;
498  int *dim = solver->dim_local;
499  int size = solver->npoints_local_wghosts;
500  double *x = solver->gpu_x;
501  double *dxinv = solver->gpu_dxinv;
502  double *FluxI_p1, *FluxI_p2;
503 
504  LimFlag = (LimFlag && solver->flag_nonlinearinterp && solver->SetInterpLimiterVar);
505 
506  gpuMemset(hyp, 0, size*nvars*sizeof(double));
507  gpuMemset(solver->StageBoundaryBuffer, 0, solver->StageBoundaryBuffer_size*sizeof(double));
508  gpuMemset(solver->StageBoundaryIntegral, 0, 2*ndims*nvars*sizeof(double));
509 
510  if (!FluxFunction) return(0); /* zero hyperbolic term */
511  /*solver->count_hyp++;*/
512 
513  int npoints_grid = 1; for (d = 0; d < ndims; d++) npoints_grid *= dim[d];
514  int nblocks = (npoints_grid - 1) / GPU_THREADS_PER_BLOCK + 1;
515  int offset = 0;
516 
517 #if defined(GPU_STAT)
518  cudaEvent_t start, stop;
519  float milliseconds = 0;
520 
521  checkCuda( cudaEventCreate(&start) );
522  checkCuda( cudaEventCreate(&stop) );
523 #endif
524 
525  for (d = 0; d < ndims; d++) {
526  int npoints_dim_interface = 1; for (int i=0; i<ndims; i++) npoints_dim_interface *= (i==d) ? (dim[i]+1) : dim[i];
527  /* evaluate cell-centered flux */
528  FluxFunction(FluxC,u,d,solver,t);
529  /* compute interface fluxes */
530  ReconstructHyperbolic(FluxI,FluxC,u,x+offset,d,solver,mpi,t,LimFlag,UpwindFunction);
531 
532  FluxI_p1 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]*nvars);
533  FluxI_p2 = solver->StageBoundaryBuffer+(solver->gpu_npoints_boundary_offset[d]+solver->gpu_npoints_boundary[d])*nvars;
534 
535 #if defined(GPU_STAT)
536  checkCuda( cudaEventRecord(start, 0) );
537 #endif
538  if (ndims == 3) {
539  HyperbolicFunction_dim3_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(
540  npoints_grid, size, npoints_dim_interface, d, nvars, ghosts, offset,
541  solver->gpu_dim_local, FluxI, dxinv,
542  hyp, FluxI_p1, FluxI_p2
543  );
544  } else {
545  fprintf(stderr,"gpuHyperbolicFunction(): Not implemented for ndims = %d!\n", ndims);
546  exit(1);
547  }
548  cudaDeviceSynchronize();
549 
550 #if defined(GPU_STAT)
551  checkCuda( cudaEventRecord(stop, 0) );
552  checkCuda( cudaEventSynchronize(stop) );
553  checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
554  printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
555  "HyperbolicFunction", milliseconds*1e-3, d, (1e-6*2*npoints_grid*nvars*sizeof(double))/milliseconds);
556 
557  checkCuda( cudaEventRecord(start, 0) );
558 #endif
559 
560  StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
561  solver->gpu_npoints_boundary[d], nvars, -1, FluxI_p1,
562  solver->StageBoundaryIntegral + 2*d*nvars
563  );
564  StageBoundaryIntegral_kernel<GPU_THREADS_PER_BLOCK><<<1, GPU_THREADS_PER_BLOCK, GPU_THREADS_PER_BLOCK*nvars*sizeof(double)>>>(
565  solver->gpu_npoints_boundary[d], nvars, 1, FluxI_p2,
566  solver->StageBoundaryIntegral + (2*d+1)*nvars
567  );
568  cudaDeviceSynchronize();
569 
570 #if defined(GPU_STAT)
571  checkCuda( cudaEventRecord(stop, 0) );
572  checkCuda( cudaEventSynchronize(stop) );
573  checkCuda( cudaEventElapsedTime(&milliseconds, start, stop) );
574  printf("%-50s GPU time = %.6f dir = %d bandwidth (GB/s) = %.2f\n",
575  "StageBoundaryIntegral", milliseconds*1e-3, d, (1e-6*2*solver->gpu_npoints_boundary[d]*nvars*sizeof(double))/milliseconds);
576 #endif
577  offset += dim[d] + 2*ghosts;
578  }
579 
580  if (solver->flag_ib) gpuArrayBlockMultiply(hyp, solver->gpu_iblank, size, nvars);
581 
582 #if defined(GPU_STAT)
583  checkCuda(cudaEventDestroy(start));
584  checkCuda(cudaEventDestroy(stop));
585 #endif
586 
587  return(0);
588 }
589 
630  double *fluxI,
633  double *fluxC,
635  double *u,
636  double *x,
637  int dir,
638  void *s,
639  void *m,
640  double t,
641  int LimFlag,
645  int(*UpwindFunction)(double*,double*,double*,double*,
646  double*,double*,int,void*,double)
647 )
648 {
649  HyPar *solver = (HyPar*) s;
650  MPIVariables *mpi = (MPIVariables*) m;
651 
652  double *uC = NULL;
653  double *uL = solver->uL;
654  double *uR = solver->uR;
655  double *fluxL = solver->fL;
656  double *fluxR = solver->fR;
657 
658  /*
659  precalculate the non-linear interpolation coefficients if required
660  else reuse the weights previously calculated
661  */
662 
663  if (LimFlag) solver->SetInterpLimiterVar(fluxC,u,x,dir,solver,mpi);
664 
665  /* if defined, calculate the modified u-function to be used for upwinding
666  e.g.: used in well-balanced schemes for Euler/Navier-Stokes with gravity
667  otherwise, just copy u to uC */
668  if (solver->UFunction) {
669  uC = solver->uC;
670  solver->UFunction(uC,u,dir,solver,mpi,t);
671  } else uC = u;
672 
673 
674  /* Interpolation -> to calculate left and right-biased interface flux and state variable*/
675  solver->InterpolateInterfacesHyp(uL ,uC ,u,x, 1,dir,solver,mpi,1);
676  solver->InterpolateInterfacesHyp(uR ,uC ,u,x,-1,dir,solver,mpi,1);
677  solver->InterpolateInterfacesHyp(fluxL,fluxC,u,x, 1,dir,solver,mpi,0);
678  solver->InterpolateInterfacesHyp(fluxR,fluxC,u,x,-1,dir,solver,mpi,0);
679 
680  /* Upwind -> to calculate the final interface flux */
681  if (UpwindFunction) { UpwindFunction (fluxI,fluxL,fluxR,uL,uR,u,dir,solver,t); }
682  else { DefaultUpwinding (fluxI,fluxL,fluxR,NULL,NULL,NULL,dir,solver,t); }
683 
684  return(0);
685 }
686 
687 #endif
688 
692  double *fI,
693  double *fL,
694  double *fR,
695  double *uL,
696  double *uR,
697  double *u,
698  int dir,
699  void *s,
700  double t
701 )
702 {
703  HyPar *solver = (HyPar*) s;
704  int done;
705 
706  int *dim = solver->dim_local;
707  int ndims = solver->ndims;
708  int nvars = solver->nvars;
709 
710  int bounds_outer[ndims], bounds_inter[ndims];
711  _ArrayCopy1D_(dim,bounds_outer,ndims); bounds_outer[dir] = 1;
712  _ArrayCopy1D_(dim,bounds_inter,ndims); bounds_inter[dir] += 1;
713 
714  done = 0; int index_outer[ndims], index_inter[ndims];
715  _ArraySetValue_(index_outer,ndims,0);
716  while (!done) {
717  _ArrayCopy1D_(index_outer,index_inter,ndims);
718  for (index_inter[dir] = 0; index_inter[dir] < bounds_inter[dir]; index_inter[dir]++) {
719  int p; _ArrayIndex1D_(ndims,bounds_inter,index_inter,0,p);
720  int v; for (v=0; v<nvars; v++) fI[nvars*p+v] = 0.5 * (fL[nvars*p+v]+fR[nvars*p+v]);
721  }
722  _ArrayIncrementIndex_(ndims,bounds_outer,index_outer,done);
723  }
724 
725  return(0);
726 }
int npoints_local_wghosts
Definition: hypar.h:42
#define _ArraySetValue_(x, size, value)
double * uR
Definition: hypar.h:139
__global__ void HyperbolicFunction_dim3_kernel(int npoints_grid, int npoints_local_wghosts, int npoints_dim_interface, int d, int nvars, int ghosts, int offset, const int *__restrict__ dim, const double *__restrict__ FluxI, const double *__restrict__ dxinv, double *__restrict__ hyp, double *__restrict__ FluxI_p1, double *__restrict__ FluxI_p2)
double * uL
Definition: hypar.h:139
double * StageBoundaryBuffer
Definition: hypar.h:462
#define _ArrayIncrementIndex_(N, imax, i, done)
void gpuArrayBlockMultiply(double *, const double *, int, int)
int gpuHyperbolicFunction(double *hyp, double *u, void *s, void *m, double t, int LimFlag, int(*FluxFunction)(double *, double *, int, void *, double), int(*UpwindFunction)(double *, double *, double *, double *, double *, double *, int, void *, double))
#define _ArrayIndexnD_(N, index, imax, i, ghost)
double * fL
Definition: hypar.h:139
int flag_ib
Definition: hypar.h:441
static int ReconstructHyperbolic(double *, double *, double *, double *, int, void *, void *, double, int, int(*)(double *, double *, double *, double *, double *, double *, int, void *, double))
int * dim_local
Definition: hypar.h:37
MPI related function definitions.
double * gpu_dxinv
Definition: hypar.h:458
int gpu_npoints_boundary[3]
Definition: hypar.h:453
double * gpu_iblank
Definition: hypar.h:456
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
#define GPU_MAX_NVARS
Definition: basic_gpu.h:9
int ghosts
Definition: hypar.h:52
#define _ArrayIndex1D_(N, imax, i, ghost, index)
int(* SetInterpLimiterVar)(double *, double *, double *, int, void *, void *)
Definition: hypar.h:234
__global__ void StageBoundaryIntegral_kernel(int n, int nvars, int sign, const double *__restrict__ FluxI, double *__restrict__ StageBoundaryIntegral)
double * fluxI
Definition: hypar.h:136
int * gpu_dim_local
Definition: hypar.h:455
double * gpu_x
Definition: hypar.h:457
int gpu_npoints_boundary_offset[3]
Definition: hypar.h:452
double * uC
Definition: hypar.h:131
Contains function definitions for common array operations on GPU.
#define _ArrayCopy1D_(x, y, size)
double * fluxC
Definition: hypar.h:128
int nvars
Definition: hypar.h:29
int(* InterpolateInterfacesHyp)(double *, double *, double *, double *, int, int, void *, void *, int)
Definition: hypar.h:224
void gpuMemset(void *, int, size_t)
Contains structure definition for hypar.
double * StageBoundaryIntegral
Definition: hypar.h:382
int(* UFunction)(double *, double *, int, void *, void *, double)
Definition: hypar.h:321
int ndims
Definition: hypar.h:26
int StageBoundaryBuffer_size
Definition: hypar.h:461
#define sign(a)
Definition: math_ops.h:54
static int DefaultUpwinding(double *, double *, double *, double *, double *, double *, int, void *, double)
Structure of MPI-related variables.
int flag_nonlinearinterp
Definition: hypar.h:411
Structure containing all solver-specific variables and functions.
Definition: hypar.h:23
double * fR
Definition: hypar.h:139