HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
BCPeriodic_GPU.cu
Go to the documentation of this file.
1 /*! @file BCPeriodic_GPU.cu
2  @author Youngdae Kim
3  @brief GPU implementations of periodic boundary conditions
4 */
5 #include <basic_gpu.h>
6 #include <arrayfunctions_gpu.h>
7 #include <mpivars.h>
8 #include <boundaryconditions.h>
9 
10 #ifdef CUDA_VAR_ORDERDING_AOS
11 
12 /*! Kernel of gpuBCPeriodicU() */
13 __global__
14 void BCPeriodicU_kernel(
15  int npoints_bounds,
16  int npoints_local_wghosts,
17  int face,
18  int ndims,
19  int dim,
20  int ghosts,
21  int nvars,
22  const int * __restrict__ bounds,
23  const int * __restrict__ size,
24  const int * __restrict__ boundary_is,
25  double * __restrict__ phi
26 )
27 {
28  int p = blockDim.x * blockIdx.x + threadIdx.x;
29 
30  if (p < npoints_bounds) {
31  int p1 = 0, p2 = 0;
32  int index1[GPU_MAX_NDIMS], index2[GPU_MAX_NDIMS];
33 
34  _ArrayIndexnD_(ndims,p,bounds,index1,0);
35  _ArrayCopy1D_(index1,index2,ndims);
36  if (face == 1) {
37  index2[dim] = index1[dim] + size[dim]-ghosts;
38  _ArrayIndex1DWO_(ndims,size,index1,boundary_is,ghosts,p1);
39  _ArrayIndex1D_(ndims,size,index2,ghosts,p2);
40  } else if (face == -1) {
41  _ArrayIndex1DWO_(ndims,size,index1,boundary_is,ghosts,p1);
42  _ArrayIndex1D_(ndims,size,index1,ghosts,p2);
43  }
44  _ArrayCopy1D_((phi+nvars*p2),(phi+nvars*p1),nvars);
45  }
46  return;
47 }
48 
49 /*! Applies periodic boundary conditions: Implemented by copying the solution
50  from the other end of the domain into the physical boundary ghost points.
51  \n\n
52  **Note**: This function only acts if the the number of processors is 1 along
53  the spatial dimension this boundary corresponds to. If there are more than 1
54  processors along this dimension, periodicity is handled by MPIExchangeBoundariesnD()
55  to minimize communication.
56  \sa BCPeriodicU() */
57 extern "C" int gpuBCPeriodicU(
58  void * __restrict__ b, /*!< Boundary object of type #DomainBoundary */
59  void * __restrict__ m, /*!< MPI object of type #MPIVariables */
60  int ndims, /*!< Number of spatial dimensions */
61  int nvars, /*!< Number of variables/DoFs per grid point */
62  int * __restrict__ size, /*!< Integer array with the number of grid points in
63  each spatial dimensions */
64  int ghosts, /*!< Number of ghost points */
65  double * __restrict__ phi, /*!< The solution array on which to apply the boundary condition */
66  double waqt /*!< Current solution time */
67 )
68 {
69  DomainBoundary *boundary = (DomainBoundary*) b;
70  MPIVariables *mpi = (MPIVariables*) m;
71 
72  int dim = boundary->dim;
73  int face = boundary->face;
74 
75  if ((boundary->on_this_proc) && (mpi->iproc[dim] == 1)) {
76  int nblocks;
77  nblocks = (boundary->gpu_npoints_bounds-1) / GPU_THREADS_PER_BLOCK + 1;
78 
79 #if defined(GPU_STAT)
80  cudaEvent_t startEvent, stopEvent;
81  float milliseconds = 0;
82 
83  int memory_accessed = 2*boundary->gpu_npoints_bounds*nvars*sizeof(double);
84 
85  checkCuda( cudaEventCreate(&startEvent));
86  checkCuda( cudaEventCreate(&stopEvent));
87 
88  checkCuda( cudaEventRecord(startEvent, 0) );
89 #endif
90 
91  BCPeriodicU_kernel<<<nblocks,GPU_THREADS_PER_BLOCK>>>(boundary->gpu_npoints_bounds,
92  boundary->gpu_npoints_local_wghosts, face, ndims, dim, ghosts, nvars,
93  boundary->gpu_bounds, size, boundary->gpu_is, phi
94  );
95  cudaDeviceSynchronize();
96 
97 #if defined(GPU_STAT)
98  checkCuda( cudaEventRecord(stopEvent, 0) );
99  checkCuda( cudaEventSynchronize(stopEvent) );
100  checkCuda( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent) );
101 
102  printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
103  "BCPeriodicU", milliseconds*1e-3,
104  (1e-6*(memory_accessed)/milliseconds));
105 #endif
106  }
107 
108  return(0);
109 }
110 
111 #else
112 
113 /*! Kernel of gpuBCPeriodicU() */
114 __global__
115 void BCPeriodicU_kernel(
116  int npoints_bounds,
117  int npoints_local_wghosts,
118  int face,
119  int ndims,
120  int dim,
121  int ghosts,
122  int nvars,
123  const int * __restrict__ bounds,
124  const int * __restrict__ size,
125  const int * __restrict__ boundary_is,
126  double * __restrict__ phi
127 )
128 {
129  int p = blockDim.x * blockIdx.x + threadIdx.x;
130 
131  if (p < npoints_bounds) {
132  int p1 = 0, p2 = 0;
133  int index1[GPU_MAX_NDIMS], index2[GPU_MAX_NDIMS];
134 
135  _ArrayIndexnD_(ndims,p,bounds,index1,0);
136  _ArrayCopy1D_(index1,index2,ndims);
137  if (face == 1) {
138  index2[dim] = index1[dim] + size[dim]-ghosts;
139  _ArrayIndex1DWO_(ndims,size,index1,boundary_is,ghosts,p1);
140  _ArrayIndex1D_(ndims,size,index2,ghosts,p2);
141  } else if (face == -1) {
142  _ArrayIndex1DWO_(ndims,size,index1,boundary_is,ghosts,p1);
143  _ArrayIndex1D_(ndims,size,index1,ghosts,p2);
144  }
145 
146  for (int j=0; j<nvars; j++) {
147  phi[p1] = phi[p2];
148  p1 += npoints_local_wghosts;
149  p2 += npoints_local_wghosts;
150  }
151  }
152  return;
153 }
154 
155 /*! Applies periodic boundary conditions: Implemented by copying the solution
156  from the other end of the domain into the physical boundary ghost points.
157  \n\n
158  **Note**: This function only acts if the the number of processors is 1 along
159  the spatial dimension this boundary corresponds to. If there are more than 1
160  processors along this dimension, periodicity is handled by MPIExchangeBoundariesnD()
161  to minimize communication.
162  \sa BCPeriodicU(), gpuBCPeriodicU() */
163 extern "C" int gpuBCPeriodicU(
164  void * __restrict__ b, /*!< Boundary object of type #DomainBoundary */
165  void * __restrict__ m, /*!< MPI object of type #MPIVariables */
166  int ndims, /*!< Number of spatial dimensions */
167  int nvars, /*!< Number of variables/DoFs per grid point */
168  int * __restrict__ size, /*!< Integer array with the number of grid points in
169  each spatial dimensions */
170  int ghosts, /*!< Number of ghost points */
171  double * __restrict__ phi, /*!< The solution array on which to apply the boundary condition */
172  double waqt /*!< Current solution time */
173 )
174 {
175  DomainBoundary *boundary = (DomainBoundary*) b;
176  MPIVariables *mpi = (MPIVariables*) m;
177 
178  int dim = boundary->dim;
179  int face = boundary->face;
180 
181  if ((boundary->on_this_proc) && (mpi->iproc[dim] == 1)) {
182  int nblocks;
183  nblocks = (boundary->gpu_npoints_bounds-1) / GPU_THREADS_PER_BLOCK + 1;
184 
185 #if defined(GPU_STAT)
186  cudaEvent_t startEvent, stopEvent;
187  float milliseconds = 0;
188 
189  int memory_accessed = 2*boundary->gpu_npoints_bounds*nvars*sizeof(double);
190 
191  checkCuda(cudaEventCreate(&startEvent));
192  checkCuda(cudaEventCreate(&stopEvent));
193 
194  checkCuda(cudaEventRecord(startEvent, 0));
195 #endif
196 
197  BCPeriodicU_kernel<<<nblocks,GPU_THREADS_PER_BLOCK>>>(boundary->gpu_npoints_bounds,
198  boundary->gpu_npoints_local_wghosts, face, ndims, dim, ghosts, nvars,
199  boundary->gpu_bounds, size, boundary->gpu_is, phi
200  );
201  cudaDeviceSynchronize();
202 
203 #if defined(GPU_STAT)
204  checkCuda(cudaEventRecord(stopEvent, 0));
205  checkCuda(cudaEventSynchronize(stopEvent));
206  checkCuda(cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
207 
208  printf("%-50s GPU time (secs) = %.6f bandwidth (GB/s) = %6.2f\n",
209  "BCPeriodicU2", milliseconds*1e-3,
210  (1e-6*(memory_accessed)/milliseconds));
211 #endif
212  }
213 
214  return(0);
215 }
216 
217 #endif