HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
ArrayImplementations_GPU.cu File Reference

GPU implementations of array functions. More...

#include <assert.h>
#include <basic_gpu.h>
#include <arrayfunctions_gpu.h>

Go to the source code of this file.

Functions

__global__ void ArrayCopy1D_kernel (const double *x, double *y, int n)
 
__global__ void ArraySetValue_kernel (double *x, int n, double value)
 
__global__ void ArrayAXPY_kernel (const double *x, double a, double *y, int n)
 
__global__ void ArrayBlockMultiply_kernel (double *x, const double *a, int n, int bs)
 
__global__ void ArrayCopy1DNewScheme_kernel (const double *__restrict__ src, double *__restrict__ dest, int npoints, int nvars)
 
void gpuSetDevice (int device)
 
void gpuMemcpy (void *dest, const void *src, size_t count, enum gpuMemcpyKind kind)
 
void gpuMalloc (void **devPtr, size_t size)
 
void gpuMemset (void *devPtr, int value, size_t count)
 
void gpuFree (void *devPtr)
 
void gpuArrayCopy1D (const double *x, double *y, int n)
 
void gpuArraySetValue (double *devPtr, int n, double value)
 
void gpuArrayAXPY (const double *x, double a, double *y, int n)
 
void gpuArrayBlockMultiply (double *x, const double *a, int n, int bs)
 
double gpuArraySumSquarenD (int nvars, int ndims, int *dim, int ghosts, int *index, double *x)
 
void gpuArrayCopy1DNewScheme (const double *src, double *dest, int npoints, int nvars)
 
void gpuArrayCheckEqual (const char *msg, const double *var_adj, const double *var_sep, int npoints)
 

Detailed Description

GPU implementations of array functions.

Author
Youngdae Kim

Definition in file ArrayImplementations_GPU.cu.

Function Documentation

__global__ void ArrayCopy1D_kernel ( const double *  x,
double *  y,
int  n 
)

Element-wise copy y = x, where x, y are 1-dimensional arrays of length size.

See Also
_ArrayCopy1D_
Parameters
xcopy-from array
ycopy-to array
nsize of array

Definition at line 13 of file ArrayImplementations_GPU.cu.

16 {
17  int tx = threadIdx.x + (blockIdx.x * blockDim.x);
18  if (tx < n) y[tx] = x[tx];
19  return;
20 }
__global__ void ArraySetValue_kernel ( double *  x,
int  n,
double  value 
)

Set all elements of a 1-dimensional array x (any datatype) of length size to a scalar value.

See Also
_ArraySetValue_
Parameters
xarray
nsize of array
valuescalar value

Definition at line 25 of file ArrayImplementations_GPU.cu.

28 {
29  int tx = threadIdx.x + (blockIdx.x * blockDim.x);
30  if (tx < n) x[tx] = value;
31  return;
32 }
__global__ void ArrayAXPY_kernel ( const double *  x,
double  a,
double *  y,
int  n 
)
See Also
_ArrayAXPY_

Element-wise AXPY y = a x + y, where a is a scalar, and x, y, z are 1-dimensional arrays of length size.

Parameters
xx
aa
yy
nsize of array

Definition at line 40 of file ArrayImplementations_GPU.cu.

44 {
45  int tx = threadIdx.x + (blockIdx.x * blockDim.x);
46  if (tx < n) y[tx] += a*x[tx];
47  return;
48 }
__global__ void ArrayBlockMultiply_kernel ( double *  x,
const double *  a,
int  n,
int  bs 
)
See Also
_ArrayBlockMultiply_

Given two arrays: x of size n*bs, and a of size n, this function implements: x[i][j] *= a[i] where i = 1,...,n, j = 1,...,bs, and x is stored as a 1D array in row-major format, i.e., x[i][j] = x[i*bs+j].

Parameters
xx
aa
nsize of array
bsblock size

Definition at line 56 of file ArrayImplementations_GPU.cu.

60 {
61  int tx = threadIdx.x + (blockIdx.x * blockDim.x);
62  if (tx < n) {
63  for (int i = 0; i < bs; i++) x[tx*bs + i] *= a[tx];
64  }
65 }
__global__ void ArrayCopy1DNewScheme_kernel ( const double *__restrict__  src,
double *__restrict__  dest,
int  npoints,
int  nvars 
)

Alternative implementation of _ArrayCopy1D_

Parameters
srcsource array
destdestination array
npointsnumber of points
nvarsnumber of components

Definition at line 69 of file ArrayImplementations_GPU.cu.

73 {
74  int p = blockDim.x * blockIdx.x + threadIdx.x;
75  if (p < npoints) {
76  for (int v=0; v<nvars; v++) {
77  dest[p+v*npoints] = src[p*nvars+v];
78  }
79  }
80  return;
81 }
void gpuSetDevice ( int  device)

Set device

Parameters
devicedevice

Definition at line 84 of file ArrayImplementations_GPU.cu.

85 {
86  cudaSetDevice(device);
87  cudaError_t err = cudaGetLastError();
88  if (err != cudaSuccess) {
89  fprintf(stderr,"Error in gpuSetDevice(): device=%d error message=\"%s\"\n", device, cudaGetErrorString(err));
90  }
91 }
void gpuMemcpy ( void *  dest,
const void *  src,
size_t  count,
enum gpuMemcpyKind  kind 
)

GPU memory copy

Parameters
destdestination
srcsource
countcount
kindkind of copy

Definition at line 94 of file ArrayImplementations_GPU.cu.

98 {
99  switch (kind) {
101  checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyHostToDevice) );
102  break;
104  checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToHost) );
105  break;
107  checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToDevice) );
108  break;
109  default:
110  fprintf(stderr, "Error: invalid gpuMemcpyKind: %d\n", kind);
111  assert(0);
112  break;
113  }
114  return;
115 }
void gpuMalloc ( void **  devPtr,
size_t  size 
)

Allocate memory

Parameters
devPtrpointer to memory
sizesize of memory

Definition at line 118 of file ArrayImplementations_GPU.cu.

120 {
121  cudaMalloc(devPtr, size);
122  cudaError_t err = cudaGetLastError();
123  if (err != cudaSuccess) {
124  fprintf( stderr,"Error in gpuMalloc(): size=%d, error message=\"%s\"\n", size,
125  cudaGetErrorString(err) );
126  }
127  return;
128 }
void gpuMemset ( void *  devPtr,
int  value,
size_t  count 
)

Set value

Parameters
devPtrPointer to memory
valuevalue to set
countsize of data

Definition at line 131 of file ArrayImplementations_GPU.cu.

134 {
135  checkCuda( cudaMemset(devPtr, value, count) );
136  return;
137 }
void gpuFree ( void *  devPtr)

deallocate memory

Parameters
devPtrPointer to memory

Definition at line 140 of file ArrayImplementations_GPU.cu.

141 {
142  checkCuda( cudaFree(devPtr) );
143  return;
144 }
void gpuArrayCopy1D ( const double *  x,
double *  y,
int  n 
)

Element-wise copy y = x, where x, y are 1-dimensional arrays of length size.

See Also
_ArrayCopy1D_, ArrayCopy1D_kernel()
Parameters
xcopy-from array
ycopy-to array
nsize of array

Definition at line 148 of file ArrayImplementations_GPU.cu.

151 {
152  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
153  ArrayCopy1D_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, y, n);
154  cudaDeviceSynchronize();
155  return;
156 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArraySetValue ( double *  devPtr,
int  n,
double  value 
)

Set all elements of a 1-dimensional array x (any datatype) of length size to a scalar value.

See Also
_ArraySetValue_, ArraySetValue_kernel()
Parameters
devPtrarray
nsize of array
valuescalar value

Definition at line 161 of file ArrayImplementations_GPU.cu.

164 {
165  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
166  ArraySetValue_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(devPtr, n, value);
167  cudaDeviceSynchronize();
168  return;
169 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArrayAXPY ( const double *  x,
double  a,
double *  y,
int  n 
)
See Also
_ArrayAXPY_, ArrayAXPY_kernel()

Element-wise AXPY y = a x + y, where a is a scalar, and x, y, z are 1-dimensional arrays of length size.

Parameters
xx
aa
yy
nsize of array

Definition at line 177 of file ArrayImplementations_GPU.cu.

181 {
182  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
183  ArrayAXPY_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, y, n);
184  cudaDeviceSynchronize();
185  return;
186 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArrayBlockMultiply ( double *  x,
const double *  a,
int  n,
int  bs 
)
See Also
_ArrayBlockMultiply_, ArrayBlockMultiply_kernel()

Given two arrays: x of size n*bs, and a of size n, this function implements: x[i][j] *= a[i] where i = 1,...,n, j = 1,...,bs, and x is stored as a 1D array in row-major format, i.e., x[i][j] = x[i*bs+j].

Parameters
xx
aa
nsize of array
bsblock size

Definition at line 193 of file ArrayImplementations_GPU.cu.

197 {
198  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
199  ArrayBlockMultiply_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, n, bs);
200  cudaDeviceSynchronize();
201  return;
202 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
double gpuArraySumSquarenD ( int  nvars,
int  ndims,
int *  dim,
int  ghosts,
int *  index,
double *  x 
)

Returns the sum-of-squares of the elements in an n-D array (useful for L_2 norm)

See Also
ArraySumSquarenD()
Parameters
nvarsnumber of elements at one array location, can be > 1 for systems of equations
ndimsnumber of dimensions
diminteger array of size in each dimension
ghostsnumber of ghost points in the array x
indexpre-allocated (by the calling function) integer array of size ndims
xthe array

Definition at line 206 of file ArrayImplementations_GPU.cu.

215 {
216  double sum = 0;
217  printf("gpuArraySumSquarenD hasn't been implemented, yet.\n");
218  exit(0);
219  return (sum);
220 }
long sum(const std::vector< int > &a_iv)
Definition: std_vec_ops.h:18
void gpuArrayCopy1DNewScheme ( const double *  src,
double *  dest,
int  npoints,
int  nvars 
)

Alternative implementation of _ArrayCopy1D_

Parameters
srcsource array
destdestination array
npointsnumber of points
nvarsnumber of components

Definition at line 223 of file ArrayImplementations_GPU.cu.

227 {
228  int nblocks = (npoints-1) / GPU_THREADS_PER_BLOCK + 1;
229  ArrayCopy1DNewScheme_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(src, dest, npoints, nvars);
230  cudaDeviceSynchronize();
231  return;
232 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArrayCheckEqual ( const char *  msg,
const double *  var_adj,
const double *  var_sep,
int  npoints 
)

Check if two arrays are equal, if not, report the difference

Check if two arrays are equal, if not, report the difference

Parameters
msgmessage
var_adjarray
var_separray
npointssize of array

Definition at line 236 of file ArrayImplementations_GPU.cu.

240 {
241  double *h_var_adj = (double *) malloc(npoints*sizeof(double));
242  double *h_var_sep = (double *) malloc(npoints*sizeof(double));
243 
244  gpuMemcpy(h_var_adj, var_adj, npoints*sizeof(double), gpuMemcpyDeviceToHost);
245  gpuMemcpy(h_var_sep, var_sep, npoints*sizeof(double), gpuMemcpyDeviceToHost);
246 
247  double max_err = 0.0;
248  for (int j=0; j<npoints; j++) {
249  if (h_var_sep[j] != h_var_adj[j]) {
250  max_err = max(max_err, fabs(h_var_sep[j]-h_var_adj[j]));
251  }
252  }
253 
254  free(h_var_adj);
255  free(h_var_sep);
256 
257  if (max_err > 1e-10) {
258  printf("gpuArrayCheckEqual: %-30s max_err = %e\n", msg, max_err);
259  exit(0);
260  }
261  return;
262 }
void gpuMemcpy(void *, const void *, size_t, enum gpuMemcpyKind)
#define max(a, b)
Definition: math_ops.h:18