HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
arrayfunctions_gpu.h File Reference

Contains function definitions for common array operations on GPU. More...

#include <arrayfunctions.h>

Go to the source code of this file.

Enumerations

enum  gpuMemcpyKind { gpuMemcpyHostToDevice = 0, gpuMemcpyDeviceToHost, gpuMemcpyDeviceToDevice }
 

Functions

void gpuSetDevice (int device)
 
void gpuMemcpy (void *, const void *, size_t, enum gpuMemcpyKind)
 
void gpuMalloc (void **, size_t)
 
void gpuMemset (void *, int, size_t)
 
void gpuFree (void *)
 
void gpuArrayCopy1D (const double *, double *, int)
 
void gpuArraySetValue (double *, int, double)
 
void gpuArrayAXPY (const double *, double, double *, int)
 
void gpuArrayBlockMultiply (double *, const double *, int, int)
 
double gpuArraySumSquarenD (int, int, int *, int, int *, double *)
 
void gpuArrayCopy1DNewScheme (const double *, double *, int, int)
 
void gpuArrayCheckEqual (const char *, const double *, const double *, int, int)
 
void gpuArrayCheckEqual2 (const char *, const double *, const double *, int)
 

Detailed Description

Contains function definitions for common array operations on GPU.

Author
Youngdae Kim

Definition in file arrayfunctions_gpu.h.

Enumeration Type Documentation

Enumerator
gpuMemcpyHostToDevice 
gpuMemcpyDeviceToHost 
gpuMemcpyDeviceToDevice 

Definition at line 15 of file arrayfunctions_gpu.h.

Function Documentation

void gpuSetDevice ( int  device)

Set device

Parameters
devicedevice

Definition at line 84 of file ArrayImplementations_GPU.cu.

85 {
86  cudaSetDevice(device);
87  cudaError_t err = cudaGetLastError();
88  if (err != cudaSuccess) {
89  fprintf(stderr,"Error in gpuSetDevice(): device=%d error message=\"%s\"\n", device, cudaGetErrorString(err));
90  }
91 }
void gpuMemcpy ( void *  dest,
const void *  src,
size_t  count,
enum gpuMemcpyKind  kind 
)

GPU memory copy

Parameters
destdestination
srcsource
countcount
kindkind of copy

Definition at line 94 of file ArrayImplementations_GPU.cu.

98 {
99  switch (kind) {
101  checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyHostToDevice) );
102  break;
104  checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToHost) );
105  break;
107  checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToDevice) );
108  break;
109  default:
110  fprintf(stderr, "Error: invalid gpuMemcpyKind: %d\n", kind);
111  assert(0);
112  break;
113  }
114  return;
115 }
void gpuMalloc ( void **  devPtr,
size_t  size 
)

Allocate memory

Parameters
devPtrpointer to memory
sizesize of memory

Definition at line 118 of file ArrayImplementations_GPU.cu.

120 {
121  cudaMalloc(devPtr, size);
122  cudaError_t err = cudaGetLastError();
123  if (err != cudaSuccess) {
124  fprintf( stderr,"Error in gpuMalloc(): size=%d, error message=\"%s\"\n", size,
125  cudaGetErrorString(err) );
126  }
127  return;
128 }
void gpuMemset ( void *  devPtr,
int  value,
size_t  count 
)

Set value

Parameters
devPtrPointer to memory
valuevalue to set
countsize of data

Definition at line 131 of file ArrayImplementations_GPU.cu.

134 {
135  checkCuda( cudaMemset(devPtr, value, count) );
136  return;
137 }
void gpuFree ( void *  devPtr)

deallocate memory

Parameters
devPtrPointer to memory

Definition at line 140 of file ArrayImplementations_GPU.cu.

141 {
142  checkCuda( cudaFree(devPtr) );
143  return;
144 }
void gpuArrayCopy1D ( const double *  x,
double *  y,
int  n 
)

Element-wise copy y = x, where x, y are 1-dimensional arrays of length size.

See Also
_ArrayCopy1D_, ArrayCopy1D_kernel()
Parameters
xcopy-from array
ycopy-to array
nsize of array

Definition at line 148 of file ArrayImplementations_GPU.cu.

151 {
152  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
153  ArrayCopy1D_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, y, n);
154  cudaDeviceSynchronize();
155  return;
156 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArraySetValue ( double *  devPtr,
int  n,
double  value 
)

Set all elements of a 1-dimensional array x (any datatype) of length size to a scalar value.

See Also
_ArraySetValue_, ArraySetValue_kernel()
Parameters
devPtrarray
nsize of array
valuescalar value

Definition at line 161 of file ArrayImplementations_GPU.cu.

164 {
165  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
166  ArraySetValue_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(devPtr, n, value);
167  cudaDeviceSynchronize();
168  return;
169 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArrayAXPY ( const double *  x,
double  a,
double *  y,
int  n 
)
See Also
_ArrayAXPY_, ArrayAXPY_kernel()

Element-wise AXPY y = a x + y, where a is a scalar, and x, y, z are 1-dimensional arrays of length size.

Parameters
xx
aa
yy
nsize of array

Definition at line 177 of file ArrayImplementations_GPU.cu.

181 {
182  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
183  ArrayAXPY_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, y, n);
184  cudaDeviceSynchronize();
185  return;
186 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArrayBlockMultiply ( double *  x,
const double *  a,
int  n,
int  bs 
)
See Also
_ArrayBlockMultiply_, ArrayBlockMultiply_kernel()

Given two arrays: x of size n*bs, and a of size n, this function implements: x[i][j] *= a[i] where i = 1,...,n, j = 1,...,bs, and x is stored as a 1D array in row-major format, i.e., x[i][j] = x[i*bs+j].

Parameters
xx
aa
nsize of array
bsblock size

Definition at line 193 of file ArrayImplementations_GPU.cu.

197 {
198  int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
199  ArrayBlockMultiply_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, n, bs);
200  cudaDeviceSynchronize();
201  return;
202 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
double gpuArraySumSquarenD ( int  nvars,
int  ndims,
int *  dim,
int  ghosts,
int *  index,
double *  x 
)

Returns the sum-of-squares of the elements in an n-D array (useful for L_2 norm)

See Also
ArraySumSquarenD()
Parameters
nvarsnumber of elements at one array location, can be > 1 for systems of equations
ndimsnumber of dimensions
diminteger array of size in each dimension
ghostsnumber of ghost points in the array x
indexpre-allocated (by the calling function) integer array of size ndims
xthe array

Definition at line 206 of file ArrayImplementations_GPU.cu.

215 {
216  double sum = 0;
217  printf("gpuArraySumSquarenD hasn't been implemented, yet.\n");
218  exit(0);
219  return (sum);
220 }
long sum(const std::vector< int > &a_iv)
Definition: std_vec_ops.h:18
void gpuArrayCopy1DNewScheme ( const double *  src,
double *  dest,
int  npoints,
int  nvars 
)

Alternative implementation of _ArrayCopy1D_

Parameters
srcsource array
destdestination array
npointsnumber of points
nvarsnumber of components

Definition at line 223 of file ArrayImplementations_GPU.cu.

227 {
228  int nblocks = (npoints-1) / GPU_THREADS_PER_BLOCK + 1;
229  ArrayCopy1DNewScheme_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(src, dest, npoints, nvars);
230  cudaDeviceSynchronize();
231  return;
232 }
#define GPU_THREADS_PER_BLOCK
Definition: basic_gpu.h:7
void gpuArrayCheckEqual ( const char *  ,
const double *  ,
const double *  ,
int  ,
int   
)
void gpuArrayCheckEqual2 ( const char *  ,
const double *  ,
const double *  ,
int   
)