Contains function definitions for common array operations on GPU.
More...
Go to the source code of this file.
|
void | gpuSetDevice (int device) |
|
void | gpuMemcpy (void *, const void *, size_t, enum gpuMemcpyKind) |
|
void | gpuMalloc (void **, size_t) |
|
void | gpuMemset (void *, int, size_t) |
|
void | gpuFree (void *) |
|
void | gpuArrayCopy1D (const double *, double *, int) |
|
void | gpuArraySetValue (double *, int, double) |
|
void | gpuArrayAXPY (const double *, double, double *, int) |
|
void | gpuArrayBlockMultiply (double *, const double *, int, int) |
|
double | gpuArraySumSquarenD (int, int, int *, int, int *, double *) |
|
void | gpuArrayCopy1DNewScheme (const double *, double *, int, int) |
|
void | gpuArrayCheckEqual (const char *, const double *, const double *, int, int) |
|
void | gpuArrayCheckEqual2 (const char *, const double *, const double *, int) |
|
Contains function definitions for common array operations on GPU.
- Author
- Youngdae Kim
Definition in file arrayfunctions_gpu.h.
Enumerator |
---|
gpuMemcpyHostToDevice |
|
gpuMemcpyDeviceToHost |
|
gpuMemcpyDeviceToDevice |
|
Definition at line 15 of file arrayfunctions_gpu.h.
void gpuSetDevice |
( |
int |
device | ) |
|
Set device
- Parameters
-
Definition at line 84 of file ArrayImplementations_GPU.cu.
86 cudaSetDevice(device);
87 cudaError_t err = cudaGetLastError();
88 if (err != cudaSuccess) {
89 fprintf(stderr,
"Error in gpuSetDevice(): device=%d error message=\"%s\"\n", device, cudaGetErrorString(err));
void gpuMemcpy |
( |
void * |
dest, |
|
|
const void * |
src, |
|
|
size_t |
count, |
|
|
enum gpuMemcpyKind |
kind |
|
) |
| |
GPU memory copy
- Parameters
-
dest | destination |
src | source |
count | count |
kind | kind of copy |
Definition at line 94 of file ArrayImplementations_GPU.cu.
101 checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyHostToDevice) );
104 checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToHost) );
107 checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToDevice) );
110 fprintf(stderr,
"Error: invalid gpuMemcpyKind: %d\n", kind);
void gpuMalloc |
( |
void ** |
devPtr, |
|
|
size_t |
size |
|
) |
| |
Allocate memory
- Parameters
-
devPtr | pointer to memory |
size | size of memory |
Definition at line 118 of file ArrayImplementations_GPU.cu.
121 cudaMalloc(devPtr, size);
122 cudaError_t err = cudaGetLastError();
123 if (err != cudaSuccess) {
124 fprintf( stderr,
"Error in gpuMalloc(): size=%d, error message=\"%s\"\n", size,
125 cudaGetErrorString(err) );
void gpuMemset |
( |
void * |
devPtr, |
|
|
int |
value, |
|
|
size_t |
count |
|
) |
| |
Set value
- Parameters
-
devPtr | Pointer to memory |
value | value to set |
count | size of data |
Definition at line 131 of file ArrayImplementations_GPU.cu.
135 checkCuda( cudaMemset(devPtr, value, count) );
void gpuFree |
( |
void * |
devPtr | ) |
|
void gpuArrayCopy1D |
( |
const double * |
x, |
|
|
double * |
y, |
|
|
int |
n |
|
) |
| |
Element-wise copy y = x, where x, y are 1-dimensional arrays of length size.
- See Also
- _ArrayCopy1D_, ArrayCopy1D_kernel()
- Parameters
-
x | copy-from array |
y | copy-to array |
n | size of array |
Definition at line 148 of file ArrayImplementations_GPU.cu.
153 ArrayCopy1D_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, y, n);
154 cudaDeviceSynchronize();
#define GPU_THREADS_PER_BLOCK
void gpuArraySetValue |
( |
double * |
devPtr, |
|
|
int |
n, |
|
|
double |
value |
|
) |
| |
Set all elements of a 1-dimensional array x (any datatype) of length size to a scalar value.
- See Also
- _ArraySetValue_, ArraySetValue_kernel()
- Parameters
-
devPtr | array |
n | size of array |
value | scalar value |
Definition at line 161 of file ArrayImplementations_GPU.cu.
166 ArraySetValue_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(devPtr, n, value);
167 cudaDeviceSynchronize();
#define GPU_THREADS_PER_BLOCK
void gpuArrayAXPY |
( |
const double * |
x, |
|
|
double |
a, |
|
|
double * |
y, |
|
|
int |
n |
|
) |
| |
- See Also
- _ArrayAXPY_, ArrayAXPY_kernel()
Element-wise AXPY y = a x + y, where a is a scalar, and x, y, z are 1-dimensional arrays of length size.
- Parameters
-
Definition at line 177 of file ArrayImplementations_GPU.cu.
183 ArrayAXPY_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, y, n);
184 cudaDeviceSynchronize();
#define GPU_THREADS_PER_BLOCK
void gpuArrayBlockMultiply |
( |
double * |
x, |
|
|
const double * |
a, |
|
|
int |
n, |
|
|
int |
bs |
|
) |
| |
- See Also
- _ArrayBlockMultiply_, ArrayBlockMultiply_kernel()
Given two arrays: x of size n*bs, and a of size n, this function implements: x[i][j] *= a[i] where i = 1,...,n, j = 1,...,bs, and x is stored as a 1D array in row-major format, i.e., x[i][j] = x[i*bs+j].
- Parameters
-
x | x |
a | a |
n | size of array |
bs | block size |
Definition at line 193 of file ArrayImplementations_GPU.cu.
199 ArrayBlockMultiply_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, n, bs);
200 cudaDeviceSynchronize();
#define GPU_THREADS_PER_BLOCK
double gpuArraySumSquarenD |
( |
int |
nvars, |
|
|
int |
ndims, |
|
|
int * |
dim, |
|
|
int |
ghosts, |
|
|
int * |
index, |
|
|
double * |
x |
|
) |
| |
Returns the sum-of-squares of the elements in an n-D array (useful for L_2 norm)
- See Also
- ArraySumSquarenD()
- Parameters
-
nvars | number of elements at one array location, can be > 1 for systems of equations |
ndims | number of dimensions |
dim | integer array of size in each dimension |
ghosts | number of ghost points in the array x |
index | pre-allocated (by the calling function) integer array of size ndims |
x | the array |
Definition at line 206 of file ArrayImplementations_GPU.cu.
217 printf(
"gpuArraySumSquarenD hasn't been implemented, yet.\n");
long sum(const std::vector< int > &a_iv)
void gpuArrayCopy1DNewScheme |
( |
const double * |
src, |
|
|
double * |
dest, |
|
|
int |
npoints, |
|
|
int |
nvars |
|
) |
| |
Alternative implementation of _ArrayCopy1D_
- Parameters
-
src | source array |
dest | destination array |
npoints | number of points |
nvars | number of components |
Definition at line 223 of file ArrayImplementations_GPU.cu.
229 ArrayCopy1DNewScheme_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(src, dest, npoints, nvars);
230 cudaDeviceSynchronize();
#define GPU_THREADS_PER_BLOCK
void gpuArrayCheckEqual |
( |
const char * |
, |
|
|
const double * |
, |
|
|
const double * |
, |
|
|
int |
, |
|
|
int |
|
|
) |
| |
void gpuArrayCheckEqual2 |
( |
const char * |
, |
|
|
const double * |
, |
|
|
const double * |
, |
|
|
int |
|
|
) |
| |