Contains function definitions for common array operations on GPU. More...

#include <arrayfunctions.h>

Enumerations
enum	gpuMemcpyKind { gpuMemcpyHostToDevice = 0, gpuMemcpyDeviceToHost, gpuMemcpyDeviceToDevice }

Functions
void	gpuSetDevice (int device)

void	gpuMemcpy (void , const void , size_t, enum gpuMemcpyKind)

void	gpuMalloc (void **, size_t)

void	gpuMemset (void *, int, size_t)

void	gpuFree (void *)

void	gpuArrayCopy1D (const double , double , int)

void	gpuArraySetValue (double *, int, double)

void	gpuArrayAXPY (const double , double, double , int)

void	gpuArrayBlockMultiply (double , const double , int, int)

double	gpuArraySumSquarenD (int, int, int , int, int , double *)

void	gpuArrayCopy1DNewScheme (const double , double , int, int)

void	gpuArrayCheckEqual (const char , const double , const double *, int, int)

void	gpuArrayCheckEqual2 (const char , const double , const double *, int)

Detailed Description

Contains function definitions for common array operations on GPU.

Author: Youngdae Kim

Definition in file arrayfunctions_gpu.h.

Enumeration Type Documentation

enum gpuMemcpyKind

Enumerator
gpuMemcpyHostToDevice
gpuMemcpyDeviceToHost
gpuMemcpyDeviceToDevice

Definition at line 15 of file arrayfunctions_gpu.h.

                    {
     gpuMemcpyHostToDevice = 0,
     gpuMemcpyDeviceToHost,
     gpuMemcpyDeviceToDevice
 };

Function Documentation

void gpuSetDevice ( int device )

Set device

Parameters

device device

Definition at line 84 of file ArrayImplementations_GPU.cu.

 {
     cudaSetDevice(device);
     cudaError_t err = cudaGetLastError();
     if (err != cudaSuccess) {
       fprintf(stderr,"Error in gpuSetDevice(): device=%d error message=\"%s\"\n", device, cudaGetErrorString(err));
     }
 }

void gpuMemcpy	(	void *	dest,
		const void *	src,
		size_t	count,
		enum gpuMemcpyKind	kind
	)

GPU memory copy

Parameters

dest	destination
src	source
count	count
kind	kind of copy

Definition at line 94 of file ArrayImplementations_GPU.cu.

 {
     switch (kind) {
         case gpuMemcpyHostToDevice:
             checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyHostToDevice) );
             break;
         case gpuMemcpyDeviceToHost:
             checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToHost) );
             break;
         case gpuMemcpyDeviceToDevice:
             checkCuda( cudaMemcpy(dest, src, count, cudaMemcpyDeviceToDevice) );
             break;
         default:
             fprintf(stderr, "Error: invalid gpuMemcpyKind: %d\n", kind);
             assert(0);
             break;
     }
     return;
 }

void gpuMalloc	(	void **	devPtr,
		size_t	size
	)

Allocate memory

Parameters

devPtr	pointer to memory
size	size of memory

Definition at line 118 of file ArrayImplementations_GPU.cu.

 {
     cudaMalloc(devPtr, size);
     cudaError_t err = cudaGetLastError();
     if (err != cudaSuccess) {
       fprintf(  stderr,"Error in gpuMalloc(): size=%d, error message=\"%s\"\n", size, 
                 cudaGetErrorString(err) );
     }
     return;
 }

void gpuMemset	(	void *	devPtr,
		int	value,
		size_t	count
	)

Set value

Parameters

devPtr	Pointer to memory
value	value to set
count	size of data

Definition at line 131 of file ArrayImplementations_GPU.cu.

 {
     checkCuda( cudaMemset(devPtr, value, count) );
     return;
 }

void gpuFree ( void * devPtr )

deallocate memory

Parameters

devPtr Pointer to memory

Definition at line 140 of file ArrayImplementations_GPU.cu.

 {
     checkCuda( cudaFree(devPtr) );
     return;
 }

void gpuArrayCopy1D	(	const double *	x,
		double *	y,
		int	n
	)

Element-wise copy y = x, where x, y are 1-dimensional arrays of length size.

See Also: _ArrayCopy1D_, ArrayCopy1D_kernel()

Parameters

x	copy-from array
y	copy-to array
n	size of array

Definition at line 148 of file ArrayImplementations_GPU.cu.

 {
     int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
     ArrayCopy1D_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, y, n);
     cudaDeviceSynchronize();
     return;
 }

void gpuArraySetValue	(	double *	devPtr,
		int	n,
		double	value
	)

Set all elements of a 1-dimensional array x (any datatype) of length size to a scalar value.

See Also: _ArraySetValue_, ArraySetValue_kernel()

Parameters

devPtr	array
n	size of array
value	scalar value

Definition at line 161 of file ArrayImplementations_GPU.cu.

 {
     int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
     ArraySetValue_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(devPtr, n, value);
     cudaDeviceSynchronize();
     return;
 }

void gpuArrayAXPY	(	const double *	x,
		double	a,
		double *	y,
		int	n
	)

See Also: _ArrayAXPY_, ArrayAXPY_kernel()

Element-wise AXPY y = a x + y, where a is a scalar, and x, y, z are 1-dimensional arrays of length size.

Parameters

x	x
a	a
y	y
n	size of array

Definition at line 177 of file ArrayImplementations_GPU.cu.

 {
     int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
     ArrayAXPY_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, y, n);
     cudaDeviceSynchronize();
     return;
 }

void gpuArrayBlockMultiply	(	double *	x,
		const double *	a,
		int	n,
		int	bs
	)

See Also: _ArrayBlockMultiply_, ArrayBlockMultiply_kernel()

Given two arrays: x of size n*bs, and a of size n, this function implements: x[i][j] *= a[i] where i = 1,...,n, j = 1,...,bs, and x is stored as a 1D array in row-major format, i.e., x[i][j] = x[i*bs+j].

Parameters

x	x
a	a
n	size of array
bs	block size

Definition at line 193 of file ArrayImplementations_GPU.cu.

 {
     int nblocks = (n - 1) / GPU_THREADS_PER_BLOCK + 1;
     ArrayBlockMultiply_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(x, a, n, bs);
     cudaDeviceSynchronize();
     return;
 }

double gpuArraySumSquarenD	(	int	nvars,
		int	ndims,
		int *	dim,
		int	ghosts,
		int *	index,
		double *	x
	)

Returns the sum-of-squares of the elements in an n-D array (useful for L_2 norm)

See Also: ArraySumSquarenD()

Parameters

nvars	number of elements at one array location, can be > 1 for systems of equations
ndims	number of dimensions
dim	integer array of size in each dimension
ghosts	number of ghost points in the array x
index	pre-allocated (by the calling function) integer array of size ndims
x	the array

Definition at line 206 of file ArrayImplementations_GPU.cu.

 {
     double sum = 0;
     printf("gpuArraySumSquarenD hasn't been implemented, yet.\n");
     exit(0);
     return (sum);
 }

void gpuArrayCopy1DNewScheme	(	const double *	src,
		double *	dest,
		int	npoints,
		int	nvars
	)

Alternative implementation of _ArrayCopy1D_

Parameters

src	source array
dest	destination array
npoints	number of points
nvars	number of components

Definition at line 223 of file ArrayImplementations_GPU.cu.

 {
     int nblocks = (npoints-1) / GPU_THREADS_PER_BLOCK + 1;
     ArrayCopy1DNewScheme_kernel<<<nblocks, GPU_THREADS_PER_BLOCK>>>(src, dest, npoints, nvars);
     cudaDeviceSynchronize();
     return;
 }

void gpuArrayCheckEqual	(	const char *	,
		const double *	,
		const double *	,
		int	,
		int
	)

void gpuArrayCheckEqual2	(	const char *	,
		const double *	,
		const double *	,
		int
	)

Enumerations

Functions

Detailed Description

Enumeration Type Documentation

Function Documentation