97 struct timeval start,stage1,stage2,stage3,stage4;
103 MPI_Comm *comm = (MPI_Comm*) m;
108 MPI_Comm_size(*comm,&nproc);
109 MPI_Comm_rank(*comm,&rank);
117 fprintf(stderr,
"Error in tridiagLU(): NULL pointer passed for parameters.\n");
122 gettimeofday(&start,NULL);
124 if ((ns == 0) || (n == 0))
return(0);
126 xs1 = (
double*) calloc (ns,
sizeof(
double));
127 xp1 = (
double*) calloc (ns,
sizeof(
double));
128 for (i=0; i<ns; i++) xs1[i] = xp1[i] = 0;
131 istart = (rank == 0 ? 1 : 2);
133 for (i = istart; i < iend; i++) {
134 for (d = 0; d < ns; d++) {
135 if (b[(i-1)*ns+d] == 0)
return(-1);
136 double factor = a[i*ns+d] / b[(i-1)*ns+d];
137 b[i*ns+d] -= factor * c[(i-1)*ns+d];
138 a[i*ns+d] = -factor * a[(i-1)*ns+d];
139 x[i*ns+d] -= factor * x[(i-1)*ns+d];
141 double factor = c[d] / b[(i-1)*ns+d];
142 c[d] = -factor * c[(i-1)*ns+d];
143 b[d] -= factor * a[(i-1)*ns+d];
144 x[d] -= factor * x[(i-1)*ns+d];
150 gettimeofday(&stage1,NULL);
155 double *sendbuf, *recvbuf;
156 sendbuf = (
double*) calloc (ns*nvar,
sizeof(
double));
157 recvbuf = (
double*) calloc (ns*nvar,
sizeof(
double));
158 for (d=0; d<ns; d++) {
159 sendbuf[d*nvar+0] = a[(n-1)*ns+d];
160 sendbuf[d*nvar+1] = b[(n-1)*ns+d];
161 sendbuf[d*nvar+2] = c[(n-1)*ns+d];
162 sendbuf[d*nvar+3] = x[(n-1)*ns+d];
165 MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
166 if (rank) MPI_Irecv(recvbuf,nvar*ns,MPI_DOUBLE,rank-1,1436,*comm,&req[0]);
167 if (rank != nproc-1) MPI_Isend(sendbuf,nvar*ns,MPI_DOUBLE,rank+1,1436,*comm,&req[1]);
168 MPI_Status status_arr[2];
169 MPI_Waitall(2,&req[0],status_arr);
173 for (d = 0; d < ns; d++) {
174 double am1, bm1, cm1, xm1;
175 am1 = recvbuf[d*nvar+0];
176 bm1 = recvbuf[d*nvar+1];
177 cm1 = recvbuf[d*nvar+2];
178 xm1 = recvbuf[d*nvar+3];
180 if (bm1 == 0)
return(-1);
182 b[d] -= factor * cm1;
183 a[d] = -factor * am1;
184 x[d] -= factor * xm1;
185 if (b[(n-1)*ns+d] == 0)
return(-1);
186 factor = c[d] / b[(n-1)*ns+d];
187 b[d] -= factor * a[(n-1)*ns+d];
188 c[d] = -factor * c[(n-1)*ns+d];
189 x[d] -= factor * x[(n-1)*ns+d];
197 gettimeofday(&stage2,NULL);
203 zero = (
double*) calloc (ns,
sizeof(
double));
204 one = (
double*) calloc (ns,
sizeof(
double));
205 for (d=0; d<ns; d++) {
211 if (rank) ierr =
tridiagLUGS(a,b,c,x,1,ns,params,comm);
212 else ierr =
tridiagLUGS(zero,one,zero,zero,1,ns,params,comm);
213 if (ierr)
return(ierr);
223 MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
224 for (d=0; d<ns; d++) xs1[d] = x[d];
225 if (rank+1 < nproc) MPI_Irecv(xp1,ns,MPI_DOUBLE,rank+1,1323,*comm,&req[0]);
226 if (rank) MPI_Isend(xs1,ns,MPI_DOUBLE,rank-1,1323,*comm,&req[1]);
227 MPI_Status status_arr[2];
228 MPI_Waitall(2,&req[0],status_arr);
232 fprintf(stderr,
"Error: nproc > 1 for a serial run!\n");
237 gettimeofday(&stage3,NULL);
241 iend = (rank == 0 ? 0 : 1);
243 for (d = 0; d < ns; d++) {
244 if (b[istart*ns+d] == 0)
return(-1);
245 x[istart*ns+d] = (x[istart*ns+d]-a[istart*ns+d]*x[d]-c[istart*ns+d]*xp1[d]) / b[istart*ns+d];
247 for (i = istart-1; i > iend-1; i--) {
248 for (d = 0; d < ns; d++) {
249 if (b[i*ns+d] == 0)
return(-1);
250 x[i*ns+d] = (x[i*ns+d]-c[i*ns+d]*x[(i+1)*ns+d]-a[i*ns+d]*x[d]) / b[i*ns+d];
255 gettimeofday(&stage4,NULL);
263 walltime = ((stage1.tv_sec * 1000000 + stage1.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
264 params->
stage1_time = (double) walltime / 1000000.0;
265 walltime = ((stage2.tv_sec * 1000000 + stage2.tv_usec) - (stage1.tv_sec * 1000000 + stage1.tv_usec));
266 params->
stage2_time = (double) walltime / 1000000.0;
267 walltime = ((stage3.tv_sec * 1000000 + stage3.tv_usec) - (stage2.tv_sec * 1000000 + stage2.tv_usec));
268 params->
stage3_time = (double) walltime / 1000000.0;
269 walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (stage3.tv_sec * 1000000 + stage3.tv_usec));
270 params->
stage4_time = (double) walltime / 1000000.0;
271 walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
272 params->
total_time = (double) walltime / 1000000.0;
Header file for TridiagLU.
int tridiagLU(double *, double *, double *, double *, int, int, void *, void *)
char reducedsolvetype[50]
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)