HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
blocktridiagLU.c
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <sys/time.h>
9 #include <string.h>
10 #ifndef serial
11 #include <mpi.h>
12 #endif
13 #include <tridiagLU.h>
14 #include <matops.h>
15 
108  double *a,
109  double *b,
110  double *c,
111  double *x,
112  int n,
114  int ns,
115  int bs,
116  void *r,
117  void *m
118  )
119 {
120  TridiagLU *params = (TridiagLU*) r;
121  int d,i,j,istart,iend,size;
122  int rank,nproc,bs2=bs*bs,nsbs=ns*bs;
123  struct timeval start,stage1,stage2,stage3,stage4;
124 
125 #ifdef serial
126  rank = 0;
127  nproc = 1;
128 #else
129  MPI_Comm *comm = (MPI_Comm*) m;
130  const int nvar = 4;
131  int ierr = 0;
132 
133  if (comm) {
134  MPI_Comm_size(*comm,&nproc);
135  MPI_Comm_rank(*comm,&rank);
136  } else {
137  rank = 0;
138  nproc = 1;
139  }
140 #endif
141 
142  if (!params) {
143  fprintf(stderr,"Error in tridiagLU(): NULL pointer passed for parameters.\n");
144  return(1);
145  }
146 
147  /* start */
148  gettimeofday(&start,NULL);
149 
150  if ((ns == 0) || (n == 0) || (bs == 0)) return(0);
151  double *xs1, *xp1;
152  xs1 = (double*) calloc (nsbs, sizeof(double));
153  xp1 = (double*) calloc (nsbs, sizeof(double));
154  for (i=0; i<nsbs; i++) xs1[i] = xp1[i] = 0;
155 
156  /* Stage 1 - Parallel elimination of subdiagonal entries */
157  istart = (rank == 0 ? 1 : 2);
158  iend = n;
159  for (i = istart; i < iend; i++) {
160  double binv[bs2], factor[bs2];
161  for (d = 0; d < ns; d++) {
162  _MatrixInvert_ (b+((i-1)*ns+d)*bs2,binv,bs);
163  _MatrixMultiply_ (a+(i*ns+d)*bs2,binv,factor,bs);
164  _MatrixMultiplySubtract_ (b+(i*ns+d)*bs2,factor,c+((i-1)*ns+d)*bs2,bs);
165  _MatrixZero_ (a+(i*ns+d)*bs2,bs);
166  _MatrixMultiplySubtract_ (a+(i*ns+d)*bs2,factor,a+((i-1)*ns+d)*bs2,bs);
167  _MatVecMultiplySubtract_ (x+(i*ns+d)*bs ,factor,x+((i-1)*ns+d)*bs ,bs);
168  if (rank) {
169  _MatrixMultiply_ (c+d*bs2,binv,factor,bs);
170  _MatrixZero_ (c+d*bs2,bs);
171  _MatrixMultiplySubtract_ (c+d*bs2,factor,c+((i-1)*ns+d)*bs2,bs);
172  _MatrixMultiplySubtract_ (b+d*bs2,factor,a+((i-1)*ns+d)*bs2,bs);
173  _MatVecMultiplySubtract_ (x+d*bs ,factor,x+((i-1)*ns+d)*bs ,bs);
174  }
175  }
176  }
177 
178  /* end of stage 1 */
179  gettimeofday(&stage1,NULL);
180 
181  /* Stage 2 - Eliminate the first sub- & super-diagonal entries */
182  /* This needs the last (a,b,c,x) from the previous process */
183 #ifndef serial
184  double *sendbuf, *recvbuf;
185  size = ns*bs2*(nvar-1)+nsbs;
186  sendbuf = (double*) calloc (size, sizeof(double));
187  recvbuf = (double*) calloc (size, sizeof(double));
188  for (d=0; d<ns; d++) {
189  for (i=0; i<bs2; i++) {
190  sendbuf[(0*ns+d)*bs2+i] = a[((n-1)*ns+d)*bs2+i];
191  sendbuf[(1*ns+d)*bs2+i] = b[((n-1)*ns+d)*bs2+i];
192  sendbuf[(2*ns+d)*bs2+i] = c[((n-1)*ns+d)*bs2+i];
193  }
194  for (i=0; i<bs; i++) sendbuf[3*ns*bs2+d*bs+i] = x[((n-1)*ns+d)*bs+i];
195  }
196  if (nproc > 1) {
197  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
198  if (rank) MPI_Irecv(recvbuf,size,MPI_DOUBLE,rank-1,1436,*comm,&req[0]);
199  if (rank != nproc-1) MPI_Isend(sendbuf,size,MPI_DOUBLE,rank+1,1436,*comm,&req[1]);
200  MPI_Status status_arr[2];
201  MPI_Waitall(2,&req[0],status_arr);
202  }
203  /* The first process sits this one out */
204  if (rank) {
205  for (d = 0; d < ns; d++) {
206  double am1[bs2], bm1[bs2], cm1[bs2], xm1[bs];
207  for (i=0; i<bs2; i++) {
208  am1[i] = recvbuf[(0*ns+d)*bs2+i];
209  bm1[i] = recvbuf[(1*ns+d)*bs2+i];
210  cm1[i] = recvbuf[(2*ns+d)*bs2+i];
211  }
212  for (i=0; i<bs; i++) xm1[i] = recvbuf[3*ns*bs2+d*bs+i];
213  double factor[bs2], binv[bs2];
214  _MatrixInvert_ (bm1,binv,bs);
215  _MatrixMultiply_ (a+d*bs2,binv,factor,bs);
216  _MatrixMultiplySubtract_ (b+d*bs2,factor,cm1,bs);
217  _MatrixZero_ (a+d*bs2,bs);
218  _MatrixMultiplySubtract_ (a+d*bs2,factor,am1,bs);
219  _MatVecMultiplySubtract_ (x+d*bs ,factor,xm1,bs);
220 
221  _MatrixInvert_ (b+((n-1)*ns+d)*bs2,binv,bs); if (ierr) return(ierr);
222  _MatrixMultiply_ (c+d*bs2,binv,factor,bs);
223  _MatrixMultiplySubtract_ (b+d*bs2,factor,a+((n-1)*ns+d)*bs2,bs);
224  _MatrixZero_ (c+d*bs2,bs);
225  _MatrixMultiplySubtract_ (c+d*bs2,factor,c+((n-1)*ns+d)*bs2,bs);
226  _MatVecMultiplySubtract_ (x+d*bs ,factor,x+((n-1)*ns+d)*bs ,bs);
227  }
228  }
229  free(sendbuf);
230  free(recvbuf);
231 #endif
232 
233  /* end of stage 2 */
234  gettimeofday(&stage2,NULL);
235 
236  /* Stage 3 - Solve the reduced (nproc-1) X (nproc-1) tridiagonal system */
237 #ifndef serial
238  if (nproc > 1) {
239  double *zero, *eye;
240  zero = (double*) calloc (ns*bs2, sizeof(double));
241  eye = (double*) calloc (ns*bs2, sizeof(double));
242  for (d=0; d<ns*bs2; d++) zero[d] = eye[d] = 0.0;
243  for (d=0; d<ns; d++) {
244  for (i=0; i<bs; i++) eye[d*bs2+(i*bs+i)] = 1.0;
245  }
246 
247  if (!strcmp(params->reducedsolvetype,_TRIDIAG_GS_)) {
248  /* not supported */
249  fprintf(stderr,"Error in blocktridiagLU(): Gather-and-solve for reduced system not available.\n");
250  return(1);
251  } else if (!strcmp(params->reducedsolvetype,_TRIDIAG_JACOBI_)) {
252  /* Solving the reduced system iteratively with the Jacobi method */
253  if (rank) ierr = blocktridiagIterJacobi(a,b,c,x,1,ns,bs,params,comm);
254  else ierr = blocktridiagIterJacobi(zero,eye,zero,zero,1,ns,bs,params,comm);
255  }
256  free(zero);
257  free(eye);
258 
259  /* Each process, get the first x of the next process */
260  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
261  for (d=0; d<nsbs; d++) xs1[d] = x[d];
262  if (rank+1 < nproc) MPI_Irecv(xp1,nsbs,MPI_DOUBLE,rank+1,1323,*comm,&req[0]);
263  if (rank) MPI_Isend(xs1,nsbs,MPI_DOUBLE,rank-1,1323,*comm,&req[1]);
264  MPI_Status status_arr[2];
265  MPI_Waitall(2,&req[0],status_arr);
266  }
267 #else
268  if (nproc > 1) {
269  fprintf(stderr,"Error: nproc > 1 for a serial run!\n");
270  return(1);
271  }
272 #endif /* if not serial */
273  /* end of stage 3 */
274  gettimeofday(&stage3,NULL);
275 
276  /* Stage 4 - Parallel back-substitution to get the solution */
277  istart = n-1;
278  iend = (rank == 0 ? 0 : 1);
279 
280  for (d = 0; d < ns; d++) {
281  double binv[bs2],xt[bs];
282  _MatrixInvert_ (b+(istart*ns+d)*bs2,binv,bs);
283  _MatVecMultiplySubtract_ (x+(istart*ns+d)*bs ,a+(istart*ns+d)*bs2,x +d*bs,bs);
284  _MatVecMultiplySubtract_ (x+(istart*ns+d)*bs ,c+(istart*ns+d)*bs2,xp1+d*bs,bs);
285  _MatVecMultiply_ (binv,x+(istart*ns+d)*bs,xt,bs);
286  for (j=0; j<bs; j++) x[(istart*ns+d)*bs+j]=xt[j];
287  }
288  for (i = istart-1; i > iend-1; i--) {
289  for (d = 0; d < ns; d++) {
290  double binv[bs2],xt[bs];
291  _MatrixInvert_ (b+(i*ns+d)*bs2,binv,bs);
292  _MatVecMultiplySubtract_ (x+(i*ns+d)*bs ,c+(i*ns+d)*bs2,x+((i+1)*ns+d)*bs,bs);
293  _MatVecMultiplySubtract_ (x+(i*ns+d)*bs ,a+(i*ns+d)*bs2,x+d*bs,bs);
294  _MatVecMultiply_ (binv,x+(i*ns+d)*bs,xt,bs);
295  for (j=0; j<bs; j++) x[(i*ns+d)*bs+j] = xt[j];
296  }
297  }
298 
299  /* end of stage 4 */
300  gettimeofday(&stage4,NULL);
301 
302  /* Done - now x contains the solution */
303  free(xs1);
304  free(xp1);
305 
306  /* save runtimes if needed */
307  long long walltime;
308  walltime = ((stage1.tv_sec * 1000000 + stage1.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
309  params->stage1_time = (double) walltime / 1000000.0;
310  walltime = ((stage2.tv_sec * 1000000 + stage2.tv_usec) - (stage1.tv_sec * 1000000 + stage1.tv_usec));
311  params->stage2_time = (double) walltime / 1000000.0;
312  walltime = ((stage3.tv_sec * 1000000 + stage3.tv_usec) - (stage2.tv_sec * 1000000 + stage2.tv_usec));
313  params->stage3_time = (double) walltime / 1000000.0;
314  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (stage3.tv_sec * 1000000 + stage3.tv_usec));
315  params->stage4_time = (double) walltime / 1000000.0;
316  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
317  params->total_time = (double) walltime / 1000000.0;
318  return(0);
319 }
double total_time
Definition: tridiagLU.h:101
Contains macros and function definitions for common matrix operations.
char reducedsolvetype[50]
Definition: tridiagLU.h:91
int blocktridiagIterJacobi(double *, double *, double *, double *, int, int, int, void *, void *)
Header file for TridiagLU.
double stage4_time
Definition: tridiagLU.h:105
double stage3_time
Definition: tridiagLU.h:104
#define _MatVecMultiplySubtract_(y, A, x, N)
Definition: matops.h:81
#define _MatrixMultiply_(A, B, C, N)
Definition: matops.h:25
int blocktridiagLU(double *a, double *b, double *c, double *x, int n, int ns, int bs, void *r, void *m)
#define _TRIDIAG_GS_
Definition: tridiagLU.h:76
double stage2_time
Definition: tridiagLU.h:103
#define _TRIDIAG_JACOBI_
Definition: tridiagLU.h:74
double stage1_time
Definition: tridiagLU.h:102
#define _MatVecMultiply_(A, x, y, N)
Definition: matops.h:67
#define _MatrixInvert_(A, B, N)
Definition: matops.h:93
#define _MatrixZero_(A, N)
Definition: matops.h:15
#define _MatrixMultiplySubtract_(C, A, B, N)
Definition: matops.h:54