HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
tridiagLU.c
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <sys/time.h>
9 #include <string.h>
10 #ifndef serial
11 #include <mpi.h>
12 #endif
13 #include <tridiagLU.h>
14 
84  double *a,
85  double *b,
86  double *c,
87  double *x,
88  int n,
89  int ns,
90  void *r,
91  void *m
92  )
93 {
94  TridiagLU *params = (TridiagLU*) r;
95  int d,i,istart,iend;
96  int rank,nproc;
97  struct timeval start,stage1,stage2,stage3,stage4;
98 
99 #ifdef serial
100  rank = 0;
101  nproc = 1;
102 #else
103  MPI_Comm *comm = (MPI_Comm*) m;
104  int ierr = 0;
105  const int nvar = 4;
106 
107  if (comm) {
108  MPI_Comm_size(*comm,&nproc);
109  MPI_Comm_rank(*comm,&rank);
110  } else {
111  rank = 0;
112  nproc = 1;
113  }
114 #endif
115 
116  if (!params) {
117  fprintf(stderr,"Error in tridiagLU(): NULL pointer passed for parameters.\n");
118  return(1);
119  }
120 
121  /* start */
122  gettimeofday(&start,NULL);
123 
124  if ((ns == 0) || (n == 0)) return(0);
125  double *xs1, *xp1;
126  xs1 = (double*) calloc (ns, sizeof(double));
127  xp1 = (double*) calloc (ns, sizeof(double));
128  for (i=0; i<ns; i++) xs1[i] = xp1[i] = 0;
129 
130  /* Stage 1 - Parallel elimination of subdiagonal entries */
131  istart = (rank == 0 ? 1 : 2);
132  iend = n;
133  for (i = istart; i < iend; i++) {
134  for (d = 0; d < ns; d++) {
135  if (b[(i-1)*ns+d] == 0) return(-1);
136  double factor = a[i*ns+d] / b[(i-1)*ns+d];
137  b[i*ns+d] -= factor * c[(i-1)*ns+d];
138  a[i*ns+d] = -factor * a[(i-1)*ns+d];
139  x[i*ns+d] -= factor * x[(i-1)*ns+d];
140  if (rank) {
141  double factor = c[d] / b[(i-1)*ns+d];
142  c[d] = -factor * c[(i-1)*ns+d];
143  b[d] -= factor * a[(i-1)*ns+d];
144  x[d] -= factor * x[(i-1)*ns+d];
145  }
146  }
147  }
148 
149  /* end of stage 1 */
150  gettimeofday(&stage1,NULL);
151 
152  /* Stage 2 - Eliminate the first sub- & super-diagonal entries */
153  /* This needs the last (a,b,c,x) from the previous process */
154 #ifndef serial
155  double *sendbuf, *recvbuf;
156  sendbuf = (double*) calloc (ns*nvar, sizeof(double));
157  recvbuf = (double*) calloc (ns*nvar, sizeof(double));
158  for (d=0; d<ns; d++) {
159  sendbuf[d*nvar+0] = a[(n-1)*ns+d];
160  sendbuf[d*nvar+1] = b[(n-1)*ns+d];
161  sendbuf[d*nvar+2] = c[(n-1)*ns+d];
162  sendbuf[d*nvar+3] = x[(n-1)*ns+d];
163  }
164  if (nproc > 1) {
165  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
166  if (rank) MPI_Irecv(recvbuf,nvar*ns,MPI_DOUBLE,rank-1,1436,*comm,&req[0]);
167  if (rank != nproc-1) MPI_Isend(sendbuf,nvar*ns,MPI_DOUBLE,rank+1,1436,*comm,&req[1]);
168  MPI_Status status_arr[2];
169  MPI_Waitall(2,&req[0],status_arr);
170  }
171  /* The first process sits this one out */
172  if (rank) {
173  for (d = 0; d < ns; d++) {
174  double am1, bm1, cm1, xm1;
175  am1 = recvbuf[d*nvar+0];
176  bm1 = recvbuf[d*nvar+1];
177  cm1 = recvbuf[d*nvar+2];
178  xm1 = recvbuf[d*nvar+3];
179  double factor;
180  if (bm1 == 0) return(-1);
181  factor = a[d] / bm1;
182  b[d] -= factor * cm1;
183  a[d] = -factor * am1;
184  x[d] -= factor * xm1;
185  if (b[(n-1)*ns+d] == 0) return(-1);
186  factor = c[d] / b[(n-1)*ns+d];
187  b[d] -= factor * a[(n-1)*ns+d];
188  c[d] = -factor * c[(n-1)*ns+d];
189  x[d] -= factor * x[(n-1)*ns+d];
190  }
191  }
192  free(sendbuf);
193  free(recvbuf);
194 #endif
195 
196  /* end of stage 2 */
197  gettimeofday(&stage2,NULL);
198 
199  /* Stage 3 - Solve the reduced (nproc-1) X (nproc-1) tridiagonal system */
200 #ifndef serial
201  if (nproc > 1) {
202  double *zero, *one;
203  zero = (double*) calloc (ns, sizeof(double));
204  one = (double*) calloc (ns, sizeof(double));
205  for (d=0; d<ns; d++) {
206  zero[d] = 0.0;
207  one [d] = 1.0;
208  }
209  if (!strcmp(params->reducedsolvetype,_TRIDIAG_GS_)) {
210  /* Solving the reduced system by gather-and-solve algorithm */
211  if (rank) ierr = tridiagLUGS(a,b,c,x,1,ns,params,comm);
212  else ierr = tridiagLUGS(zero,one,zero,zero,1,ns,params,comm);
213  if (ierr) return(ierr);
214  } else if (!strcmp(params->reducedsolvetype,_TRIDIAG_JACOBI_)) {
215  /* Solving the reduced system iteratively with the Jacobi method */
216  if (rank) ierr = tridiagIterJacobi(a,b,c,x,1,ns,params,comm);
217  else ierr = tridiagIterJacobi(zero,one,zero,zero,1,ns,params,comm);
218  }
219  free(zero);
220  free(one);
221 
222  /* Each process, get the first x of the next process */
223  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
224  for (d=0; d<ns; d++) xs1[d] = x[d];
225  if (rank+1 < nproc) MPI_Irecv(xp1,ns,MPI_DOUBLE,rank+1,1323,*comm,&req[0]);
226  if (rank) MPI_Isend(xs1,ns,MPI_DOUBLE,rank-1,1323,*comm,&req[1]);
227  MPI_Status status_arr[2];
228  MPI_Waitall(2,&req[0],status_arr);
229  }
230 #else
231  if (nproc > 1) {
232  fprintf(stderr,"Error: nproc > 1 for a serial run!\n");
233  return(1);
234  }
235 #endif /* if not serial */
236  /* end of stage 3 */
237  gettimeofday(&stage3,NULL);
238 
239  /* Stage 4 - Parallel back-substitution to get the solution */
240  istart = n-1;
241  iend = (rank == 0 ? 0 : 1);
242 
243  for (d = 0; d < ns; d++) {
244  if (b[istart*ns+d] == 0) return(-1);
245  x[istart*ns+d] = (x[istart*ns+d]-a[istart*ns+d]*x[d]-c[istart*ns+d]*xp1[d]) / b[istart*ns+d];
246  }
247  for (i = istart-1; i > iend-1; i--) {
248  for (d = 0; d < ns; d++) {
249  if (b[i*ns+d] == 0) return(-1);
250  x[i*ns+d] = (x[i*ns+d]-c[i*ns+d]*x[(i+1)*ns+d]-a[i*ns+d]*x[d]) / b[i*ns+d];
251  }
252  }
253 
254  /* end of stage 4 */
255  gettimeofday(&stage4,NULL);
256 
257  /* Done - now x contains the solution */
258  free(xs1);
259  free(xp1);
260 
261  /* save runtimes if needed */
262  long long walltime;
263  walltime = ((stage1.tv_sec * 1000000 + stage1.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
264  params->stage1_time = (double) walltime / 1000000.0;
265  walltime = ((stage2.tv_sec * 1000000 + stage2.tv_usec) - (stage1.tv_sec * 1000000 + stage1.tv_usec));
266  params->stage2_time = (double) walltime / 1000000.0;
267  walltime = ((stage3.tv_sec * 1000000 + stage3.tv_usec) - (stage2.tv_sec * 1000000 + stage2.tv_usec));
268  params->stage3_time = (double) walltime / 1000000.0;
269  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (stage3.tv_sec * 1000000 + stage3.tv_usec));
270  params->stage4_time = (double) walltime / 1000000.0;
271  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
272  params->total_time = (double) walltime / 1000000.0;
273  return(0);
274 }
double total_time
Definition: tridiagLU.h:101
char reducedsolvetype[50]
Definition: tridiagLU.h:91
Header file for TridiagLU.
double stage4_time
Definition: tridiagLU.h:105
double stage3_time
Definition: tridiagLU.h:104
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLUGS.c:64
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)
#define _TRIDIAG_GS_
Definition: tridiagLU.h:76
double stage2_time
Definition: tridiagLU.h:103
int tridiagLU(double *a, double *b, double *c, double *x, int n, int ns, void *r, void *m)
Definition: tridiagLU.c:83
#define _TRIDIAG_JACOBI_
Definition: tridiagLU.h:74
double stage1_time
Definition: tridiagLU.h:102