HyPar  1.0
Finite-Difference Hyperbolic-Parabolic PDE Solver on Cartesian Grids
tridiagLU.c File Reference

Solve tridiagonal systems of equation using parallel LU decomposition. More...

#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <string.h>
#include <mpi.h>
#include <tridiagLU.h>

Go to the source code of this file.

Functions

int tridiagLU (double *a, double *b, double *c, double *x, int n, int ns, void *r, void *m)
 

Detailed Description

Solve tridiagonal systems of equation using parallel LU decomposition.

Author
Debojyoti Ghosh

Definition in file tridiagLU.c.

Function Documentation

◆ tridiagLU()

int tridiagLU ( double *  a,
double *  b,
double *  c,
double *  x,
int  n,
int  ns,
void *  r,
void *  m 
)

Solve tridiagonal (non-periodic) systems of equations using parallel LU decomposition: This function can solve multiple independent systems with one call. The systems need not share the same left- or right-hand-sides. The iterative substructuring method is used in this function that can be briefly described through the following 4 stages:

  • Stage 1: Parallel elimination of the tridiagonal blocks on each processor comprising all points of the subdomain except the 1st point (unless its the 1st global point, i.e., a physical boundary)
  • Stage 2: Elimination of the 1st row on each processor (except the 1st processor) using the last row of the previous processor.
  • Stage 3: Solution of the reduced tridiagonal system that represents the coupling of the system across the processors, using blocktridiagIterJacobi() in this implementation.
  • Stage 4: Backward-solve to obtain the final solution

Specific details of the method implemented here are available in:

More references on this class of parallel tridiagonal solvers:

  • E. Polizzi and A. H. Sameh, "A parallel hybrid banded system solver: The SPIKE algorithm", Parallel Comput., 32 (2006), pp. 177–194.
  • E. Polizzi and A. H. Sameh, "SPIKE: A parallel environment for solving banded linear systems", Comput. & Fluids, 36 (2007), pp. 113–120.

Array layout: The arguments a, b, c, and x are local 1D arrays (containing this processor's part of the subdiagonal, diagonal, superdiagonal, and right-hand-side) of size (n X ns), where n is the local size of the system, and ns is the number of independent systems to solve. The ordering of the elements in these arrays is as follows:

  • Elements of the same row for each of the independent systems are stored adjacent to each other.

For example, consider the following systems:

\begin{equation} \left[\begin{array}{ccccc} b_0^k & c_0^k & & & \\ a_1^k & b_1^k & c_1^k & & \\ & a_2^k & b_2^k & c_2^k & & \\ & & a_3^k & b_3^k & c_3^k & \\ & & & a_4^k & b_4^k & c_4^k \\ \end{array}\right] \left[\begin{array}{c} x_0^k \\ x_1^k \\ x_2^k \\ x_3^k \\ x_4^k \end{array}\right] = \left[\begin{array}{c} r_0^k \\ r_1^k \\ r_2^k \\ r_3^k \\ r_4^k \end{array}\right]; \ \ k= 1,\cdots,ns \end{equation}

and let \( ns = 3\). Note that in the code, \(x\) and \(r\) are the same array x.

Then, the array b must be a 1D array with the following layout of elements:
[
b_0^0, b_0^1, b_0^2, (diagonal element of the first row in each system)
b_1^0, b_1^1, b_1^2, (diagonal element of the second row in each system)
...,
b_{n-1}^0, b_{n-1}^1, b_{n-1}^2 (diagonal element of the last row in each system)
]
The arrays a, c, and x are stored similarly.

Notes:

  • This function does not preserve the sub-diagonal, diagonal, super-diagonal elements and the right-hand-sides.
  • The input array x contains the right-hand-side on entering the function, and the solution on exiting it.
Parameters
aArray containing the sub-diagonal elements
bArray containing the diagonal elements
cArray containing the super-diagonal elements
xRight-hand side; will contain the solution on exit
nLocal size of the system on this processor
nsNumber of systems to solve
rObject of type TridiagLU
mMPI communicator

Definition at line 83 of file tridiagLU.c.

93 {
94  TridiagLU *params = (TridiagLU*) r;
95  int d,i,istart,iend;
96  int rank,nproc;
97  struct timeval start,stage1,stage2,stage3,stage4;
98 
99 #ifdef serial
100  rank = 0;
101  nproc = 1;
102 #else
103  MPI_Comm *comm = (MPI_Comm*) m;
104  int ierr = 0;
105  const int nvar = 4;
106 
107  if (comm) {
108  MPI_Comm_size(*comm,&nproc);
109  MPI_Comm_rank(*comm,&rank);
110  } else {
111  rank = 0;
112  nproc = 1;
113  }
114 #endif
115 
116  if (!params) {
117  fprintf(stderr,"Error in tridiagLU(): NULL pointer passed for parameters.\n");
118  return(1);
119  }
120 
121  /* start */
122  gettimeofday(&start,NULL);
123 
124  if ((ns == 0) || (n == 0)) return(0);
125  double *xs1, *xp1;
126  xs1 = (double*) calloc (ns, sizeof(double));
127  xp1 = (double*) calloc (ns, sizeof(double));
128  for (i=0; i<ns; i++) xs1[i] = xp1[i] = 0;
129 
130  /* Stage 1 - Parallel elimination of subdiagonal entries */
131  istart = (rank == 0 ? 1 : 2);
132  iend = n;
133  for (i = istart; i < iend; i++) {
134  for (d = 0; d < ns; d++) {
135  if (b[(i-1)*ns+d] == 0) return(-1);
136  double factor = a[i*ns+d] / b[(i-1)*ns+d];
137  b[i*ns+d] -= factor * c[(i-1)*ns+d];
138  a[i*ns+d] = -factor * a[(i-1)*ns+d];
139  x[i*ns+d] -= factor * x[(i-1)*ns+d];
140  if (rank) {
141  double factor = c[d] / b[(i-1)*ns+d];
142  c[d] = -factor * c[(i-1)*ns+d];
143  b[d] -= factor * a[(i-1)*ns+d];
144  x[d] -= factor * x[(i-1)*ns+d];
145  }
146  }
147  }
148 
149  /* end of stage 1 */
150  gettimeofday(&stage1,NULL);
151 
152  /* Stage 2 - Eliminate the first sub- & super-diagonal entries */
153  /* This needs the last (a,b,c,x) from the previous process */
154 #ifndef serial
155  double *sendbuf, *recvbuf;
156  sendbuf = (double*) calloc (ns*nvar, sizeof(double));
157  recvbuf = (double*) calloc (ns*nvar, sizeof(double));
158  for (d=0; d<ns; d++) {
159  sendbuf[d*nvar+0] = a[(n-1)*ns+d];
160  sendbuf[d*nvar+1] = b[(n-1)*ns+d];
161  sendbuf[d*nvar+2] = c[(n-1)*ns+d];
162  sendbuf[d*nvar+3] = x[(n-1)*ns+d];
163  }
164  if (nproc > 1) {
165  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
166  if (rank) MPI_Irecv(recvbuf,nvar*ns,MPI_DOUBLE,rank-1,1436,*comm,&req[0]);
167  if (rank != nproc-1) MPI_Isend(sendbuf,nvar*ns,MPI_DOUBLE,rank+1,1436,*comm,&req[1]);
168  MPI_Status status_arr[2];
169  MPI_Waitall(2,&req[0],status_arr);
170  }
171  /* The first process sits this one out */
172  if (rank) {
173  for (d = 0; d < ns; d++) {
174  double am1, bm1, cm1, xm1;
175  am1 = recvbuf[d*nvar+0];
176  bm1 = recvbuf[d*nvar+1];
177  cm1 = recvbuf[d*nvar+2];
178  xm1 = recvbuf[d*nvar+3];
179  double factor;
180  if (bm1 == 0) return(-1);
181  factor = a[d] / bm1;
182  b[d] -= factor * cm1;
183  a[d] = -factor * am1;
184  x[d] -= factor * xm1;
185  if (b[(n-1)*ns+d] == 0) return(-1);
186  factor = c[d] / b[(n-1)*ns+d];
187  b[d] -= factor * a[(n-1)*ns+d];
188  c[d] = -factor * c[(n-1)*ns+d];
189  x[d] -= factor * x[(n-1)*ns+d];
190  }
191  }
192  free(sendbuf);
193  free(recvbuf);
194 #endif
195 
196  /* end of stage 2 */
197  gettimeofday(&stage2,NULL);
198 
199  /* Stage 3 - Solve the reduced (nproc-1) X (nproc-1) tridiagonal system */
200 #ifndef serial
201  if (nproc > 1) {
202  double *zero, *one;
203  zero = (double*) calloc (ns, sizeof(double));
204  one = (double*) calloc (ns, sizeof(double));
205  for (d=0; d<ns; d++) {
206  zero[d] = 0.0;
207  one [d] = 1.0;
208  }
209  if (!strcmp(params->reducedsolvetype,_TRIDIAG_GS_)) {
210  /* Solving the reduced system by gather-and-solve algorithm */
211  if (rank) ierr = tridiagLUGS(a,b,c,x,1,ns,params,comm);
212  else ierr = tridiagLUGS(zero,one,zero,zero,1,ns,params,comm);
213  if (ierr) return(ierr);
214  } else if (!strcmp(params->reducedsolvetype,_TRIDIAG_JACOBI_)) {
215  /* Solving the reduced system iteratively with the Jacobi method */
216  if (rank) ierr = tridiagIterJacobi(a,b,c,x,1,ns,params,comm);
217  else ierr = tridiagIterJacobi(zero,one,zero,zero,1,ns,params,comm);
218  }
219  free(zero);
220  free(one);
221 
222  /* Each process, get the first x of the next process */
223  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
224  for (d=0; d<ns; d++) xs1[d] = x[d];
225  if (rank+1 < nproc) MPI_Irecv(xp1,ns,MPI_DOUBLE,rank+1,1323,*comm,&req[0]);
226  if (rank) MPI_Isend(xs1,ns,MPI_DOUBLE,rank-1,1323,*comm,&req[1]);
227  MPI_Status status_arr[2];
228  MPI_Waitall(2,&req[0],status_arr);
229  }
230 #else
231  if (nproc > 1) {
232  fprintf(stderr,"Error: nproc > 1 for a serial run!\n");
233  return(1);
234  }
235 #endif /* if not serial */
236  /* end of stage 3 */
237  gettimeofday(&stage3,NULL);
238 
239  /* Stage 4 - Parallel back-substitution to get the solution */
240  istart = n-1;
241  iend = (rank == 0 ? 0 : 1);
242 
243  for (d = 0; d < ns; d++) {
244  if (b[istart*ns+d] == 0) return(-1);
245  x[istart*ns+d] = (x[istart*ns+d]-a[istart*ns+d]*x[d]-c[istart*ns+d]*xp1[d]) / b[istart*ns+d];
246  }
247  for (i = istart-1; i > iend-1; i--) {
248  for (d = 0; d < ns; d++) {
249  if (b[i*ns+d] == 0) return(-1);
250  x[i*ns+d] = (x[i*ns+d]-c[i*ns+d]*x[(i+1)*ns+d]-a[i*ns+d]*x[d]) / b[i*ns+d];
251  }
252  }
253 
254  /* end of stage 4 */
255  gettimeofday(&stage4,NULL);
256 
257  /* Done - now x contains the solution */
258  free(xs1);
259  free(xp1);
260 
261  /* save runtimes if needed */
262  long long walltime;
263  walltime = ((stage1.tv_sec * 1000000 + stage1.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
264  params->stage1_time = (double) walltime / 1000000.0;
265  walltime = ((stage2.tv_sec * 1000000 + stage2.tv_usec) - (stage1.tv_sec * 1000000 + stage1.tv_usec));
266  params->stage2_time = (double) walltime / 1000000.0;
267  walltime = ((stage3.tv_sec * 1000000 + stage3.tv_usec) - (stage2.tv_sec * 1000000 + stage2.tv_usec));
268  params->stage3_time = (double) walltime / 1000000.0;
269  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (stage3.tv_sec * 1000000 + stage3.tv_usec));
270  params->stage4_time = (double) walltime / 1000000.0;
271  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
272  params->total_time = (double) walltime / 1000000.0;
273  return(0);
274 }
double total_time
Definition: tridiagLU.h:101
char reducedsolvetype[50]
Definition: tridiagLU.h:91
double stage4_time
Definition: tridiagLU.h:105
double stage3_time
Definition: tridiagLU.h:104
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLUGS.c:64
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)
#define _TRIDIAG_GS_
Definition: tridiagLU.h:76
double stage2_time
Definition: tridiagLU.h:103
#define _TRIDIAG_JACOBI_
Definition: tridiagLU.h:74
double stage1_time
Definition: tridiagLU.h:102