exch-swp.c


/*
 * exchange-sweep.c
 *
 *   routine for one exchange-sweep-loop with overlapping of
 *      communication and computation
 *   replaces one consecutive call of exchange and sweep 
 */

#include "array.h"
#include "poisson.h"

double exchange_and_sweep1d(dmatrix a, dmatrix f, dmatrix b,
			    Decomposition oned, Grid grid) {
  int             lx, ly;
  MPI_Status      recv_status, send_status[2];
  MPI_Request     recv_req[2], send_req[2];
  int             i, j;
  int             index, it;
  double          error = 0.0;
  double          resid = 0.0;

  /*
   *   start all receives and sends  
   */
  lx  =  oned.lx;
  ly  =  oned.ly;
  
  /* extra boundary points in y are not exchanged, they are constant anyway */
  MPI_Irecv(&INDEX(a, 0, 1), ly, MPI_DOUBLE, grid.down,  1,
            grid.comm, &recv_req[0]);
  MPI_Irecv(&INDEX(a, lx+1, 1), ly, MPI_DOUBLE, grid.up, 0, 
            grid.comm, &recv_req[1]);
  MPI_Isend(&INDEX(a, 1, 1), ly, MPI_DOUBLE, grid.down, 0, 
	    grid.comm, &send_req[0]);
  MPI_Isend(&INDEX(a, lx, 1), ly, MPI_DOUBLE, grid.up,  1,
	    grid.comm, &send_req[1]);
  /*
   *   compute inner points
   */
  for (i = 2; i <= lx-1; i++) {
    for (j = 1; j <= ly; j++) {
      resid = INDEX(a, i-1, j) + INDEX(a, i, j+1) 
   	      + INDEX(a, i+1, j) + INDEX(a, i, j-1) - 4*INDEX(a, i, j)
	      - INDEX(f, i, j);
      error += fabs(resid);
      INDEX(b, i, j) = INDEX(a, i, j) + 0.25*resid;
    }
  }
  /*
   *  wait for the receives and compute corresponding edge points
   */ 
  for (it=0; it<=1; it++) {
    MPI_Waitany(2, recv_req, &index, &recv_status);
    
    /* which receive returned ? */
    if (index == 0) {
      i = 1;
    } else if (index == 1) {
      i = lx;
    } else {     /* index = MPI_UNDEFINED */
      printf("MPI_Waitany returned MPI_UNDEFINED!\n");
      MPI_Abort(MPI_COMM_WORLD, -1);
    }

    /* compute corresponding row */
    for (j = 1; j <= ly; j++) {
      resid = INDEX(a, i-1, j) + INDEX(a, i, j+1) 
  	      + INDEX(a, i+1, j) + INDEX(a, i, j-1) - 4*INDEX(a, i, j)
              - INDEX(f, i, j);
      error += fabs(resid);
      INDEX(b, i, j) = INDEX(a, i, j) + 0.25*resid;
    }
  }

  /*
   *   finally finish the sends
   */
  MPI_Waitall(2, send_req, send_status);
  
  return(error);
}

previous    contents     next

Peter Junglas 11.5.2000