commit 6ced3abfff5224d46f8359a6fdd293eeff936b22
parent a383ae33f19e6bf8ec03dcf6b89242a7a42f779d
Author: Fabian Wermelinger <fabianw@mavt.ethz.ch>
Date: Wed, 30 Nov 2016 19:30:22 +0100
finished MPI implementation of iso extractor
Diffstat:
2 files changed, 485 insertions(+), 84 deletions(-)
diff --git a/src/InterpolatorMPI.h b/src/InterpolatorMPI.h
@@ -9,6 +9,7 @@
#include <cassert>
#include <vector>
#include <cmath>
+#include <functional>
#include <mpi.h>
#include "Interpolator.h"
@@ -161,24 +162,51 @@ private:
void _load_wavelet_MPI(ArgumentParser& p);
// halo handling
+ using func_t = std::function<void(const int, const int, const int)>;
void _fetch_halo(const MPI_Comm comm);
void _send_faces(const MPI_Comm comm, const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const;
void _recv_faces(const MPI_Comm comm, const MPInbr& nbrs, Faces& f, std::vector<MPI_Request>& f_req) const;
- void _pack_faces(Faces& f, const MPInbr& nbr) const;
+ void _pack_faces(Faces& f, const MPInbr& nbr);
void _unpack_faces(const Faces& f, const MPInbr& nbr);
- inline void _pack_faceX(Face_t& f, const int _s) const;
- inline void _pack_faceY(Face_t& f, const int _s) const;
- inline void _pack_faceZ(Face_t& f, const int _s) const;
- inline void _upack_faceX(const Face_t& f, const int _d);
- inline void _upack_faceY(const Face_t& f, const int _d);
- inline void _upack_faceZ(const Face_t& f, const int _d);
- inline void _set_halo_faceX(const int _s, const int _d);
- inline void _set_halo_faceY(const int _s, const int _d);
- inline void _set_halo_faceZ(const int _s, const int _d);
-
- void _send_edges();
- void _send_corners();
+ inline void _process_faceX(func_t& eval);
+ inline void _process_faceY(func_t& eval);
+ inline void _process_faceZ(func_t& eval);
+ inline void _pack_faceX(Face_t& f, const int _s0);
+ inline void _pack_faceY(Face_t& f, const int _s0);
+ inline void _pack_faceZ(Face_t& f, const int _s0);
+ inline void _upack_faceX(const Face_t& f, const int _d0);
+ inline void _upack_faceY(const Face_t& f, const int _d0);
+ inline void _upack_faceZ(const Face_t& f, const int _d0);
+ inline void _halo_faceX(const int _s0, const int _d0);
+ inline void _halo_faceY(const int _s0, const int _d0);
+ inline void _halo_faceZ(const int _s0, const int _d0);
+
+ void _send_edges(const MPI_Comm comm, const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const;
+ void _recv_edges(const MPI_Comm comm, const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const;
+ void _pack_edges(Edges& f, const MPInbr& nbr);
+ void _unpack_edges(const Edges& e, const MPInbr& nbr);
+ inline void _process_edgeX(func_t& eval);
+ inline void _process_edgeY(func_t& eval);
+ inline void _process_edgeZ(func_t& eval);
+ inline void _pack_edgeX(Edge_t& e, const int _s0, const int _s1);
+ inline void _pack_edgeY(Edge_t& e, const int _s0, const int _s1);
+ inline void _pack_edgeZ(Edge_t& e, const int _s0, const int _s1);
+ inline void _upack_edgeX(const Edge_t& e, const int _d0, const int _d1);
+ inline void _upack_edgeY(const Edge_t& e, const int _d0, const int _d1);
+ inline void _upack_edgeZ(const Edge_t& e, const int _d0, const int _d1);
+ inline void _halo_edgeX(const int _s0, const int _s1, const int _d0, const int _d1);
+ inline void _halo_edgeY(const int _s0, const int _s1, const int _d0, const int _d1);
+ inline void _halo_edgeZ(const int _s0, const int _s1, const int _d0, const int _d1);
+
+ void _send_corners(const MPI_Comm comm, const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const;
+ void _recv_corners(const MPI_Comm comm, const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const;
+ void _pack_corners(Corners& c, const MPInbr& nbr);
+ void _unpack_corners(const Corners& c, const MPInbr& nbr);
+ inline void _process_corner(func_t& eval);
+ inline void _pack_corner(Corner_t& c, const int _s0, const int _s1, const int _s2);
+ inline void _upack_corner(const Corner_t& c, const int _d0, const int _d1, const int _d2);
+ inline void _halo_corner(const int _s0, const int _s1, const int _s2, const int _d0, const int _d1, const int _d2);
};
@@ -302,71 +330,79 @@ void InterpolatorMPI<Tinterp>::_load_wavelet_MPI(ArgumentParser& p)
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_fetch_halo(const MPI_Comm comm)
{
- int myrank;
- MPI_Comm_rank(comm, &myrank);
- MPInbr mynbrs(comm);
+ // concept:
+ // 0.) prepare neighbor info
+ // 1.) pack buffers and send asynchronous messages
+ // 2.) unpack buffers
- // int myrank;
- // int size;
- // MPI_Comm_rank(comm, &myrank);
- // MPI_Comm_size(comm, &size);
-
- // for (int i=0; i<size; ++i)
- // {
- // if (i==myrank)
- // {
- // std::cout << "Rank " << myrank << " nbrs" << std::endl;
- // for (int i=0; i<27; ++i)
- // {
- // int off[3] = {(i%3)-1, ((i/3)%3)-1, (i/9)-1};
- // std::cout << mynbrs(off[0], off[1], off[2]) << " ";
- // }
- // std::cout << std::endl << std::endl;
- // }
- // MPI_Barrier(comm);
- // }
+ // 0.)
+ MPInbr mynbrs(comm);
// 1.)
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
const int Nz = this->m_data.Nz();
- // recv faces
+ // faces
std::vector<MPI_Request> fpend_recv;
Faces frecv(Ny,Nz, Nx,Nz, Nx,Ny);
_recv_faces(comm, mynbrs, frecv, fpend_recv);
- // send faces
std::vector<MPI_Request> fpend_send;
Faces fsend(Ny,Nz, Nx,Nz, Nx,Ny);
_pack_faces(fsend, mynbrs);
_send_faces(comm, mynbrs, fsend, fpend_send);
+ // edges
+ std::vector<MPI_Request> epend_recv;
+ Edges erecv(Nx, Ny, Nz);
+ _recv_edges(comm, mynbrs, erecv, epend_recv);
+
+ std::vector<MPI_Request> epend_send;
+ Edges esend(Nx, Ny, Nz);
+ _pack_edges(esend, mynbrs);
+ _send_edges(comm, mynbrs, esend, epend_send);
+
+ // corners
+ std::vector<MPI_Request> cpend_recv;
+ Corners crecv;
+ _recv_corners(comm, mynbrs, crecv, cpend_recv);
+
+ std::vector<MPI_Request> cpend_send;
+ Corners csend;
+ _pack_corners(csend, mynbrs);
+ _send_corners(comm, mynbrs, csend, cpend_send);
+
+ // 2.)
std::vector<MPI_Status> fstat_recv(fpend_recv.size());
MPI_Waitall(fpend_recv.size(), fpend_recv.data(), fstat_recv.data());
_unpack_faces(frecv, mynbrs);
- // // send edges
- // Edges esend;
+ std::vector<MPI_Status> estat_recv(epend_recv.size());
+ MPI_Waitall(epend_recv.size(), epend_recv.data(), estat_recv.data());
+ _unpack_edges(erecv, mynbrs);
- // // send corners
- // Corners csend;
+ std::vector<MPI_Status> cstat_recv(cpend_recv.size());
+ MPI_Waitall(cpend_recv.size(), cpend_recv.data(), cstat_recv.data());
+ _unpack_corners(crecv, mynbrs);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_send_faces(const MPI_Comm comm, const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const
{
- const int me = nbrs(0,0,0);
const int dst[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)};
const Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data};
const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem};
+ const int tagbase = 0;
for (int i = 0; i < 6; ++i)
{
+ const int dir = i/2;
+ const int side = i%2;
if (dst[i] >= 0)
{
MPI_Request req;
- MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], me, comm, &req);
+ MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+dir*2+side, comm, &req);
f_req.push_back(req);
}
}
@@ -379,12 +415,15 @@ void InterpolatorMPI<Tinterp>::_recv_faces(const MPI_Comm comm, const MPInbr& nb
const int src[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)};
Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data};
const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem};
+ const int tagbase = 0;
for (int i = 0; i < 6; ++i)
{
+ const int dir = i/2;
+ const int side = (i+1)%2;
if (src[i] >= 0)
{
MPI_Request req;
- MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], src[i], comm, &req);
+ MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+dir*2+side, comm, &req);
f_req.push_back(req);
}
}
@@ -392,7 +431,125 @@ void InterpolatorMPI<Tinterp>::_recv_faces(const MPI_Comm comm, const MPInbr& nb
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_pack_faces(Faces& f, const MPInbr& nbr) const
+void InterpolatorMPI<Tinterp>::_send_edges(const MPI_Comm comm, const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const
+{
+ const int dstX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)};
+ const int dstY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)};
+ const int dstZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)};
+ const Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data};
+ const Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data};
+ const Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data};
+ const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem};
+ const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem};
+ const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem};
+ const int tagbase = 6;
+ for (int i = 0; i < 4; ++i)
+ {
+ const int edge = i;
+ if (dstX[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Isend(dataX[i], NelemX[i], MPIREAL, dstX[i], tagbase+0*4+edge, comm, &req);
+ e_req.push_back(req);
+ }
+ if (dstY[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Isend(dataY[i], NelemY[i], MPIREAL, dstY[i], tagbase+1*4+edge, comm, &req);
+ e_req.push_back(req);
+ }
+ if (dstZ[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Isend(dataZ[i], NelemZ[i], MPIREAL, dstZ[i], tagbase+2*4+edge, comm, &req);
+ e_req.push_back(req);
+ }
+ }
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_recv_edges(const MPI_Comm comm, const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const
+{
+ const int srcX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)};
+ const int srcY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)};
+ const int srcZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)};
+ Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data};
+ Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data};
+ Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data};
+ const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem};
+ const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem};
+ const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem};
+ const int tagbase = 6;
+ for (int i = 0; i < 4; ++i)
+ {
+ const int edge = (i+2)%4;
+ if (srcX[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Irecv(dataX[i], NelemX[i], MPIREAL, srcX[i], tagbase+0*4+edge, comm, &req);
+ e_req.push_back(req);
+ }
+ if (srcY[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Irecv(dataY[i], NelemY[i], MPIREAL, srcY[i], tagbase+1*4+edge, comm, &req);
+ e_req.push_back(req);
+ }
+ if (srcZ[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Irecv(dataZ[i], NelemZ[i], MPIREAL, srcZ[i], tagbase+2*4+edge, comm, &req);
+ e_req.push_back(req);
+ }
+ }
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_send_corners(const MPI_Comm comm, const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const
+{
+ const int dst[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)};
+ const Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data};
+ const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem};
+ const int tagbase = 18;
+ for (int i = 0; i < 8; ++i)
+ {
+ const int cycl = i%4;
+ const int side = i/4;
+ if (dst[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+side*4+cycl, comm, &req);
+ c_req.push_back(req);
+ }
+ }
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_recv_corners(const MPI_Comm comm, const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const
+{
+ const int src[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)};
+ Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data};
+ const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem};
+ const int tagbase = 18;
+ for (int i = 0; i < 8; ++i)
+ {
+ const int cycl = (i+2)%4;
+ const int side = (7-i)/4;
+ if (src[i] >= 0)
+ {
+ MPI_Request req;
+ MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+side*4+cycl, comm, &req);
+ c_req.push_back(req);
+ }
+ }
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_pack_faces(Faces& f, const MPInbr& nbr)
{
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
@@ -417,19 +574,125 @@ void InterpolatorMPI<Tinterp>::_unpack_faces(const Faces& f, const MPInbr& nbr)
const int Nz = this->m_data.Nz();
if (nbr(-1,0,0) >= 0) _upack_faceX(f.x0, -Face_t::_P);
- else _set_halo_faceX(0, -Face_t::_P);
+ else _halo_faceX(0, -Face_t::_P);
if (nbr(1,0,0) >= 0) _upack_faceX(f.x1, Nx);
- else _set_halo_faceX(Nx-1, Nx);
+ else _halo_faceX(Nx-1, Nx);
if (nbr(0,-1,0) >= 0) _upack_faceY(f.y0, -Face_t::_P);
- else _set_halo_faceY(0, -Face_t::_P);
+ else _halo_faceY(0, -Face_t::_P);
if (nbr(0,1,0) >= 0) _upack_faceY(f.y1, Ny);
- else _set_halo_faceY(Ny-1, Ny);
+ else _halo_faceY(Ny-1, Ny);
if (nbr(0,0,-1) >= 0) _upack_faceZ(f.z0, -Face_t::_P);
- else _set_halo_faceZ(0, -Face_t::_P);
+ else _halo_faceZ(0, -Face_t::_P);
if (nbr(0,0,1) >= 0) _upack_faceZ(f.z1, Nz);
- else _set_halo_faceZ(Nz-1, Nz);
+ else _halo_faceZ(Nz-1, Nz);
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_pack_edges(Edges& e, const MPInbr& nbr)
+{
+ const int Nx = this->m_data.Nx();
+ const int Ny = this->m_data.Ny();
+ const int Nz = this->m_data.Nz();
+
+ if (nbr(0,-1,-1) >= 0) _pack_edgeX(e.x0, 0, 0);
+ if (nbr(0,1,-1) >= 0) _pack_edgeX(e.x1, Ny-Edge_t::_P, 0);
+ if (nbr(0,1,1) >= 0) _pack_edgeX(e.x2, Ny-Edge_t::_P, Nz-Edge_t::_Q);
+ if (nbr(0,-1,1) >= 0) _pack_edgeX(e.x3, 0, Nz-Edge_t::_Q);
+
+ if (nbr(-1,0,-1) >= 0) _pack_edgeY(e.y0, 0, 0);
+ if (nbr(-1,0,1) >= 0) _pack_edgeY(e.y1, 0, Nz-Edge_t::_Q);
+ if (nbr(1,0,1) >= 0) _pack_edgeY(e.y2, Nx-Edge_t::_P, Nz-Edge_t::_Q);
+ if (nbr(1,0,-1) >= 0) _pack_edgeY(e.y3, Nx-Edge_t::_P, 0);
+
+ if (nbr(-1,-1,0) >= 0) _pack_edgeZ(e.z0, 0, 0);
+ if (nbr(1,-1,0) >= 0) _pack_edgeZ(e.z1, Nx-Edge_t::_P, 0);
+ if (nbr(1,1,0) >= 0) _pack_edgeZ(e.z2, Nx-Edge_t::_P, Ny-Edge_t::_Q);
+ if (nbr(-1,1,0) >= 0) _pack_edgeZ(e.z3, 0, Ny-Edge_t::_Q);
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_unpack_edges(const Edges& e, const MPInbr& nbr)
+{
+ const int Nx = this->m_data.Nx();
+ const int Ny = this->m_data.Ny();
+ const int Nz = this->m_data.Nz();
+
+ if (nbr(0,-1,-1) >= 0) _upack_edgeX(e.x0, -Edge_t::_P, -Edge_t::_Q);
+ else _halo_edgeX(0, 0, -Edge_t::_P, -Edge_t::_Q);
+ if (nbr(0,1,-1) >= 0) _upack_edgeX(e.x1, Ny, -Edge_t::_Q);
+ else _halo_edgeX(Ny-1, 0, Ny, -Edge_t::_Q);
+ if (nbr(0,1,1) >= 0) _upack_edgeX(e.x2, Ny, Nz);
+ else _halo_edgeX(Ny-1, Nz-1, Ny, Nz);
+ if (nbr(0,-1,1) >= 0) _upack_edgeX(e.x3, -Edge_t::_P, Nz);
+ else _halo_edgeX(0, Nz-1, -Edge_t::_P, Nz);
+
+ if (nbr(-1,0,-1) >= 0) _upack_edgeY(e.y0, -Edge_t::_P, -Edge_t::_Q);
+ else _halo_edgeY(0, 0, -Edge_t::_P, -Edge_t::_Q);
+ if (nbr(-1,0,1) >= 0) _upack_edgeY(e.y1, -Edge_t::_P, Nz);
+ else _halo_edgeY(0, Nz-1, -Edge_t::_P, Nz);
+ if (nbr(1,0,1) >= 0) _upack_edgeY(e.y2, Nx, Nz);
+ else _halo_edgeY(Nx-1, Nz-1, Nx, Nz);
+ if (nbr(1,0,-1) >= 0) _upack_edgeY(e.y3, Nx, -Edge_t::_Q);
+ else _halo_edgeY(Nx-1, 0, Nx, -Edge_t::_Q);
+
+ if (nbr(-1,-1,0) >= 0) _upack_edgeZ(e.z0, -Edge_t::_P, -Edge_t::_Q);
+ else _halo_edgeZ(0, 0, -Edge_t::_P, -Edge_t::_Q);
+ if (nbr(1,-1,0) >= 0) _upack_edgeZ(e.z1, Nx, -Edge_t::_Q);
+ else _halo_edgeZ(Nx-1, 0, Nx, -Edge_t::_Q);
+ if (nbr(1,1,0) >= 0) _upack_edgeZ(e.z2, Nx, Ny);
+ else _halo_edgeZ(Nx-1, Ny-1, Nx, Ny);
+ if (nbr(-1,1,0) >= 0) _upack_edgeZ(e.z3, -Edge_t::_P, Ny);
+ else _halo_edgeZ(0, Ny-1, -Edge_t::_P, Ny);
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_pack_corners(Corners& c, const MPInbr& nbr)
+{
+ const int Nx = this->m_data.Nx();
+ const int Ny = this->m_data.Ny();
+ const int Nz = this->m_data.Nz();
+
+ if (nbr(-1,-1,-1) >= 0) _pack_corner(c.x00, 0, 0, 0);
+ if (nbr(-1,1,-1) >= 0) _pack_corner(c.x01, 0, Ny-Corner_t::_Q, 0);
+ if (nbr(-1,1,1) >= 0) _pack_corner(c.x02, 0, Ny-Corner_t::_Q, Nz-Corner_t::_R);
+ if (nbr(-1,-1,1) >= 0) _pack_corner(c.x03, 0, 0, Nz-Corner_t::_R);
+
+ if (nbr(1,-1,-1) >= 0) _pack_corner(c.x10, Nx-Corner_t::_P, 0, 0);
+ if (nbr(1,1,-1) >= 0) _pack_corner(c.x11, Nx-Corner_t::_P, Ny-Corner_t::_Q, 0);
+ if (nbr(1,1,1) >= 0) _pack_corner(c.x12, Nx-Corner_t::_P, Ny-Corner_t::_Q, Nz-Corner_t::_R);
+ if (nbr(1,-1,1) >= 0) _pack_corner(c.x13, Nx-Corner_t::_P, 0, Nz-Corner_t::_R);
+}
+
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_unpack_corners(const Corners& c, const MPInbr& nbr)
+{
+ const int Nx = this->m_data.Nx();
+ const int Ny = this->m_data.Ny();
+ const int Nz = this->m_data.Nz();
+
+ if (nbr(-1,-1,-1) >= 0) _upack_corner(c.x00, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R);
+ else _halo_corner(0, 0, 0, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R);
+ if (nbr(-1,1,-1) >= 0) _upack_corner(c.x01, -Corner_t::_P, Ny, -Corner_t::_R);
+ else _halo_corner(0, Ny-1, 0, -Corner_t::_P, Ny, -Corner_t::_R);
+ if (nbr(-1,1,1) >= 0) _upack_corner(c.x02, -Corner_t::_P, Ny, Nz);
+ else _halo_corner(0, Ny-1, Nz-1, -Corner_t::_P, Ny, Nz);
+ if (nbr(-1,-1,1) >= 0) _upack_corner(c.x03, -Corner_t::_P, -Corner_t::_Q, Nz);
+ else _halo_corner(0, 0, Nz-1, -Corner_t::_P, -Corner_t::_Q, Nz);
+
+ if (nbr(1,-1,-1) >= 0) _upack_corner(c.x10, Nx, -Corner_t::_Q, -Corner_t::_R);
+ else _halo_corner(Nx-1, 0, 0, Nx, -Corner_t::_Q, -Corner_t::_R);
+ if (nbr(1,1,-1) >= 0) _upack_corner(c.x11, Nx, Ny, -Corner_t::_R);
+ else _halo_corner(Nx-1, Ny-1, 0, Nx, Ny, -Corner_t::_R);
+ if (nbr(1,1,1) >= 0) _upack_corner(c.x12, Nx, Ny, Nz);
+ else _halo_corner(Nx-1, Ny-1, Nz-1, Nx, Ny, Nz);
+ if (nbr(1,-1,1) >= 0) _upack_corner(c.x13, Nx, -Corner_t::_Q, Nz);
+ else _halo_corner(Nx-1, 0, Nz-1, Nx, -Corner_t::_Q, Nz);
}
// PUP kernels
diff --git a/src/InterpolatorPUPMPI.h b/src/InterpolatorPUPMPI.h
@@ -6,85 +6,223 @@
#ifndef INTERPOLATORPUPMPI_H_EPMRY0VT
#define INTERPOLATORPUPMPI_H_EPMRY0VT
+///////////////////////////////////////////////////////////////////////////////
+// FACES
+///////////////////////////////////////////////////////////////////////////////
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_pack_faceX(Face_t& f, const int _s) const
+void InterpolatorMPI<Tinterp>::_process_faceX(func_t& eval)
{
for (int iz=0; iz<this->m_data.Nz(); ++iz)
for (int iy=0; iy<this->m_data.Ny(); ++iy)
for (int ix=0; ix<Face_t::_P; ++ix)
- f(iy,iz,ix) = this->m_data(ix+_s,iy,iz);
+ eval(ix,iy,iz);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_pack_faceY(Face_t& f, const int _s) const
+void InterpolatorMPI<Tinterp>::_process_faceY(func_t& eval)
{
for (int iz=0; iz<this->m_data.Nz(); ++iz)
for (int iy=0; iy<Face_t::_P; ++iy)
for (int ix=0; ix<this->m_data.Nx(); ++ix)
- f(ix,iz,iy) = this->m_data(ix,iy+_s,iz);
+ eval(ix,iy,iz);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_pack_faceZ(Face_t& f, const int _s) const
+void InterpolatorMPI<Tinterp>::_process_faceZ(func_t& eval)
{
for (int iz=0; iz<Face_t::_P; ++iz)
for (int iy=0; iy<this->m_data.Ny(); ++iy)
for (int ix=0; ix<this->m_data.Nx(); ++ix)
- f(ix,iy,iz) = this->m_data(ix,iy,iz+_s);
+ eval(ix,iy,iz);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_upack_faceX(const Face_t& f, const int _d)
+void InterpolatorMPI<Tinterp>::_pack_faceX(Face_t& f, const int _s0)
{
- for (int iz=0; iz<this->m_data.Nz(); ++iz)
- for (int iy=0; iy<this->m_data.Ny(); ++iy)
- for (int ix=0; ix<Face_t::_P; ++ix)
- this->m_data(ix+_d,iy,iz) = f(iy,iz,ix);
+ func_t pack = [this, &f, _s0](const int ix, const int iy, const int iz) { f(iy,iz,ix) = this->m_data(ix+_s0,iy,iz); };
+ _process_faceX(pack);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_upack_faceY(const Face_t& f, const int _d)
+void InterpolatorMPI<Tinterp>::_pack_faceY(Face_t& f, const int _s0)
{
- for (int iz=0; iz<this->m_data.Nz(); ++iz)
- for (int iy=0; iy<Face_t::_P; ++iy)
- for (int ix=0; ix<this->m_data.Nx(); ++ix)
- this->m_data(ix,iy+_d,iz) = f(ix,iz,iy);
+ func_t pack = [this, &f, _s0](const int ix, const int iy, const int iz) { f(ix,iz,iy) = this->m_data(ix,iy+_s0,iz); };
+ _process_faceY(pack);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_upack_faceZ(const Face_t& f, const int _d)
+void InterpolatorMPI<Tinterp>::_pack_faceZ(Face_t& f, const int _s0)
{
- for (int iz=0; iz<Face_t::_P; ++iz)
- for (int iy=0; iy<this->m_data.Ny(); ++iy)
+ func_t pack = [this, &f, _s0](const int ix, const int iy, const int iz) { f(ix,iy,iz) = this->m_data(ix,iy,iz+_s0); };
+ _process_faceZ(pack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_upack_faceX(const Face_t& f, const int _d0)
+{
+ func_t unpack = [this, &f, _d0](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy,iz) = f(iy,iz,ix); };
+ _process_faceX(unpack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_upack_faceY(const Face_t& f, const int _d0)
+{
+ func_t unpack = [this, &f, _d0](const int ix, const int iy, const int iz) { this->m_data(ix,iy+_d0,iz) = f(ix,iz,iy); };
+ _process_faceY(unpack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_upack_faceZ(const Face_t& f, const int _d0)
+{
+ func_t unpack = [this, &f, _d0](const int ix, const int iy, const int iz) { this->m_data(ix,iy,iz+_d0) = f(ix,iy,iz); };
+ _process_faceZ(unpack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_halo_faceX(const int _s0, const int _d0)
+{
+ func_t halo = [this, _s0, _d0](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy,iz) = this->m_data(_s0,iy,iz); };
+ _process_faceX(halo);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_halo_faceY(const int _s0, const int _d0)
+{
+ func_t halo = [this, _s0, _d0](const int ix, const int iy, const int iz) { this->m_data(ix,iy+_d0,iz) = this->m_data(ix,_s0,iz); };
+ _process_faceY(halo);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_halo_faceZ(const int _s0, const int _d0)
+{
+ func_t halo = [this, _s0, _d0](const int ix, const int iy, const int iz) { this->m_data(ix,iy,iz+_d0) = this->m_data(ix,iy,_s0); };
+ _process_faceZ(halo);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// EDGES
+///////////////////////////////////////////////////////////////////////////////
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_process_edgeX(func_t& eval)
+{
+ for (int iz=0; iz<Edge_t::_Q; ++iz)
+ for (int iy=0; iy<Edge_t::_P; ++iy)
for (int ix=0; ix<this->m_data.Nx(); ++ix)
- this->m_data(ix,iy,iz+_d) = f(ix,iy,iz);
+ eval(ix,iy,iz);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_set_halo_faceX(const int _s, const int _d)
+void InterpolatorMPI<Tinterp>::_process_edgeY(func_t& eval)
{
- for (int iz=0; iz<this->m_data.Nz(); ++iz)
+ for (int iz=0; iz<Edge_t::_Q; ++iz)
for (int iy=0; iy<this->m_data.Ny(); ++iy)
- for (int ix=0; ix<Face_t::_P; ++ix)
- this->m_data(ix+_d,iy,iz) = this->m_data(_s,iy,iz);
+ for (int ix=0; ix<Edge_t::_P; ++ix)
+ eval(ix,iy,iz);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_set_halo_faceY(const int _s, const int _d)
+void InterpolatorMPI<Tinterp>::_process_edgeZ(func_t& eval)
{
for (int iz=0; iz<this->m_data.Nz(); ++iz)
- for (int iy=0; iy<Face_t::_P; ++iy)
- for (int ix=0; ix<this->m_data.Nx(); ++ix)
- this->m_data(ix,iy+_d,iz) = this->m_data(ix,_s,iz);
+ for (int iy=0; iy<Edge_t::_Q; ++iy)
+ for (int ix=0; ix<Edge_t::_P; ++ix)
+ eval(ix,iy,iz);
}
template <typename Tinterp>
-void InterpolatorMPI<Tinterp>::_set_halo_faceZ(const int _s, const int _d)
+void InterpolatorMPI<Tinterp>::_pack_edgeX(Edge_t& e, const int _s0, const int _s1)
{
- for (int iz=0; iz<Face_t::_P; ++iz)
- for (int iy=0; iy<this->m_data.Ny(); ++iy)
- for (int ix=0; ix<this->m_data.Nx(); ++ix)
- this->m_data(ix,iy,iz+_d) = this->m_data(ix,iy,_s);
+ func_t pack = [this, &e, _s0, _s1](const int ix, const int iy, const int iz) { e(ix,iy,iz) = this->m_data(ix,iy+_s0,iz+_s1); };
+ _process_edgeX(pack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_pack_edgeY(Edge_t& e, const int _s0, const int _s1)
+{
+ func_t pack = [this, &e, _s0, _s1](const int ix, const int iy, const int iz) { e(iy,ix,iz) = this->m_data(ix+_s0,iy,iz+_s1); };
+ _process_edgeY(pack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_pack_edgeZ(Edge_t& e, const int _s0, const int _s1)
+{
+ func_t pack = [this, &e, _s0, _s1](const int ix, const int iy, const int iz) { e(iz,ix,iy) = this->m_data(ix+_s0,iy+_s1,iz); };
+ _process_edgeZ(pack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_upack_edgeX(const Edge_t& e, const int _d0, const int _d1)
+{
+ func_t unpack = [this, &e, _d0, _d1](const int ix, const int iy, const int iz) { this->m_data(ix,iy+_d0,iz+_d1) = e(ix,iy,iz); };
+ _process_edgeX(unpack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_upack_edgeY(const Edge_t& e, const int _d0, const int _d1)
+{
+ func_t unpack = [this, &e, _d0, _d1](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy,iz+_d1) = e(iy,ix,iz); };
+ _process_edgeY(unpack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_upack_edgeZ(const Edge_t& e, const int _d0, const int _d1)
+{
+ func_t unpack = [this, &e, _d0, _d1](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy+_d1,iz) = e(iz,ix,iy); };
+ _process_edgeZ(unpack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_halo_edgeX(const int _s0, const int _s1, const int _d0, const int _d1)
+{
+ func_t halo = [this, _s0, _s1, _d0, _d1](const int ix, const int iy, const int iz) { this->m_data(ix,iy+_d0,iz+_d1) = this->m_data(ix,_s0,_s1); };
+ _process_edgeX(halo);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_halo_edgeY(const int _s0, const int _s1, const int _d0, const int _d1)
+{
+ func_t halo = [this, _s0, _s1, _d0, _d1](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy,iz+_d1) = this->m_data(_s0,iy,_s1); };
+ _process_edgeY(halo);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_halo_edgeZ(const int _s0, const int _s1, const int _d0, const int _d1)
+{
+ func_t halo = [this, _s0, _s1, _d0, _d1](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy+_d1,iz) = this->m_data(_s0,_s1,iz); };
+ _process_edgeZ(halo);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// CORNERS
+///////////////////////////////////////////////////////////////////////////////
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_process_corner(func_t& eval)
+{
+ for (int iz=0; iz<Corner_t::_R; ++iz)
+ for (int iy=0; iy<Corner_t::_Q; ++iy)
+ for (int ix=0; ix<Corner_t::_P; ++ix)
+ eval(ix,iy,iz);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_pack_corner(Corner_t& c, const int _s0, const int _s1, const int _s2)
+{
+ func_t pack = [this, &c, _s0, _s1, _s2](const int ix, const int iy, const int iz) { c(ix,iy,iz) = this->m_data(ix+_s0,iy+_s1,iz+_s2); };
+ _process_corner(pack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_upack_corner(const Corner_t& c, const int _d0, const int _d1, const int _d2)
+{
+ func_t unpack = [this, &c, _d0, _d1, _d2](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy+_d1,iz+_d2) = c(ix,iy,iz); };
+ _process_corner(unpack);
+}
+
+template <typename Tinterp>
+void InterpolatorMPI<Tinterp>::_halo_corner(const int _s0, const int _s1, const int _s2, const int _d0, const int _d1, const int _d2)
+{
+ func_t halo = [this, _s0, _s1, _s2, _d0, _d1, _d2](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy+_d1,iz+_d2) = this->m_data(_s0,_s1,_s2); };
+ _process_corner(halo);
}
#endif /* INTERPOLATORPUPMPI_H_EPMRY0VT */