easy-iso

Iso-surface extraction from volume data
git clone https://git.0xfab.ch/easy-iso.git
Log | Files | Refs | Submodules | README | LICENSE

InterpolatorMPI.h (31046B)


      1 // File       : InterpolatorMPI.h
      2 // Date       : Mon Nov 28 16:15:41 2016
      3 // Author     : Fabian Wermelinger
      4 // Description: MPI'ed Interpolator
      5 // Copyright 2016 ETH Zurich. All Rights Reserved.
      6 #ifndef INTERPOLATORMPI_H_FKS9KH2J
      7 #define INTERPOLATORMPI_H_FKS9KH2J
      8 
      9 #include <cassert>
     10 #include <vector>
     11 #include <cmath>
     12 #include <functional>
     13 #include <mpi.h>
     14 #include "Interpolator.h"
     15 
     16 #ifdef _FLOAT_PRECISION_
     17 #define MPIREAL MPI_FLOAT
     18 #else
     19 #define MPIREAL MPI_DOUBLE
     20 #endif
     21 
     22 class MPInbr
     23 {
     24 public:
     25     MPInbr(const MPI_Comm comm)
     26     {
     27         // assumes comm is a communicator created with MPI_Cart_create
     28         int myidx[3];
     29         int dims[3];
     30         int periods[3];
     31         MPI_Cart_get(comm, 3, dims, periods, myidx);
     32 
     33         for (int i=0; i<27; ++i)
     34         {
     35             m_nbr[i] = -1;
     36             int off[3] = {(i%3)-1, ((i/3)%3)-1, (i/9)-1};
     37             int coords[3];
     38             for (int j=0; j<3; ++j)
     39                 coords[j] = myidx[j] + off[j];
     40 
     41             if (coords[0] < 0 || coords[0] >= dims[0]) continue;
     42             if (coords[1] < 0 || coords[1] >= dims[1]) continue;
     43             if (coords[2] < 0 || coords[2] >= dims[2]) continue;
     44 
     45             int& nbr = m_nbr[(off[0]+1) + (off[1]+1)*3 + (off[2]+1)*9];
     46             MPI_Cart_rank(comm, coords, &nbr);
     47         }
     48     }
     49 
     50     inline int operator()(const int ix, const int iy, const int iz) const
     51     {
     52         assert(ix >= -1 && ix <= 1);
     53         assert(iy >= -1 && iy <= 1);
     54         assert(iz >= -1 && iz <= 1);
     55         return m_nbr[(ix+1) + (iy+1)*3 + (iz+1)*9];
     56     }
     57 
     58 private:
     59     int m_nbr[27];
     60 };
     61 
     62 struct CubeHalo
     63 {
     64     Faces faces;
     65     Edges edges;
     66     Corners corners;
     67 
     68     CubeHalo() {}
     69     CubeHalo(const int Nx, const int Ny, const int Nz) :
     70         faces(Ny,Nz, Nx,Nz, Nx,Ny),
     71         edges(Nx, Ny, Nz)
     72     {}
     73     void alloc(const int Nx, const int Ny, const int Nz)
     74     {
     75         faces.alloc(Ny,Nz, Nx,Nz, Nx,Ny);
     76         edges.alloc(Nx, Ny, Nz);
     77     }
     78 };
     79 
     80 template <typename Tinterp>
     81 class InterpolatorMPI : public Interpolator<Tinterp>
     82 {
     83 public:
     84     InterpolatorMPI(ArgumentParser& p, const MPI_Comm comm=MPI_COMM_WORLD) :
     85         Interpolator<Tinterp>(),
     86         m_PESize{p("xpesize").asInt(1),p("ypesize").asInt(1),p("zpesize").asInt(1)}
     87     {
     88         this->m_extent = p("extent").asDouble(1.0);
     89         int periodic[3] = {false};
     90 
     91         MPI_Cart_create(comm, 3, m_PESize, periodic, true, &m_cartcomm);
     92 
     93         int world_size;
     94         MPI_Comm_size(m_cartcomm, &world_size);
     95         assert(m_PESize[0]*m_PESize[1]*m_PESize[2] == world_size);
     96 
     97         int myrank;
     98         MPI_Comm_rank(m_cartcomm, &myrank);
     99         MPI_Cart_coords(m_cartcomm, myrank, 3, m_myPEIndex);
    100         m_isroot = (0 == myrank);
    101 
    102         // load the data
    103         std::chrono::time_point<std::chrono::system_clock> start, end;
    104         start = std::chrono::system_clock::now();
    105         if (p("load").asString("h5") == "h5")           this->_load_h5_MPI(p);
    106         else if (p("load").asString("h5") == "wavelet") this->_load_wavelet_MPI(p);
    107         else
    108         {
    109             if (m_isroot)
    110                 std::cerr << "ERROR: InterpolatorMPI: Undefined loader \"" << p("load").asString() << "\"" << std::endl;
    111             abort();
    112         }
    113         end = std::chrono::system_clock::now();
    114         std::chrono::duration<double> elapsed_seconds = end-start;
    115         if (m_isroot)
    116             std::cout << "Loading data done. Elapsed time = " << elapsed_seconds.count() << "s" << std::endl;
    117 
    118         // cell centered or nodal values?
    119         if (p("data").asString("cell") == "cell")      this->_nCells = [](const int N) { return N; };
    120         else if (p("data").asString("cell") == "node") this->_nCells = [](const int N) { return N-1; };
    121         else
    122         {
    123             if (m_isroot)
    124                 std::cerr << "ERROR: InterpolatorMPI: Unknown data attribute \"" << p("data").asString() << "\"" << std::endl;
    125             abort();
    126         }
    127 
    128         const int Nx = m_PESize[0]*this->m_data.Nx();
    129         const int Ny = m_PESize[1]*this->m_data.Ny();
    130         const int Nz = m_PESize[2]*this->m_data.Nz();
    131         const int Nmax = std::max(Nx, std::max(Ny, Nz));
    132         assert(Nmax > 1);
    133         this->m_h = this->m_extent/this->_nCells(Nmax);
    134         this->m_invh = 1.0/this->m_h;
    135         this->m_extentX = this->m_h*this->_nCells(Nx);
    136         this->m_extentY = this->m_h*this->_nCells(Ny);
    137         this->m_extentZ = this->m_h*this->_nCells(Nz);
    138 
    139         // init coordinate transform
    140         this->_physicalDataPos = [this](const int i) { return i*this->m_h; };
    141         this->m_isNodal = 1;
    142         if (p("data").asString("cell") == "cell")
    143         {
    144             this->_physicalDataPos = [this](const int i) { return (0.5+i)*this->m_h; };
    145             this->m_isNodal = 0;
    146         }
    147         const Real Ox = (this->m_extentX / m_PESize[0]) * m_myPEIndex[0];
    148         const Real Oy = (this->m_extentY / m_PESize[1]) * m_myPEIndex[1];
    149         const Real Oz = (this->m_extentZ / m_PESize[2]) * m_myPEIndex[2];
    150         Real origin[3];
    151         origin[0] = this->_physicalDataPos(ceil(Ox*this->m_invh)) - Ox;
    152         origin[1] = this->_physicalDataPos(ceil(Oy*this->m_invh)) - Oy;
    153         origin[2] = this->_physicalDataPos(ceil(Oz*this->m_invh)) - Oz;
    154         assert(origin[0] >= 0.0);
    155         assert(origin[1] >= 0.0);
    156         assert(origin[2] >= 0.0);
    157         for (int i = 0; i < 3; ++i)
    158             this->m_origin_off[i] = origin[i];
    159 
    160         // fetch halo cells
    161         m_sendbuf.alloc(this->m_data.Nx(), this->m_data.Ny(), this->m_data.Nz());
    162         m_recvbuf.alloc(this->m_data.Nx(), this->m_data.Ny(), this->m_data.Nz());
    163 
    164         start = std::chrono::system_clock::now();
    165         _fetch_halo();
    166         end = std::chrono::system_clock::now();
    167         elapsed_seconds = end-start;
    168         if (m_isroot)
    169             std::cout << "Exchanging messages data done. Elapsed time = " << elapsed_seconds.count() << "s" << std::endl;
    170     }
    171     virtual ~InterpolatorMPI() {}
    172 
    173     inline int getNx() const { return m_PESize[0]*this->m_data.Nx(); }
    174     inline int getNy() const { return m_PESize[1]*this->m_data.Ny(); }
    175     inline int getNz() const { return m_PESize[2]*this->m_data.Nz(); }
    176     inline int getLocalNx() const { return this->m_data.Nx(); }
    177     inline int getLocalNy() const { return this->m_data.Ny(); }
    178     inline int getLocalNz() const { return this->m_data.Nz(); }
    179     inline MPI_Comm getComm() const { return m_cartcomm; }
    180     inline void getPESize(int ps[3]) const { for (int i = 0; i < 3; ++i) ps[i] = m_PESize[i]; }
    181     inline void getPEIndex(int pi[3]) const { for (int i = 0; i < 3; ++i) pi[i] = m_myPEIndex[i]; }
    182     inline bool isroot() const {return m_isroot; }
    183 
    184 
    185 private:
    186     MPI_Comm m_cartcomm;
    187     int m_PESize[3];
    188     int m_myPEIndex[3];
    189     bool m_isroot;
    190 
    191     // loader
    192     void _load_h5_MPI(ArgumentParser& p);
    193     void _load_wavelet_MPI(ArgumentParser& p);
    194 
    195     // halo handling
    196     using func_t = std::function<void(const int, const int, const int)>;
    197     CubeHalo m_sendbuf;
    198     CubeHalo m_recvbuf;
    199     void _fetch_halo();
    200 
    201     void _send_faces(const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const;
    202     void _recv_faces(const MPInbr& nbrs, Faces& f, std::vector<MPI_Request>& f_req) const;
    203     void _pack_faces(Faces& f, const MPInbr& nbr);
    204     void _unpack_faces(const Faces& f, const MPInbr& nbr);
    205     inline void _process_faceX(func_t& eval);
    206     inline void _process_faceY(func_t& eval);
    207     inline void _process_faceZ(func_t& eval);
    208     inline void _pack_faceX(Face_t& f, const int _s0);
    209     inline void _pack_faceY(Face_t& f, const int _s0);
    210     inline void _pack_faceZ(Face_t& f, const int _s0);
    211     inline void _upack_faceX(const Face_t& f, const int _d0);
    212     inline void _upack_faceY(const Face_t& f, const int _d0);
    213     inline void _upack_faceZ(const Face_t& f, const int _d0);
    214     inline void _halo_faceX(const int _s0, const int _d0);
    215     inline void _halo_faceY(const int _s0, const int _d0);
    216     inline void _halo_faceZ(const int _s0, const int _d0);
    217 
    218     void _send_edges(const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const;
    219     void _recv_edges(const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const;
    220     void _pack_edges(Edges& f, const MPInbr& nbr);
    221     void _unpack_edges(const Edges& e, const MPInbr& nbr);
    222     inline void _process_edgeX(func_t& eval);
    223     inline void _process_edgeY(func_t& eval);
    224     inline void _process_edgeZ(func_t& eval);
    225     inline void _pack_edgeX(Edge_t& e, const int _s0, const int _s1);
    226     inline void _pack_edgeY(Edge_t& e, const int _s0, const int _s1);
    227     inline void _pack_edgeZ(Edge_t& e, const int _s0, const int _s1);
    228     inline void _upack_edgeX(const Edge_t& e, const int _d0, const int _d1);
    229     inline void _upack_edgeY(const Edge_t& e, const int _d0, const int _d1);
    230     inline void _upack_edgeZ(const Edge_t& e, const int _d0, const int _d1);
    231     inline void _halo_edgeX(const int _s0, const int _s1, const int _d0, const int _d1);
    232     inline void _halo_edgeY(const int _s0, const int _s1, const int _d0, const int _d1);
    233     inline void _halo_edgeZ(const int _s0, const int _s1, const int _d0, const int _d1);
    234 
    235     void _send_corners(const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const;
    236     void _recv_corners(const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const;
    237     void _pack_corners(Corners& c, const MPInbr& nbr);
    238     void _unpack_corners(const Corners& c, const MPInbr& nbr);
    239     inline void _process_corner(func_t& eval);
    240     inline void _pack_corner(Corner_t& c, const int _s0, const int _s1, const int _s2);
    241     inline void _upack_corner(const Corner_t& c, const int _d0, const int _d1, const int _d2);
    242     inline void _halo_corner(const int _s0, const int _s1, const int _s2, const int _d0, const int _d1, const int _d2);
    243 };
    244 
    245 
    246 template <typename Tinterp>
    247 void InterpolatorMPI<Tinterp>::_load_h5_MPI(ArgumentParser& parser)
    248 {
    249 #ifdef _USE_HDF_
    250     const std::string group = parser("hdf5_group").asString("/data");
    251     const std::string filename = parser("file").asString("");
    252     if (filename == "")
    253     {
    254         if (m_isroot)
    255             std::cerr << "ERROR: InterpolatorMPI: -file is not specified. No input file given" << std::endl;
    256         abort();
    257     }
    258 
    259     herr_t status;
    260     hid_t file_id, dataset_id, dataspace_id, file_dataspace_id, fspace_id, fapl_id, mspace_id;
    261     int ndims, NCH;
    262     int maxDim[4];
    263 
    264     if (m_isroot)
    265     {
    266         hsize_t dims[4];
    267         file_id           = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
    268         dataset_id        = H5Dopen(file_id, group.c_str(), H5P_DEFAULT);
    269         file_dataspace_id = H5Dget_space(dataset_id);
    270         ndims             = H5Sget_simple_extent_dims(file_dataspace_id, dims, NULL);
    271         status = H5Dclose(dataset_id);
    272         status = H5Sclose(file_dataspace_id);
    273         status = H5Fclose(file_id);
    274         maxDim[2] = dims[0];
    275         maxDim[1] = dims[1];
    276         maxDim[0] = dims[2];
    277         maxDim[3] = dims[3];
    278     }
    279     MPI_Bcast(maxDim, 4, MPI_INT, 0, m_cartcomm);
    280     NCH = maxDim[3];
    281 
    282     if (maxDim[0] % m_PESize[0] != 0)
    283     {
    284         if (m_isroot)
    285             std::cerr << "ERROR: InterpolatorMPI: Number of processes in x-direction is not an integer multiple of available data." << std::endl;
    286         abort();
    287     }
    288     if (maxDim[1] % m_PESize[1] != 0)
    289     {
    290         if (m_isroot)
    291             std::cerr << "ERROR: InterpolatorMPI: Number of processes in y-direction is not an integer multiple of available data." << std::endl;
    292         abort();
    293     }
    294     if (maxDim[2] % m_PESize[2] != 0)
    295     {
    296         if (m_isroot)
    297             std::cerr << "ERROR: InterpolatorMPI: Number of processes in z-direction is not an integer multiple of available data." << std::endl;
    298         abort();
    299     }
    300     const size_t NX = maxDim[0]/m_PESize[0];
    301     const size_t NY = maxDim[1]/m_PESize[1];
    302     const size_t NZ = maxDim[2]/m_PESize[2];
    303 
    304     const size_t num_elem = NX*NY*NZ*NCH;
    305     Real* const data = new Real[num_elem];
    306 
    307     hsize_t count[4] = {NZ, NY, NX, NCH};
    308     hsize_t dims[4] = {maxDim[2], maxDim[1], maxDim[0], NCH};
    309     hsize_t offset[4] = {m_myPEIndex[2]*NZ, m_myPEIndex[1]*NY, m_myPEIndex[0]*NX, 0};
    310 
    311     H5open();
    312     fapl_id = H5Pcreate(H5P_FILE_ACCESS);
    313     status = H5Pset_fapl_mpio(fapl_id, m_cartcomm, MPI_INFO_NULL);
    314     file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, fapl_id);
    315     status = H5Pclose(fapl_id);
    316 
    317     dataset_id = H5Dopen2(file_id, group.c_str(), H5P_DEFAULT);
    318     fapl_id = H5Pcreate(H5P_DATASET_XFER);
    319     H5Pset_dxpl_mpio(fapl_id, H5FD_MPIO_COLLECTIVE);
    320 
    321     fspace_id = H5Dget_space(dataset_id);
    322     H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset, NULL, count, NULL);
    323 
    324     mspace_id = H5Screate_simple(4, count, NULL);
    325     status = H5Dread(dataset_id, HDF_PRECISION, mspace_id, fspace_id, fapl_id, data);
    326 
    327     if (NCH > 1)
    328     {
    329         const int channel    = parser("channel").asInt(0); // data channel
    330         const bool magnitude = parser("magnitude").asBool(false); // vector magnitude (only if NCH == 3)
    331         if (magnitude && NCH == 3)
    332         {
    333             int k = 0;
    334             for (int i = 0; i < num_elem; i += NCH)
    335             {
    336                 const Real a = data[i+0];
    337                 const Real b = data[i+1];
    338                 const Real c = data[i+2];
    339                 data[k++] = std::sqrt(a*a + b*b + c*c);
    340             }
    341 
    342         }
    343         else
    344         {
    345             assert(channel < NCH);
    346             int k = 0;
    347             for (int i = 0; i < num_elem; i += NCH)
    348                 data[k++] = data[i+channel];
    349         }
    350     }
    351     this->m_data = std::move(Matrix_t(NX, NY, NZ, data));
    352     delete [] data;
    353 #endif /* _USE_HDF_ */
    354 }
    355 
    356 
    357 template <typename Tinterp>
    358 void InterpolatorMPI<Tinterp>::_load_wavelet_MPI(ArgumentParser& p)
    359 {
    360 #ifdef _USE_CUBISMZ_
    361     const bool byte_swap   = p("-swap").asBool(false);
    362     const int wavelet_type = p("-wtype").asInt(1);
    363     const std::string filename = p("file").asString("");
    364     if (filename == "")
    365     {
    366         if (m_isroot)
    367             std::cerr << "ERROR: InterpolatorMPI: -file is not specified. No input file given" << std::endl;
    368         abort();
    369     }
    370 
    371     Reader_WaveletCompressionMPI myreader(m_cartcomm, filename, byte_swap, wavelet_type);
    372     myreader.load_file();
    373     const int NBX = myreader.xblocks();
    374     const int NBY = myreader.yblocks();
    375     const int NBZ = myreader.zblocks();
    376     assert(NBX % m_PESize[0] == 0);
    377     assert(NBY % m_PESize[1] == 0);
    378     assert(NBZ % m_PESize[2] == 0);
    379     const int myNBX = NBX/m_PESize[0];
    380     const int myNBY = NBY/m_PESize[1];
    381     const int myNBZ = NBZ/m_PESize[2];
    382 
    383     const int maxDim[3] = {myNBX*_BLOCKSIZE_, myNBY*_BLOCKSIZE_, myNBZ*_BLOCKSIZE_};
    384     const size_t num_elem = static_cast<size_t>(maxDim[0]) * maxDim[1] * maxDim[2];
    385     Real* const data = new Real[num_elem];
    386 
    387     const int nBlocks = myNBX * myNBY * myNBZ;
    388 #pragma omp parallel
    389     {
    390         Real blockdata[_BLOCKSIZE_][_BLOCKSIZE_][_BLOCKSIZE_];
    391 #pragma omp for
    392         for (int i = 0; i < nBlocks; ++i)
    393         {
    394             const int ix = i%myNBX;
    395             const int iy = (i/myNBX)%myNBY;
    396             const int iz = (i/(myNBX*myNBY))%myNBZ;
    397             const double zratio = myreader.load_block2(
    398                     ix+myNBX*m_myPEIndex[0],
    399                     iy+myNBY*m_myPEIndex[1],
    400                     iz+myNBZ*m_myPEIndex[2],
    401                     blockdata);
    402 
    403             for (int z=0; z < _BLOCKSIZE_; ++z)
    404                 for (int y=0; y < _BLOCKSIZE_; ++y)
    405                 {
    406                     assert(iy*_BLOCKSIZE_+y < maxDim[1]);
    407                     assert(iz*_BLOCKSIZE_+z < maxDim[2]);
    408                     const size_t offset = _BLOCKSIZE_*(static_cast<size_t>(ix) + myNBX*(y+iy*_BLOCKSIZE_ + (z+static_cast<size_t>(iz)*_BLOCKSIZE_)*myNBY*_BLOCKSIZE_));
    409                     Real* const dst = data + offset;
    410                     memcpy(dst, &blockdata[z][y][0], _BLOCKSIZE_*sizeof(Real));
    411                 }
    412         }
    413     }
    414 
    415     this->m_data = std::move(Matrix_t(maxDim[0], maxDim[1], maxDim[2], data));
    416     delete [] data;
    417 #else
    418     fprintf(stderr, "WARNING: Executable was compiled without wavelet compressor support...\n");
    419 #endif /* _USE_CUBISMZ_ */
    420 }
    421 
    422 
    423 template <typename Tinterp>
    424 void InterpolatorMPI<Tinterp>::_fetch_halo()
    425 {
    426     // concept:
    427     // 0.) prepare neighbor info
    428     // 1.) pack buffers and send asynchronous messages
    429     // 2.) unpack buffers
    430 
    431     // 0.)
    432     MPInbr mynbrs(m_cartcomm);
    433 
    434     // 1.)
    435     const int Nx = this->m_data.Nx();
    436     const int Ny = this->m_data.Ny();
    437     const int Nz = this->m_data.Nz();
    438 
    439     // faces
    440     std::vector<MPI_Request> fpend_recv;
    441     _recv_faces(mynbrs, m_recvbuf.faces, fpend_recv);
    442 
    443     std::vector<MPI_Request> fpend_send;
    444     _pack_faces(m_sendbuf.faces, mynbrs);
    445     _send_faces(mynbrs, m_sendbuf.faces, fpend_send);
    446 
    447     // edges
    448     std::vector<MPI_Request> epend_recv;
    449     _recv_edges(mynbrs, m_recvbuf.edges, epend_recv);
    450 
    451     std::vector<MPI_Request> epend_send;
    452     _pack_edges(m_sendbuf.edges, mynbrs);
    453     _send_edges(mynbrs, m_sendbuf.edges, epend_send);
    454 
    455     // corners
    456     std::vector<MPI_Request> cpend_recv;
    457     _recv_corners(mynbrs, m_recvbuf.corners, cpend_recv);
    458 
    459     std::vector<MPI_Request> cpend_send;
    460     _pack_corners(m_sendbuf.corners, mynbrs);
    461     _send_corners(mynbrs, m_sendbuf.corners, cpend_send);
    462 
    463     // 2.)
    464     std::vector<MPI_Status> fstat_recv(fpend_recv.size());
    465     MPI_Waitall(fpend_recv.size(), fpend_recv.data(), fstat_recv.data());
    466     _unpack_faces(m_recvbuf.faces, mynbrs);
    467 
    468     std::vector<MPI_Status> estat_recv(epend_recv.size());
    469     MPI_Waitall(epend_recv.size(), epend_recv.data(), estat_recv.data());
    470     _unpack_edges(m_recvbuf.edges, mynbrs);
    471 
    472     std::vector<MPI_Status> cstat_recv(cpend_recv.size());
    473     MPI_Waitall(cpend_recv.size(), cpend_recv.data(), cstat_recv.data());
    474     _unpack_corners(m_recvbuf.corners, mynbrs);
    475 }
    476 
    477 
    478 template <typename Tinterp>
    479 void InterpolatorMPI<Tinterp>::_send_faces(const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const
    480 {
    481     const int dst[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)};
    482     Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data};
    483     const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem};
    484     const int tagbase = 0;
    485     for (int i = 0; i < 6; ++i)
    486     {
    487         const int dir  = i/2;
    488         const int side = i%2;
    489         if (dst[i] >= 0)
    490         {
    491             MPI_Request req;
    492             MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+dir*2+side, m_cartcomm, &req);
    493             f_req.push_back(req);
    494         }
    495     }
    496 }
    497 
    498 
    499 template <typename Tinterp>
    500 void InterpolatorMPI<Tinterp>::_recv_faces(const MPInbr& nbrs, Faces& f, std::vector<MPI_Request>& f_req) const
    501 {
    502     const int src[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)};
    503     Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data};
    504     const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem};
    505     const int tagbase = 0;
    506     for (int i = 0; i < 6; ++i)
    507     {
    508         const int dir  = i/2;
    509         const int side = (i+1)%2;
    510         if (src[i] >= 0)
    511         {
    512             MPI_Request req;
    513             MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+dir*2+side, m_cartcomm, &req);
    514             f_req.push_back(req);
    515         }
    516     }
    517 }
    518 
    519 
    520 template <typename Tinterp>
    521 void InterpolatorMPI<Tinterp>::_send_edges(const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const
    522 {
    523     const int dstX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)};
    524     const int dstY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)};
    525     const int dstZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)};
    526     Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data};
    527     Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data};
    528     Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data};
    529     const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem};
    530     const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem};
    531     const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem};
    532     const int tagbase = 6;
    533     for (int i = 0; i < 4; ++i)
    534     {
    535         const int edge = i;
    536         if (dstX[i] >= 0)
    537         {
    538             MPI_Request req;
    539             MPI_Isend(dataX[i], NelemX[i], MPIREAL, dstX[i], tagbase+0*4+edge, m_cartcomm, &req);
    540             e_req.push_back(req);
    541         }
    542         if (dstY[i] >= 0)
    543         {
    544             MPI_Request req;
    545             MPI_Isend(dataY[i], NelemY[i], MPIREAL, dstY[i], tagbase+1*4+edge, m_cartcomm, &req);
    546             e_req.push_back(req);
    547         }
    548         if (dstZ[i] >= 0)
    549         {
    550             MPI_Request req;
    551             MPI_Isend(dataZ[i], NelemZ[i], MPIREAL, dstZ[i], tagbase+2*4+edge, m_cartcomm, &req);
    552             e_req.push_back(req);
    553         }
    554     }
    555 }
    556 
    557 
    558 template <typename Tinterp>
    559 void InterpolatorMPI<Tinterp>::_recv_edges(const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const
    560 {
    561     const int srcX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)};
    562     const int srcY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)};
    563     const int srcZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)};
    564     Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data};
    565     Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data};
    566     Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data};
    567     const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem};
    568     const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem};
    569     const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem};
    570     const int tagbase = 6;
    571     for (int i = 0; i < 4; ++i)
    572     {
    573         const int edge = (i+2)%4;
    574         if (srcX[i] >= 0)
    575         {
    576             MPI_Request req;
    577             MPI_Irecv(dataX[i], NelemX[i], MPIREAL, srcX[i], tagbase+0*4+edge, m_cartcomm, &req);
    578             e_req.push_back(req);
    579         }
    580         if (srcY[i] >= 0)
    581         {
    582             MPI_Request req;
    583             MPI_Irecv(dataY[i], NelemY[i], MPIREAL, srcY[i], tagbase+1*4+edge, m_cartcomm, &req);
    584             e_req.push_back(req);
    585         }
    586         if (srcZ[i] >= 0)
    587         {
    588             MPI_Request req;
    589             MPI_Irecv(dataZ[i], NelemZ[i], MPIREAL, srcZ[i], tagbase+2*4+edge, m_cartcomm, &req);
    590             e_req.push_back(req);
    591         }
    592     }
    593 }
    594 
    595 
    596 template <typename Tinterp>
    597 void InterpolatorMPI<Tinterp>::_send_corners(const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const
    598 {
    599     const int dst[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)};
    600     Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data};
    601     const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem};
    602     const int tagbase = 18;
    603     for (int i = 0; i < 8; ++i)
    604     {
    605         const int cycl = i%4;
    606         const int side = i/4;
    607         if (dst[i] >= 0)
    608         {
    609             MPI_Request req;
    610             MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+side*4+cycl, m_cartcomm, &req);
    611             c_req.push_back(req);
    612         }
    613     }
    614 }
    615 
    616 
    617 template <typename Tinterp>
    618 void InterpolatorMPI<Tinterp>::_recv_corners(const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const
    619 {
    620     const int src[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)};
    621     Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data};
    622     const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem};
    623     const int tagbase = 18;
    624     for (int i = 0; i < 8; ++i)
    625     {
    626         const int cycl = (i+2)%4;
    627         const int side = (7-i)/4;
    628         if (src[i] >= 0)
    629         {
    630             MPI_Request req;
    631             MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+side*4+cycl, m_cartcomm, &req);
    632             c_req.push_back(req);
    633         }
    634     }
    635 }
    636 
    637 
    638 template <typename Tinterp>
    639 void InterpolatorMPI<Tinterp>::_pack_faces(Faces& f, const MPInbr& nbr)
    640 {
    641     const int Nx = this->m_data.Nx();
    642     const int Ny = this->m_data.Ny();
    643     const int Nz = this->m_data.Nz();
    644 
    645     if (nbr(-1,0,0) >= 0) _pack_faceX(f.x0, 0);
    646     if (nbr(1,0,0) >= 0)  _pack_faceX(f.x1, Nx-Face_t::_P);
    647 
    648     if (nbr(0,-1,0) >= 0) _pack_faceY(f.y0, 0);
    649     if (nbr(0,1,0) >= 0)  _pack_faceY(f.y1, Ny-Face_t::_P);
    650 
    651     if (nbr(0,0,-1) >= 0) _pack_faceZ(f.z0, 0);
    652     if (nbr(0,0,1) >= 0)  _pack_faceZ(f.z1, Nz-Face_t::_P);
    653 }
    654 
    655 
    656 template <typename Tinterp>
    657 void InterpolatorMPI<Tinterp>::_unpack_faces(const Faces& f, const MPInbr& nbr)
    658 {
    659     const int Nx = this->m_data.Nx();
    660     const int Ny = this->m_data.Ny();
    661     const int Nz = this->m_data.Nz();
    662 
    663     if (nbr(-1,0,0) >= 0) _upack_faceX(f.x0, -Face_t::_P);
    664     else                  _halo_faceX(0, -Face_t::_P);
    665     if (nbr(1,0,0) >= 0)  _upack_faceX(f.x1, Nx);
    666     else                  _halo_faceX(Nx-1, Nx);
    667 
    668     if (nbr(0,-1,0) >= 0) _upack_faceY(f.y0, -Face_t::_P);
    669     else                  _halo_faceY(0, -Face_t::_P);
    670     if (nbr(0,1,0) >= 0)  _upack_faceY(f.y1, Ny);
    671     else                  _halo_faceY(Ny-1, Ny);
    672 
    673     if (nbr(0,0,-1) >= 0) _upack_faceZ(f.z0, -Face_t::_P);
    674     else                  _halo_faceZ(0, -Face_t::_P);
    675     if (nbr(0,0,1) >= 0)  _upack_faceZ(f.z1, Nz);
    676     else                  _halo_faceZ(Nz-1, Nz);
    677 }
    678 
    679 
    680 template <typename Tinterp>
    681 void InterpolatorMPI<Tinterp>::_pack_edges(Edges& e, const MPInbr& nbr)
    682 {
    683     const int Nx = this->m_data.Nx();
    684     const int Ny = this->m_data.Ny();
    685     const int Nz = this->m_data.Nz();
    686 
    687     if (nbr(0,-1,-1) >= 0) _pack_edgeX(e.x0, 0, 0);
    688     if (nbr(0,1,-1) >= 0)  _pack_edgeX(e.x1, Ny-Edge_t::_P, 0);
    689     if (nbr(0,1,1) >= 0)   _pack_edgeX(e.x2, Ny-Edge_t::_P, Nz-Edge_t::_Q);
    690     if (nbr(0,-1,1) >= 0)  _pack_edgeX(e.x3, 0, Nz-Edge_t::_Q);
    691 
    692     if (nbr(-1,0,-1) >= 0) _pack_edgeY(e.y0, 0, 0);
    693     if (nbr(-1,0,1) >= 0)  _pack_edgeY(e.y1, 0, Nz-Edge_t::_Q);
    694     if (nbr(1,0,1) >= 0)   _pack_edgeY(e.y2, Nx-Edge_t::_P, Nz-Edge_t::_Q);
    695     if (nbr(1,0,-1) >= 0)  _pack_edgeY(e.y3, Nx-Edge_t::_P, 0);
    696 
    697     if (nbr(-1,-1,0) >= 0) _pack_edgeZ(e.z0, 0, 0);
    698     if (nbr(1,-1,0) >= 0)  _pack_edgeZ(e.z1, Nx-Edge_t::_P, 0);
    699     if (nbr(1,1,0) >= 0)   _pack_edgeZ(e.z2, Nx-Edge_t::_P, Ny-Edge_t::_Q);
    700     if (nbr(-1,1,0) >= 0)  _pack_edgeZ(e.z3, 0, Ny-Edge_t::_Q);
    701 }
    702 
    703 
    704 template <typename Tinterp>
    705 void InterpolatorMPI<Tinterp>::_unpack_edges(const Edges& e, const MPInbr& nbr)
    706 {
    707     const int Nx = this->m_data.Nx();
    708     const int Ny = this->m_data.Ny();
    709     const int Nz = this->m_data.Nz();
    710 
    711     if (nbr(0,-1,-1) >= 0) _upack_edgeX(e.x0, -Edge_t::_P, -Edge_t::_Q);
    712     else                   _halo_edgeX(0, 0, -Edge_t::_P, -Edge_t::_Q);
    713     if (nbr(0,1,-1) >= 0)  _upack_edgeX(e.x1, Ny, -Edge_t::_Q);
    714     else                   _halo_edgeX(Ny-1, 0, Ny, -Edge_t::_Q);
    715     if (nbr(0,1,1) >= 0)   _upack_edgeX(e.x2, Ny, Nz);
    716     else                   _halo_edgeX(Ny-1, Nz-1, Ny, Nz);
    717     if (nbr(0,-1,1) >= 0)  _upack_edgeX(e.x3, -Edge_t::_P, Nz);
    718     else                   _halo_edgeX(0, Nz-1, -Edge_t::_P, Nz);
    719 
    720     if (nbr(-1,0,-1) >= 0) _upack_edgeY(e.y0, -Edge_t::_P, -Edge_t::_Q);
    721     else                   _halo_edgeY(0, 0, -Edge_t::_P, -Edge_t::_Q);
    722     if (nbr(-1,0,1) >= 0)  _upack_edgeY(e.y1, -Edge_t::_P, Nz);
    723     else                   _halo_edgeY(0, Nz-1, -Edge_t::_P, Nz);
    724     if (nbr(1,0,1) >= 0)   _upack_edgeY(e.y2, Nx, Nz);
    725     else                   _halo_edgeY(Nx-1, Nz-1, Nx, Nz);
    726     if (nbr(1,0,-1) >= 0)  _upack_edgeY(e.y3, Nx, -Edge_t::_Q);
    727     else                   _halo_edgeY(Nx-1, 0, Nx, -Edge_t::_Q);
    728 
    729     if (nbr(-1,-1,0) >= 0) _upack_edgeZ(e.z0, -Edge_t::_P, -Edge_t::_Q);
    730     else                   _halo_edgeZ(0, 0, -Edge_t::_P, -Edge_t::_Q);
    731     if (nbr(1,-1,0) >= 0)  _upack_edgeZ(e.z1, Nx, -Edge_t::_Q);
    732     else                   _halo_edgeZ(Nx-1, 0, Nx, -Edge_t::_Q);
    733     if (nbr(1,1,0) >= 0)   _upack_edgeZ(e.z2, Nx, Ny);
    734     else                   _halo_edgeZ(Nx-1, Ny-1, Nx, Ny);
    735     if (nbr(-1,1,0) >= 0)  _upack_edgeZ(e.z3, -Edge_t::_P, Ny);
    736     else                   _halo_edgeZ(0, Ny-1, -Edge_t::_P, Ny);
    737 }
    738 
    739 
    740 template <typename Tinterp>
    741 void InterpolatorMPI<Tinterp>::_pack_corners(Corners& c, const MPInbr& nbr)
    742 {
    743     const int Nx = this->m_data.Nx();
    744     const int Ny = this->m_data.Ny();
    745     const int Nz = this->m_data.Nz();
    746 
    747     if (nbr(-1,-1,-1) >= 0) _pack_corner(c.x00, 0, 0, 0);
    748     if (nbr(-1,1,-1) >= 0)  _pack_corner(c.x01, 0, Ny-Corner_t::_Q, 0);
    749     if (nbr(-1,1,1) >= 0)   _pack_corner(c.x02, 0, Ny-Corner_t::_Q, Nz-Corner_t::_R);
    750     if (nbr(-1,-1,1) >= 0)  _pack_corner(c.x03, 0, 0, Nz-Corner_t::_R);
    751 
    752     if (nbr(1,-1,-1) >= 0) _pack_corner(c.x10, Nx-Corner_t::_P, 0, 0);
    753     if (nbr(1,1,-1) >= 0)  _pack_corner(c.x11, Nx-Corner_t::_P, Ny-Corner_t::_Q, 0);
    754     if (nbr(1,1,1) >= 0)   _pack_corner(c.x12, Nx-Corner_t::_P, Ny-Corner_t::_Q, Nz-Corner_t::_R);
    755     if (nbr(1,-1,1) >= 0)  _pack_corner(c.x13, Nx-Corner_t::_P, 0, Nz-Corner_t::_R);
    756 }
    757 
    758 
    759 template <typename Tinterp>
    760 void InterpolatorMPI<Tinterp>::_unpack_corners(const Corners& c, const MPInbr& nbr)
    761 {
    762     const int Nx = this->m_data.Nx();
    763     const int Ny = this->m_data.Ny();
    764     const int Nz = this->m_data.Nz();
    765 
    766     if (nbr(-1,-1,-1) >= 0) _upack_corner(c.x00, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R);
    767     else                    _halo_corner(0, 0, 0, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R);
    768     if (nbr(-1,1,-1) >= 0)  _upack_corner(c.x01, -Corner_t::_P, Ny, -Corner_t::_R);
    769     else                    _halo_corner(0, Ny-1, 0, -Corner_t::_P, Ny, -Corner_t::_R);
    770     if (nbr(-1,1,1) >= 0)   _upack_corner(c.x02, -Corner_t::_P, Ny, Nz);
    771     else                    _halo_corner(0, Ny-1, Nz-1, -Corner_t::_P, Ny, Nz);
    772     if (nbr(-1,-1,1) >= 0)  _upack_corner(c.x03, -Corner_t::_P, -Corner_t::_Q, Nz);
    773     else                    _halo_corner(0, 0, Nz-1, -Corner_t::_P, -Corner_t::_Q, Nz);
    774 
    775     if (nbr(1,-1,-1) >= 0) _upack_corner(c.x10, Nx, -Corner_t::_Q, -Corner_t::_R);
    776     else                   _halo_corner(Nx-1, 0, 0, Nx, -Corner_t::_Q, -Corner_t::_R);
    777     if (nbr(1,1,-1) >= 0)  _upack_corner(c.x11, Nx, Ny, -Corner_t::_R);
    778     else                   _halo_corner(Nx-1, Ny-1, 0, Nx, Ny, -Corner_t::_R);
    779     if (nbr(1,1,1) >= 0)   _upack_corner(c.x12, Nx, Ny, Nz);
    780     else                   _halo_corner(Nx-1, Ny-1, Nz-1, Nx, Ny, Nz);
    781     if (nbr(1,-1,1) >= 0)  _upack_corner(c.x13, Nx, -Corner_t::_Q, Nz);
    782     else                   _halo_corner(Nx-1, 0, Nz-1, Nx, -Corner_t::_Q, Nz);
    783 }
    784 
    785 // PUP kernels
    786 #include "InterpolatorPUPMPI.h"
    787 
    788 #endif /* INTERPOLATORMPI_H_FKS9KH2J */