InterpolatorMPI.h (31046B)
1 // File : InterpolatorMPI.h 2 // Date : Mon Nov 28 16:15:41 2016 3 // Author : Fabian Wermelinger 4 // Description: MPI'ed Interpolator 5 // Copyright 2016 ETH Zurich. All Rights Reserved. 6 #ifndef INTERPOLATORMPI_H_FKS9KH2J 7 #define INTERPOLATORMPI_H_FKS9KH2J 8 9 #include <cassert> 10 #include <vector> 11 #include <cmath> 12 #include <functional> 13 #include <mpi.h> 14 #include "Interpolator.h" 15 16 #ifdef _FLOAT_PRECISION_ 17 #define MPIREAL MPI_FLOAT 18 #else 19 #define MPIREAL MPI_DOUBLE 20 #endif 21 22 class MPInbr 23 { 24 public: 25 MPInbr(const MPI_Comm comm) 26 { 27 // assumes comm is a communicator created with MPI_Cart_create 28 int myidx[3]; 29 int dims[3]; 30 int periods[3]; 31 MPI_Cart_get(comm, 3, dims, periods, myidx); 32 33 for (int i=0; i<27; ++i) 34 { 35 m_nbr[i] = -1; 36 int off[3] = {(i%3)-1, ((i/3)%3)-1, (i/9)-1}; 37 int coords[3]; 38 for (int j=0; j<3; ++j) 39 coords[j] = myidx[j] + off[j]; 40 41 if (coords[0] < 0 || coords[0] >= dims[0]) continue; 42 if (coords[1] < 0 || coords[1] >= dims[1]) continue; 43 if (coords[2] < 0 || coords[2] >= dims[2]) continue; 44 45 int& nbr = m_nbr[(off[0]+1) + (off[1]+1)*3 + (off[2]+1)*9]; 46 MPI_Cart_rank(comm, coords, &nbr); 47 } 48 } 49 50 inline int operator()(const int ix, const int iy, const int iz) const 51 { 52 assert(ix >= -1 && ix <= 1); 53 assert(iy >= -1 && iy <= 1); 54 assert(iz >= -1 && iz <= 1); 55 return m_nbr[(ix+1) + (iy+1)*3 + (iz+1)*9]; 56 } 57 58 private: 59 int m_nbr[27]; 60 }; 61 62 struct CubeHalo 63 { 64 Faces faces; 65 Edges edges; 66 Corners corners; 67 68 CubeHalo() {} 69 CubeHalo(const int Nx, const int Ny, const int Nz) : 70 faces(Ny,Nz, Nx,Nz, Nx,Ny), 71 edges(Nx, Ny, Nz) 72 {} 73 void alloc(const int Nx, const int Ny, const int Nz) 74 { 75 faces.alloc(Ny,Nz, Nx,Nz, Nx,Ny); 76 edges.alloc(Nx, Ny, Nz); 77 } 78 }; 79 80 template <typename Tinterp> 81 class InterpolatorMPI : public Interpolator<Tinterp> 82 { 83 public: 84 InterpolatorMPI(ArgumentParser& p, const MPI_Comm comm=MPI_COMM_WORLD) : 85 Interpolator<Tinterp>(), 86 m_PESize{p("xpesize").asInt(1),p("ypesize").asInt(1),p("zpesize").asInt(1)} 87 { 88 this->m_extent = p("extent").asDouble(1.0); 89 int periodic[3] = {false}; 90 91 MPI_Cart_create(comm, 3, m_PESize, periodic, true, &m_cartcomm); 92 93 int world_size; 94 MPI_Comm_size(m_cartcomm, &world_size); 95 assert(m_PESize[0]*m_PESize[1]*m_PESize[2] == world_size); 96 97 int myrank; 98 MPI_Comm_rank(m_cartcomm, &myrank); 99 MPI_Cart_coords(m_cartcomm, myrank, 3, m_myPEIndex); 100 m_isroot = (0 == myrank); 101 102 // load the data 103 std::chrono::time_point<std::chrono::system_clock> start, end; 104 start = std::chrono::system_clock::now(); 105 if (p("load").asString("h5") == "h5") this->_load_h5_MPI(p); 106 else if (p("load").asString("h5") == "wavelet") this->_load_wavelet_MPI(p); 107 else 108 { 109 if (m_isroot) 110 std::cerr << "ERROR: InterpolatorMPI: Undefined loader \"" << p("load").asString() << "\"" << std::endl; 111 abort(); 112 } 113 end = std::chrono::system_clock::now(); 114 std::chrono::duration<double> elapsed_seconds = end-start; 115 if (m_isroot) 116 std::cout << "Loading data done. Elapsed time = " << elapsed_seconds.count() << "s" << std::endl; 117 118 // cell centered or nodal values? 119 if (p("data").asString("cell") == "cell") this->_nCells = [](const int N) { return N; }; 120 else if (p("data").asString("cell") == "node") this->_nCells = [](const int N) { return N-1; }; 121 else 122 { 123 if (m_isroot) 124 std::cerr << "ERROR: InterpolatorMPI: Unknown data attribute \"" << p("data").asString() << "\"" << std::endl; 125 abort(); 126 } 127 128 const int Nx = m_PESize[0]*this->m_data.Nx(); 129 const int Ny = m_PESize[1]*this->m_data.Ny(); 130 const int Nz = m_PESize[2]*this->m_data.Nz(); 131 const int Nmax = std::max(Nx, std::max(Ny, Nz)); 132 assert(Nmax > 1); 133 this->m_h = this->m_extent/this->_nCells(Nmax); 134 this->m_invh = 1.0/this->m_h; 135 this->m_extentX = this->m_h*this->_nCells(Nx); 136 this->m_extentY = this->m_h*this->_nCells(Ny); 137 this->m_extentZ = this->m_h*this->_nCells(Nz); 138 139 // init coordinate transform 140 this->_physicalDataPos = [this](const int i) { return i*this->m_h; }; 141 this->m_isNodal = 1; 142 if (p("data").asString("cell") == "cell") 143 { 144 this->_physicalDataPos = [this](const int i) { return (0.5+i)*this->m_h; }; 145 this->m_isNodal = 0; 146 } 147 const Real Ox = (this->m_extentX / m_PESize[0]) * m_myPEIndex[0]; 148 const Real Oy = (this->m_extentY / m_PESize[1]) * m_myPEIndex[1]; 149 const Real Oz = (this->m_extentZ / m_PESize[2]) * m_myPEIndex[2]; 150 Real origin[3]; 151 origin[0] = this->_physicalDataPos(ceil(Ox*this->m_invh)) - Ox; 152 origin[1] = this->_physicalDataPos(ceil(Oy*this->m_invh)) - Oy; 153 origin[2] = this->_physicalDataPos(ceil(Oz*this->m_invh)) - Oz; 154 assert(origin[0] >= 0.0); 155 assert(origin[1] >= 0.0); 156 assert(origin[2] >= 0.0); 157 for (int i = 0; i < 3; ++i) 158 this->m_origin_off[i] = origin[i]; 159 160 // fetch halo cells 161 m_sendbuf.alloc(this->m_data.Nx(), this->m_data.Ny(), this->m_data.Nz()); 162 m_recvbuf.alloc(this->m_data.Nx(), this->m_data.Ny(), this->m_data.Nz()); 163 164 start = std::chrono::system_clock::now(); 165 _fetch_halo(); 166 end = std::chrono::system_clock::now(); 167 elapsed_seconds = end-start; 168 if (m_isroot) 169 std::cout << "Exchanging messages data done. Elapsed time = " << elapsed_seconds.count() << "s" << std::endl; 170 } 171 virtual ~InterpolatorMPI() {} 172 173 inline int getNx() const { return m_PESize[0]*this->m_data.Nx(); } 174 inline int getNy() const { return m_PESize[1]*this->m_data.Ny(); } 175 inline int getNz() const { return m_PESize[2]*this->m_data.Nz(); } 176 inline int getLocalNx() const { return this->m_data.Nx(); } 177 inline int getLocalNy() const { return this->m_data.Ny(); } 178 inline int getLocalNz() const { return this->m_data.Nz(); } 179 inline MPI_Comm getComm() const { return m_cartcomm; } 180 inline void getPESize(int ps[3]) const { for (int i = 0; i < 3; ++i) ps[i] = m_PESize[i]; } 181 inline void getPEIndex(int pi[3]) const { for (int i = 0; i < 3; ++i) pi[i] = m_myPEIndex[i]; } 182 inline bool isroot() const {return m_isroot; } 183 184 185 private: 186 MPI_Comm m_cartcomm; 187 int m_PESize[3]; 188 int m_myPEIndex[3]; 189 bool m_isroot; 190 191 // loader 192 void _load_h5_MPI(ArgumentParser& p); 193 void _load_wavelet_MPI(ArgumentParser& p); 194 195 // halo handling 196 using func_t = std::function<void(const int, const int, const int)>; 197 CubeHalo m_sendbuf; 198 CubeHalo m_recvbuf; 199 void _fetch_halo(); 200 201 void _send_faces(const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const; 202 void _recv_faces(const MPInbr& nbrs, Faces& f, std::vector<MPI_Request>& f_req) const; 203 void _pack_faces(Faces& f, const MPInbr& nbr); 204 void _unpack_faces(const Faces& f, const MPInbr& nbr); 205 inline void _process_faceX(func_t& eval); 206 inline void _process_faceY(func_t& eval); 207 inline void _process_faceZ(func_t& eval); 208 inline void _pack_faceX(Face_t& f, const int _s0); 209 inline void _pack_faceY(Face_t& f, const int _s0); 210 inline void _pack_faceZ(Face_t& f, const int _s0); 211 inline void _upack_faceX(const Face_t& f, const int _d0); 212 inline void _upack_faceY(const Face_t& f, const int _d0); 213 inline void _upack_faceZ(const Face_t& f, const int _d0); 214 inline void _halo_faceX(const int _s0, const int _d0); 215 inline void _halo_faceY(const int _s0, const int _d0); 216 inline void _halo_faceZ(const int _s0, const int _d0); 217 218 void _send_edges(const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const; 219 void _recv_edges(const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const; 220 void _pack_edges(Edges& f, const MPInbr& nbr); 221 void _unpack_edges(const Edges& e, const MPInbr& nbr); 222 inline void _process_edgeX(func_t& eval); 223 inline void _process_edgeY(func_t& eval); 224 inline void _process_edgeZ(func_t& eval); 225 inline void _pack_edgeX(Edge_t& e, const int _s0, const int _s1); 226 inline void _pack_edgeY(Edge_t& e, const int _s0, const int _s1); 227 inline void _pack_edgeZ(Edge_t& e, const int _s0, const int _s1); 228 inline void _upack_edgeX(const Edge_t& e, const int _d0, const int _d1); 229 inline void _upack_edgeY(const Edge_t& e, const int _d0, const int _d1); 230 inline void _upack_edgeZ(const Edge_t& e, const int _d0, const int _d1); 231 inline void _halo_edgeX(const int _s0, const int _s1, const int _d0, const int _d1); 232 inline void _halo_edgeY(const int _s0, const int _s1, const int _d0, const int _d1); 233 inline void _halo_edgeZ(const int _s0, const int _s1, const int _d0, const int _d1); 234 235 void _send_corners(const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const; 236 void _recv_corners(const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const; 237 void _pack_corners(Corners& c, const MPInbr& nbr); 238 void _unpack_corners(const Corners& c, const MPInbr& nbr); 239 inline void _process_corner(func_t& eval); 240 inline void _pack_corner(Corner_t& c, const int _s0, const int _s1, const int _s2); 241 inline void _upack_corner(const Corner_t& c, const int _d0, const int _d1, const int _d2); 242 inline void _halo_corner(const int _s0, const int _s1, const int _s2, const int _d0, const int _d1, const int _d2); 243 }; 244 245 246 template <typename Tinterp> 247 void InterpolatorMPI<Tinterp>::_load_h5_MPI(ArgumentParser& parser) 248 { 249 #ifdef _USE_HDF_ 250 const std::string group = parser("hdf5_group").asString("/data"); 251 const std::string filename = parser("file").asString(""); 252 if (filename == "") 253 { 254 if (m_isroot) 255 std::cerr << "ERROR: InterpolatorMPI: -file is not specified. No input file given" << std::endl; 256 abort(); 257 } 258 259 herr_t status; 260 hid_t file_id, dataset_id, dataspace_id, file_dataspace_id, fspace_id, fapl_id, mspace_id; 261 int ndims, NCH; 262 int maxDim[4]; 263 264 if (m_isroot) 265 { 266 hsize_t dims[4]; 267 file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); 268 dataset_id = H5Dopen(file_id, group.c_str(), H5P_DEFAULT); 269 file_dataspace_id = H5Dget_space(dataset_id); 270 ndims = H5Sget_simple_extent_dims(file_dataspace_id, dims, NULL); 271 status = H5Dclose(dataset_id); 272 status = H5Sclose(file_dataspace_id); 273 status = H5Fclose(file_id); 274 maxDim[2] = dims[0]; 275 maxDim[1] = dims[1]; 276 maxDim[0] = dims[2]; 277 maxDim[3] = dims[3]; 278 } 279 MPI_Bcast(maxDim, 4, MPI_INT, 0, m_cartcomm); 280 NCH = maxDim[3]; 281 282 if (maxDim[0] % m_PESize[0] != 0) 283 { 284 if (m_isroot) 285 std::cerr << "ERROR: InterpolatorMPI: Number of processes in x-direction is not an integer multiple of available data." << std::endl; 286 abort(); 287 } 288 if (maxDim[1] % m_PESize[1] != 0) 289 { 290 if (m_isroot) 291 std::cerr << "ERROR: InterpolatorMPI: Number of processes in y-direction is not an integer multiple of available data." << std::endl; 292 abort(); 293 } 294 if (maxDim[2] % m_PESize[2] != 0) 295 { 296 if (m_isroot) 297 std::cerr << "ERROR: InterpolatorMPI: Number of processes in z-direction is not an integer multiple of available data." << std::endl; 298 abort(); 299 } 300 const size_t NX = maxDim[0]/m_PESize[0]; 301 const size_t NY = maxDim[1]/m_PESize[1]; 302 const size_t NZ = maxDim[2]/m_PESize[2]; 303 304 const size_t num_elem = NX*NY*NZ*NCH; 305 Real* const data = new Real[num_elem]; 306 307 hsize_t count[4] = {NZ, NY, NX, NCH}; 308 hsize_t dims[4] = {maxDim[2], maxDim[1], maxDim[0], NCH}; 309 hsize_t offset[4] = {m_myPEIndex[2]*NZ, m_myPEIndex[1]*NY, m_myPEIndex[0]*NX, 0}; 310 311 H5open(); 312 fapl_id = H5Pcreate(H5P_FILE_ACCESS); 313 status = H5Pset_fapl_mpio(fapl_id, m_cartcomm, MPI_INFO_NULL); 314 file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, fapl_id); 315 status = H5Pclose(fapl_id); 316 317 dataset_id = H5Dopen2(file_id, group.c_str(), H5P_DEFAULT); 318 fapl_id = H5Pcreate(H5P_DATASET_XFER); 319 H5Pset_dxpl_mpio(fapl_id, H5FD_MPIO_COLLECTIVE); 320 321 fspace_id = H5Dget_space(dataset_id); 322 H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset, NULL, count, NULL); 323 324 mspace_id = H5Screate_simple(4, count, NULL); 325 status = H5Dread(dataset_id, HDF_PRECISION, mspace_id, fspace_id, fapl_id, data); 326 327 if (NCH > 1) 328 { 329 const int channel = parser("channel").asInt(0); // data channel 330 const bool magnitude = parser("magnitude").asBool(false); // vector magnitude (only if NCH == 3) 331 if (magnitude && NCH == 3) 332 { 333 int k = 0; 334 for (int i = 0; i < num_elem; i += NCH) 335 { 336 const Real a = data[i+0]; 337 const Real b = data[i+1]; 338 const Real c = data[i+2]; 339 data[k++] = std::sqrt(a*a + b*b + c*c); 340 } 341 342 } 343 else 344 { 345 assert(channel < NCH); 346 int k = 0; 347 for (int i = 0; i < num_elem; i += NCH) 348 data[k++] = data[i+channel]; 349 } 350 } 351 this->m_data = std::move(Matrix_t(NX, NY, NZ, data)); 352 delete [] data; 353 #endif /* _USE_HDF_ */ 354 } 355 356 357 template <typename Tinterp> 358 void InterpolatorMPI<Tinterp>::_load_wavelet_MPI(ArgumentParser& p) 359 { 360 #ifdef _USE_CUBISMZ_ 361 const bool byte_swap = p("-swap").asBool(false); 362 const int wavelet_type = p("-wtype").asInt(1); 363 const std::string filename = p("file").asString(""); 364 if (filename == "") 365 { 366 if (m_isroot) 367 std::cerr << "ERROR: InterpolatorMPI: -file is not specified. No input file given" << std::endl; 368 abort(); 369 } 370 371 Reader_WaveletCompressionMPI myreader(m_cartcomm, filename, byte_swap, wavelet_type); 372 myreader.load_file(); 373 const int NBX = myreader.xblocks(); 374 const int NBY = myreader.yblocks(); 375 const int NBZ = myreader.zblocks(); 376 assert(NBX % m_PESize[0] == 0); 377 assert(NBY % m_PESize[1] == 0); 378 assert(NBZ % m_PESize[2] == 0); 379 const int myNBX = NBX/m_PESize[0]; 380 const int myNBY = NBY/m_PESize[1]; 381 const int myNBZ = NBZ/m_PESize[2]; 382 383 const int maxDim[3] = {myNBX*_BLOCKSIZE_, myNBY*_BLOCKSIZE_, myNBZ*_BLOCKSIZE_}; 384 const size_t num_elem = static_cast<size_t>(maxDim[0]) * maxDim[1] * maxDim[2]; 385 Real* const data = new Real[num_elem]; 386 387 const int nBlocks = myNBX * myNBY * myNBZ; 388 #pragma omp parallel 389 { 390 Real blockdata[_BLOCKSIZE_][_BLOCKSIZE_][_BLOCKSIZE_]; 391 #pragma omp for 392 for (int i = 0; i < nBlocks; ++i) 393 { 394 const int ix = i%myNBX; 395 const int iy = (i/myNBX)%myNBY; 396 const int iz = (i/(myNBX*myNBY))%myNBZ; 397 const double zratio = myreader.load_block2( 398 ix+myNBX*m_myPEIndex[0], 399 iy+myNBY*m_myPEIndex[1], 400 iz+myNBZ*m_myPEIndex[2], 401 blockdata); 402 403 for (int z=0; z < _BLOCKSIZE_; ++z) 404 for (int y=0; y < _BLOCKSIZE_; ++y) 405 { 406 assert(iy*_BLOCKSIZE_+y < maxDim[1]); 407 assert(iz*_BLOCKSIZE_+z < maxDim[2]); 408 const size_t offset = _BLOCKSIZE_*(static_cast<size_t>(ix) + myNBX*(y+iy*_BLOCKSIZE_ + (z+static_cast<size_t>(iz)*_BLOCKSIZE_)*myNBY*_BLOCKSIZE_)); 409 Real* const dst = data + offset; 410 memcpy(dst, &blockdata[z][y][0], _BLOCKSIZE_*sizeof(Real)); 411 } 412 } 413 } 414 415 this->m_data = std::move(Matrix_t(maxDim[0], maxDim[1], maxDim[2], data)); 416 delete [] data; 417 #else 418 fprintf(stderr, "WARNING: Executable was compiled without wavelet compressor support...\n"); 419 #endif /* _USE_CUBISMZ_ */ 420 } 421 422 423 template <typename Tinterp> 424 void InterpolatorMPI<Tinterp>::_fetch_halo() 425 { 426 // concept: 427 // 0.) prepare neighbor info 428 // 1.) pack buffers and send asynchronous messages 429 // 2.) unpack buffers 430 431 // 0.) 432 MPInbr mynbrs(m_cartcomm); 433 434 // 1.) 435 const int Nx = this->m_data.Nx(); 436 const int Ny = this->m_data.Ny(); 437 const int Nz = this->m_data.Nz(); 438 439 // faces 440 std::vector<MPI_Request> fpend_recv; 441 _recv_faces(mynbrs, m_recvbuf.faces, fpend_recv); 442 443 std::vector<MPI_Request> fpend_send; 444 _pack_faces(m_sendbuf.faces, mynbrs); 445 _send_faces(mynbrs, m_sendbuf.faces, fpend_send); 446 447 // edges 448 std::vector<MPI_Request> epend_recv; 449 _recv_edges(mynbrs, m_recvbuf.edges, epend_recv); 450 451 std::vector<MPI_Request> epend_send; 452 _pack_edges(m_sendbuf.edges, mynbrs); 453 _send_edges(mynbrs, m_sendbuf.edges, epend_send); 454 455 // corners 456 std::vector<MPI_Request> cpend_recv; 457 _recv_corners(mynbrs, m_recvbuf.corners, cpend_recv); 458 459 std::vector<MPI_Request> cpend_send; 460 _pack_corners(m_sendbuf.corners, mynbrs); 461 _send_corners(mynbrs, m_sendbuf.corners, cpend_send); 462 463 // 2.) 464 std::vector<MPI_Status> fstat_recv(fpend_recv.size()); 465 MPI_Waitall(fpend_recv.size(), fpend_recv.data(), fstat_recv.data()); 466 _unpack_faces(m_recvbuf.faces, mynbrs); 467 468 std::vector<MPI_Status> estat_recv(epend_recv.size()); 469 MPI_Waitall(epend_recv.size(), epend_recv.data(), estat_recv.data()); 470 _unpack_edges(m_recvbuf.edges, mynbrs); 471 472 std::vector<MPI_Status> cstat_recv(cpend_recv.size()); 473 MPI_Waitall(cpend_recv.size(), cpend_recv.data(), cstat_recv.data()); 474 _unpack_corners(m_recvbuf.corners, mynbrs); 475 } 476 477 478 template <typename Tinterp> 479 void InterpolatorMPI<Tinterp>::_send_faces(const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const 480 { 481 const int dst[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)}; 482 Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data}; 483 const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem}; 484 const int tagbase = 0; 485 for (int i = 0; i < 6; ++i) 486 { 487 const int dir = i/2; 488 const int side = i%2; 489 if (dst[i] >= 0) 490 { 491 MPI_Request req; 492 MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+dir*2+side, m_cartcomm, &req); 493 f_req.push_back(req); 494 } 495 } 496 } 497 498 499 template <typename Tinterp> 500 void InterpolatorMPI<Tinterp>::_recv_faces(const MPInbr& nbrs, Faces& f, std::vector<MPI_Request>& f_req) const 501 { 502 const int src[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)}; 503 Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data}; 504 const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem}; 505 const int tagbase = 0; 506 for (int i = 0; i < 6; ++i) 507 { 508 const int dir = i/2; 509 const int side = (i+1)%2; 510 if (src[i] >= 0) 511 { 512 MPI_Request req; 513 MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+dir*2+side, m_cartcomm, &req); 514 f_req.push_back(req); 515 } 516 } 517 } 518 519 520 template <typename Tinterp> 521 void InterpolatorMPI<Tinterp>::_send_edges(const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const 522 { 523 const int dstX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)}; 524 const int dstY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)}; 525 const int dstZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)}; 526 Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data}; 527 Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data}; 528 Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data}; 529 const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem}; 530 const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem}; 531 const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem}; 532 const int tagbase = 6; 533 for (int i = 0; i < 4; ++i) 534 { 535 const int edge = i; 536 if (dstX[i] >= 0) 537 { 538 MPI_Request req; 539 MPI_Isend(dataX[i], NelemX[i], MPIREAL, dstX[i], tagbase+0*4+edge, m_cartcomm, &req); 540 e_req.push_back(req); 541 } 542 if (dstY[i] >= 0) 543 { 544 MPI_Request req; 545 MPI_Isend(dataY[i], NelemY[i], MPIREAL, dstY[i], tagbase+1*4+edge, m_cartcomm, &req); 546 e_req.push_back(req); 547 } 548 if (dstZ[i] >= 0) 549 { 550 MPI_Request req; 551 MPI_Isend(dataZ[i], NelemZ[i], MPIREAL, dstZ[i], tagbase+2*4+edge, m_cartcomm, &req); 552 e_req.push_back(req); 553 } 554 } 555 } 556 557 558 template <typename Tinterp> 559 void InterpolatorMPI<Tinterp>::_recv_edges(const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const 560 { 561 const int srcX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)}; 562 const int srcY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)}; 563 const int srcZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)}; 564 Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data}; 565 Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data}; 566 Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data}; 567 const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem}; 568 const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem}; 569 const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem}; 570 const int tagbase = 6; 571 for (int i = 0; i < 4; ++i) 572 { 573 const int edge = (i+2)%4; 574 if (srcX[i] >= 0) 575 { 576 MPI_Request req; 577 MPI_Irecv(dataX[i], NelemX[i], MPIREAL, srcX[i], tagbase+0*4+edge, m_cartcomm, &req); 578 e_req.push_back(req); 579 } 580 if (srcY[i] >= 0) 581 { 582 MPI_Request req; 583 MPI_Irecv(dataY[i], NelemY[i], MPIREAL, srcY[i], tagbase+1*4+edge, m_cartcomm, &req); 584 e_req.push_back(req); 585 } 586 if (srcZ[i] >= 0) 587 { 588 MPI_Request req; 589 MPI_Irecv(dataZ[i], NelemZ[i], MPIREAL, srcZ[i], tagbase+2*4+edge, m_cartcomm, &req); 590 e_req.push_back(req); 591 } 592 } 593 } 594 595 596 template <typename Tinterp> 597 void InterpolatorMPI<Tinterp>::_send_corners(const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const 598 { 599 const int dst[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)}; 600 Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data}; 601 const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem}; 602 const int tagbase = 18; 603 for (int i = 0; i < 8; ++i) 604 { 605 const int cycl = i%4; 606 const int side = i/4; 607 if (dst[i] >= 0) 608 { 609 MPI_Request req; 610 MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+side*4+cycl, m_cartcomm, &req); 611 c_req.push_back(req); 612 } 613 } 614 } 615 616 617 template <typename Tinterp> 618 void InterpolatorMPI<Tinterp>::_recv_corners(const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const 619 { 620 const int src[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)}; 621 Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data}; 622 const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem}; 623 const int tagbase = 18; 624 for (int i = 0; i < 8; ++i) 625 { 626 const int cycl = (i+2)%4; 627 const int side = (7-i)/4; 628 if (src[i] >= 0) 629 { 630 MPI_Request req; 631 MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+side*4+cycl, m_cartcomm, &req); 632 c_req.push_back(req); 633 } 634 } 635 } 636 637 638 template <typename Tinterp> 639 void InterpolatorMPI<Tinterp>::_pack_faces(Faces& f, const MPInbr& nbr) 640 { 641 const int Nx = this->m_data.Nx(); 642 const int Ny = this->m_data.Ny(); 643 const int Nz = this->m_data.Nz(); 644 645 if (nbr(-1,0,0) >= 0) _pack_faceX(f.x0, 0); 646 if (nbr(1,0,0) >= 0) _pack_faceX(f.x1, Nx-Face_t::_P); 647 648 if (nbr(0,-1,0) >= 0) _pack_faceY(f.y0, 0); 649 if (nbr(0,1,0) >= 0) _pack_faceY(f.y1, Ny-Face_t::_P); 650 651 if (nbr(0,0,-1) >= 0) _pack_faceZ(f.z0, 0); 652 if (nbr(0,0,1) >= 0) _pack_faceZ(f.z1, Nz-Face_t::_P); 653 } 654 655 656 template <typename Tinterp> 657 void InterpolatorMPI<Tinterp>::_unpack_faces(const Faces& f, const MPInbr& nbr) 658 { 659 const int Nx = this->m_data.Nx(); 660 const int Ny = this->m_data.Ny(); 661 const int Nz = this->m_data.Nz(); 662 663 if (nbr(-1,0,0) >= 0) _upack_faceX(f.x0, -Face_t::_P); 664 else _halo_faceX(0, -Face_t::_P); 665 if (nbr(1,0,0) >= 0) _upack_faceX(f.x1, Nx); 666 else _halo_faceX(Nx-1, Nx); 667 668 if (nbr(0,-1,0) >= 0) _upack_faceY(f.y0, -Face_t::_P); 669 else _halo_faceY(0, -Face_t::_P); 670 if (nbr(0,1,0) >= 0) _upack_faceY(f.y1, Ny); 671 else _halo_faceY(Ny-1, Ny); 672 673 if (nbr(0,0,-1) >= 0) _upack_faceZ(f.z0, -Face_t::_P); 674 else _halo_faceZ(0, -Face_t::_P); 675 if (nbr(0,0,1) >= 0) _upack_faceZ(f.z1, Nz); 676 else _halo_faceZ(Nz-1, Nz); 677 } 678 679 680 template <typename Tinterp> 681 void InterpolatorMPI<Tinterp>::_pack_edges(Edges& e, const MPInbr& nbr) 682 { 683 const int Nx = this->m_data.Nx(); 684 const int Ny = this->m_data.Ny(); 685 const int Nz = this->m_data.Nz(); 686 687 if (nbr(0,-1,-1) >= 0) _pack_edgeX(e.x0, 0, 0); 688 if (nbr(0,1,-1) >= 0) _pack_edgeX(e.x1, Ny-Edge_t::_P, 0); 689 if (nbr(0,1,1) >= 0) _pack_edgeX(e.x2, Ny-Edge_t::_P, Nz-Edge_t::_Q); 690 if (nbr(0,-1,1) >= 0) _pack_edgeX(e.x3, 0, Nz-Edge_t::_Q); 691 692 if (nbr(-1,0,-1) >= 0) _pack_edgeY(e.y0, 0, 0); 693 if (nbr(-1,0,1) >= 0) _pack_edgeY(e.y1, 0, Nz-Edge_t::_Q); 694 if (nbr(1,0,1) >= 0) _pack_edgeY(e.y2, Nx-Edge_t::_P, Nz-Edge_t::_Q); 695 if (nbr(1,0,-1) >= 0) _pack_edgeY(e.y3, Nx-Edge_t::_P, 0); 696 697 if (nbr(-1,-1,0) >= 0) _pack_edgeZ(e.z0, 0, 0); 698 if (nbr(1,-1,0) >= 0) _pack_edgeZ(e.z1, Nx-Edge_t::_P, 0); 699 if (nbr(1,1,0) >= 0) _pack_edgeZ(e.z2, Nx-Edge_t::_P, Ny-Edge_t::_Q); 700 if (nbr(-1,1,0) >= 0) _pack_edgeZ(e.z3, 0, Ny-Edge_t::_Q); 701 } 702 703 704 template <typename Tinterp> 705 void InterpolatorMPI<Tinterp>::_unpack_edges(const Edges& e, const MPInbr& nbr) 706 { 707 const int Nx = this->m_data.Nx(); 708 const int Ny = this->m_data.Ny(); 709 const int Nz = this->m_data.Nz(); 710 711 if (nbr(0,-1,-1) >= 0) _upack_edgeX(e.x0, -Edge_t::_P, -Edge_t::_Q); 712 else _halo_edgeX(0, 0, -Edge_t::_P, -Edge_t::_Q); 713 if (nbr(0,1,-1) >= 0) _upack_edgeX(e.x1, Ny, -Edge_t::_Q); 714 else _halo_edgeX(Ny-1, 0, Ny, -Edge_t::_Q); 715 if (nbr(0,1,1) >= 0) _upack_edgeX(e.x2, Ny, Nz); 716 else _halo_edgeX(Ny-1, Nz-1, Ny, Nz); 717 if (nbr(0,-1,1) >= 0) _upack_edgeX(e.x3, -Edge_t::_P, Nz); 718 else _halo_edgeX(0, Nz-1, -Edge_t::_P, Nz); 719 720 if (nbr(-1,0,-1) >= 0) _upack_edgeY(e.y0, -Edge_t::_P, -Edge_t::_Q); 721 else _halo_edgeY(0, 0, -Edge_t::_P, -Edge_t::_Q); 722 if (nbr(-1,0,1) >= 0) _upack_edgeY(e.y1, -Edge_t::_P, Nz); 723 else _halo_edgeY(0, Nz-1, -Edge_t::_P, Nz); 724 if (nbr(1,0,1) >= 0) _upack_edgeY(e.y2, Nx, Nz); 725 else _halo_edgeY(Nx-1, Nz-1, Nx, Nz); 726 if (nbr(1,0,-1) >= 0) _upack_edgeY(e.y3, Nx, -Edge_t::_Q); 727 else _halo_edgeY(Nx-1, 0, Nx, -Edge_t::_Q); 728 729 if (nbr(-1,-1,0) >= 0) _upack_edgeZ(e.z0, -Edge_t::_P, -Edge_t::_Q); 730 else _halo_edgeZ(0, 0, -Edge_t::_P, -Edge_t::_Q); 731 if (nbr(1,-1,0) >= 0) _upack_edgeZ(e.z1, Nx, -Edge_t::_Q); 732 else _halo_edgeZ(Nx-1, 0, Nx, -Edge_t::_Q); 733 if (nbr(1,1,0) >= 0) _upack_edgeZ(e.z2, Nx, Ny); 734 else _halo_edgeZ(Nx-1, Ny-1, Nx, Ny); 735 if (nbr(-1,1,0) >= 0) _upack_edgeZ(e.z3, -Edge_t::_P, Ny); 736 else _halo_edgeZ(0, Ny-1, -Edge_t::_P, Ny); 737 } 738 739 740 template <typename Tinterp> 741 void InterpolatorMPI<Tinterp>::_pack_corners(Corners& c, const MPInbr& nbr) 742 { 743 const int Nx = this->m_data.Nx(); 744 const int Ny = this->m_data.Ny(); 745 const int Nz = this->m_data.Nz(); 746 747 if (nbr(-1,-1,-1) >= 0) _pack_corner(c.x00, 0, 0, 0); 748 if (nbr(-1,1,-1) >= 0) _pack_corner(c.x01, 0, Ny-Corner_t::_Q, 0); 749 if (nbr(-1,1,1) >= 0) _pack_corner(c.x02, 0, Ny-Corner_t::_Q, Nz-Corner_t::_R); 750 if (nbr(-1,-1,1) >= 0) _pack_corner(c.x03, 0, 0, Nz-Corner_t::_R); 751 752 if (nbr(1,-1,-1) >= 0) _pack_corner(c.x10, Nx-Corner_t::_P, 0, 0); 753 if (nbr(1,1,-1) >= 0) _pack_corner(c.x11, Nx-Corner_t::_P, Ny-Corner_t::_Q, 0); 754 if (nbr(1,1,1) >= 0) _pack_corner(c.x12, Nx-Corner_t::_P, Ny-Corner_t::_Q, Nz-Corner_t::_R); 755 if (nbr(1,-1,1) >= 0) _pack_corner(c.x13, Nx-Corner_t::_P, 0, Nz-Corner_t::_R); 756 } 757 758 759 template <typename Tinterp> 760 void InterpolatorMPI<Tinterp>::_unpack_corners(const Corners& c, const MPInbr& nbr) 761 { 762 const int Nx = this->m_data.Nx(); 763 const int Ny = this->m_data.Ny(); 764 const int Nz = this->m_data.Nz(); 765 766 if (nbr(-1,-1,-1) >= 0) _upack_corner(c.x00, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R); 767 else _halo_corner(0, 0, 0, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R); 768 if (nbr(-1,1,-1) >= 0) _upack_corner(c.x01, -Corner_t::_P, Ny, -Corner_t::_R); 769 else _halo_corner(0, Ny-1, 0, -Corner_t::_P, Ny, -Corner_t::_R); 770 if (nbr(-1,1,1) >= 0) _upack_corner(c.x02, -Corner_t::_P, Ny, Nz); 771 else _halo_corner(0, Ny-1, Nz-1, -Corner_t::_P, Ny, Nz); 772 if (nbr(-1,-1,1) >= 0) _upack_corner(c.x03, -Corner_t::_P, -Corner_t::_Q, Nz); 773 else _halo_corner(0, 0, Nz-1, -Corner_t::_P, -Corner_t::_Q, Nz); 774 775 if (nbr(1,-1,-1) >= 0) _upack_corner(c.x10, Nx, -Corner_t::_Q, -Corner_t::_R); 776 else _halo_corner(Nx-1, 0, 0, Nx, -Corner_t::_Q, -Corner_t::_R); 777 if (nbr(1,1,-1) >= 0) _upack_corner(c.x11, Nx, Ny, -Corner_t::_R); 778 else _halo_corner(Nx-1, Ny-1, 0, Nx, Ny, -Corner_t::_R); 779 if (nbr(1,1,1) >= 0) _upack_corner(c.x12, Nx, Ny, Nz); 780 else _halo_corner(Nx-1, Ny-1, Nz-1, Nx, Ny, Nz); 781 if (nbr(1,-1,1) >= 0) _upack_corner(c.x13, Nx, -Corner_t::_Q, Nz); 782 else _halo_corner(Nx-1, 0, Nz-1, Nx, -Corner_t::_Q, Nz); 783 } 784 785 // PUP kernels 786 #include "InterpolatorPUPMPI.h" 787 788 #endif /* INTERPOLATORMPI_H_FKS9KH2J */