src/gromacs/domdec/domdec_network.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2008-2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  *
  38  * \brief This file defines functions for (mostly) the domdec module
  39  * to use MPI functionality
  40  *
  41  * \author Berk Hess <hess@kth.se>
  42  * \ingroup module_domdec
  43  */
  44
  45 #include "gmxpre.h"
  46
  47 #include "domdec_network.h"
  48
  49 #include "config.h"
  50
  51 #include <cstring>
  52
  53 #include "gromacs/domdec/domdec_struct.h"
  54 #include "gromacs/utility/gmxmpi.h"
  55
  56 #include "domdec_internal.h"
  57
  58
  59 /*! \brief Returns the MPI rank of the domain decomposition master rank */
  60 #define DDMASTERRANK(dd) ((dd)->masterrank)
  61
  62
  63 /*! \brief Move data of type \p T in the communication region one cell along
  64  * the domain decomposition
  65  *
  66  * Moves in the dimension indexed by ddDimensionIndex, either forward
  67  * (direction=dddirFoward) or backward (direction=dddirBackward).
  68  */
  69 template<typename T>
  70 void ddSendrecv(const struct gmx_domdec_t* dd,
  71                 int                        ddDimensionIndex,
  72                 int                        direction,
  73                 T*                         sendBuffer,
  74                 int                        numElementsToSend,
  75                 T*                         receiveBuffer,
  76                 int                        numElementsToReceive)
  77 {
  78 #if GMX_MPI
  79     int sendRank    = dd->neighbor[ddDimensionIndex][direction == dddirForward ? 0 : 1];
  80     int receiveRank = dd->neighbor[ddDimensionIndex][direction == dddirForward ? 1 : 0];
  81
  82     constexpr int mpiTag = 0;
  83     MPI_Status    mpiStatus;
  84     if (numElementsToSend > 0 && numElementsToReceive > 0)
  85     {
  86         MPI_Sendrecv(sendBuffer,
  87                      numElementsToSend * sizeof(T),
  88                      MPI_BYTE,
  89                      sendRank,
  90                      mpiTag,
  91                      receiveBuffer,
  92                      numElementsToReceive * sizeof(T),
  93                      MPI_BYTE,
  94                      receiveRank,
  95                      mpiTag,
  96                      dd->mpi_comm_all,
  97                      &mpiStatus);
  98     }
  99     else if (numElementsToSend > 0)
 100     {
 101         MPI_Send(sendBuffer, numElementsToSend * sizeof(T), MPI_BYTE, sendRank, mpiTag, dd->mpi_comm_all);
 102     }
 103     else if (numElementsToReceive > 0)
 104     {
 105         MPI_Recv(receiveBuffer, numElementsToReceive * sizeof(T), MPI_BYTE, receiveRank, mpiTag, dd->mpi_comm_all, &mpiStatus);
 106     }
 107 #else  // GMX_MPI
 108     GMX_UNUSED_VALUE(dd);
 109     GMX_UNUSED_VALUE(ddDimensionIndex);
 110     GMX_UNUSED_VALUE(direction);
 111     GMX_UNUSED_VALUE(sendBuffer);
 112     GMX_UNUSED_VALUE(numElementsToSend);
 113     GMX_UNUSED_VALUE(receiveBuffer);
 114     GMX_UNUSED_VALUE(numElementsToReceive);
 115 #endif // GMX_MPI
 116 }
 117
 118 //! Specialization of extern template for int
 119 template void ddSendrecv(const gmx_domdec_t*, int, int, int*, int, int*, int);
 120 //! Specialization of extern template for real
 121 template void ddSendrecv(const gmx_domdec_t*, int, int, real*, int, real*, int);
 122 //! Specialization of extern template for gmx::RVec
 123 template void ddSendrecv(const gmx_domdec_t*, int, int, rvec*, int, rvec*, int);
 124
 125 template<typename T>
 126 void ddSendrecv(const gmx_domdec_t* dd,
 127                 int                 ddDimensionIndex,
 128                 int                 direction,
 129                 gmx::ArrayRef<T>    sendBuffer,
 130                 gmx::ArrayRef<T>    receiveBuffer)
 131 {
 132     ddSendrecv(dd,
 133                ddDimensionIndex,
 134                direction,
 135                sendBuffer.data(),
 136                sendBuffer.size(),
 137                receiveBuffer.data(),
 138                receiveBuffer.size());
 139 }
 140
 141 //! Specialization of extern template for int
 142 template void ddSendrecv(const gmx_domdec_t*, int, int, gmx::ArrayRef<int>, gmx::ArrayRef<int>);
 143 //! Specialization of extern template for real
 144 template void ddSendrecv(const gmx_domdec_t*, int, int, gmx::ArrayRef<real>, gmx::ArrayRef<real>);
 145 //! Specialization of extern template for gmx::RVec
 146 template void ddSendrecv(const gmx_domdec_t*, int, int, gmx::ArrayRef<gmx::RVec>, gmx::ArrayRef<gmx::RVec>);
 147
 148 void dd_sendrecv2_rvec(const struct gmx_domdec_t gmx_unused* dd,
 149                        int gmx_unused                        ddimind,
 150                        rvec gmx_unused* buf_s_fw,
 151                        int gmx_unused   n_s_fw,
 152                        rvec gmx_unused* buf_r_fw,
 153                        int gmx_unused   n_r_fw,
 154                        rvec gmx_unused* buf_s_bw,
 155                        int gmx_unused   n_s_bw,
 156                        rvec gmx_unused* buf_r_bw,
 157                        int gmx_unused   n_r_bw)
 158 {
 159 #if GMX_MPI
 160     MPI_Request req[4];
 161     MPI_Status  stat[4];
 162
 163     int rank_fw = dd->neighbor[ddimind][0];
 164     int rank_bw = dd->neighbor[ddimind][1];
 165
 166     if (!dd->comm->ddSettings.useSendRecv2)
 167     {
 168         /* Try to send and receive in two directions simultaneously.
 169          * Should be faster, especially on machines
 170          * with full 3D communication networks.
 171          * However, it could be that communication libraries are
 172          * optimized for MPI_Sendrecv and non-blocking MPI calls
 173          * are slower.
 174          * SendRecv2 can be turned on with the env.var. GMX_DD_SENDRECV2
 175          */
 176         int nreq = 0;
 177         if (n_r_fw)
 178         {
 179             MPI_Irecv(buf_r_fw[0], n_r_fw * sizeof(rvec), MPI_BYTE, rank_bw, 0, dd->mpi_comm_all, &req[nreq++]);
 180         }
 181         if (n_r_bw)
 182         {
 183             MPI_Irecv(buf_r_bw[0], n_r_bw * sizeof(rvec), MPI_BYTE, rank_fw, 1, dd->mpi_comm_all, &req[nreq++]);
 184         }
 185         if (n_s_fw)
 186         {
 187             MPI_Isend(buf_s_fw[0], n_s_fw * sizeof(rvec), MPI_BYTE, rank_fw, 0, dd->mpi_comm_all, &req[nreq++]);
 188         }
 189         if (n_s_bw)
 190         {
 191             MPI_Isend(buf_s_bw[0], n_s_bw * sizeof(rvec), MPI_BYTE, rank_bw, 1, dd->mpi_comm_all, &req[nreq++]);
 192         }
 193         if (nreq)
 194         {
 195             MPI_Waitall(nreq, req, stat); //NOLINT(clang-analyzer-optin.mpi.MPI-Checker)
 196         }
 197     }
 198     else
 199     {
 200         /* Communicate in two ordered phases.
 201          * This is slower, even on a dual-core Opteron cluster
 202          * with a single full-duplex network connection per machine.
 203          */
 204         /* Forward */
 205         MPI_Sendrecv(buf_s_fw[0],
 206                      n_s_fw * sizeof(rvec),
 207                      MPI_BYTE,
 208                      rank_fw,
 209                      0,
 210                      buf_r_fw[0],
 211                      n_r_fw * sizeof(rvec),
 212                      MPI_BYTE,
 213                      rank_bw,
 214                      0,
 215                      dd->mpi_comm_all,
 216                      &stat[0]);
 217         /* Backward */
 218         MPI_Sendrecv(buf_s_bw[0],
 219                      n_s_bw * sizeof(rvec),
 220                      MPI_BYTE,
 221                      rank_bw,
 222                      0,
 223                      buf_r_bw[0],
 224                      n_r_bw * sizeof(rvec),
 225                      MPI_BYTE,
 226                      rank_fw,
 227                      0,
 228                      dd->mpi_comm_all,
 229                      &stat[0]);
 230     }
 231 #endif
 232 }
 233
 234 void dd_bcast(const gmx_domdec_t gmx_unused* dd, int gmx_unused nbytes, void gmx_unused* data)
 235 {
 236 #if GMX_MPI
 237     if (dd->nnodes > 1)
 238     {
 239         MPI_Bcast(data, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
 240     }
 241 #endif
 242 }
 243
 244 void dd_bcastc(const gmx_domdec_t* dd, int nbytes, void* src, void* dest)
 245 {
 246     if (DDMASTER(dd) || dd->nnodes == 1)
 247     {
 248         memcpy(dest, src, nbytes);
 249     }
 250 #if GMX_MPI
 251     if (dd->nnodes > 1)
 252     {
 253         MPI_Bcast(dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
 254     }
 255 #endif
 256 }
 257
 258 void dd_scatter(const gmx_domdec_t gmx_unused* dd, int gmx_unused nbytes, const void gmx_unused* src, void* dest)
 259 {
 260 #if GMX_MPI
 261     if (dd->nnodes > 1)
 262     {
 263         /* Some MPI implementions don't specify const */
 264         MPI_Scatter(const_cast<void*>(src), nbytes, MPI_BYTE, dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
 265     }
 266     else
 267 #endif
 268     {
 269         /* 1 rank, either we copy everything, or dest=src: nothing to do */
 270         if (dest != src)
 271         {
 272             memcpy(dest, src, nbytes);
 273         }
 274     }
 275 }
 276
 277 void dd_gather(const gmx_domdec_t gmx_unused* dd,
 278                int gmx_unused                 nbytes,
 279                const void gmx_unused* src,
 280                void gmx_unused* dest)
 281 {
 282 #if GMX_MPI
 283     /* Some MPI implementions don't specify const */
 284     MPI_Gather(const_cast<void*>(src), nbytes, MPI_BYTE, dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
 285 #endif
 286 }
 287
 288 void dd_scatterv(const gmx_domdec_t gmx_unused* dd,
 289                  int gmx_unused* scounts,
 290                  int gmx_unused* disps,
 291                  const void*     sbuf,
 292                  int             rcount,
 293                  void*           rbuf)
 294 {
 295 #if GMX_MPI
 296     int dum = 0;
 297
 298     if (dd->nnodes > 1)
 299     {
 300         if (rcount == 0)
 301         {
 302             /* MPI does not allow NULL pointers */
 303             rbuf = &dum;
 304         }
 305         /* Some MPI implementions don't specify const */
 306         MPI_Scatterv(
 307                 const_cast<void*>(sbuf), scounts, disps, MPI_BYTE, rbuf, rcount, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
 308     }
 309     else
 310 #endif
 311     {
 312         /* 1 rank, either we copy everything, or rbuf=sbuf: nothing to do */
 313         if (rbuf != sbuf)
 314         {
 315             memcpy(rbuf, sbuf, rcount);
 316         }
 317     }
 318 }
 319
 320 void dd_gatherv(const gmx_domdec_t gmx_unused* dd,
 321                 int gmx_unused                 scount,
 322                 const void gmx_unused* sbuf,
 323                 int gmx_unused* rcounts,
 324                 int gmx_unused* disps,
 325                 void gmx_unused* rbuf)
 326 {
 327 #if GMX_MPI
 328     int dum = 0;
 329
 330     if (scount == 0)
 331     {
 332         /* MPI does not allow NULL pointers */
 333         sbuf = &dum;
 334     }
 335     /* Some MPI implementions don't specify const */
 336     MPI_Gatherv(
 337             const_cast<void*>(sbuf), scount, MPI_BYTE, rbuf, rcounts, disps, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
 338 #endif
 339 }