src/gromacs/domdec/domdec_network.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2008,2009,2010,2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  *
  38  * \brief This file defines functions for (mostly) the domdec module
  39  * to use MPI functionality
  40  *
  41  * \author Berk Hess <hess@kth.se>
  42  * \ingroup module_domdec
  43  */
  44
  45 #include "gmxpre.h"
  46
  47 #include "domdec_network.h"
  48
  49 #include "config.h"
  50
  51 #include <string.h>
  52
  53 #include "gromacs/domdec/domdec_struct.h"
  54 #include "gromacs/utility/gmxmpi.h"
  55
  56
  57 /*! \brief Returns the MPI rank of the domain decomposition master rank */
  58 #define DDMASTERRANK(dd)   (dd->masterrank)
  59
  60
  61 /*! \brief Move data of type \p T in the communication region one cell along
  62  * the domain decomposition
  63  *
  64  * Moves in the dimension indexed by ddDimensionIndex, either forward
  65  * (direction=dddirFoward) or backward (direction=dddirBackward).
  66  */
  67 template <typename T>
  68 void
  69 ddSendrecv(const struct gmx_domdec_t *dd,
  70            int                        ddDimensionIndex,
  71            int                        direction,
  72            T                         *sendBuffer,
  73            int                        numElementsToSend,
  74            T                         *receiveBuffer,
  75            int                        numElementsToReceive)
  76 {
  77 #if GMX_MPI
  78     int           sendRank    = dd->neighbor[ddDimensionIndex][direction == dddirForward ? 0 : 1];
  79     int           receiveRank = dd->neighbor[ddDimensionIndex][direction == dddirForward ? 1 : 0];
  80
  81     constexpr int mpiTag      = 0;
  82     MPI_Status    mpiStatus;
  83     if (numElementsToSend > 0 && numElementsToReceive > 0)
  84     {
  85         MPI_Sendrecv(sendBuffer,    numElementsToSend*sizeof(T),    MPI_BYTE,
  86                      sendRank,      mpiTag,
  87                      receiveBuffer, numElementsToReceive*sizeof(T), MPI_BYTE,
  88                      receiveRank,   mpiTag,
  89                      dd->mpi_comm_all,
  90                      &mpiStatus);
  91     }
  92     else if (numElementsToSend > 0)
  93     {
  94         MPI_Send(    sendBuffer,    numElementsToSend*sizeof(T),    MPI_BYTE,
  95                      sendRank,      mpiTag,
  96                      dd->mpi_comm_all);
  97     }
  98     else if (numElementsToReceive > 0)
  99     {
 100         MPI_Recv(    receiveBuffer, numElementsToReceive*sizeof(T), MPI_BYTE,
 101                      receiveRank,   mpiTag,
 102                      dd->mpi_comm_all,
 103                      &mpiStatus);
 104     }
 105 #else // GMX_MPI
 106     GMX_UNUSED_VALUE(dd);
 107     GMX_UNUSED_VALUE(ddDimensionIndex);
 108     GMX_UNUSED_VALUE(direction);
 109     GMX_UNUSED_VALUE(sendBuffer);
 110     GMX_UNUSED_VALUE(numElementsToSend);
 111     GMX_UNUSED_VALUE(receiveBuffer);
 112     GMX_UNUSED_VALUE(numElementsToReceive);
 113 #endif // GMX_MPI
 114 }
 115
 116 //! Specialization of extern template for int
 117 template void ddSendrecv(const gmx_domdec_t *, int, int,
 118                          int *, int, int *, int);
 119 //! Specialization of extern template for real
 120 template void ddSendrecv(const gmx_domdec_t *, int, int,
 121                          real *, int, real *, int);
 122 //! Specialization of extern template for gmx::RVec
 123 template void ddSendrecv(const gmx_domdec_t *, int, int,
 124                          rvec *, int, rvec *, int);
 125
 126 template <typename T>
 127 void
 128 ddSendrecv(const gmx_domdec_t *dd,
 129            int                 ddDimensionIndex,
 130            int                 direction,
 131            gmx::ArrayRef<T>    sendBuffer,
 132            gmx::ArrayRef<T>    receiveBuffer)
 133 {
 134     ddSendrecv(dd, ddDimensionIndex, direction,
 135                sendBuffer.data(), sendBuffer.size(),
 136                receiveBuffer.data(), receiveBuffer.size());
 137 }
 138
 139 //! Specialization of extern template for int
 140 template void ddSendrecv(const gmx_domdec_t *, int, int,
 141                          gmx::ArrayRef<int>, gmx::ArrayRef<int>);
 142 //! Specialization of extern template for real
 143 template void ddSendrecv(const gmx_domdec_t *, int, int,
 144                          gmx::ArrayRef<real>, gmx::ArrayRef<real>);
 145 //! Specialization of extern template for gmx::RVec
 146 template void ddSendrecv(const gmx_domdec_t *, int, int,
 147                          gmx::ArrayRef<gmx::RVec>, gmx::ArrayRef<gmx::RVec>);
 148
 149 void dd_sendrecv2_rvec(const struct gmx_domdec_t gmx_unused *dd,
 150                        int gmx_unused ddimind,
 151                        rvec gmx_unused *buf_s_fw, int gmx_unused n_s_fw,
 152                        rvec gmx_unused *buf_r_fw, int gmx_unused n_r_fw,
 153                        rvec gmx_unused *buf_s_bw, int gmx_unused n_s_bw,
 154                        rvec gmx_unused *buf_r_bw, int gmx_unused n_r_bw)
 155 {
 156 #if GMX_MPI
 157     int         rank_fw, rank_bw, nreq;
 158     MPI_Request req[4];
 159     MPI_Status  stat[4];
 160
 161     rank_fw = dd->neighbor[ddimind][0];
 162     rank_bw = dd->neighbor[ddimind][1];
 163
 164     if (!dd->bSendRecv2)
 165     {
 166         /* Try to send and receive in two directions simultaneously.
 167          * Should be faster, especially on machines
 168          * with full 3D communication networks.
 169          * However, it could be that communication libraries are
 170          * optimized for MPI_Sendrecv and non-blocking MPI calls
 171          * are slower.
 172          * SendRecv2 can be turned on with the env.var. GMX_DD_SENDRECV2
 173          */
 174         nreq = 0;
 175         if (n_r_fw)
 176         {
 177             MPI_Irecv(buf_r_fw[0], n_r_fw*sizeof(rvec), MPI_BYTE,
 178                       rank_bw, 0, dd->mpi_comm_all, &req[nreq++]);
 179         }
 180         if (n_r_bw)
 181         {
 182             MPI_Irecv(buf_r_bw[0], n_r_bw*sizeof(rvec), MPI_BYTE,
 183                       rank_fw, 1, dd->mpi_comm_all, &req[nreq++]);
 184         }
 185         if (n_s_fw)
 186         {
 187             MPI_Isend(buf_s_fw[0], n_s_fw*sizeof(rvec), MPI_BYTE,
 188                       rank_fw, 0, dd->mpi_comm_all, &req[nreq++]);
 189         }
 190         if (n_s_bw)
 191         {
 192             MPI_Isend(buf_s_bw[0], n_s_bw*sizeof(rvec), MPI_BYTE,
 193                       rank_bw, 1, dd->mpi_comm_all, &req[nreq++]);
 194         }
 195         if (nreq)
 196         {
 197             MPI_Waitall(nreq, req, stat);
 198         }
 199     }
 200     else
 201     {
 202         /* Communicate in two ordered phases.
 203          * This is slower, even on a dual-core Opteron cluster
 204          * with a single full-duplex network connection per machine.
 205          */
 206         /* Forward */
 207         MPI_Sendrecv(buf_s_fw[0], n_s_fw*sizeof(rvec), MPI_BYTE, rank_fw, 0,
 208                      buf_r_fw[0], n_r_fw*sizeof(rvec), MPI_BYTE, rank_bw, 0,
 209                      dd->mpi_comm_all, &stat[0]);
 210         /* Backward */
 211         MPI_Sendrecv(buf_s_bw[0], n_s_bw*sizeof(rvec), MPI_BYTE, rank_bw, 0,
 212                      buf_r_bw[0], n_r_bw*sizeof(rvec), MPI_BYTE, rank_fw, 0,
 213                      dd->mpi_comm_all, &stat[0]);
 214     }
 215 #endif
 216 }
 217
 218 void dd_bcast(gmx_domdec_t gmx_unused *dd, int gmx_unused nbytes, void gmx_unused *data)
 219 {
 220 #if GMX_MPI
 221     if (dd->nnodes > 1)
 222     {
 223         MPI_Bcast(data, nbytes, MPI_BYTE,
 224                   DDMASTERRANK(dd), dd->mpi_comm_all);
 225     }
 226 #endif
 227 }
 228
 229 void dd_bcastc(gmx_domdec_t *dd, int nbytes, void *src, void *dest)
 230 {
 231     if (DDMASTER(dd) || dd->nnodes == 1)
 232     {
 233         memcpy(dest, src, nbytes);
 234     }
 235 #if GMX_MPI
 236     if (dd->nnodes > 1)
 237     {
 238         MPI_Bcast(dest, nbytes, MPI_BYTE,
 239                   DDMASTERRANK(dd), dd->mpi_comm_all);
 240     }
 241 #endif
 242 }
 243
 244 void dd_scatter(gmx_domdec_t gmx_unused *dd, int gmx_unused nbytes, const void gmx_unused *src, void *dest)
 245 {
 246 #if GMX_MPI
 247     if (dd->nnodes > 1)
 248     {
 249         /* Some MPI implementions don't specify const */
 250         MPI_Scatter(const_cast<void *>(src), nbytes, MPI_BYTE,
 251                     dest, nbytes, MPI_BYTE,
 252                     DDMASTERRANK(dd), dd->mpi_comm_all);
 253     }
 254     else
 255 #endif
 256     {
 257         /* 1 rank, either we copy everything, or dest=src: nothing to do */
 258         if (dest != src)
 259         {
 260             memcpy(dest, src, nbytes);
 261         }
 262     }
 263 }
 264
 265 void dd_gather(gmx_domdec_t gmx_unused *dd, int gmx_unused nbytes, const void gmx_unused *src, void gmx_unused *dest)
 266 {
 267 #if GMX_MPI
 268     /* Some MPI implementions don't specify const */
 269     MPI_Gather(const_cast<void *>(src), nbytes, MPI_BYTE,
 270                dest, nbytes, MPI_BYTE,
 271                DDMASTERRANK(dd), dd->mpi_comm_all);
 272 #endif
 273 }
 274
 275 void dd_scatterv(gmx_domdec_t gmx_unused *dd,
 276                  int gmx_unused *scounts, int gmx_unused *disps, const void *sbuf,
 277                  int rcount, void *rbuf)
 278 {
 279 #if GMX_MPI
 280     int dum;
 281
 282     if (dd->nnodes > 1)
 283     {
 284         if (rcount == 0)
 285         {
 286             /* MPI does not allow NULL pointers */
 287             rbuf = &dum;
 288         }
 289         /* Some MPI implementions don't specify const */
 290         MPI_Scatterv(const_cast<void *>(sbuf), scounts, disps, MPI_BYTE,
 291                      rbuf, rcount, MPI_BYTE,
 292                      DDMASTERRANK(dd), dd->mpi_comm_all);
 293     }
 294     else
 295 #endif
 296     {
 297         /* 1 rank, either we copy everything, or rbuf=sbuf: nothing to do */
 298         if (rbuf != sbuf)
 299         {
 300             memcpy(rbuf, sbuf, rcount);
 301         }
 302     }
 303 }
 304
 305 void dd_gatherv(gmx_domdec_t gmx_unused *dd,
 306                 int gmx_unused scount, const void gmx_unused *sbuf,
 307                 int gmx_unused *rcounts, int gmx_unused *disps, void gmx_unused *rbuf)
 308 {
 309 #if GMX_MPI
 310     int dum;
 311
 312     if (scount == 0)
 313     {
 314         /* MPI does not allow NULL pointers */
 315         sbuf = &dum;
 316     }
 317     /* Some MPI implementions don't specify const */
 318     MPI_Gatherv(const_cast<void *>(sbuf), scount, MPI_BYTE,
 319                 rbuf, rcounts, disps, MPI_BYTE,
 320                 DDMASTERRANK(dd), dd->mpi_comm_all);
 321 #endif
 322 }