src/gromacs/ewald/pme_redistribute.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
   7  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 /*! \internal \file
  40  *
  41  * \brief This file contains function definitions for redistributing
  42  * atoms over the PME domains
  43  *
  44  * \author Berk Hess <hess@kth.se>
  45  * \ingroup module_ewald
  46  */
  47
  48 #include "gmxpre.h"
  49
  50 #include "pme_redistribute.h"
  51
  52 #include "config.h"
  53
  54 #include <algorithm>
  55
  56 #include "gromacs/math/vec.h"
  57 #include "gromacs/mdtypes/commrec.h"
  58 #include "gromacs/utility/exceptions.h"
  59 #include "gromacs/utility/fatalerror.h"
  60 #include "gromacs/utility/gmxmpi.h"
  61 #include "gromacs/utility/smalloc.h"
  62
  63 #include "pme_internal.h"
  64
  65 //! Calculate the slab indices and store in \p atc, store counts in \p count
  66 static void pme_calc_pidx(int                            start,
  67                           int                            end,
  68                           const matrix                   recipbox,
  69                           gmx::ArrayRef<const gmx::RVec> x,
  70                           PmeAtomComm*                   atc,
  71                           int*                           count)
  72 {
  73     int         nslab, i;
  74     int         si;
  75     const real* xptr;
  76     real        s;
  77     real        rxx, ryx, rzx, ryy, rzy;
  78     int*        pd;
  79
  80     /* Calculate PME task index (pidx) for each grid index.
  81      * Here we always assign equally sized slabs to each node
  82      * for load balancing reasons (the PME grid spacing is not used).
  83      */
  84
  85     nslab = atc->nslab;
  86     pd    = atc->pd.data();
  87
  88     /* Reset the count */
  89     for (i = 0; i < nslab; i++)
  90     {
  91         count[i] = 0;
  92     }
  93
  94     if (atc->dimind == 0)
  95     {
  96         rxx = recipbox[XX][XX];
  97         ryx = recipbox[YY][XX];
  98         rzx = recipbox[ZZ][XX];
  99         /* Calculate the node index in x-dimension */
 100         for (i = start; i < end; i++)
 101         {
 102             xptr = x[i];
 103             /* Fractional coordinates along box vectors */
 104             s     = nslab * (xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx);
 105             si    = static_cast<int>(s + 2 * nslab) % nslab;
 106             pd[i] = si;
 107             count[si]++;
 108         }
 109     }
 110     else
 111     {
 112         ryy = recipbox[YY][YY];
 113         rzy = recipbox[ZZ][YY];
 114         /* Calculate the node index in y-dimension */
 115         for (i = start; i < end; i++)
 116         {
 117             xptr = x[i];
 118             /* Fractional coordinates along box vectors */
 119             s     = nslab * (xptr[YY] * ryy + xptr[ZZ] * rzy);
 120             si    = static_cast<int>(s + 2 * nslab) % nslab;
 121             pd[i] = si;
 122             count[si]++;
 123         }
 124     }
 125 }
 126
 127 //! Wrapper function for calculating slab indices, stored in \p atc
 128 static void pme_calc_pidx_wrapper(gmx::ArrayRef<const gmx::RVec> x, const matrix recipbox, PmeAtomComm* atc)
 129 {
 130     int nthread = atc->nthread;
 131
 132 #pragma omp parallel for num_threads(nthread) schedule(static)
 133     for (int thread = 0; thread < nthread; thread++)
 134     {
 135         try
 136         {
 137             const int natoms = x.ssize();
 138             pme_calc_pidx(natoms * thread / nthread,
 139                           natoms * (thread + 1) / nthread,
 140                           recipbox,
 141                           x,
 142                           atc,
 143                           atc->count_thread[thread].data());
 144         }
 145         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 146     }
 147     /* Non-parallel reduction, since nslab is small */
 148
 149     for (int thread = 1; thread < nthread; thread++)
 150     {
 151         for (int slab = 0; slab < atc->nslab; slab++)
 152         {
 153             atc->count_thread[0][slab] += atc->count_thread[thread][slab];
 154         }
 155     }
 156 }
 157
 158 #ifndef DOXYGEN
 159
 160 void SplineCoefficients::realloc(const int nalloc)
 161 {
 162     const int padding = 4;
 163
 164     bufferX_.resize(nalloc);
 165     coefficients[XX] = bufferX_.data();
 166     bufferY_.resize(nalloc);
 167     coefficients[YY] = bufferY_.data();
 168     /* In z we add padding, this is only required for the aligned 4-wide SIMD code */
 169     bufferZ_.resize(nalloc + 2 * padding);
 170     coefficients[ZZ] = bufferZ_.data() + padding;
 171 }
 172
 173 #endif // !DOXYGEN
 174
 175 //! Reallocates all buffers in \p spline to fit atoms in \p atc
 176 static void pme_realloc_splinedata(splinedata_t* spline, const PmeAtomComm* atc)
 177 {
 178     if (spline->nalloc >= atc->x.ssize() && spline->nalloc >= atc->numAtoms())
 179     {
 180         return;
 181     }
 182
 183     spline->nalloc = std::max(atc->x.capacity(), static_cast<size_t>(atc->numAtoms()));
 184     spline->ind.resize(spline->nalloc);
 185     /* Initialize the index to identity so it works without threads */
 186     for (int i = 0; i < spline->nalloc; i++)
 187     {
 188         spline->ind[i] = i;
 189     }
 190
 191     spline->theta.realloc(atc->pme_order * spline->nalloc);
 192     spline->dtheta.realloc(atc->pme_order * spline->nalloc);
 193 }
 194
 195 #ifndef DOXYGEN
 196
 197 void PmeAtomComm::setNumAtoms(const int numAtoms)
 198 {
 199     numAtoms_ = numAtoms;
 200
 201     if (nslab > 1)
 202     {
 203         /* We have to avoid a NULL pointer for atc->x to avoid
 204          * possible fatal errors in MPI routines.
 205          */
 206         xBuffer.resize(numAtoms_);
 207         if (xBuffer.capacity() == 0)
 208         {
 209             xBuffer.reserve(1);
 210         }
 211         x = xBuffer;
 212         coefficientBuffer.resize(numAtoms_);
 213         if (coefficientBuffer.capacity() == 0)
 214         {
 215             coefficientBuffer.reserve(1);
 216         }
 217         coefficient          = coefficientBuffer;
 218         const int nalloc_old = fBuffer.size();
 219         fBuffer.resize(numAtoms_);
 220         for (int i = nalloc_old; i < numAtoms_; i++)
 221         {
 222             clear_rvec(fBuffer[i]);
 223         }
 224         f = fBuffer;
 225     }
 226     if (bSpread)
 227     {
 228         fractx.resize(numAtoms_);
 229         idx.resize(numAtoms_);
 230
 231         if (nthread > 1)
 232         {
 233             thread_idx.resize(numAtoms_);
 234         }
 235
 236         for (int i = 0; i < nthread; i++)
 237         {
 238             pme_realloc_splinedata(&spline[i], this);
 239         }
 240     }
 241 }
 242
 243 #endif // !DOXYGEN
 244
 245 //! Communicates buffers between rank separated by \p shift slabs
 246 static void pme_dd_sendrecv(PmeAtomComm gmx_unused* atc,
 247                             gmx_bool gmx_unused     bBackward,
 248                             int gmx_unused          shift,
 249                             void gmx_unused* buf_s,
 250                             int gmx_unused   nbyte_s,
 251                             void gmx_unused* buf_r,
 252                             int gmx_unused   nbyte_r)
 253 {
 254 #if GMX_MPI
 255     int        dest, src;
 256     MPI_Status stat;
 257
 258     if (!bBackward)
 259     {
 260         dest = atc->slabCommSetup[shift].node_dest;
 261         src  = atc->slabCommSetup[shift].node_src;
 262     }
 263     else
 264     {
 265         dest = atc->slabCommSetup[shift].node_src;
 266         src  = atc->slabCommSetup[shift].node_dest;
 267     }
 268
 269     if (nbyte_s > 0 && nbyte_r > 0)
 270     {
 271         MPI_Sendrecv(
 272                 buf_s, nbyte_s, MPI_BYTE, dest, shift, buf_r, nbyte_r, MPI_BYTE, src, shift, atc->mpi_comm, &stat);
 273     }
 274     else if (nbyte_s > 0)
 275     {
 276         MPI_Send(buf_s, nbyte_s, MPI_BYTE, dest, shift, atc->mpi_comm);
 277     }
 278     else if (nbyte_r > 0)
 279     {
 280         MPI_Recv(buf_r, nbyte_r, MPI_BYTE, src, shift, atc->mpi_comm, &stat);
 281     }
 282 #endif
 283 }
 284
 285 //! Redistristributes \p data and optionally coordinates between MPI ranks
 286 static void dd_pmeredist_pos_coeffs(gmx_pme_t*                     pme,
 287                                     const gmx_bool                 bX,
 288                                     gmx::ArrayRef<const gmx::RVec> x,
 289                                     gmx::ArrayRef<const real>      data,
 290                                     PmeAtomComm*                   atc)
 291 {
 292     int nnodes_comm, i, local_pos, buf_pos, node;
 293
 294     nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
 295
 296     auto sendCount = atc->sendCount();
 297     int  nsend     = 0;
 298     for (i = 0; i < nnodes_comm; i++)
 299     {
 300         const int commnode                     = atc->slabCommSetup[i].node_dest;
 301         atc->slabCommSetup[commnode].buf_index = nsend;
 302         nsend += sendCount[commnode];
 303     }
 304     if (bX)
 305     {
 306         if (sendCount[atc->nodeid] + nsend != x.ssize())
 307         {
 308             gmx_fatal(
 309                     FARGS,
 310                     "%zd particles communicated to PME rank %d are more than 2/3 times the cut-off "
 311                     "out of the domain decomposition cell of their charge group in dimension %c.\n"
 312                     "This usually means that your system is not well equilibrated.",
 313                     x.ssize() - (sendCount[atc->nodeid] + nsend),
 314                     pme->nodeid,
 315                     'x' + atc->dimind);
 316         }
 317
 318         if (nsend > pme->buf_nalloc)
 319         {
 320             pme->buf_nalloc = over_alloc_dd(nsend);
 321             srenew(pme->bufv, pme->buf_nalloc);
 322             srenew(pme->bufr, pme->buf_nalloc);
 323         }
 324
 325         int numAtoms = sendCount[atc->nodeid];
 326         for (i = 0; i < nnodes_comm; i++)
 327         {
 328             const int commnode = atc->slabCommSetup[i].node_dest;
 329             int       scount   = sendCount[commnode];
 330             /* Communicate the count */
 331             if (debug)
 332             {
 333                 fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n", atc->dimind, atc->nodeid, commnode, scount);
 334             }
 335             pme_dd_sendrecv(
 336                     atc, FALSE, i, &scount, sizeof(int), &atc->slabCommSetup[i].rcount, sizeof(int));
 337             numAtoms += atc->slabCommSetup[i].rcount;
 338         }
 339
 340         atc->setNumAtoms(numAtoms);
 341     }
 342
 343     local_pos = 0;
 344     for (gmx::index i = 0; i < x.ssize(); i++)
 345     {
 346         node = atc->pd[i];
 347         if (node == atc->nodeid)
 348         {
 349             /* Copy direct to the receive buffer */
 350             if (bX)
 351             {
 352                 copy_rvec(x[i], atc->xBuffer[local_pos]);
 353             }
 354             atc->coefficientBuffer[local_pos] = data[i];
 355             local_pos++;
 356         }
 357         else
 358         {
 359             /* Copy to the send buffer */
 360             int& buf_index = atc->slabCommSetup[node].buf_index;
 361             if (bX)
 362             {
 363                 copy_rvec(x[i], pme->bufv[buf_index]);
 364             }
 365             pme->bufr[buf_index] = data[i];
 366             buf_index++;
 367         }
 368     }
 369
 370     buf_pos = 0;
 371     for (i = 0; i < nnodes_comm; i++)
 372     {
 373         const int scount = atc->sendCount()[atc->slabCommSetup[i].node_dest];
 374         const int rcount = atc->slabCommSetup[i].rcount;
 375         if (scount > 0 || rcount > 0)
 376         {
 377             if (bX)
 378             {
 379                 /* Communicate the coordinates */
 380                 pme_dd_sendrecv(atc,
 381                                 FALSE,
 382                                 i,
 383                                 pme->bufv + buf_pos,
 384                                 scount * sizeof(rvec),
 385                                 atc->xBuffer.data() + local_pos,
 386                                 rcount * sizeof(rvec));
 387             }
 388             /* Communicate the coefficients */
 389             pme_dd_sendrecv(atc,
 390                             FALSE,
 391                             i,
 392                             pme->bufr + buf_pos,
 393                             scount * sizeof(real),
 394                             atc->coefficientBuffer.data() + local_pos,
 395                             rcount * sizeof(real));
 396             buf_pos += scount;
 397             local_pos += atc->slabCommSetup[i].rcount;
 398         }
 399     }
 400     GMX_ASSERT(local_pos == atc->numAtoms(), "After receiving we should have numAtoms coordinates");
 401 }
 402
 403 void dd_pmeredist_f(struct gmx_pme_t* pme, PmeAtomComm* atc, gmx::ArrayRef<gmx::RVec> f, gmx_bool bAddF)
 404 {
 405     int nnodes_comm, local_pos, buf_pos, i, node;
 406
 407     nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
 408
 409     local_pos = atc->sendCount()[atc->nodeid];
 410     buf_pos   = 0;
 411     for (i = 0; i < nnodes_comm; i++)
 412     {
 413         const int commnode = atc->slabCommSetup[i].node_dest;
 414         const int scount   = atc->slabCommSetup[i].rcount;
 415         const int rcount   = atc->sendCount()[commnode];
 416         if (scount > 0 || rcount > 0)
 417         {
 418             /* Communicate the forces */
 419             pme_dd_sendrecv(atc,
 420                             TRUE,
 421                             i,
 422                             atc->f.data() + local_pos,
 423                             scount * sizeof(rvec),
 424                             pme->bufv + buf_pos,
 425                             rcount * sizeof(rvec));
 426             local_pos += scount;
 427         }
 428         atc->slabCommSetup[commnode].buf_index = buf_pos;
 429         buf_pos += rcount;
 430     }
 431
 432     local_pos = 0;
 433     if (bAddF)
 434     {
 435         for (gmx::index i = 0; i < f.ssize(); i++)
 436         {
 437             node = atc->pd[i];
 438             if (node == atc->nodeid)
 439             {
 440                 /* Add from the local force array */
 441                 rvec_inc(f[i], atc->f[local_pos]);
 442                 local_pos++;
 443             }
 444             else
 445             {
 446                 /* Add from the receive buffer */
 447                 rvec_inc(f[i], pme->bufv[atc->slabCommSetup[node].buf_index]);
 448                 atc->slabCommSetup[node].buf_index++;
 449             }
 450         }
 451     }
 452     else
 453     {
 454         for (gmx::index i = 0; i < f.ssize(); i++)
 455         {
 456             node = atc->pd[i];
 457             if (node == atc->nodeid)
 458             {
 459                 /* Copy from the local force array */
 460                 copy_rvec(atc->f[local_pos], f[i]);
 461                 local_pos++;
 462             }
 463             else
 464             {
 465                 /* Copy from the receive buffer */
 466                 copy_rvec(pme->bufv[atc->slabCommSetup[node].buf_index], f[i]);
 467                 atc->slabCommSetup[node].buf_index++;
 468             }
 469         }
 470     }
 471 }
 472
 473 void do_redist_pos_coeffs(struct gmx_pme_t*              pme,
 474                           const t_commrec*               cr,
 475                           gmx_bool                       bFirst,
 476                           gmx::ArrayRef<const gmx::RVec> x,
 477                           gmx::ArrayRef<const real>      data)
 478 {
 479     for (int d = pme->ndecompdim - 1; d >= 0; d--)
 480     {
 481         gmx::ArrayRef<const gmx::RVec> xRef;
 482         gmx::ArrayRef<const real>      param_d;
 483         if (d == pme->ndecompdim - 1)
 484         {
 485             /* Start out with the local coordinates and charges */
 486             xRef    = x;
 487             param_d = data;
 488         }
 489         else
 490         {
 491             /* Redistribute the data collected along the previous dimension */
 492             const PmeAtomComm& atc = pme->atc[d + 1];
 493             xRef                   = atc.x;
 494             param_d                = atc.coefficient;
 495         }
 496         PmeAtomComm& atc = pme->atc[d];
 497         atc.pd.resize(xRef.size());
 498         pme_calc_pidx_wrapper(xRef, pme->recipbox, &atc);
 499         /* Redistribute x (only once) and qA/c6A or qB/c6B */
 500         if (DOMAINDECOMP(cr))
 501         {
 502             dd_pmeredist_pos_coeffs(pme, bFirst, xRef, param_d, &atc);
 503         }
 504     }
 505 }