src/gromacs/ewald/pme_redistribute.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
   7  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 /*! \internal \file
  40  *
  41  * \brief This file contains function definitions for redistributing
  42  * atoms over the PME domains
  43  *
  44  * \author Berk Hess <hess@kth.se>
  45  * \ingroup module_ewald
  46  */
  47
  48 #include "gmxpre.h"
  49
  50 #include "pme_redistribute.h"
  51
  52 #include "config.h"
  53
  54 #include <algorithm>
  55
  56 #include "gromacs/math/vec.h"
  57 #include "gromacs/mdtypes/commrec.h"
  58 #include "gromacs/utility/exceptions.h"
  59 #include "gromacs/utility/fatalerror.h"
  60 #include "gromacs/utility/gmxmpi.h"
  61 #include "gromacs/utility/smalloc.h"
  62
  63 #include "pme_internal.h"
  64
  65 //! Calculate the slab indices and store in \p atc, store counts in \p count
  66 static void pme_calc_pidx(int                            start,
  67                           int                            end,
  68                           const matrix                   recipbox,
  69                           gmx::ArrayRef<const gmx::RVec> x,
  70                           PmeAtomComm*                   atc,
  71                           int*                           count)
  72 {
  73     int         nslab, i;
  74     int         si;
  75     const real* xptr;
  76     real        s;
  77     real        rxx, ryx, rzx, ryy, rzy;
  78     int*        pd;
  79
  80     /* Calculate PME task index (pidx) for each grid index.
  81      * Here we always assign equally sized slabs to each node
  82      * for load balancing reasons (the PME grid spacing is not used).
  83      */
  84
  85     nslab = atc->nslab;
  86     pd    = atc->pd.data();
  87
  88     /* Reset the count */
  89     for (i = 0; i < nslab; i++)
  90     {
  91         count[i] = 0;
  92     }
  93
  94     if (atc->dimind == 0)
  95     {
  96         rxx = recipbox[XX][XX];
  97         ryx = recipbox[YY][XX];
  98         rzx = recipbox[ZZ][XX];
  99         /* Calculate the node index in x-dimension */
 100         for (i = start; i < end; i++)
 101         {
 102             xptr = x[i];
 103             /* Fractional coordinates along box vectors */
 104             s     = nslab * (xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx);
 105             si    = static_cast<int>(s + 2 * nslab) % nslab;
 106             pd[i] = si;
 107             count[si]++;
 108         }
 109     }
 110     else
 111     {
 112         ryy = recipbox[YY][YY];
 113         rzy = recipbox[ZZ][YY];
 114         /* Calculate the node index in y-dimension */
 115         for (i = start; i < end; i++)
 116         {
 117             xptr = x[i];
 118             /* Fractional coordinates along box vectors */
 119             s     = nslab * (xptr[YY] * ryy + xptr[ZZ] * rzy);
 120             si    = static_cast<int>(s + 2 * nslab) % nslab;
 121             pd[i] = si;
 122             count[si]++;
 123         }
 124     }
 125 }
 126
 127 //! Wrapper function for calculating slab indices, stored in \p atc
 128 static void pme_calc_pidx_wrapper(gmx::ArrayRef<const gmx::RVec> x, const matrix recipbox, PmeAtomComm* atc)
 129 {
 130     int nthread = atc->nthread;
 131
 132 #pragma omp parallel for num_threads(nthread) schedule(static)
 133     for (int thread = 0; thread < nthread; thread++)
 134     {
 135         try
 136         {
 137             const int natoms = x.ssize();
 138             pme_calc_pidx(natoms * thread / nthread, natoms * (thread + 1) / nthread, recipbox, x,
 139                           atc, atc->count_thread[thread].data());
 140         }
 141         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 142     }
 143     /* Non-parallel reduction, since nslab is small */
 144
 145     for (int thread = 1; thread < nthread; thread++)
 146     {
 147         for (int slab = 0; slab < atc->nslab; slab++)
 148         {
 149             atc->count_thread[0][slab] += atc->count_thread[thread][slab];
 150         }
 151     }
 152 }
 153
 154 #ifndef DOXYGEN
 155
 156 void SplineCoefficients::realloc(const int nalloc)
 157 {
 158     const int padding = 4;
 159
 160     bufferX_.resize(nalloc);
 161     coefficients[XX] = bufferX_.data();
 162     bufferY_.resize(nalloc);
 163     coefficients[YY] = bufferY_.data();
 164     /* In z we add padding, this is only required for the aligned 4-wide SIMD code */
 165     bufferZ_.resize(nalloc + 2 * padding);
 166     coefficients[ZZ] = bufferZ_.data() + padding;
 167 }
 168
 169 #endif // !DOXYGEN
 170
 171 //! Reallocates all buffers in \p spline to fit atoms in \p atc
 172 static void pme_realloc_splinedata(splinedata_t* spline, const PmeAtomComm* atc)
 173 {
 174     if (spline->nalloc >= atc->x.ssize() && spline->nalloc >= atc->numAtoms())
 175     {
 176         return;
 177     }
 178
 179     spline->nalloc = std::max(atc->x.capacity(), static_cast<size_t>(atc->numAtoms()));
 180     spline->ind.resize(spline->nalloc);
 181     /* Initialize the index to identity so it works without threads */
 182     for (int i = 0; i < spline->nalloc; i++)
 183     {
 184         spline->ind[i] = i;
 185     }
 186
 187     spline->theta.realloc(atc->pme_order * spline->nalloc);
 188     spline->dtheta.realloc(atc->pme_order * spline->nalloc);
 189 }
 190
 191 #ifndef DOXYGEN
 192
 193 void PmeAtomComm::setNumAtoms(const int numAtoms)
 194 {
 195     numAtoms_ = numAtoms;
 196
 197     if (nslab > 1)
 198     {
 199         /* We have to avoid a NULL pointer for atc->x to avoid
 200          * possible fatal errors in MPI routines.
 201          */
 202         xBuffer.resize(numAtoms_);
 203         if (xBuffer.capacity() == 0)
 204         {
 205             xBuffer.reserve(1);
 206         }
 207         x = xBuffer;
 208         coefficientBuffer.resize(numAtoms_);
 209         if (coefficientBuffer.capacity() == 0)
 210         {
 211             coefficientBuffer.reserve(1);
 212         }
 213         coefficient          = coefficientBuffer;
 214         const int nalloc_old = fBuffer.size();
 215         fBuffer.resize(numAtoms_);
 216         for (int i = nalloc_old; i < numAtoms_; i++)
 217         {
 218             clear_rvec(fBuffer[i]);
 219         }
 220         f = fBuffer;
 221     }
 222     if (bSpread)
 223     {
 224         fractx.resize(numAtoms_);
 225         idx.resize(numAtoms_);
 226
 227         if (nthread > 1)
 228         {
 229             thread_idx.resize(numAtoms_);
 230         }
 231
 232         for (int i = 0; i < nthread; i++)
 233         {
 234             pme_realloc_splinedata(&spline[i], this);
 235         }
 236     }
 237 }
 238
 239 #endif // !DOXYGEN
 240
 241 //! Communicates buffers between rank separated by \p shift slabs
 242 static void pme_dd_sendrecv(PmeAtomComm gmx_unused* atc,
 243                             gmx_bool gmx_unused bBackward,
 244                             int gmx_unused shift,
 245                             void gmx_unused* buf_s,
 246                             int gmx_unused nbyte_s,
 247                             void gmx_unused* buf_r,
 248                             int gmx_unused nbyte_r)
 249 {
 250 #if GMX_MPI
 251     int        dest, src;
 252     MPI_Status stat;
 253
 254     if (!bBackward)
 255     {
 256         dest = atc->slabCommSetup[shift].node_dest;
 257         src  = atc->slabCommSetup[shift].node_src;
 258     }
 259     else
 260     {
 261         dest = atc->slabCommSetup[shift].node_src;
 262         src  = atc->slabCommSetup[shift].node_dest;
 263     }
 264
 265     if (nbyte_s > 0 && nbyte_r > 0)
 266     {
 267         MPI_Sendrecv(buf_s, nbyte_s, MPI_BYTE, dest, shift, buf_r, nbyte_r, MPI_BYTE, src, shift,
 268                      atc->mpi_comm, &stat);
 269     }
 270     else if (nbyte_s > 0)
 271     {
 272         MPI_Send(buf_s, nbyte_s, MPI_BYTE, dest, shift, atc->mpi_comm);
 273     }
 274     else if (nbyte_r > 0)
 275     {
 276         MPI_Recv(buf_r, nbyte_r, MPI_BYTE, src, shift, atc->mpi_comm, &stat);
 277     }
 278 #endif
 279 }
 280
 281 //! Redistristributes \p data and optionally coordinates between MPI ranks
 282 static void dd_pmeredist_pos_coeffs(gmx_pme_t*                     pme,
 283                                     const gmx_bool                 bX,
 284                                     gmx::ArrayRef<const gmx::RVec> x,
 285                                     const real*                    data,
 286                                     PmeAtomComm*                   atc)
 287 {
 288     int nnodes_comm, i, local_pos, buf_pos, node;
 289
 290     nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
 291
 292     auto sendCount = atc->sendCount();
 293     int  nsend     = 0;
 294     for (i = 0; i < nnodes_comm; i++)
 295     {
 296         const int commnode                     = atc->slabCommSetup[i].node_dest;
 297         atc->slabCommSetup[commnode].buf_index = nsend;
 298         nsend += sendCount[commnode];
 299     }
 300     if (bX)
 301     {
 302         if (sendCount[atc->nodeid] + nsend != x.ssize())
 303         {
 304             gmx_fatal(
 305                     FARGS,
 306                     "%zd particles communicated to PME rank %d are more than 2/3 times the cut-off "
 307                     "out of the domain decomposition cell of their charge group in dimension %c.\n"
 308                     "This usually means that your system is not well equilibrated.",
 309                     x.ssize() - (sendCount[atc->nodeid] + nsend), pme->nodeid, 'x' + atc->dimind);
 310         }
 311
 312         if (nsend > pme->buf_nalloc)
 313         {
 314             pme->buf_nalloc = over_alloc_dd(nsend);
 315             srenew(pme->bufv, pme->buf_nalloc);
 316             srenew(pme->bufr, pme->buf_nalloc);
 317         }
 318
 319         int numAtoms = sendCount[atc->nodeid];
 320         for (i = 0; i < nnodes_comm; i++)
 321         {
 322             const int commnode = atc->slabCommSetup[i].node_dest;
 323             int       scount   = sendCount[commnode];
 324             /* Communicate the count */
 325             if (debug)
 326             {
 327                 fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n", atc->dimind,
 328                         atc->nodeid, commnode, scount);
 329             }
 330             pme_dd_sendrecv(atc, FALSE, i, &scount, sizeof(int), &atc->slabCommSetup[i].rcount,
 331                             sizeof(int));
 332             numAtoms += atc->slabCommSetup[i].rcount;
 333         }
 334
 335         atc->setNumAtoms(numAtoms);
 336     }
 337
 338     local_pos = 0;
 339     for (gmx::index i = 0; i < x.ssize(); i++)
 340     {
 341         node = atc->pd[i];
 342         if (node == atc->nodeid)
 343         {
 344             /* Copy direct to the receive buffer */
 345             if (bX)
 346             {
 347                 copy_rvec(x[i], atc->xBuffer[local_pos]);
 348             }
 349             atc->coefficientBuffer[local_pos] = data[i];
 350             local_pos++;
 351         }
 352         else
 353         {
 354             /* Copy to the send buffer */
 355             int& buf_index = atc->slabCommSetup[node].buf_index;
 356             if (bX)
 357             {
 358                 copy_rvec(x[i], pme->bufv[buf_index]);
 359             }
 360             pme->bufr[buf_index] = data[i];
 361             buf_index++;
 362         }
 363     }
 364
 365     buf_pos = 0;
 366     for (i = 0; i < nnodes_comm; i++)
 367     {
 368         const int scount = atc->sendCount()[atc->slabCommSetup[i].node_dest];
 369         const int rcount = atc->slabCommSetup[i].rcount;
 370         if (scount > 0 || rcount > 0)
 371         {
 372             if (bX)
 373             {
 374                 /* Communicate the coordinates */
 375                 pme_dd_sendrecv(atc, FALSE, i, pme->bufv[buf_pos], scount * sizeof(rvec),
 376                                 atc->xBuffer[local_pos], rcount * sizeof(rvec));
 377             }
 378             /* Communicate the coefficients */
 379             pme_dd_sendrecv(atc, FALSE, i, pme->bufr + buf_pos, scount * sizeof(real),
 380                             atc->coefficientBuffer.data() + local_pos, rcount * sizeof(real));
 381             buf_pos += scount;
 382             local_pos += atc->slabCommSetup[i].rcount;
 383         }
 384     }
 385 }
 386
 387 void dd_pmeredist_f(struct gmx_pme_t* pme, PmeAtomComm* atc, gmx::ArrayRef<gmx::RVec> f, gmx_bool bAddF)
 388 {
 389     int nnodes_comm, local_pos, buf_pos, i, node;
 390
 391     nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
 392
 393     local_pos = atc->sendCount()[atc->nodeid];
 394     buf_pos   = 0;
 395     for (i = 0; i < nnodes_comm; i++)
 396     {
 397         const int commnode = atc->slabCommSetup[i].node_dest;
 398         const int scount   = atc->slabCommSetup[i].rcount;
 399         const int rcount   = atc->sendCount()[commnode];
 400         if (scount > 0 || rcount > 0)
 401         {
 402             /* Communicate the forces */
 403             pme_dd_sendrecv(atc, TRUE, i, atc->f[local_pos], scount * sizeof(rvec),
 404                             pme->bufv[buf_pos], rcount * sizeof(rvec));
 405             local_pos += scount;
 406         }
 407         atc->slabCommSetup[commnode].buf_index = buf_pos;
 408         buf_pos += rcount;
 409     }
 410
 411     local_pos = 0;
 412     if (bAddF)
 413     {
 414         for (gmx::index i = 0; i < f.ssize(); i++)
 415         {
 416             node = atc->pd[i];
 417             if (node == atc->nodeid)
 418             {
 419                 /* Add from the local force array */
 420                 rvec_inc(f[i], atc->f[local_pos]);
 421                 local_pos++;
 422             }
 423             else
 424             {
 425                 /* Add from the receive buffer */
 426                 rvec_inc(f[i], pme->bufv[atc->slabCommSetup[node].buf_index]);
 427                 atc->slabCommSetup[node].buf_index++;
 428             }
 429         }
 430     }
 431     else
 432     {
 433         for (gmx::index i = 0; i < f.ssize(); i++)
 434         {
 435             node = atc->pd[i];
 436             if (node == atc->nodeid)
 437             {
 438                 /* Copy from the local force array */
 439                 copy_rvec(atc->f[local_pos], f[i]);
 440                 local_pos++;
 441             }
 442             else
 443             {
 444                 /* Copy from the receive buffer */
 445                 copy_rvec(pme->bufv[atc->slabCommSetup[node].buf_index], f[i]);
 446                 atc->slabCommSetup[node].buf_index++;
 447             }
 448         }
 449     }
 450 }
 451
 452 void do_redist_pos_coeffs(struct gmx_pme_t*              pme,
 453                           const t_commrec*               cr,
 454                           gmx_bool                       bFirst,
 455                           gmx::ArrayRef<const gmx::RVec> x,
 456                           const real*                    data)
 457 {
 458     for (int d = pme->ndecompdim - 1; d >= 0; d--)
 459     {
 460         gmx::ArrayRef<const gmx::RVec> xRef;
 461         const real*                    param_d;
 462         if (d == pme->ndecompdim - 1)
 463         {
 464             /* Start out with the local coordinates and charges */
 465             xRef    = x;
 466             param_d = data;
 467         }
 468         else
 469         {
 470             /* Redistribute the data collected along the previous dimension */
 471             const PmeAtomComm& atc = pme->atc[d + 1];
 472             xRef                   = atc.x;
 473             param_d                = atc.coefficient.data();
 474         }
 475         PmeAtomComm& atc = pme->atc[d];
 476         atc.pd.resize(xRef.size());
 477         pme_calc_pidx_wrapper(xRef, pme->recipbox, &atc);
 478         /* Redistribute x (only once) and qA/c6A or qB/c6B */
 479         if (DOMAINDECOMP(cr))
 480         {
 481             dd_pmeredist_pos_coeffs(pme, bFirst, xRef, param_d, &atc);
 482         }
 483     }
 484 }