src/gromacs/ewald/pme-pp.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file contains function definitions necessary for
  40  * managing the offload of long-ranged PME work to separate MPI rank,
  41  * for computing energies and forces (Coulomb and LJ).
  42  *
  43  * \author Berk Hess <hess@kth.se>
  44  * \ingroup module_ewald
  45  */
  46
  47 #include "gmxpre.h"
  48
  49 #include "config.h"
  50
  51 #include <stdio.h>
  52
  53 #include <cstring>
  54
  55 #include "gromacs/domdec/domdec.h"
  56 #include "gromacs/domdec/domdec_struct.h"
  57 #include "gromacs/ewald/pme.h"
  58 #include "gromacs/gmxlib/network.h"
  59 #include "gromacs/math/vec.h"
  60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  61 #include "gromacs/mdtypes/commrec.h"
  62 #include "gromacs/mdtypes/forceoutput.h"
  63 #include "gromacs/mdtypes/interaction_const.h"
  64 #include "gromacs/mdtypes/md_enums.h"
  65 #include "gromacs/timing/wallcycle.h"
  66 #include "gromacs/utility/fatalerror.h"
  67 #include "gromacs/utility/gmxmpi.h"
  68 #include "gromacs/utility/smalloc.h"
  69
  70 #include "pme-internal.h"
  71 #include "pme-pp-communication.h"
  72
  73 /*! \brief Block to wait for communication to PME ranks to complete
  74  *
  75  * This should be faster with a real non-blocking MPI implementation
  76  */
  77 static constexpr bool c_useDelayedWait = false;
  78
  79 /*! \brief Wait for the pending data send requests to PME ranks to complete */
  80 static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t *dd)
  81 {
  82     if (dd->nreq_pme)
  83     {
  84 #if GMX_MPI
  85         MPI_Waitall(dd->nreq_pme, dd->req_pme, MPI_STATUSES_IGNORE);
  86 #endif
  87         dd->nreq_pme = 0;
  88     }
  89 }
  90
  91 /*! \brief Send data to PME ranks */
  92 static void gmx_pme_send_coeffs_coords(const t_commrec *cr, unsigned int flags,
  93                                        real gmx_unused *chargeA, real gmx_unused *chargeB,
  94                                        real gmx_unused *c6A, real gmx_unused *c6B,
  95                                        real gmx_unused *sigmaA, real gmx_unused *sigmaB,
  96                                        matrix box, rvec gmx_unused *x,
  97                                        real lambda_q, real lambda_lj,
  98                                        int maxshift_x, int maxshift_y,
  99                                        int64_t step)
 100 {
 101     gmx_domdec_t         *dd;
 102     gmx_pme_comm_n_box_t *cnb;
 103     int                   n;
 104
 105     dd = cr->dd;
 106     n  = dd_numHomeAtoms(*dd);
 107
 108     if (debug)
 109     {
 110         fprintf(debug, "PP rank %d sending to PME rank %d: %d%s%s%s%s\n",
 111                 cr->sim_nodeid, dd->pme_nodeid, n,
 112                 (flags & PP_PME_CHARGE) ? " charges" : "",
 113                 (flags & PP_PME_SQRTC6) ? " sqrtC6" : "",
 114                 (flags & PP_PME_SIGMA)  ? " sigma" : "",
 115                 (flags & PP_PME_COORD)  ? " coordinates" : "");
 116     }
 117
 118     if (c_useDelayedWait)
 119     {
 120         /* We can not use cnb until pending communication has finished */
 121         gmx_pme_send_coeffs_coords_wait(dd);
 122     }
 123
 124     if (dd->pme_receive_vir_ener)
 125     {
 126         /* Peer PP node: communicate all data */
 127         if (dd->cnb == nullptr)
 128         {
 129             snew(dd->cnb, 1);
 130         }
 131         cnb = dd->cnb;
 132
 133         cnb->flags      = flags;
 134         cnb->natoms     = n;
 135         cnb->maxshift_x = maxshift_x;
 136         cnb->maxshift_y = maxshift_y;
 137         cnb->lambda_q   = lambda_q;
 138         cnb->lambda_lj  = lambda_lj;
 139         cnb->step       = step;
 140         if (flags & PP_PME_COORD)
 141         {
 142             copy_mat(box, cnb->box);
 143         }
 144 #if GMX_MPI
 145         MPI_Isend(cnb, sizeof(*cnb), MPI_BYTE,
 146                   dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
 147                   &dd->req_pme[dd->nreq_pme++]);
 148 #endif
 149     }
 150     else if (flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
 151     {
 152 #if GMX_MPI
 153         /* Communicate only the number of atoms */
 154         MPI_Isend(&n, sizeof(n), MPI_BYTE,
 155                   dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
 156                   &dd->req_pme[dd->nreq_pme++]);
 157 #endif
 158     }
 159
 160 #if GMX_MPI
 161     if (n > 0)
 162     {
 163         if (flags & PP_PME_CHARGE)
 164         {
 165             MPI_Isend(chargeA, n*sizeof(real), MPI_BYTE,
 166                       dd->pme_nodeid, eCommType_ChargeA, cr->mpi_comm_mysim,
 167                       &dd->req_pme[dd->nreq_pme++]);
 168         }
 169         if (flags & PP_PME_CHARGEB)
 170         {
 171             MPI_Isend(chargeB, n*sizeof(real), MPI_BYTE,
 172                       dd->pme_nodeid, eCommType_ChargeB, cr->mpi_comm_mysim,
 173                       &dd->req_pme[dd->nreq_pme++]);
 174         }
 175         if (flags & PP_PME_SQRTC6)
 176         {
 177             MPI_Isend(c6A, n*sizeof(real), MPI_BYTE,
 178                       dd->pme_nodeid, eCommType_SQRTC6A, cr->mpi_comm_mysim,
 179                       &dd->req_pme[dd->nreq_pme++]);
 180         }
 181         if (flags & PP_PME_SQRTC6B)
 182         {
 183             MPI_Isend(c6B, n*sizeof(real), MPI_BYTE,
 184                       dd->pme_nodeid, eCommType_SQRTC6B, cr->mpi_comm_mysim,
 185                       &dd->req_pme[dd->nreq_pme++]);
 186         }
 187         if (flags & PP_PME_SIGMA)
 188         {
 189             MPI_Isend(sigmaA, n*sizeof(real), MPI_BYTE,
 190                       dd->pme_nodeid, eCommType_SigmaA, cr->mpi_comm_mysim,
 191                       &dd->req_pme[dd->nreq_pme++]);
 192         }
 193         if (flags & PP_PME_SIGMAB)
 194         {
 195             MPI_Isend(sigmaB, n*sizeof(real), MPI_BYTE,
 196                       dd->pme_nodeid, eCommType_SigmaB, cr->mpi_comm_mysim,
 197                       &dd->req_pme[dd->nreq_pme++]);
 198         }
 199         if (flags & PP_PME_COORD)
 200         {
 201             MPI_Isend(x[0], n*sizeof(rvec), MPI_BYTE,
 202                       dd->pme_nodeid, eCommType_COORD, cr->mpi_comm_mysim,
 203                       &dd->req_pme[dd->nreq_pme++]);
 204         }
 205     }
 206 #endif
 207     if (!c_useDelayedWait)
 208     {
 209         /* Wait for the data to arrive */
 210         /* We can skip this wait as we are sure x and q will not be modified
 211          * before the next call to gmx_pme_send_x_q or gmx_pme_receive_f.
 212          */
 213         gmx_pme_send_coeffs_coords_wait(dd);
 214     }
 215 }
 216
 217 void gmx_pme_send_parameters(const t_commrec *cr,
 218                              const interaction_const_t *ic,
 219                              gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
 220                              real *chargeA, real *chargeB,
 221                              real *sqrt_c6A, real *sqrt_c6B,
 222                              real *sigmaA, real *sigmaB,
 223                              int maxshift_x, int maxshift_y)
 224 {
 225     unsigned int flags = 0;
 226
 227     if (EEL_PME(ic->eeltype))
 228     {
 229         flags |= PP_PME_CHARGE;
 230     }
 231     if (EVDW_PME(ic->vdwtype))
 232     {
 233         flags |= (PP_PME_SQRTC6 | PP_PME_SIGMA);
 234     }
 235     if (bFreeEnergy_q || bFreeEnergy_lj)
 236     {
 237         /* Assumes that the B state flags are in the bits just above
 238          * the ones for the A state. */
 239         flags |= (flags << 1);
 240     }
 241
 242     gmx_pme_send_coeffs_coords(cr, flags,
 243                                chargeA, chargeB,
 244                                sqrt_c6A, sqrt_c6B, sigmaA, sigmaB,
 245                                nullptr, nullptr, 0, 0, maxshift_x, maxshift_y, -1);
 246 }
 247
 248 void gmx_pme_send_coordinates(const t_commrec *cr, matrix box, rvec *x,
 249                               real lambda_q, real lambda_lj,
 250                               gmx_bool bEnerVir,
 251                               int64_t step, gmx_wallcycle *wcycle)
 252 {
 253     wallcycle_start(wcycle, ewcPP_PMESENDX);
 254
 255     unsigned int flags = PP_PME_COORD;
 256     if (bEnerVir)
 257     {
 258         flags |= PP_PME_ENER_VIR;
 259     }
 260     gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
 261                                box, x, lambda_q, lambda_lj, 0, 0, step);
 262
 263     wallcycle_stop(wcycle, ewcPP_PMESENDX);
 264 }
 265
 266 void gmx_pme_send_finish(const t_commrec *cr)
 267 {
 268     unsigned int flags = PP_PME_FINISH;
 269
 270     gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, 0, -1);
 271 }
 272
 273 void gmx_pme_send_switchgrid(const t_commrec *cr,
 274                              ivec             grid_size,
 275                              real             ewaldcoeff_q,
 276                              real             ewaldcoeff_lj)
 277 {
 278 #if GMX_MPI
 279     gmx_pme_comm_n_box_t cnb;
 280
 281     /* Only let one PP node signal each PME node */
 282     if (cr->dd->pme_receive_vir_ener)
 283     {
 284         cnb.flags = PP_PME_SWITCHGRID;
 285         copy_ivec(grid_size, cnb.grid_size);
 286         cnb.ewaldcoeff_q  = ewaldcoeff_q;
 287         cnb.ewaldcoeff_lj = ewaldcoeff_lj;
 288
 289         /* We send this, uncommon, message blocking to simplify the code */
 290         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
 291                  cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
 292     }
 293 #else
 294     GMX_UNUSED_VALUE(cr);
 295     GMX_UNUSED_VALUE(grid_size);
 296     GMX_UNUSED_VALUE(ewaldcoeff_q);
 297     GMX_UNUSED_VALUE(ewaldcoeff_lj);
 298 #endif
 299 }
 300
 301 void gmx_pme_send_resetcounters(const t_commrec gmx_unused *cr, int64_t gmx_unused step)
 302 {
 303 #if GMX_MPI
 304     gmx_pme_comm_n_box_t cnb;
 305
 306     /* Only let one PP node signal each PME node */
 307     if (cr->dd->pme_receive_vir_ener)
 308     {
 309         cnb.flags = PP_PME_RESETCOUNTERS;
 310         cnb.step  = step;
 311
 312         /* We send this, uncommon, message blocking to simplify the code */
 313         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
 314                  cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
 315     }
 316 #endif
 317 }
 318
 319 /*! \brief Receive virial and energy from PME rank */
 320 static void receive_virial_energy(const t_commrec *cr,
 321                                   gmx::ForceWithVirial *forceWithVirial,
 322                                   real *energy_q, real *energy_lj,
 323                                   real *dvdlambda_q, real *dvdlambda_lj,
 324                                   float *pme_cycles)
 325 {
 326     gmx_pme_comm_vir_ene_t cve;
 327
 328     if (cr->dd->pme_receive_vir_ener)
 329     {
 330         if (debug)
 331         {
 332             fprintf(debug,
 333                     "PP rank %d receiving from PME rank %d: virial and energy\n",
 334                     cr->sim_nodeid, cr->dd->pme_nodeid);
 335         }
 336 #if GMX_MPI
 337         MPI_Recv(&cve, sizeof(cve), MPI_BYTE, cr->dd->pme_nodeid, 1, cr->mpi_comm_mysim,
 338                  MPI_STATUS_IGNORE);
 339 #else
 340         memset(&cve, 0, sizeof(cve));
 341 #endif
 342
 343         forceWithVirial->addVirialContribution(cve.vir_q);
 344         forceWithVirial->addVirialContribution(cve.vir_lj);
 345         *energy_q      = cve.energy_q;
 346         *energy_lj     = cve.energy_lj;
 347         *dvdlambda_q  += cve.dvdlambda_q;
 348         *dvdlambda_lj += cve.dvdlambda_lj;
 349         *pme_cycles    = cve.cycles;
 350
 351         if (cve.stop_cond != gmx_stop_cond_none)
 352         {
 353             gmx_set_stop_condition(cve.stop_cond);
 354         }
 355     }
 356     else
 357     {
 358         *energy_q   = 0;
 359         *energy_lj  = 0;
 360         *pme_cycles = 0;
 361     }
 362 }
 363
 364 void gmx_pme_receive_f(const t_commrec *cr,
 365                        gmx::ForceWithVirial *forceWithVirial,
 366                        real *energy_q, real *energy_lj,
 367                        real *dvdlambda_q, real *dvdlambda_lj,
 368                        float *pme_cycles)
 369 {
 370     if (c_useDelayedWait)
 371     {
 372         /* Wait for the x request to finish */
 373         gmx_pme_send_coeffs_coords_wait(cr->dd);
 374     }
 375
 376     int natoms = dd_numHomeAtoms(*cr->dd);
 377
 378     if (natoms > cr->dd->pme_recv_f_alloc)
 379     {
 380         cr->dd->pme_recv_f_alloc = over_alloc_dd(natoms);
 381         srenew(cr->dd->pme_recv_f_buf, cr->dd->pme_recv_f_alloc);
 382     }
 383
 384 #if GMX_MPI
 385     MPI_Recv(cr->dd->pme_recv_f_buf[0],
 386              natoms*sizeof(rvec), MPI_BYTE,
 387              cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim,
 388              MPI_STATUS_IGNORE);
 389 #endif
 390
 391     int nt = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, natoms);
 392
 393     gmx::ArrayRef<gmx::RVec> f = forceWithVirial->force_;
 394
 395     /* Note that we would like to avoid this conditional by putting it
 396      * into the omp pragma instead, but then we still take the full
 397      * omp parallel for overhead (at least with gcc5).
 398      */
 399     if (nt == 1)
 400     {
 401         for (int i = 0; i < natoms; i++)
 402         {
 403             rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
 404         }
 405     }
 406     else
 407     {
 408 #pragma omp parallel for num_threads(nt) schedule(static)
 409         for (int i = 0; i < natoms; i++)
 410         {
 411             rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
 412         }
 413     }
 414
 415     receive_virial_energy(cr, forceWithVirial, energy_q, energy_lj, dvdlambda_q, dvdlambda_lj, pme_cycles);
 416 }