src/gromacs/mdlib/force.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "force.h"
  41
  42 #include <cassert>
  43 #include <cmath>
  44 #include <cstring>
  45
  46 #include "gromacs/domdec/dlbtiming.h"
  47 #include "gromacs/domdec/domdec.h"
  48 #include "gromacs/domdec/domdec_struct.h"
  49 #include "gromacs/ewald/ewald.h"
  50 #include "gromacs/ewald/long_range_correction.h"
  51 #include "gromacs/ewald/pme.h"
  52 #include "gromacs/gmxlib/network.h"
  53 #include "gromacs/gmxlib/nrnb.h"
  54 #include "gromacs/listed_forces/listed_forces.h"
  55 #include "gromacs/math/vec.h"
  56 #include "gromacs/math/vecdump.h"
  57 #include "gromacs/mdlib/forcerec_threading.h"
  58 #include "gromacs/mdlib/qmmm.h"
  59 #include "gromacs/mdlib/rf_util.h"
  60 #include "gromacs/mdlib/wall.h"
  61 #include "gromacs/mdtypes/commrec.h"
  62 #include "gromacs/mdtypes/enerdata.h"
  63 #include "gromacs/mdtypes/forceoutput.h"
  64 #include "gromacs/mdtypes/forcerec.h"
  65 #include "gromacs/mdtypes/inputrec.h"
  66 #include "gromacs/mdtypes/interaction_const.h"
  67 #include "gromacs/mdtypes/md_enums.h"
  68 #include "gromacs/mdtypes/mdatom.h"
  69 #include "gromacs/mdtypes/simulation_workload.h"
  70 #include "gromacs/pbcutil/ishift.h"
  71 #include "gromacs/pbcutil/mshift.h"
  72 #include "gromacs/pbcutil/pbc.h"
  73 #include "gromacs/timing/wallcycle.h"
  74 #include "gromacs/utility/exceptions.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/smalloc.h"
  77
  78 static void clearEwaldThreadOutput(ewald_corr_thread_t* ewc_t)
  79 {
  80     ewc_t->Vcorr_q        = 0;
  81     ewc_t->Vcorr_lj       = 0;
  82     ewc_t->dvdl[efptCOUL] = 0;
  83     ewc_t->dvdl[efptVDW]  = 0;
  84     clear_mat(ewc_t->vir_q);
  85     clear_mat(ewc_t->vir_lj);
  86 }
  87
  88 static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t* ewc_t)
  89 {
  90     ewald_corr_thread_t& dest = ewc_t[0];
  91
  92     for (int t = 1; t < nthreads; t++)
  93     {
  94         dest.Vcorr_q += ewc_t[t].Vcorr_q;
  95         dest.Vcorr_lj += ewc_t[t].Vcorr_lj;
  96         dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
  97         dest.dvdl[efptVDW] += ewc_t[t].dvdl[efptVDW];
  98         m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q);
  99         m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
 100     }
 101 }
 102
 103 void do_force_lowlevel(t_forcerec*                         fr,
 104                        const t_inputrec*                   ir,
 105                        const InteractionDefinitions&       idef,
 106                        const t_commrec*                    cr,
 107                        const gmx_multisim_t*               ms,
 108                        t_nrnb*                             nrnb,
 109                        gmx_wallcycle_t                     wcycle,
 110                        const t_mdatoms*                    md,
 111                        gmx::ArrayRefWithPadding<gmx::RVec> coordinates,
 112                        history_t*                          hist,
 113                        gmx::ForceOutputs*                  forceOutputs,
 114                        gmx_enerdata_t*                     enerd,
 115                        t_fcdata*                           fcd,
 116                        const matrix                        box,
 117                        const real*                         lambda,
 118                        const t_graph*                      graph,
 119                        const rvec*                         mu_tot,
 120                        const gmx::StepWorkload&            stepWork,
 121                        const DDBalanceRegionHandler&       ddBalanceRegionHandler)
 122 {
 123     // TODO: Replace all uses of x by const coordinates
 124     rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
 125
 126     auto& forceWithVirial = forceOutputs->forceWithVirial();
 127
 128     /* do QMMM first if requested */
 129     if (fr->bQMMM)
 130     {
 131         enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr->qr);
 132     }
 133
 134     /* Call the short range functions all in one go. */
 135
 136     if (ir->nwall)
 137     {
 138         /* foreign lambda component for walls */
 139         real dvdl_walls = do_walls(*ir, *fr, box, *md, x, &forceWithVirial, lambda[efptVDW],
 140                                    enerd->grpp.ener[egLJSR].data(), nrnb);
 141         enerd->dvdl_lin[efptVDW] += dvdl_walls;
 142
 143         for (auto& dhdl : enerd->dhdlLambda)
 144         {
 145             dhdl += dvdl_walls;
 146         }
 147     }
 148
 149     /* Shift the coordinates. Must be done before listed forces and PPPM,
 150      * but is also necessary for SHAKE and update, therefore it can NOT
 151      * go when no listed forces have to be evaluated.
 152      *
 153      * The shifting and PBC code is deliberately not timed, since with
 154      * the Verlet scheme it only takes non-zero time with triclinic
 155      * boxes, and even then the time is around a factor of 100 less
 156      * than the next smallest counter.
 157      */
 158
 159
 160     /* Here sometimes we would not need to shift with NBFonly,
 161      * but we do so anyhow for consistency of the returned coordinates.
 162      */
 163     if (graph)
 164     {
 165         shift_self(graph, box, x);
 166         if (TRICLINIC(box))
 167         {
 168             inc_nrnb(nrnb, eNR_SHIFTX, 2 * graph->nnodes);
 169         }
 170         else
 171         {
 172             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
 173         }
 174     }
 175
 176     {
 177         t_pbc pbc;
 178
 179         /* Check whether we need to take into account PBC in listed interactions. */
 180         const auto needPbcForListedForces =
 181                 fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, idef, *fcd);
 182         if (needPbcForListedForces)
 183         {
 184             /* Since all atoms are in the rectangular or triclinic unit-cell,
 185              * only single box vector shifts (2 in x) are required.
 186              */
 187             set_pbc_dd(&pbc, fr->pbcType, DOMAINDECOMP(cr) ? cr->dd->numCells : nullptr, TRUE, box);
 188         }
 189
 190         do_force_listed(wcycle, box, ir->fepvals, cr, ms, idef, x, hist, forceOutputs, fr, &pbc,
 191                         graph, enerd, nrnb, lambda, md, fcd,
 192                         DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, stepWork);
 193     }
 194
 195     const bool computePmeOnCpu = (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
 196                                  && thisRankHasDuty(cr, DUTY_PME)
 197                                  && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU);
 198
 199     const bool haveEwaldSurfaceTerm = haveEwaldSurfaceContribution(*ir);
 200
 201     /* Do long-range electrostatics and/or LJ-PME
 202      * and compute PME surface terms when necessary.
 203      */
 204     if (computePmeOnCpu || fr->ic->eeltype == eelEWALD || haveEwaldSurfaceTerm)
 205     {
 206         int  status = 0;
 207         real Vlr_q = 0, Vlr_lj = 0;
 208
 209         /* We reduce all virial, dV/dlambda and energy contributions, except
 210          * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
 211          */
 212         ewald_corr_thread_t& ewaldOutput = fr->ewc_t[0];
 213         clearEwaldThreadOutput(&ewaldOutput);
 214
 215         if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
 216         {
 217             /* Calculate the Ewald surface force and energy contributions, when necessary */
 218             if (haveEwaldSurfaceTerm)
 219             {
 220                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
 221
 222                 int nthreads = fr->nthread_ewc;
 223 #pragma omp parallel for num_threads(nthreads) schedule(static)
 224                 for (int t = 0; t < nthreads; t++)
 225                 {
 226                     try
 227                     {
 228                         ewald_corr_thread_t& ewc_t = fr->ewc_t[t];
 229                         if (t > 0)
 230                         {
 231                             clearEwaldThreadOutput(&ewc_t);
 232                         }
 233
 234                         /* Threading is only supported with the Verlet cut-off
 235                          * scheme and then only single particle forces (no
 236                          * exclusion forces) are calculated, so we can store
 237                          * the forces in the normal, single forceWithVirial->force_ array.
 238                          */
 239                         ewald_LRcorrection(md->homenr, cr, nthreads, t, *fr, *ir, md->chargeA,
 240                                            md->chargeB, (md->nChargePerturbed != 0), x, box, mu_tot,
 241                                            as_rvec_array(forceWithVirial.force_.data()),
 242                                            &ewc_t.Vcorr_q, lambda[efptCOUL], &ewc_t.dvdl[efptCOUL]);
 243                     }
 244                     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 245                 }
 246                 if (nthreads > 1)
 247                 {
 248                     reduceEwaldThreadOuput(nthreads, fr->ewc_t);
 249                 }
 250                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
 251             }
 252
 253             if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
 254             {
 255                 /* This is not in a subcounter because it takes a
 256                    negligible and constant-sized amount of time */
 257                 ewaldOutput.Vcorr_q += ewald_charge_correction(
 258                         cr, fr, lambda[efptCOUL], box, &ewaldOutput.dvdl[efptCOUL], ewaldOutput.vir_q);
 259             }
 260
 261             if (computePmeOnCpu)
 262             {
 263                 /* Do reciprocal PME for Coulomb and/or LJ. */
 264                 assert(fr->n_tpi >= 0);
 265                 if (fr->n_tpi == 0 || stepWork.stateChanged)
 266                 {
 267                     int pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
 268
 269                     if (stepWork.computeForces)
 270                     {
 271                         pme_flags |= GMX_PME_CALC_F;
 272                     }
 273                     if (stepWork.computeVirial)
 274                     {
 275                         pme_flags |= GMX_PME_CALC_ENER_VIR;
 276                     }
 277
 278                     /* With domain decomposition we close the CPU side load
 279                      * balancing region here, because PME does global
 280                      * communication that acts as a global barrier.
 281                      */
 282                     ddBalanceRegionHandler.closeAfterForceComputationCpu();
 283
 284                     wallcycle_start(wcycle, ewcPMEMESH);
 285                     status = gmx_pme_do(
 286                             fr->pmedata,
 287                             gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(),
 288                                                         md->homenr - fr->n_tpi),
 289                             forceWithVirial.force_, md->chargeA, md->chargeB, md->sqrt_c6A,
 290                             md->sqrt_c6B, md->sigmaA, md->sigmaB, box, cr,
 291                             DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 292                             DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle,
 293                             ewaldOutput.vir_q, ewaldOutput.vir_lj, &Vlr_q, &Vlr_lj,
 294                             lambda[efptCOUL], lambda[efptVDW], &ewaldOutput.dvdl[efptCOUL],
 295                             &ewaldOutput.dvdl[efptVDW], pme_flags);
 296                     wallcycle_stop(wcycle, ewcPMEMESH);
 297                     if (status != 0)
 298                     {
 299                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
 300                     }
 301
 302                     /* We should try to do as little computation after
 303                      * this as possible, because parallel PME synchronizes
 304                      * the nodes, so we want all load imbalance of the
 305                      * rest of the force calculation to be before the PME
 306                      * call.  DD load balancing is done on the whole time
 307                      * of the force call (without PME).
 308                      */
 309                 }
 310                 if (fr->n_tpi > 0)
 311                 {
 312                     /* Determine the PME grid energy of the test molecule
 313                      * with the PME grid potential of the other charges.
 314                      */
 315                     gmx_pme_calc_energy(
 316                             fr->pmedata,
 317                             coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
 318                             gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
 319                             &Vlr_q);
 320                 }
 321             }
 322         }
 323
 324         if (fr->ic->eeltype == eelEWALD)
 325         {
 326             Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial.force_.data()), md->chargeA,
 327                              md->chargeB, box, cr, md->homenr, ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
 328                              lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL], fr->ewald_table);
 329         }
 330
 331         /* Note that with separate PME nodes we get the real energies later */
 332         // TODO it would be simpler if we just accumulated a single
 333         // long-range virial contribution.
 334         forceWithVirial.addVirialContribution(ewaldOutput.vir_q);
 335         forceWithVirial.addVirialContribution(ewaldOutput.vir_lj);
 336         enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
 337         enerd->dvdl_lin[efptVDW] += ewaldOutput.dvdl[efptVDW];
 338         enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q;
 339         enerd->term[F_LJ_RECIP]   = Vlr_lj + ewaldOutput.Vcorr_lj;
 340
 341         for (auto& dhdl : enerd->dhdlLambda)
 342         {
 343             dhdl += ewaldOutput.dvdl[efptVDW] + ewaldOutput.dvdl[efptCOUL];
 344         }
 345
 346         if (debug)
 347         {
 348             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q,
 349                     ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
 350             pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
 351             rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
 352             pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS);
 353             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj,
 354                     ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
 355             pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
 356         }
 357     }
 358
 359     if (debug)
 360     {
 361         print_nrnb(debug, nrnb);
 362     }
 363
 364     if (debug)
 365     {
 366         rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
 367         pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS);
 368     }
 369 }