0bc7b13c8cc9ebf7b6b027296549361db50dea37
[alexxy/gromacs.git] / src / gromacs / mdlib / force.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "force.h"
41
42 #include <cassert>
43 #include <cmath>
44 #include <cstring>
45
46 #include "gromacs/domdec/dlbtiming.h"
47 #include "gromacs/domdec/domdec.h"
48 #include "gromacs/domdec/domdec_struct.h"
49 #include "gromacs/ewald/ewald.h"
50 #include "gromacs/ewald/long_range_correction.h"
51 #include "gromacs/ewald/pme.h"
52 #include "gromacs/gmxlib/network.h"
53 #include "gromacs/gmxlib/nrnb.h"
54 #include "gromacs/listed_forces/listed_forces.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/math/vecdump.h"
57 #include "gromacs/mdlib/forcerec_threading.h"
58 #include "gromacs/mdlib/qmmm.h"
59 #include "gromacs/mdlib/rf_util.h"
60 #include "gromacs/mdlib/wall.h"
61 #include "gromacs/mdtypes/commrec.h"
62 #include "gromacs/mdtypes/enerdata.h"
63 #include "gromacs/mdtypes/forceoutput.h"
64 #include "gromacs/mdtypes/forcerec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/mdtypes/mdatom.h"
68 #include "gromacs/mdtypes/simulation_workload.h"
69 #include "gromacs/pbcutil/ishift.h"
70 #include "gromacs/pbcutil/mshift.h"
71 #include "gromacs/pbcutil/pbc.h"
72 #include "gromacs/timing/wallcycle.h"
73 #include "gromacs/utility/exceptions.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/smalloc.h"
76
77 static void clearEwaldThreadOutput(ewald_corr_thread_t* ewc_t)
78 {
79     ewc_t->Vcorr_q        = 0;
80     ewc_t->Vcorr_lj       = 0;
81     ewc_t->dvdl[efptCOUL] = 0;
82     ewc_t->dvdl[efptVDW]  = 0;
83     clear_mat(ewc_t->vir_q);
84     clear_mat(ewc_t->vir_lj);
85 }
86
87 static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t* ewc_t)
88 {
89     ewald_corr_thread_t& dest = ewc_t[0];
90
91     for (int t = 1; t < nthreads; t++)
92     {
93         dest.Vcorr_q += ewc_t[t].Vcorr_q;
94         dest.Vcorr_lj += ewc_t[t].Vcorr_lj;
95         dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
96         dest.dvdl[efptVDW] += ewc_t[t].dvdl[efptVDW];
97         m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q);
98         m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
99     }
100 }
101
102 void do_force_lowlevel(t_forcerec*                         fr,
103                        const t_inputrec*                   ir,
104                        const t_idef*                       idef,
105                        const t_commrec*                    cr,
106                        const gmx_multisim_t*               ms,
107                        t_nrnb*                             nrnb,
108                        gmx_wallcycle_t                     wcycle,
109                        const t_mdatoms*                    md,
110                        gmx::ArrayRefWithPadding<gmx::RVec> coordinates,
111                        history_t*                          hist,
112                        gmx::ForceOutputs*                  forceOutputs,
113                        gmx_enerdata_t*                     enerd,
114                        t_fcdata*                           fcd,
115                        const matrix                        box,
116                        const real*                         lambda,
117                        const t_graph*                      graph,
118                        const rvec*                         mu_tot,
119                        const gmx::StepWorkload&            stepWork,
120                        const DDBalanceRegionHandler&       ddBalanceRegionHandler)
121 {
122     // TODO: Replace all uses of x by const coordinates
123     rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
124
125     auto& forceWithVirial = forceOutputs->forceWithVirial();
126
127     /* do QMMM first if requested */
128     if (fr->bQMMM)
129     {
130         enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr->qr);
131     }
132
133     /* Call the short range functions all in one go. */
134
135     if (ir->nwall)
136     {
137         /* foreign lambda component for walls */
138         real dvdl_walls = do_walls(*ir, *fr, box, *md, x, &forceWithVirial, lambda[efptVDW],
139                                    enerd->grpp.ener[egLJSR].data(), nrnb);
140         enerd->dvdl_lin[efptVDW] += dvdl_walls;
141     }
142
143     /* Shift the coordinates. Must be done before listed forces and PPPM,
144      * but is also necessary for SHAKE and update, therefore it can NOT
145      * go when no listed forces have to be evaluated.
146      *
147      * The shifting and PBC code is deliberately not timed, since with
148      * the Verlet scheme it only takes non-zero time with triclinic
149      * boxes, and even then the time is around a factor of 100 less
150      * than the next smallest counter.
151      */
152
153
154     /* Here sometimes we would not need to shift with NBFonly,
155      * but we do so anyhow for consistency of the returned coordinates.
156      */
157     if (graph)
158     {
159         shift_self(graph, box, x);
160         if (TRICLINIC(box))
161         {
162             inc_nrnb(nrnb, eNR_SHIFTX, 2 * graph->nnodes);
163         }
164         else
165         {
166             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
167         }
168     }
169
170     {
171         t_pbc pbc;
172
173         /* Check whether we need to take into account PBC in listed interactions. */
174         const auto needPbcForListedForces =
175                 fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd);
176         if (needPbcForListedForces)
177         {
178             /* Since all atoms are in the rectangular or triclinic unit-cell,
179              * only single box vector shifts (2 in x) are required.
180              */
181             set_pbc_dd(&pbc, fr->pbcType, DOMAINDECOMP(cr) ? cr->dd->numCells : nullptr, TRUE, box);
182         }
183
184         do_force_listed(wcycle, box, ir->fepvals, cr, ms, idef, x, hist, forceOutputs, fr, &pbc,
185                         graph, enerd, nrnb, lambda, md, fcd,
186                         DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, stepWork);
187     }
188
189     const bool computePmeOnCpu = (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
190                                  && thisRankHasDuty(cr, DUTY_PME)
191                                  && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU);
192
193     const bool haveEwaldSurfaceTerm = haveEwaldSurfaceContribution(*ir);
194
195     /* Do long-range electrostatics and/or LJ-PME
196      * and compute PME surface terms when necessary.
197      */
198     if (computePmeOnCpu || fr->ic->eeltype == eelEWALD || haveEwaldSurfaceTerm)
199     {
200         int  status = 0;
201         real Vlr_q = 0, Vlr_lj = 0;
202
203         /* We reduce all virial, dV/dlambda and energy contributions, except
204          * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
205          */
206         ewald_corr_thread_t& ewaldOutput = fr->ewc_t[0];
207         clearEwaldThreadOutput(&ewaldOutput);
208
209         if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
210         {
211             /* Calculate the Ewald surface force and energy contributions, when necessary */
212             if (haveEwaldSurfaceTerm)
213             {
214                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
215
216                 if (fr->n_tpi > 0)
217                 {
218                     gmx_fatal(FARGS,
219                               "TPI with PME currently only works in a 3D geometry with tin-foil "
220                               "boundary conditions");
221                 }
222
223                 int nthreads = fr->nthread_ewc;
224 #pragma omp parallel for num_threads(nthreads) schedule(static)
225                 for (int t = 0; t < nthreads; t++)
226                 {
227                     try
228                     {
229                         ewald_corr_thread_t& ewc_t = fr->ewc_t[t];
230                         if (t > 0)
231                         {
232                             clearEwaldThreadOutput(&ewc_t);
233                         }
234
235                         /* Threading is only supported with the Verlet cut-off
236                          * scheme and then only single particle forces (no
237                          * exclusion forces) are calculated, so we can store
238                          * the forces in the normal, single forceWithVirial->force_ array.
239                          */
240                         ewald_LRcorrection(md->homenr, cr, nthreads, t, *fr, *ir, md->chargeA,
241                                            md->chargeB, (md->nChargePerturbed != 0), x, box, mu_tot,
242                                            as_rvec_array(forceWithVirial.force_.data()),
243                                            &ewc_t.Vcorr_q, lambda[efptCOUL], &ewc_t.dvdl[efptCOUL]);
244                     }
245                     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
246                 }
247                 if (nthreads > 1)
248                 {
249                     reduceEwaldThreadOuput(nthreads, fr->ewc_t);
250                 }
251                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
252             }
253
254             if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
255             {
256                 /* This is not in a subcounter because it takes a
257                    negligible and constant-sized amount of time */
258                 ewaldOutput.Vcorr_q += ewald_charge_correction(
259                         cr, fr, lambda[efptCOUL], box, &ewaldOutput.dvdl[efptCOUL], ewaldOutput.vir_q);
260             }
261
262             if (computePmeOnCpu)
263             {
264                 /* Do reciprocal PME for Coulomb and/or LJ. */
265                 assert(fr->n_tpi >= 0);
266                 if (fr->n_tpi == 0 || stepWork.stateChanged)
267                 {
268                     int pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
269
270                     if (stepWork.computeForces)
271                     {
272                         pme_flags |= GMX_PME_CALC_F;
273                     }
274                     if (stepWork.computeVirial)
275                     {
276                         pme_flags |= GMX_PME_CALC_ENER_VIR;
277                     }
278                     if (fr->n_tpi > 0)
279                     {
280                         /* We don't calculate f, but we do want the potential */
281                         pme_flags |= GMX_PME_CALC_POT;
282                     }
283
284                     /* With domain decomposition we close the CPU side load
285                      * balancing region here, because PME does global
286                      * communication that acts as a global barrier.
287                      */
288                     ddBalanceRegionHandler.closeAfterForceComputationCpu();
289
290                     wallcycle_start(wcycle, ewcPMEMESH);
291                     status = gmx_pme_do(
292                             fr->pmedata,
293                             gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(),
294                                                         md->homenr - fr->n_tpi),
295                             forceWithVirial.force_, md->chargeA, md->chargeB, md->sqrt_c6A,
296                             md->sqrt_c6B, md->sigmaA, md->sigmaB, box, cr,
297                             DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
298                             DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle,
299                             ewaldOutput.vir_q, ewaldOutput.vir_lj, &Vlr_q, &Vlr_lj,
300                             lambda[efptCOUL], lambda[efptVDW], &ewaldOutput.dvdl[efptCOUL],
301                             &ewaldOutput.dvdl[efptVDW], pme_flags);
302                     wallcycle_stop(wcycle, ewcPMEMESH);
303                     if (status != 0)
304                     {
305                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
306                     }
307
308                     /* We should try to do as little computation after
309                      * this as possible, because parallel PME synchronizes
310                      * the nodes, so we want all load imbalance of the
311                      * rest of the force calculation to be before the PME
312                      * call.  DD load balancing is done on the whole time
313                      * of the force call (without PME).
314                      */
315                 }
316                 if (fr->n_tpi > 0)
317                 {
318                     if (EVDW_PME(ir->vdwtype))
319                     {
320
321                         gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
322                     }
323                     /* Determine the PME grid energy of the test molecule
324                      * with the PME grid potential of the other charges.
325                      */
326                     gmx_pme_calc_energy(
327                             fr->pmedata,
328                             coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
329                             gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
330                             &Vlr_q);
331                 }
332             }
333         }
334
335         if (fr->ic->eeltype == eelEWALD)
336         {
337             Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial.force_.data()), md->chargeA,
338                              md->chargeB, box, cr, md->homenr, ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
339                              lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL], fr->ewald_table);
340         }
341
342         /* Note that with separate PME nodes we get the real energies later */
343         // TODO it would be simpler if we just accumulated a single
344         // long-range virial contribution.
345         forceWithVirial.addVirialContribution(ewaldOutput.vir_q);
346         forceWithVirial.addVirialContribution(ewaldOutput.vir_lj);
347         enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
348         enerd->dvdl_lin[efptVDW] += ewaldOutput.dvdl[efptVDW];
349         enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q;
350         enerd->term[F_LJ_RECIP]   = Vlr_lj + ewaldOutput.Vcorr_lj;
351
352         if (debug)
353         {
354             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q,
355                     ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
356             pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
357             rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
358             pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS);
359             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj,
360                     ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
361             pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
362         }
363     }
364
365     if (debug)
366     {
367         print_nrnb(debug, nrnb);
368     }
369
370     if (debug)
371     {
372         rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
373         pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS);
374     }
375 }