Set up workload data structures
[alexxy/gromacs.git] / src / gromacs / mdlib / force.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "force.h"
40
41 #include <cassert>
42 #include <cmath>
43 #include <cstring>
44
45 #include "gromacs/domdec/dlbtiming.h"
46 #include "gromacs/domdec/domdec.h"
47 #include "gromacs/domdec/domdec_struct.h"
48 #include "gromacs/ewald/ewald.h"
49 #include "gromacs/ewald/long_range_correction.h"
50 #include "gromacs/ewald/pme.h"
51 #include "gromacs/gmxlib/network.h"
52 #include "gromacs/gmxlib/nrnb.h"
53 #include "gromacs/listed_forces/listed_forces.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/math/vecdump.h"
56 #include "gromacs/mdlib/forcerec_threading.h"
57 #include "gromacs/mdlib/qmmm.h"
58 #include "gromacs/mdlib/rf_util.h"
59 #include "gromacs/mdlib/wall.h"
60 #include "gromacs/mdtypes/commrec.h"
61 #include "gromacs/mdtypes/enerdata.h"
62 #include "gromacs/mdtypes/forceoutput.h"
63 #include "gromacs/mdtypes/forcerec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/mdatom.h"
67 #include "gromacs/mdtypes/simulation_workload.h"
68 #include "gromacs/pbcutil/ishift.h"
69 #include "gromacs/pbcutil/mshift.h"
70 #include "gromacs/pbcutil/pbc.h"
71 #include "gromacs/timing/wallcycle.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/smalloc.h"
75
76 static void clearEwaldThreadOutput(ewald_corr_thread_t *ewc_t)
77 {
78     ewc_t->Vcorr_q        = 0;
79     ewc_t->Vcorr_lj       = 0;
80     ewc_t->dvdl[efptCOUL] = 0;
81     ewc_t->dvdl[efptVDW]  = 0;
82     clear_mat(ewc_t->vir_q);
83     clear_mat(ewc_t->vir_lj);
84 }
85
86 static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t *ewc_t)
87 {
88     ewald_corr_thread_t &dest = ewc_t[0];
89
90     for (int t = 1; t < nthreads; t++)
91     {
92         dest.Vcorr_q        += ewc_t[t].Vcorr_q;
93         dest.Vcorr_lj       += ewc_t[t].Vcorr_lj;
94         dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
95         dest.dvdl[efptVDW]  += ewc_t[t].dvdl[efptVDW];
96         m_add(dest.vir_q,  ewc_t[t].vir_q,  dest.vir_q);
97         m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
98     }
99 }
100
101 void
102 do_force_lowlevel(t_forcerec                               *fr,
103                   const t_inputrec                         *ir,
104                   const t_idef                             *idef,
105                   const t_commrec                          *cr,
106                   const gmx_multisim_t                     *ms,
107                   t_nrnb                                   *nrnb,
108                   gmx_wallcycle_t                           wcycle,
109                   const t_mdatoms                          *md,
110                   gmx::ArrayRefWithPadding<gmx::RVec>       coordinates,
111                   history_t                                *hist,
112                   gmx::ForceOutputs                        *forceOutputs,
113                   gmx_enerdata_t                           *enerd,
114                   t_fcdata                                 *fcd,
115                   const matrix                              box,
116                   const real                               *lambda,
117                   const t_graph                            *graph,
118                   const rvec                               *mu_tot,
119                   const gmx::StepWorkload                  &stepWork,
120                   const DDBalanceRegionHandler             &ddBalanceRegionHandler)
121 {
122     // TODO: Replace all uses of x by const coordinates
123     rvec *x               = as_rvec_array(coordinates.paddedArrayRef().data());
124
125     auto &forceWithVirial = forceOutputs->forceWithVirial();
126
127     /* do QMMM first if requested */
128     if (fr->bQMMM)
129     {
130         enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr);
131     }
132
133     /* Call the short range functions all in one go. */
134
135     if (ir->nwall)
136     {
137         /* foreign lambda component for walls */
138         real dvdl_walls = do_walls(*ir, *fr, box, *md, x,
139                                    &forceWithVirial, lambda[efptVDW],
140                                    enerd->grpp.ener[egLJSR].data(), nrnb);
141         enerd->dvdl_lin[efptVDW] += dvdl_walls;
142     }
143
144     /* Shift the coordinates. Must be done before listed forces and PPPM,
145      * but is also necessary for SHAKE and update, therefore it can NOT
146      * go when no listed forces have to be evaluated.
147      *
148      * The shifting and PBC code is deliberately not timed, since with
149      * the Verlet scheme it only takes non-zero time with triclinic
150      * boxes, and even then the time is around a factor of 100 less
151      * than the next smallest counter.
152      */
153
154
155     /* Here sometimes we would not need to shift with NBFonly,
156      * but we do so anyhow for consistency of the returned coordinates.
157      */
158     if (graph)
159     {
160         shift_self(graph, box, x);
161         if (TRICLINIC(box))
162         {
163             inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
164         }
165         else
166         {
167             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
168         }
169     }
170
171     {
172         t_pbc      pbc;
173
174         /* Check whether we need to take into account PBC in listed interactions. */
175         const auto needPbcForListedForces = fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd);
176         if (needPbcForListedForces)
177         {
178             /* Since all atoms are in the rectangular or triclinic unit-cell,
179              * only single box vector shifts (2 in x) are required.
180              */
181             set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr,
182                        TRUE, box);
183         }
184
185         do_force_listed(wcycle, box, ir->fepvals, cr, ms,
186                         idef, x, hist,
187                         forceOutputs,
188                         fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
189                         DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr,
190                         stepWork);
191     }
192
193     const bool computePmeOnCpu =
194         (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) &&
195         thisRankHasDuty(cr, DUTY_PME) &&
196         (pme_run_mode(fr->pmedata) == PmeRunMode::CPU);
197
198     const bool haveEwaldSurfaceTerms =
199         EEL_PME_EWALD(fr->ic->eeltype) &&
200         (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0);
201
202     /* Do long-range electrostatics and/or LJ-PME
203      * and compute PME surface terms when necessary.
204      */
205     if (computePmeOnCpu ||
206         fr->ic->eeltype == eelEWALD ||
207         haveEwaldSurfaceTerms)
208     {
209         int  status            = 0;
210         real Vlr_q             = 0, Vlr_lj = 0;
211
212         /* We reduce all virial, dV/dlambda and energy contributions, except
213          * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
214          */
215         ewald_corr_thread_t &ewaldOutput = fr->ewc_t[0];
216         clearEwaldThreadOutput(&ewaldOutput);
217
218         if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
219         {
220             /* Calculate Ewald surface terms, when necessary */
221             if (haveEwaldSurfaceTerms)
222             {
223                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
224
225                 if (fr->n_tpi > 0)
226                 {
227                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
228                 }
229
230                 int nthreads = fr->nthread_ewc;
231 #pragma omp parallel for num_threads(nthreads) schedule(static)
232                 for (int t = 0; t < nthreads; t++)
233                 {
234                     try
235                     {
236                         ewald_corr_thread_t &ewc_t = fr->ewc_t[t];
237                         if (t > 0)
238                         {
239                             clearEwaldThreadOutput(&ewc_t);
240                         }
241
242                         /* Threading is only supported with the Verlet cut-off
243                          * scheme and then only single particle forces (no
244                          * exclusion forces) are calculated, so we can store
245                          * the forces in the normal, single forceWithVirial->force_ array.
246                          */
247                         ewald_LRcorrection(md->homenr, cr, nthreads, t, fr, ir,
248                                            md->chargeA, md->chargeB,
249                                            (md->nChargePerturbed != 0),
250                                            x, box, mu_tot,
251                                            ir->ewald_geometry,
252                                            ir->epsilon_surface,
253                                            as_rvec_array(forceWithVirial.force_.data()),
254                                            &ewc_t.Vcorr_q,
255                                            lambda[efptCOUL],
256                                            &ewc_t.dvdl[efptCOUL]);
257                     }
258                     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
259                 }
260                 if (nthreads > 1)
261                 {
262                     reduceEwaldThreadOuput(nthreads, fr->ewc_t);
263                 }
264                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
265             }
266
267             if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
268             {
269                 /* This is not in a subcounter because it takes a
270                    negligible and constant-sized amount of time */
271                 ewaldOutput.Vcorr_q +=
272                     ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
273                                             &ewaldOutput.dvdl[efptCOUL],
274                                             ewaldOutput.vir_q);
275             }
276
277             if (computePmeOnCpu)
278             {
279                 /* Do reciprocal PME for Coulomb and/or LJ. */
280                 assert(fr->n_tpi >= 0);
281                 if (fr->n_tpi == 0 || stepWork.stateChanged)
282                 {
283                     int pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
284
285                     if (stepWork.computeForces)
286                     {
287                         pme_flags |= GMX_PME_CALC_F;
288                     }
289                     if (stepWork.computeVirial)
290                     {
291                         pme_flags |= GMX_PME_CALC_ENER_VIR;
292                     }
293                     if (fr->n_tpi > 0)
294                     {
295                         /* We don't calculate f, but we do want the potential */
296                         pme_flags |= GMX_PME_CALC_POT;
297                     }
298
299                     /* With domain decomposition we close the CPU side load
300                      * balancing region here, because PME does global
301                      * communication that acts as a global barrier.
302                      */
303                     ddBalanceRegionHandler.closeAfterForceComputationCpu();
304
305                     wallcycle_start(wcycle, ewcPMEMESH);
306                     status = gmx_pme_do(fr->pmedata,
307                                         gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(), md->homenr - fr->n_tpi),
308                                         forceWithVirial.force_,
309                                         md->chargeA, md->chargeB,
310                                         md->sqrt_c6A, md->sqrt_c6B,
311                                         md->sigmaA, md->sigmaB,
312                                         box, cr,
313                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
314                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
315                                         nrnb, wcycle,
316                                         ewaldOutput.vir_q, ewaldOutput.vir_lj,
317                                         &Vlr_q, &Vlr_lj,
318                                         lambda[efptCOUL], lambda[efptVDW],
319                                         &ewaldOutput.dvdl[efptCOUL],
320                                         &ewaldOutput.dvdl[efptVDW],
321                                         pme_flags);
322                     wallcycle_stop(wcycle, ewcPMEMESH);
323                     if (status != 0)
324                     {
325                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
326                     }
327
328                     /* We should try to do as little computation after
329                      * this as possible, because parallel PME synchronizes
330                      * the nodes, so we want all load imbalance of the
331                      * rest of the force calculation to be before the PME
332                      * call.  DD load balancing is done on the whole time
333                      * of the force call (without PME).
334                      */
335                 }
336                 if (fr->n_tpi > 0)
337                 {
338                     if (EVDW_PME(ir->vdwtype))
339                     {
340
341                         gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
342                     }
343                     /* Determine the PME grid energy of the test molecule
344                      * with the PME grid potential of the other charges.
345                      */
346                     gmx_pme_calc_energy(fr->pmedata,
347                                         coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
348                                         gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
349                                         &Vlr_q);
350                 }
351             }
352         }
353
354         if (fr->ic->eeltype == eelEWALD)
355         {
356             Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial.force_.data()),
357                              md->chargeA, md->chargeB,
358                              box, cr, md->homenr,
359                              ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
360                              lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL],
361                              fr->ewald_table);
362         }
363
364         /* Note that with separate PME nodes we get the real energies later */
365         // TODO it would be simpler if we just accumulated a single
366         // long-range virial contribution.
367         forceWithVirial.addVirialContribution(ewaldOutput.vir_q);
368         forceWithVirial.addVirialContribution(ewaldOutput.vir_lj);
369         enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
370         enerd->dvdl_lin[efptVDW]  += ewaldOutput.dvdl[efptVDW];
371         enerd->term[F_COUL_RECIP]  = Vlr_q + ewaldOutput.Vcorr_q;
372         enerd->term[F_LJ_RECIP]    = Vlr_lj + ewaldOutput.Vcorr_lj;
373
374         if (debug)
375         {
376             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
377                     Vlr_q, ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
378             pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
379             rvec *fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
380             pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS);
381             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
382                     Vlr_lj, ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
383             pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
384         }
385     }
386
387     if (debug)
388     {
389         print_nrnb(debug, nrnb);
390     }
391
392     if (debug)
393     {
394         rvec *fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
395         pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS);
396     }
397
398 }