Implement pull for modular simulator
[alexxy/gromacs.git] / src / gromacs / pulling / pullutil.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <cassert>
43 #include <cstdlib>
44
45 #include "gromacs/fileio/confio.h"
46 #include "gromacs/gmxlib/network.h"
47 #include "gromacs/math/functions.h"
48 #include "gromacs/math/units.h"
49 #include "gromacs/math/utilities.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/math/vectypes.h"
52 #include "gromacs/mdtypes/commrec.h"
53 #include "gromacs/mdtypes/inputrec.h"
54 #include "gromacs/mdtypes/md_enums.h"
55 #include "gromacs/mdtypes/state.h"
56 #include "gromacs/pbcutil/pbc.h"
57 #include "gromacs/pulling/pull.h"
58 #include "gromacs/utility/arrayref.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/gmxassert.h"
62 #include "gromacs/utility/real.h"
63 #include "gromacs/utility/smalloc.h"
64
65 #include "pull_internal.h"
66
67 using gmx::ArrayRef;
68 using gmx::RVec;
69
70 #if GMX_MPI
71
72 // Helper function to deduce MPI datatype from the type of data
73 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused* data)
74 {
75     return MPI_FLOAT;
76 }
77
78 // Helper function to deduce MPI datatype from the type of data
79 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused* data)
80 {
81     return MPI_DOUBLE;
82 }
83
84 #endif // GMX_MPI
85
86 #if !GMX_DOUBLE
87 // Helper function; note that gmx_sum(d) should actually be templated
88 gmx_unused static void gmxAllReduce(int n, real* data, const t_commrec* cr)
89 {
90     gmx_sum(n, data, cr);
91 }
92 #endif
93
94 // Helper function; note that gmx_sum(d) should actually be templated
95 gmx_unused static void gmxAllReduce(int n, double* data, const t_commrec* cr)
96 {
97     gmx_sumd(n, data, cr);
98 }
99
100 // Reduce data of n elements over all ranks currently participating in pull
101 template<typename T>
102 static void pullAllReduce(const t_commrec* cr, pull_comm_t* comm, int n, T* data)
103 {
104     if (cr != nullptr && PAR(cr))
105     {
106         if (comm->bParticipateAll)
107         {
108             /* Sum the contributions over all DD ranks */
109             gmxAllReduce(n, data, cr);
110         }
111         else
112         {
113             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
114 #if GMX_MPI
115             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
116 #else
117             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
118 #endif
119         }
120     }
121 }
122
123 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
124  * When those coordinates are not available on this rank, clears x_pbc.
125  */
126 static void setPbcAtomCoords(const pull_group_work_t& pgrp, ArrayRef<const RVec> x, rvec x_pbc)
127 {
128     if (pgrp.pbcAtomSet != nullptr)
129     {
130         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
131         {
132             /* We have the atom locally, copy its coordinates */
133             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
134         }
135         else
136         {
137             /* Another rank has it, clear the coordinates for MPI_Allreduce */
138             clear_rvec(x_pbc);
139         }
140     }
141     else
142     {
143         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
144     }
145 }
146
147 static void pull_set_pbcatoms(const t_commrec* cr, struct pull_t* pull, ArrayRef<const RVec> x, ArrayRef<RVec> x_pbc)
148 {
149     int numPbcAtoms = 0;
150     for (size_t g = 0; g < pull->group.size(); g++)
151     {
152         const pull_group_work_t& group = pull->group[g];
153         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
154         {
155             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
156             numPbcAtoms++;
157         }
158         else
159         {
160             clear_rvec(x_pbc[g]);
161         }
162     }
163
164     if (cr && PAR(cr) && numPbcAtoms > 0)
165     {
166         /* Sum over participating ranks to get x_pbc from the home ranks.
167          * This can be very expensive at high parallelization, so we only
168          * do this after each DD repartitioning.
169          */
170         pullAllReduce(cr, &pull->comm, pull->group.size() * DIM, static_cast<real*>(x_pbc[0]));
171     }
172 }
173
174 static void make_cyl_refgrps(const t_commrec*     cr,
175                              pull_t*              pull,
176                              ArrayRef<const real> masses,
177                              const t_pbc&         pbc,
178                              double               t,
179                              ArrayRef<const RVec> x)
180 {
181     pull_comm_t* comm = &pull->comm;
182
183     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size() * c_cylinderBufferStride,
184                "cylinderBuffer should have the correct size");
185
186     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
187
188     /* loop over all groups to make a reference group for each*/
189     int bufferOffset = 0;
190     for (pull_coord_work_t& pcrd : pull->coord)
191     {
192         double sum_a, wmass, wwmass;
193         dvec   radf_fac0, radf_fac1;
194
195         sum_a  = 0;
196         wmass  = 0;
197         wwmass = 0;
198         clear_dvec(radf_fac0);
199         clear_dvec(radf_fac1);
200
201         if (pcrd.params.eGeom == PullGroupGeometry::Cylinder)
202         {
203             /* pref will be the same group for all pull coordinates */
204             const pull_group_work_t& pref  = pull->group[pcrd.params.group[0]];
205             const pull_group_work_t& pgrp  = pull->group[pcrd.params.group[1]];
206             pull_group_work_t&       pdyna = *pcrd.dynamicGroup0;
207             rvec                     direction;
208             copy_dvec_to_rvec(pcrd.spatialData.vec, direction);
209
210             /* Since we have not calculated the COM of the cylinder group yet,
211              * we calculate distances with respect to location of the pull
212              * group minus the reference position along the vector.
213              * here we already have the COM of the pull group. This resolves
214              * any PBC issues and we don't need to use a PBC-atom here.
215              */
216             if (pcrd.params.rate != 0)
217             {
218                 /* With rate=0, value_ref is set initially */
219                 pcrd.value_ref = pcrd.params.init + pcrd.params.rate * t;
220             }
221             rvec reference;
222             for (int m = 0; m < DIM; m++)
223             {
224                 reference[m] = pgrp.x[m] - pcrd.spatialData.vec[m] * pcrd.value_ref;
225             }
226
227             auto localAtomIndices = pref.atomSet.localIndex();
228
229             /* This actually only needs to be done at init or DD time,
230              * but resizing with the same size does not cause much overhead.
231              */
232             pdyna.localWeights.resize(localAtomIndices.size());
233             pdyna.mdw.resize(localAtomIndices.size());
234             pdyna.dv.resize(localAtomIndices.size());
235
236             /* loop over all atoms in the main ref group */
237             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
238             {
239                 int  atomIndex = localAtomIndices[indexInSet];
240                 rvec dx;
241                 pbc_dx_aiuc(&pbc, x[atomIndex], reference, dx);
242                 double axialLocation = iprod(direction, dx);
243                 dvec   radialLocation;
244                 double dr2 = 0;
245                 for (int m = 0; m < DIM; m++)
246                 {
247                     /* Determine the radial components */
248                     radialLocation[m] = dx[m] - axialLocation * direction[m];
249                     dr2 += gmx::square(radialLocation[m]);
250                 }
251                 double dr2_rel = dr2 * inv_cyl_r2;
252
253                 if (dr2_rel < 1)
254                 {
255                     /* add atom to sum of COM and to weight array */
256
257                     double mass = masses[atomIndex];
258                     /* The radial weight function is 1-2x^2+x^4,
259                      * where x=r/cylinder_r. Since this function depends
260                      * on the radial component, we also get radial forces
261                      * on both groups.
262                      */
263                     double weight                  = 1 + (-2 + dr2_rel) * dr2_rel;
264                     double dweight_r               = (-4 + 4 * dr2_rel) * inv_cyl_r2;
265                     pdyna.localWeights[indexInSet] = weight;
266                     sum_a += mass * weight * axialLocation;
267                     wmass += mass * weight;
268                     wwmass += mass * weight * weight;
269                     dvec mdw;
270                     dsvmul(mass * dweight_r, radialLocation, mdw);
271                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
272                     /* Currently we only have the axial component of the
273                      * offset from the cylinder COM up to an unkown offset.
274                      * We add this offset after the reduction needed
275                      * for determining the COM of the cylinder group.
276                      */
277                     pdyna.dv[indexInSet] = axialLocation;
278                     for (int m = 0; m < DIM; m++)
279                     {
280                         radf_fac0[m] += mdw[m];
281                         radf_fac1[m] += mdw[m] * axialLocation;
282                     }
283                 }
284                 else
285                 {
286                     pdyna.localWeights[indexInSet] = 0;
287                 }
288             }
289         }
290
291         auto buffer = gmx::arrayRefFromArray(comm->cylinderBuffer.data() + bufferOffset,
292                                              c_cylinderBufferStride);
293         bufferOffset += c_cylinderBufferStride;
294
295         buffer[0] = wmass;
296         buffer[1] = wwmass;
297         buffer[2] = sum_a;
298
299         buffer[3] = radf_fac0[XX];
300         buffer[4] = radf_fac0[YY];
301         buffer[5] = radf_fac0[ZZ];
302
303         buffer[6] = radf_fac1[XX];
304         buffer[7] = radf_fac1[YY];
305         buffer[8] = radf_fac1[ZZ];
306     }
307
308     if (cr != nullptr && PAR(cr))
309     {
310         /* Sum the contributions over the ranks */
311         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
312     }
313
314     bufferOffset = 0;
315     for (pull_coord_work_t& pcrd : pull->coord)
316     {
317         if (pcrd.params.eGeom == PullGroupGeometry::Cylinder)
318         {
319             pull_group_work_t&    dynamicGroup0 = *pcrd.dynamicGroup0;
320             pull_group_work_t&    group1        = pull->group[pcrd.params.group[1]];
321             PullCoordSpatialData& spatialData   = pcrd.spatialData;
322
323             auto buffer = gmx::constArrayRefFromArray(comm->cylinderBuffer.data() + bufferOffset,
324                                                       c_cylinderBufferStride);
325             bufferOffset += c_cylinderBufferStride;
326             double wmass          = buffer[0];
327             double wwmass         = buffer[1];
328             dynamicGroup0.mwscale = 1.0 / wmass;
329             /* Cylinder pulling can't be used with constraints, but we set
330              * wscale and invtm anyhow, in case someone would like to use them.
331              */
332             dynamicGroup0.wscale = wmass / wwmass;
333             dynamicGroup0.invtm  = wwmass / (wmass * wmass);
334
335             /* We store the deviation of the COM from the reference location
336              * used above, since we need it when we apply the radial forces
337              * to the atoms in the cylinder group.
338              */
339             spatialData.cyl_dev = 0;
340             for (int m = 0; m < DIM; m++)
341             {
342                 double reference   = group1.x[m] - spatialData.vec[m] * pcrd.value_ref;
343                 double dist        = -spatialData.vec[m] * buffer[2] * dynamicGroup0.mwscale;
344                 dynamicGroup0.x[m] = reference - dist;
345                 spatialData.cyl_dev += dist;
346             }
347             /* Now we know the exact COM of the cylinder reference group,
348              * we can determine the radial force factor (ffrad) that when
349              * multiplied with the axial pull force will give the radial
350              * force on the pulled (non-cylinder) group.
351              */
352             for (int m = 0; m < DIM; m++)
353             {
354                 spatialData.ffrad[m] = (buffer[6 + m] + buffer[3 + m] * spatialData.cyl_dev) / wmass;
355             }
356
357             if (debug)
358             {
359                 fprintf(debug,
360                         "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n",
361                         pcrd.params.coordIndex,
362                         dynamicGroup0.x[0],
363                         dynamicGroup0.x[1],
364                         dynamicGroup0.x[2],
365                         1.0 / dynamicGroup0.invtm);
366                 fprintf(debug,
367                         "ffrad %8.3f %8.3f %8.3f\n",
368                         spatialData.ffrad[XX],
369                         spatialData.ffrad[YY],
370                         spatialData.ffrad[ZZ]);
371             }
372         }
373     }
374 }
375
376 static double atan2_0_2pi(double y, double x)
377 {
378     double a;
379
380     a = atan2(y, x);
381     if (a < 0)
382     {
383         a += 2.0 * M_PI;
384     }
385     return a;
386 }
387
388 static void sum_com_part(const pull_group_work_t* pgrp,
389                          int                      ind_start,
390                          int                      ind_end,
391                          ArrayRef<const RVec>     x,
392                          ArrayRef<const RVec>     xp,
393                          ArrayRef<const real>     mass,
394                          const t_pbc&             pbc,
395                          const rvec               x_pbc,
396                          ComSums*                 sum_com)
397 {
398     double sum_wm   = 0;
399     double sum_wwm  = 0;
400     dvec   sum_wmx  = { 0, 0, 0 };
401     dvec   sum_wmxp = { 0, 0, 0 };
402
403     auto localAtomIndices = pgrp->atomSet.localIndex();
404     for (int i = ind_start; i < ind_end; i++)
405     {
406         int  ii = localAtomIndices[i];
407         real wm;
408         if (pgrp->localWeights.empty())
409         {
410             wm = mass[ii];
411             sum_wm += wm;
412         }
413         else
414         {
415             real w;
416
417             w  = pgrp->localWeights[i];
418             wm = w * mass[ii];
419             sum_wm += wm;
420             sum_wwm += wm * w;
421         }
422         if (pgrp->epgrppbc == epgrppbcNONE)
423         {
424             /* Plain COM: sum the coordinates */
425             for (int d = 0; d < DIM; d++)
426             {
427                 sum_wmx[d] += wm * x[ii][d];
428             }
429             if (!xp.empty())
430             {
431                 for (int d = 0; d < DIM; d++)
432                 {
433                     sum_wmxp[d] += wm * xp[ii][d];
434                 }
435             }
436         }
437         else
438         {
439             rvec dx;
440
441             /* Sum the difference with the reference atom */
442             pbc_dx(&pbc, x[ii], x_pbc, dx);
443             for (int d = 0; d < DIM; d++)
444             {
445                 sum_wmx[d] += wm * dx[d];
446             }
447             if (!xp.empty())
448             {
449                 /* For xp add the difference between xp and x to dx,
450                  * such that we use the same periodic image,
451                  * also when xp has a large displacement.
452                  */
453                 for (int d = 0; d < DIM; d++)
454                 {
455                     sum_wmxp[d] += wm * (dx[d] + xp[ii][d] - x[ii][d]);
456                 }
457             }
458         }
459     }
460
461     sum_com->sum_wm  = sum_wm;
462     sum_com->sum_wwm = sum_wwm;
463     copy_dvec(sum_wmx, sum_com->sum_wmx);
464     if (!xp.empty())
465     {
466         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
467     }
468 }
469
470 static void sum_com_part_cosweight(const pull_group_work_t* pgrp,
471                                    int                      ind_start,
472                                    int                      ind_end,
473                                    int                      cosdim,
474                                    real                     twopi_box,
475                                    ArrayRef<const RVec>     x,
476                                    ArrayRef<const RVec>     xp,
477                                    ArrayRef<const real>     mass,
478                                    ComSums*                 sum_com)
479 {
480     /* Cosine weighting geometry */
481     double sum_cm  = 0;
482     double sum_sm  = 0;
483     double sum_ccm = 0;
484     double sum_csm = 0;
485     double sum_ssm = 0;
486     double sum_cmp = 0;
487     double sum_smp = 0;
488
489     auto localAtomIndices = pgrp->atomSet.localIndex();
490
491     for (int i = ind_start; i < ind_end; i++)
492     {
493         int  ii = localAtomIndices[i];
494         real m  = mass[ii];
495         /* Determine cos and sin sums */
496         real cw = std::cos(x[ii][cosdim] * twopi_box);
497         real sw = std::sin(x[ii][cosdim] * twopi_box);
498         sum_cm += static_cast<double>(cw * m);
499         sum_sm += static_cast<double>(sw * m);
500         sum_ccm += static_cast<double>(cw * cw * m);
501         sum_csm += static_cast<double>(cw * sw * m);
502         sum_ssm += static_cast<double>(sw * sw * m);
503
504         if (!xp.empty())
505         {
506             real cw = std::cos(xp[ii][cosdim] * twopi_box);
507             real sw = std::sin(xp[ii][cosdim] * twopi_box);
508             sum_cmp += static_cast<double>(cw * m);
509             sum_smp += static_cast<double>(sw * m);
510         }
511     }
512
513     sum_com->sum_cm  = sum_cm;
514     sum_com->sum_sm  = sum_sm;
515     sum_com->sum_ccm = sum_ccm;
516     sum_com->sum_csm = sum_csm;
517     sum_com->sum_ssm = sum_ssm;
518     sum_com->sum_cmp = sum_cmp;
519     sum_com->sum_smp = sum_smp;
520 }
521
522 /* calculates center of mass of selection index from all coordinates x */
523 void pull_calc_coms(const t_commrec*     cr,
524                     pull_t*              pull,
525                     ArrayRef<const real> masses,
526                     const t_pbc&         pbc,
527                     double               t,
528                     ArrayRef<const RVec> x,
529                     ArrayRef<RVec>       xp)
530 {
531     real         twopi_box = 0;
532     pull_comm_t* comm;
533
534     comm = &pull->comm;
535
536     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
537                "pbcAtomBuffer should have size number of groups");
538     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
539                "comBuffer should have size #group*c_comBufferStride");
540
541     if (pull->bRefAt && pull->bSetPBCatoms)
542     {
543         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
544
545         if (cr != nullptr && haveDDAtomOrdering(*cr))
546         {
547             /* We can keep these PBC reference coordinates fixed for nstlist
548              * steps, since atoms won't jump over PBC.
549              * This avoids a global reduction at the next nstlist-1 steps.
550              * Note that the exact values of the pbc reference coordinates
551              * are irrelevant, as long all atoms in the group are within
552              * half a box distance of the reference coordinate.
553              */
554             pull->bSetPBCatoms = FALSE;
555         }
556     }
557
558     if (pull->cosdim >= 0)
559     {
560         int m;
561
562         assert(pull->npbcdim <= DIM);
563
564         for (m = pull->cosdim + 1; m < pull->npbcdim; m++)
565         {
566             if (pbc.box[m][pull->cosdim] != 0)
567             {
568                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
569             }
570         }
571         twopi_box = 2.0 * M_PI / pbc.box[pull->cosdim][pull->cosdim];
572     }
573
574     for (size_t g = 0; g < pull->group.size(); g++)
575     {
576         pull_group_work_t* pgrp = &pull->group[g];
577
578         /* Cosine-weighted COMs behave different from all other weighted COMs
579          * in the sense that the weights depend on instantaneous coordinates,
580          * not on pre-set weights. Thus we resize the local weight buffer here.
581          */
582         if (pgrp->epgrppbc == epgrppbcCOS)
583         {
584             pgrp->localWeights.resize(pgrp->atomSet.localIndex().size());
585         }
586
587         auto comBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
588                                                 c_comBufferStride);
589
590         if (pgrp->needToCalcCom)
591         {
592             if (pgrp->epgrppbc != epgrppbcCOS)
593             {
594                 rvec x_pbc = { 0, 0, 0 };
595
596                 switch (pgrp->epgrppbc)
597                 {
598                     case epgrppbcREFAT:
599                         /* Set the pbc atom */
600                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
601                         break;
602                     case epgrppbcPREVSTEPCOM:
603                         /* Set the pbc reference to the COM of the group of the last step */
604                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
605                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
606                 }
607
608                 /* The final sums should end up in comSums[0] */
609                 ComSums& comSumsTotal = pull->comSums[0];
610
611                 /* If we have a single-atom group the mass is irrelevant, so
612                  * we can remove the mass factor to avoid division by zero.
613                  * Note that with constraint pulling the mass does matter, but
614                  * in that case a check group mass != 0 has been done before.
615                  */
616                 if (pgrp->params.ind.size() == 1 && pgrp->atomSet.numAtomsLocal() == 1
617                     && masses[pgrp->atomSet.localIndex()[0]] == 0)
618                 {
619                     GMX_ASSERT(xp.empty(),
620                                "We should not have groups with zero mass with constraints, i.e. "
621                                "xp not empty");
622
623                     /* Copy the single atom coordinate */
624                     for (int d = 0; d < DIM; d++)
625                     {
626                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
627                     }
628                     /* Set all mass factors to 1 to get the correct COM */
629                     comSumsTotal.sum_wm  = 1;
630                     comSumsTotal.sum_wwm = 1;
631                 }
632                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
633                 {
634                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, xp, masses, pbc, x_pbc, &comSumsTotal);
635                 }
636                 else
637                 {
638                     const int numThreads = pgrp->numThreads();
639 #pragma omp parallel for num_threads(numThreads) schedule(static)
640                     for (int t = 0; t < numThreads; t++)
641                     {
642                         int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / numThreads;
643                         int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / numThreads;
644                         sum_com_part(
645                                 pgrp, ind_start, ind_end, x, xp, masses, pbc, x_pbc, &pull->comSums[t]);
646                     }
647
648                     /* Reduce the thread contributions to sum_com[0] */
649                     for (int t = 1; t < numThreads; t++)
650                     {
651                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
652                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
653                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
654                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
655                     }
656                 }
657
658                 if (pgrp->localWeights.empty())
659                 {
660                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
661                 }
662
663                 /* Copy local sums to a buffer for global summing */
664                 copy_dvec(comSumsTotal.sum_wmx, comBuffer[0]);
665
666                 copy_dvec(comSumsTotal.sum_wmxp, comBuffer[1]);
667
668                 comBuffer[2][0] = comSumsTotal.sum_wm;
669                 comBuffer[2][1] = comSumsTotal.sum_wwm;
670                 comBuffer[2][2] = 0;
671             }
672             else
673             {
674                 /* Cosine weighting geometry.
675                  * This uses a slab of the system, thus we always have many
676                  * atoms in the pull groups. Therefore, always use threads.
677                  */
678                 const int numThreads = pgrp->numThreads();
679 #pragma omp parallel for num_threads(numThreads) schedule(static)
680                 for (int t = 0; t < numThreads; t++)
681                 {
682                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / numThreads;
683                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / numThreads;
684                     sum_com_part_cosweight(
685                             pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp, masses, &pull->comSums[t]);
686                 }
687
688                 /* Reduce the thread contributions to comSums[0] */
689                 ComSums& comSumsTotal = pull->comSums[0];
690                 for (int t = 1; t < numThreads; t++)
691                 {
692                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
693                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
694                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
695                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
696                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
697                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
698                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
699                 }
700
701                 /* Copy local sums to a buffer for global summing */
702                 comBuffer[0][0] = comSumsTotal.sum_cm;
703                 comBuffer[0][1] = comSumsTotal.sum_sm;
704                 comBuffer[0][2] = 0;
705                 comBuffer[1][0] = comSumsTotal.sum_ccm;
706                 comBuffer[1][1] = comSumsTotal.sum_csm;
707                 comBuffer[1][2] = comSumsTotal.sum_ssm;
708                 comBuffer[2][0] = comSumsTotal.sum_cmp;
709                 comBuffer[2][1] = comSumsTotal.sum_smp;
710                 comBuffer[2][2] = 0;
711             }
712         }
713         else
714         {
715             clear_dvec(comBuffer[0]);
716             clear_dvec(comBuffer[1]);
717             clear_dvec(comBuffer[2]);
718         }
719     }
720
721     pullAllReduce(cr,
722                   comm,
723                   pull->group.size() * c_comBufferStride * DIM,
724                   static_cast<double*>(comm->comBuffer[0]));
725
726     for (size_t g = 0; g < pull->group.size(); g++)
727     {
728         pull_group_work_t* pgrp;
729
730         pgrp = &pull->group[g];
731         if (pgrp->needToCalcCom)
732         {
733             GMX_ASSERT(!pgrp->params.ind.empty(),
734                        "Normal pull groups should have atoms, only group 0, which should have "
735                        "bCalcCom=FALSE has nat=0");
736
737             const auto comBuffer = gmx::constArrayRefFromArray(
738                     comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
739
740             if (pgrp->epgrppbc != epgrppbcCOS)
741             {
742                 double wmass, wwmass;
743                 int    m;
744
745                 /* Determine the inverse mass */
746                 wmass         = comBuffer[2][0];
747                 wwmass        = comBuffer[2][1];
748                 pgrp->mwscale = 1.0 / wmass;
749                 /* invtm==0 signals a frozen group, so then we should keep it zero */
750                 if (pgrp->invtm != 0)
751                 {
752                     pgrp->wscale = wmass / wwmass;
753                     pgrp->invtm  = wwmass / (wmass * wmass);
754                 }
755                 /* Divide by the total mass */
756                 for (m = 0; m < DIM; m++)
757                 {
758                     pgrp->x[m] = comBuffer[0][m] * pgrp->mwscale;
759                     if (!xp.empty())
760                     {
761                         pgrp->xp[m] = comBuffer[1][m] * pgrp->mwscale;
762                     }
763                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
764                     {
765                         pgrp->x[m] += comm->pbcAtomBuffer[g][m];
766                         if (!xp.empty())
767                         {
768                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
769                         }
770                     }
771                 }
772             }
773             else
774             {
775                 /* Cosine weighting geometry */
776                 double csw, snw, wmass, wwmass;
777
778                 /* Determine the optimal location of the cosine weight */
779                 csw                   = comBuffer[0][0];
780                 snw                   = comBuffer[0][1];
781                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
782                 /* Set the weights for the local atoms */
783                 wmass  = sqrt(csw * csw + snw * snw);
784                 wwmass = (comBuffer[1][0] * csw * csw + comBuffer[1][1] * csw * snw
785                           + comBuffer[1][2] * snw * snw)
786                          / (wmass * wmass);
787
788                 pgrp->mwscale = 1.0 / wmass;
789                 pgrp->wscale  = wmass / wwmass;
790                 pgrp->invtm   = wwmass / (wmass * wmass);
791                 /* Set the weights for the local atoms */
792                 csw *= pgrp->invtm;
793                 snw *= pgrp->invtm;
794                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
795                 {
796                     int ii                = pgrp->atomSet.localIndex()[i];
797                     pgrp->localWeights[i] = csw * std::cos(twopi_box * x[ii][pull->cosdim])
798                                             + snw * std::sin(twopi_box * x[ii][pull->cosdim]);
799                 }
800                 if (!xp.empty())
801                 {
802                     csw                    = comBuffer[2][0];
803                     snw                    = comBuffer[2][1];
804                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
805                 }
806             }
807             if (debug)
808             {
809                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
810             }
811         }
812     }
813
814     if (pull->bCylinder)
815     {
816         /* Calculate the COMs for the cyclinder reference groups */
817         make_cyl_refgrps(cr, pull, masses, pbc, t, x);
818     }
819 }
820
821 using BoolVec = gmx::BasicVector<bool>;
822
823 /* Returns whether the pull group obeys the PBC restrictions */
824 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t& group,
825                                           const BoolVec&           dimUsed,
826                                           ArrayRef<const RVec>     x,
827                                           const t_pbc&             pbc,
828                                           const RVec&              x_pbc,
829                                           const real               pbcMargin)
830 {
831     /* Determine which dimensions are relevant for PBC */
832     BoolVec dimUsesPbc       = { false, false, false };
833     bool    pbcIsRectangular = true;
834     for (int d = 0; d < pbc.ndim_ePBC; d++)
835     {
836         if (dimUsed[d])
837         {
838             dimUsesPbc[d] = true;
839             /* All non-zero dimensions of vector v are involved in PBC */
840             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
841             {
842                 assert(d2 < DIM);
843                 if (pbc.box[d2][d] != 0)
844                 {
845                     dimUsesPbc[d2]   = true;
846                     pbcIsRectangular = false;
847                 }
848             }
849         }
850     }
851
852     rvec marginPerDim    = {};
853     real marginDistance2 = 0;
854     if (pbcIsRectangular)
855     {
856         /* Use margins for dimensions independently */
857         for (int d = 0; d < pbc.ndim_ePBC; d++)
858         {
859             marginPerDim[d] = pbcMargin * pbc.hbox_diag[d];
860         }
861     }
862     else
863     {
864         /* Check the total distance along the relevant dimensions */
865         for (int d = 0; d < pbc.ndim_ePBC; d++)
866         {
867             if (dimUsesPbc[d])
868             {
869                 marginDistance2 += pbcMargin * gmx::square(0.5) * norm2(pbc.box[d]);
870             }
871         }
872     }
873
874     auto localAtomIndices = group.atomSet.localIndex();
875     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
876     {
877         rvec dx;
878         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
879
880         bool atomIsTooFar = false;
881         if (pbcIsRectangular)
882         {
883             for (int d = 0; d < pbc.ndim_ePBC; d++)
884             {
885                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] || dx[d] > marginPerDim[d]))
886                 {
887                     atomIsTooFar = true;
888                 }
889             }
890         }
891         else
892         {
893             real pbcDistance2 = 0;
894             for (int d = 0; d < pbc.ndim_ePBC; d++)
895             {
896                 if (dimUsesPbc[d])
897                 {
898                     pbcDistance2 += gmx::square(dx[d]);
899                 }
900             }
901             atomIsTooFar = (pbcDistance2 > marginDistance2);
902         }
903         if (atomIsTooFar)
904         {
905             return false;
906         }
907     }
908
909     return true;
910 }
911
912 int pullCheckPbcWithinGroups(const pull_t& pull, ArrayRef<const RVec> x, const t_pbc& pbc, real pbcMargin)
913 {
914     if (pbc.pbcType == PbcType::No)
915     {
916         return -1;
917     }
918
919     /* Determine what dimensions are used for each group by pull coordinates */
920     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
921     for (const pull_coord_work_t& pcrd : pull.coord)
922     {
923         const t_pull_coord& coordParams = pcrd.params;
924         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
925         {
926             for (int d = 0; d < DIM; d++)
927             {
928                 if (coordParams.dim[d]
929                     && !(coordParams.eGeom == PullGroupGeometry::Cylinder && groupIndex == 0))
930                 {
931                     dimUsed[coordParams.group[groupIndex]][d] = true;
932                 }
933             }
934         }
935     }
936
937     /* Check PBC for every group that uses a PBC reference atom treatment */
938     for (size_t g = 0; g < pull.group.size(); g++)
939     {
940         const pull_group_work_t& group = pull.group[g];
941         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
942             && !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
943         {
944             return g;
945         }
946     }
947
948     return -1;
949 }
950
951 bool pullCheckPbcWithinGroup(const pull_t& pull, ArrayRef<const RVec> x, const t_pbc& pbc, int groupNr, real pbcMargin)
952 {
953     if (pbc.pbcType == PbcType::No)
954     {
955         return true;
956     }
957     GMX_ASSERT(groupNr < gmx::ssize(pull.group), "groupNr is out of range");
958
959     /* Check PBC if the group uses a PBC reference atom treatment. */
960     const pull_group_work_t& group = pull.group[groupNr];
961     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
962     {
963         return true;
964     }
965
966     /* Determine what dimensions are used for each group by pull coordinates */
967     BoolVec dimUsed = { false, false, false };
968     for (const pull_coord_work_t& pcrd : pull.coord)
969     {
970         const t_pull_coord& coordParams = pcrd.params;
971         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
972         {
973             if (coordParams.group[groupIndex] == groupNr)
974             {
975                 for (int d = 0; d < DIM; d++)
976                 {
977                     if (coordParams.dim[d]
978                         && !(coordParams.eGeom == PullGroupGeometry::Cylinder && groupIndex == 0))
979                     {
980                         dimUsed[d] = true;
981                     }
982                 }
983             }
984         }
985     }
986
987     return (pullGroupObeysPbcRestrictions(
988             group, dimUsed, x, pbc, pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
989 }
990
991 void setPrevStepPullComFromState(struct pull_t* pull, const t_state* state)
992 {
993     for (size_t g = 0; g < pull->group.size(); g++)
994     {
995         for (int j = 0; j < DIM; j++)
996         {
997             pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g * DIM + j];
998         }
999     }
1000 }
1001
1002 /*! \brief Whether pull functions save a backup to the t_state object
1003  *
1004  * Saving to the state object is only used for checkpointing in the legacy simulator.
1005  * Modular simulator doesn't use the t_state object for checkpointing.
1006  */
1007 enum class PullBackupCOM
1008 {
1009     Yes, //<! Save a copy of the previous step COM to state
1010     No,  //<! Don't save a copy of the previous step COM to state
1011 };
1012
1013 /*! \brief Sets the previous step COM in pull to the current COM and optionally
1014  *         stores it in the provided ArrayRef
1015  *
1016  * \tparam     pullBackupToState  Whether we're storing the previous COM to state
1017  * \param[in]  pull  The COM pull force calculation data structure
1018  * \param[in]   comPreviousStep  The COM of the previous step of each pull group
1019  */
1020 template<PullBackupCOM pullBackupToState>
1021 static void updatePrevStepPullComImpl(pull_t* pull, gmx::ArrayRef<double> comPreviousStep)
1022 {
1023     for (gmx::index g = 0; g < gmx::ssize(pull->group); g++)
1024     {
1025         if (pull->group[g].needToCalcCom)
1026         {
1027             for (int j = 0; j < DIM; j++)
1028             {
1029                 pull->group[g].x_prev_step[j] = pull->group[g].x[j];
1030                 if (pullBackupToState == PullBackupCOM::Yes)
1031                 {
1032                     comPreviousStep[g * DIM + j] = pull->group[g].x[j];
1033                 }
1034             }
1035         }
1036     }
1037 }
1038
1039 void updatePrevStepPullCom(pull_t* pull, std::optional<gmx::ArrayRef<double>> comPreviousStep)
1040 {
1041     if (comPreviousStep.has_value())
1042     {
1043         updatePrevStepPullComImpl<PullBackupCOM::Yes>(pull, comPreviousStep.value());
1044     }
1045     else
1046     {
1047         updatePrevStepPullComImpl<PullBackupCOM::No>(pull, gmx::ArrayRef<double>());
1048     }
1049 }
1050
1051 std::vector<double> prevStepPullCom(const pull_t* pull)
1052 {
1053     std::vector<double> pullCom(pull->group.size() * DIM, 0.0);
1054     for (gmx::index g = 0; g < gmx::ssize(pull->group); g++)
1055     {
1056         for (int j = 0; j < DIM; j++)
1057         {
1058             pullCom[g * DIM + j] = pull->group[g].x_prev_step[j];
1059         }
1060     }
1061     return pullCom;
1062 }
1063
1064 void setPrevStepPullCom(pull_t* pull, gmx::ArrayRef<const double> prevStepPullCom)
1065 {
1066     GMX_RELEASE_ASSERT(prevStepPullCom.size() >= pull->group.size() * DIM,
1067                        "Pull COM vector size mismatch.");
1068     for (gmx::index g = 0; g < gmx::ssize(pull->group); g++)
1069     {
1070         for (int j = 0; j < DIM; j++)
1071         {
1072             pull->group[g].x_prev_step[j] = prevStepPullCom[g * DIM + j];
1073         }
1074     }
1075 }
1076
1077 void allocStatePrevStepPullCom(t_state* state, const pull_t* pull)
1078 {
1079     if (!pull)
1080     {
1081         state->pull_com_prev_step.clear();
1082         return;
1083     }
1084     size_t ngroup = pull->group.size();
1085     if (state->pull_com_prev_step.size() / DIM != ngroup)
1086     {
1087         state->pull_com_prev_step.resize(ngroup * DIM, NAN);
1088     }
1089 }
1090
1091 void initPullComFromPrevStep(const t_commrec*     cr,
1092                              pull_t*              pull,
1093                              ArrayRef<const real> masses,
1094                              const t_pbc&         pbc,
1095                              ArrayRef<const RVec> x)
1096 {
1097     pull_comm_t* comm   = &pull->comm;
1098     size_t       ngroup = pull->group.size();
1099
1100     if (!comm->bParticipate)
1101     {
1102         return;
1103     }
1104
1105     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
1106                "pbcAtomBuffer should have size number of groups");
1107     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
1108                "comBuffer should have size #group*c_comBufferStride");
1109
1110     pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1111
1112     for (size_t g = 0; g < ngroup; g++)
1113     {
1114         pull_group_work_t* pgrp;
1115
1116         pgrp = &pull->group[g];
1117
1118         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1119         {
1120             GMX_ASSERT(pgrp->params.ind.size() > 1,
1121                        "Groups with no atoms, or only one atom, should not "
1122                        "use the COM from the previous step as reference.");
1123
1124             RVec x_pbc = { 0, 0, 0 };
1125             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1126
1127             if (debug)
1128             {
1129                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1130                 for (int m = 0; m < DIM; m++)
1131                 {
1132                     fprintf(debug, " %f", x_pbc[m]);
1133                 }
1134                 fprintf(debug, "\n");
1135             }
1136
1137             /* The following is to a large extent similar to pull_calc_coms() */
1138
1139             /* The final sums should end up in sum_com[0] */
1140             ComSums& comSumsTotal = pull->comSums[0];
1141
1142             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1143             {
1144                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, {}, masses, pbc, x_pbc, &comSumsTotal);
1145             }
1146             else
1147             {
1148                 const int numThreads = pgrp->numThreads();
1149 #pragma omp parallel for num_threads(numThreads) schedule(static)
1150                 for (int t = 0; t < numThreads; t++)
1151                 {
1152                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / numThreads;
1153                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / numThreads;
1154                     sum_com_part(pgrp, ind_start, ind_end, x, {}, masses, pbc, x_pbc, &pull->comSums[t]);
1155                 }
1156
1157                 /* Reduce the thread contributions to sum_com[0] */
1158                 for (int t = 1; t < numThreads; t++)
1159                 {
1160                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
1161                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1162                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1163                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1164                 }
1165             }
1166
1167             if (pgrp->localWeights.empty())
1168             {
1169                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1170             }
1171
1172             /* Copy local sums to a buffer for global summing */
1173             auto localSums = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
1174                                                     c_comBufferStride);
1175
1176             localSums[0]    = comSumsTotal.sum_wmx;
1177             localSums[1]    = comSumsTotal.sum_wmxp;
1178             localSums[2][0] = comSumsTotal.sum_wm;
1179             localSums[2][1] = comSumsTotal.sum_wwm;
1180             localSums[2][2] = 0;
1181         }
1182     }
1183
1184     pullAllReduce(cr, comm, ngroup * c_comBufferStride * DIM, static_cast<double*>(comm->comBuffer[0]));
1185
1186     for (size_t g = 0; g < ngroup; g++)
1187     {
1188         pull_group_work_t* pgrp;
1189
1190         pgrp = &pull->group[g];
1191         if (pgrp->needToCalcCom)
1192         {
1193             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1194             {
1195                 auto localSums = gmx::arrayRefFromArray(
1196                         comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
1197                 double wmass, wwmass;
1198
1199                 /* Determine the inverse mass */
1200                 wmass         = localSums[2][0];
1201                 wwmass        = localSums[2][1];
1202                 pgrp->mwscale = 1.0 / wmass;
1203                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1204                 if (pgrp->invtm != 0)
1205                 {
1206                     pgrp->wscale = wmass / wwmass;
1207                     pgrp->invtm  = wwmass / (wmass * wmass);
1208                 }
1209                 /* Divide by the total mass */
1210                 for (int m = 0; m < DIM; m++)
1211                 {
1212                     pgrp->x[m] = localSums[0][m] * pgrp->mwscale;
1213                     pgrp->x[m] += comm->pbcAtomBuffer[g][m];
1214                 }
1215                 if (debug)
1216                 {
1217                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
1218                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1219                     for (int m = 0; m < DIM; m++)
1220                     {
1221                         fprintf(debug, " %f", pgrp->x[m]);
1222                     }
1223                     fprintf(debug, "\n");
1224                 }
1225                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1226             }
1227         }
1228     }
1229 }