c4985a8f80ef618dd92193f5a8ec83e7edbfc7e7
[alexxy/gromacs.git] / src / gromacs / pulling / pullutil.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <cassert>
43 #include <cstdlib>
44
45 #include "gromacs/fileio/confio.h"
46 #include "gromacs/gmxlib/network.h"
47 #include "gromacs/math/functions.h"
48 #include "gromacs/math/units.h"
49 #include "gromacs/math/utilities.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/mdtypes/commrec.h"
52 #include "gromacs/mdtypes/inputrec.h"
53 #include "gromacs/mdtypes/md_enums.h"
54 #include "gromacs/mdtypes/mdatom.h"
55 #include "gromacs/mdtypes/state.h"
56 #include "gromacs/pbcutil/pbc.h"
57 #include "gromacs/pulling/pull.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/gmxassert.h"
61 #include "gromacs/utility/real.h"
62 #include "gromacs/utility/smalloc.h"
63
64 #include "pull_internal.h"
65
66 #if GMX_MPI
67
68 // Helper function to deduce MPI datatype from the type of data
69 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused* data)
70 {
71     return MPI_FLOAT;
72 }
73
74 // Helper function to deduce MPI datatype from the type of data
75 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused* data)
76 {
77     return MPI_DOUBLE;
78 }
79
80 #endif // GMX_MPI
81
82 #if !GMX_DOUBLE
83 // Helper function; note that gmx_sum(d) should actually be templated
84 gmx_unused static void gmxAllReduce(int n, real* data, const t_commrec* cr)
85 {
86     gmx_sum(n, data, cr);
87 }
88 #endif
89
90 // Helper function; note that gmx_sum(d) should actually be templated
91 gmx_unused static void gmxAllReduce(int n, double* data, const t_commrec* cr)
92 {
93     gmx_sumd(n, data, cr);
94 }
95
96 // Reduce data of n elements over all ranks currently participating in pull
97 template<typename T>
98 static void pullAllReduce(const t_commrec* cr, pull_comm_t* comm, int n, T* data)
99 {
100     if (cr != nullptr && PAR(cr))
101     {
102         if (comm->bParticipateAll)
103         {
104             /* Sum the contributions over all DD ranks */
105             gmxAllReduce(n, data, cr);
106         }
107         else
108         {
109             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
110 #if GMX_MPI
111             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
112 #else
113             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
114 #endif
115         }
116     }
117 }
118
119 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
120  * When those coordinates are not available on this rank, clears x_pbc.
121  */
122 static void setPbcAtomCoords(const pull_group_work_t& pgrp, const rvec* x, rvec x_pbc)
123 {
124     if (pgrp.pbcAtomSet != nullptr)
125     {
126         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
127         {
128             /* We have the atom locally, copy its coordinates */
129             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
130         }
131         else
132         {
133             /* Another rank has it, clear the coordinates for MPI_Allreduce */
134             clear_rvec(x_pbc);
135         }
136     }
137     else
138     {
139         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
140     }
141 }
142
143 static void pull_set_pbcatoms(const t_commrec* cr, struct pull_t* pull, const rvec* x, gmx::ArrayRef<gmx::RVec> x_pbc)
144 {
145     int numPbcAtoms = 0;
146     for (size_t g = 0; g < pull->group.size(); g++)
147     {
148         const pull_group_work_t& group = pull->group[g];
149         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
150         {
151             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
152             numPbcAtoms++;
153         }
154         else
155         {
156             clear_rvec(x_pbc[g]);
157         }
158     }
159
160     if (cr && PAR(cr) && numPbcAtoms > 0)
161     {
162         /* Sum over participating ranks to get x_pbc from the home ranks.
163          * This can be very expensive at high parallelization, so we only
164          * do this after each DD repartitioning.
165          */
166         pullAllReduce(cr, &pull->comm, pull->group.size() * DIM, static_cast<real*>(x_pbc[0]));
167     }
168 }
169
170 static void
171 make_cyl_refgrps(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, double t, const rvec* x)
172 {
173     pull_comm_t* comm = &pull->comm;
174
175     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size() * c_cylinderBufferStride,
176                "cylinderBuffer should have the correct size");
177
178     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
179
180     /* loop over all groups to make a reference group for each*/
181     for (size_t c = 0; c < pull->coord.size(); c++)
182     {
183         pull_coord_work_t* pcrd;
184         double             sum_a, wmass, wwmass;
185         dvec               radf_fac0, radf_fac1;
186
187         pcrd = &pull->coord[c];
188
189         sum_a  = 0;
190         wmass  = 0;
191         wwmass = 0;
192         clear_dvec(radf_fac0);
193         clear_dvec(radf_fac1);
194
195         if (pcrd->params.eGeom == PullGroupGeometry::Cylinder)
196         {
197             /* pref will be the same group for all pull coordinates */
198             const pull_group_work_t& pref  = pull->group[pcrd->params.group[0]];
199             const pull_group_work_t& pgrp  = pull->group[pcrd->params.group[1]];
200             pull_group_work_t&       pdyna = pull->dyna[c];
201             rvec                     direction;
202             copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
203
204             /* Since we have not calculated the COM of the cylinder group yet,
205              * we calculate distances with respect to location of the pull
206              * group minus the reference position along the vector.
207              * here we already have the COM of the pull group. This resolves
208              * any PBC issues and we don't need to use a PBC-atom here.
209              */
210             if (pcrd->params.rate != 0)
211             {
212                 /* With rate=0, value_ref is set initially */
213                 pcrd->value_ref = pcrd->params.init + pcrd->params.rate * t;
214             }
215             rvec reference;
216             for (int m = 0; m < DIM; m++)
217             {
218                 reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m] * pcrd->value_ref;
219             }
220
221             auto localAtomIndices = pref.atomSet.localIndex();
222
223             /* This actually only needs to be done at init or DD time,
224              * but resizing with the same size does not cause much overhead.
225              */
226             pdyna.localWeights.resize(localAtomIndices.size());
227             pdyna.mdw.resize(localAtomIndices.size());
228             pdyna.dv.resize(localAtomIndices.size());
229
230             /* loop over all atoms in the main ref group */
231             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
232             {
233                 int  atomIndex = localAtomIndices[indexInSet];
234                 rvec dx;
235                 pbc_dx_aiuc(pbc, x[atomIndex], reference, dx);
236                 double axialLocation = iprod(direction, dx);
237                 dvec   radialLocation;
238                 double dr2 = 0;
239                 for (int m = 0; m < DIM; m++)
240                 {
241                     /* Determine the radial components */
242                     radialLocation[m] = dx[m] - axialLocation * direction[m];
243                     dr2 += gmx::square(radialLocation[m]);
244                 }
245                 double dr2_rel = dr2 * inv_cyl_r2;
246
247                 if (dr2_rel < 1)
248                 {
249                     /* add atom to sum of COM and to weight array */
250
251                     double mass = masses[atomIndex];
252                     /* The radial weight function is 1-2x^2+x^4,
253                      * where x=r/cylinder_r. Since this function depends
254                      * on the radial component, we also get radial forces
255                      * on both groups.
256                      */
257                     double weight                  = 1 + (-2 + dr2_rel) * dr2_rel;
258                     double dweight_r               = (-4 + 4 * dr2_rel) * inv_cyl_r2;
259                     pdyna.localWeights[indexInSet] = weight;
260                     sum_a += mass * weight * axialLocation;
261                     wmass += mass * weight;
262                     wwmass += mass * weight * weight;
263                     dvec mdw;
264                     dsvmul(mass * dweight_r, radialLocation, mdw);
265                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
266                     /* Currently we only have the axial component of the
267                      * offset from the cylinder COM up to an unkown offset.
268                      * We add this offset after the reduction needed
269                      * for determining the COM of the cylinder group.
270                      */
271                     pdyna.dv[indexInSet] = axialLocation;
272                     for (int m = 0; m < DIM; m++)
273                     {
274                         radf_fac0[m] += mdw[m];
275                         radf_fac1[m] += mdw[m] * axialLocation;
276                     }
277                 }
278                 else
279                 {
280                     pdyna.localWeights[indexInSet] = 0;
281                 }
282             }
283         }
284
285         auto buffer = gmx::arrayRefFromArray(
286                 comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
287
288         buffer[0] = wmass;
289         buffer[1] = wwmass;
290         buffer[2] = sum_a;
291
292         buffer[3] = radf_fac0[XX];
293         buffer[4] = radf_fac0[YY];
294         buffer[5] = radf_fac0[ZZ];
295
296         buffer[6] = radf_fac1[XX];
297         buffer[7] = radf_fac1[YY];
298         buffer[8] = radf_fac1[ZZ];
299     }
300
301     if (cr != nullptr && PAR(cr))
302     {
303         /* Sum the contributions over the ranks */
304         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
305     }
306
307     for (size_t c = 0; c < pull->coord.size(); c++)
308     {
309         pull_coord_work_t* pcrd;
310
311         pcrd = &pull->coord[c];
312
313         if (pcrd->params.eGeom == PullGroupGeometry::Cylinder)
314         {
315             pull_group_work_t*    pdyna       = &pull->dyna[c];
316             pull_group_work_t*    pgrp        = &pull->group[pcrd->params.group[1]];
317             PullCoordSpatialData& spatialData = pcrd->spatialData;
318
319             auto buffer = gmx::constArrayRefFromArray(
320                     comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
321             double wmass   = buffer[0];
322             double wwmass  = buffer[1];
323             pdyna->mwscale = 1.0 / wmass;
324             /* Cylinder pulling can't be used with constraints, but we set
325              * wscale and invtm anyhow, in case someone would like to use them.
326              */
327             pdyna->wscale = wmass / wwmass;
328             pdyna->invtm  = wwmass / (wmass * wmass);
329
330             /* We store the deviation of the COM from the reference location
331              * used above, since we need it when we apply the radial forces
332              * to the atoms in the cylinder group.
333              */
334             spatialData.cyl_dev = 0;
335             for (int m = 0; m < DIM; m++)
336             {
337                 double reference = pgrp->x[m] - spatialData.vec[m] * pcrd->value_ref;
338                 double dist      = -spatialData.vec[m] * buffer[2] * pdyna->mwscale;
339                 pdyna->x[m]      = reference - dist;
340                 spatialData.cyl_dev += dist;
341             }
342             /* Now we know the exact COM of the cylinder reference group,
343              * we can determine the radial force factor (ffrad) that when
344              * multiplied with the axial pull force will give the radial
345              * force on the pulled (non-cylinder) group.
346              */
347             for (int m = 0; m < DIM; m++)
348             {
349                 spatialData.ffrad[m] = (buffer[6 + m] + buffer[3 + m] * spatialData.cyl_dev) / wmass;
350             }
351
352             if (debug)
353             {
354                 fprintf(debug,
355                         "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n",
356                         c,
357                         pdyna->x[0],
358                         pdyna->x[1],
359                         pdyna->x[2],
360                         1.0 / pdyna->invtm);
361                 fprintf(debug,
362                         "ffrad %8.3f %8.3f %8.3f\n",
363                         spatialData.ffrad[XX],
364                         spatialData.ffrad[YY],
365                         spatialData.ffrad[ZZ]);
366             }
367         }
368     }
369 }
370
371 static double atan2_0_2pi(double y, double x)
372 {
373     double a;
374
375     a = atan2(y, x);
376     if (a < 0)
377     {
378         a += 2.0 * M_PI;
379     }
380     return a;
381 }
382
383 static void sum_com_part(const pull_group_work_t* pgrp,
384                          int                      ind_start,
385                          int                      ind_end,
386                          const rvec*              x,
387                          const rvec*              xp,
388                          const real*              mass,
389                          const t_pbc*             pbc,
390                          const rvec               x_pbc,
391                          ComSums*                 sum_com)
392 {
393     double sum_wm   = 0;
394     double sum_wwm  = 0;
395     dvec   sum_wmx  = { 0, 0, 0 };
396     dvec   sum_wmxp = { 0, 0, 0 };
397
398     auto localAtomIndices = pgrp->atomSet.localIndex();
399     for (int i = ind_start; i < ind_end; i++)
400     {
401         int  ii = localAtomIndices[i];
402         real wm;
403         if (pgrp->localWeights.empty())
404         {
405             wm = mass[ii];
406             sum_wm += wm;
407         }
408         else
409         {
410             real w;
411
412             w  = pgrp->localWeights[i];
413             wm = w * mass[ii];
414             sum_wm += wm;
415             sum_wwm += wm * w;
416         }
417         if (pgrp->epgrppbc == epgrppbcNONE)
418         {
419             /* Plain COM: sum the coordinates */
420             for (int d = 0; d < DIM; d++)
421             {
422                 sum_wmx[d] += wm * x[ii][d];
423             }
424             if (xp)
425             {
426                 for (int d = 0; d < DIM; d++)
427                 {
428                     sum_wmxp[d] += wm * xp[ii][d];
429                 }
430             }
431         }
432         else
433         {
434             rvec dx;
435
436             /* Sum the difference with the reference atom */
437             pbc_dx(pbc, x[ii], x_pbc, dx);
438             for (int d = 0; d < DIM; d++)
439             {
440                 sum_wmx[d] += wm * dx[d];
441             }
442             if (xp)
443             {
444                 /* For xp add the difference between xp and x to dx,
445                  * such that we use the same periodic image,
446                  * also when xp has a large displacement.
447                  */
448                 for (int d = 0; d < DIM; d++)
449                 {
450                     sum_wmxp[d] += wm * (dx[d] + xp[ii][d] - x[ii][d]);
451                 }
452             }
453         }
454     }
455
456     sum_com->sum_wm  = sum_wm;
457     sum_com->sum_wwm = sum_wwm;
458     copy_dvec(sum_wmx, sum_com->sum_wmx);
459     if (xp)
460     {
461         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
462     }
463 }
464
465 static void sum_com_part_cosweight(const pull_group_work_t* pgrp,
466                                    int                      ind_start,
467                                    int                      ind_end,
468                                    int                      cosdim,
469                                    real                     twopi_box,
470                                    const rvec*              x,
471                                    const rvec*              xp,
472                                    const real*              mass,
473                                    ComSums*                 sum_com)
474 {
475     /* Cosine weighting geometry */
476     double sum_cm  = 0;
477     double sum_sm  = 0;
478     double sum_ccm = 0;
479     double sum_csm = 0;
480     double sum_ssm = 0;
481     double sum_cmp = 0;
482     double sum_smp = 0;
483
484     auto localAtomIndices = pgrp->atomSet.localIndex();
485
486     for (int i = ind_start; i < ind_end; i++)
487     {
488         int  ii = localAtomIndices[i];
489         real m  = mass[ii];
490         /* Determine cos and sin sums */
491         real cw = std::cos(x[ii][cosdim] * twopi_box);
492         real sw = std::sin(x[ii][cosdim] * twopi_box);
493         sum_cm += static_cast<double>(cw * m);
494         sum_sm += static_cast<double>(sw * m);
495         sum_ccm += static_cast<double>(cw * cw * m);
496         sum_csm += static_cast<double>(cw * sw * m);
497         sum_ssm += static_cast<double>(sw * sw * m);
498
499         if (xp != nullptr)
500         {
501             real cw = std::cos(xp[ii][cosdim] * twopi_box);
502             real sw = std::sin(xp[ii][cosdim] * twopi_box);
503             sum_cmp += static_cast<double>(cw * m);
504             sum_smp += static_cast<double>(sw * m);
505         }
506     }
507
508     sum_com->sum_cm  = sum_cm;
509     sum_com->sum_sm  = sum_sm;
510     sum_com->sum_ccm = sum_ccm;
511     sum_com->sum_csm = sum_csm;
512     sum_com->sum_ssm = sum_ssm;
513     sum_com->sum_cmp = sum_cmp;
514     sum_com->sum_smp = sum_smp;
515 }
516
517 /* calculates center of mass of selection index from all coordinates x */
518 void pull_calc_coms(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, double t, const rvec x[], rvec* xp)
519 {
520     real         twopi_box = 0;
521     pull_comm_t* comm;
522
523     comm = &pull->comm;
524
525     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
526                "pbcAtomBuffer should have size number of groups");
527     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
528                "comBuffer should have size #group*c_comBufferStride");
529
530     if (pull->bRefAt && pull->bSetPBCatoms)
531     {
532         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
533
534         if (cr != nullptr && DOMAINDECOMP(cr))
535         {
536             /* We can keep these PBC reference coordinates fixed for nstlist
537              * steps, since atoms won't jump over PBC.
538              * This avoids a global reduction at the next nstlist-1 steps.
539              * Note that the exact values of the pbc reference coordinates
540              * are irrelevant, as long all atoms in the group are within
541              * half a box distance of the reference coordinate.
542              */
543             pull->bSetPBCatoms = FALSE;
544         }
545     }
546
547     if (pull->cosdim >= 0)
548     {
549         int m;
550
551         assert(pull->npbcdim <= DIM);
552
553         for (m = pull->cosdim + 1; m < pull->npbcdim; m++)
554         {
555             if (pbc->box[m][pull->cosdim] != 0)
556             {
557                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
558             }
559         }
560         twopi_box = 2.0 * M_PI / pbc->box[pull->cosdim][pull->cosdim];
561     }
562
563     for (size_t g = 0; g < pull->group.size(); g++)
564     {
565         pull_group_work_t* pgrp = &pull->group[g];
566
567         /* Cosine-weighted COMs behave different from all other weighted COMs
568          * in the sense that the weights depend on instantaneous coordinates,
569          * not on pre-set weights. Thus we resize the local weight buffer here.
570          */
571         if (pgrp->epgrppbc == epgrppbcCOS)
572         {
573             pgrp->localWeights.resize(pgrp->atomSet.localIndex().size());
574         }
575
576         auto comBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
577                                                 c_comBufferStride);
578
579         if (pgrp->needToCalcCom)
580         {
581             if (pgrp->epgrppbc != epgrppbcCOS)
582             {
583                 rvec x_pbc = { 0, 0, 0 };
584
585                 switch (pgrp->epgrppbc)
586                 {
587                     case epgrppbcREFAT:
588                         /* Set the pbc atom */
589                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
590                         break;
591                     case epgrppbcPREVSTEPCOM:
592                         /* Set the pbc reference to the COM of the group of the last step */
593                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
594                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
595                 }
596
597                 /* The final sums should end up in comSums[0] */
598                 ComSums& comSumsTotal = pull->comSums[0];
599
600                 /* If we have a single-atom group the mass is irrelevant, so
601                  * we can remove the mass factor to avoid division by zero.
602                  * Note that with constraint pulling the mass does matter, but
603                  * in that case a check group mass != 0 has been done before.
604                  */
605                 if (pgrp->params.ind.size() == 1 && pgrp->atomSet.numAtomsLocal() == 1
606                     && masses[pgrp->atomSet.localIndex()[0]] == 0)
607                 {
608                     GMX_ASSERT(xp == nullptr,
609                                "We should not have groups with zero mass with constraints, i.e. "
610                                "xp!=NULL");
611
612                     /* Copy the single atom coordinate */
613                     for (int d = 0; d < DIM; d++)
614                     {
615                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
616                     }
617                     /* Set all mass factors to 1 to get the correct COM */
618                     comSumsTotal.sum_wm  = 1;
619                     comSumsTotal.sum_wwm = 1;
620                 }
621                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
622                 {
623                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, xp, masses, pbc, x_pbc, &comSumsTotal);
624                 }
625                 else
626                 {
627 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
628                     for (int t = 0; t < pull->nthreads; t++)
629                     {
630                         int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
631                         int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
632                         sum_com_part(
633                                 pgrp, ind_start, ind_end, x, xp, masses, pbc, x_pbc, &pull->comSums[t]);
634                     }
635
636                     /* Reduce the thread contributions to sum_com[0] */
637                     for (int t = 1; t < pull->nthreads; t++)
638                     {
639                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
640                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
641                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
642                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
643                     }
644                 }
645
646                 if (pgrp->localWeights.empty())
647                 {
648                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
649                 }
650
651                 /* Copy local sums to a buffer for global summing */
652                 copy_dvec(comSumsTotal.sum_wmx, comBuffer[0]);
653
654                 copy_dvec(comSumsTotal.sum_wmxp, comBuffer[1]);
655
656                 comBuffer[2][0] = comSumsTotal.sum_wm;
657                 comBuffer[2][1] = comSumsTotal.sum_wwm;
658                 comBuffer[2][2] = 0;
659             }
660             else
661             {
662                 /* Cosine weighting geometry.
663                  * This uses a slab of the system, thus we always have many
664                  * atoms in the pull groups. Therefore, always use threads.
665                  */
666 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
667                 for (int t = 0; t < pull->nthreads; t++)
668                 {
669                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
670                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
671                     sum_com_part_cosweight(
672                             pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp, masses, &pull->comSums[t]);
673                 }
674
675                 /* Reduce the thread contributions to comSums[0] */
676                 ComSums& comSumsTotal = pull->comSums[0];
677                 for (int t = 1; t < pull->nthreads; t++)
678                 {
679                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
680                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
681                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
682                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
683                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
684                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
685                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
686                 }
687
688                 /* Copy local sums to a buffer for global summing */
689                 comBuffer[0][0] = comSumsTotal.sum_cm;
690                 comBuffer[0][1] = comSumsTotal.sum_sm;
691                 comBuffer[0][2] = 0;
692                 comBuffer[1][0] = comSumsTotal.sum_ccm;
693                 comBuffer[1][1] = comSumsTotal.sum_csm;
694                 comBuffer[1][2] = comSumsTotal.sum_ssm;
695                 comBuffer[2][0] = comSumsTotal.sum_cmp;
696                 comBuffer[2][1] = comSumsTotal.sum_smp;
697                 comBuffer[2][2] = 0;
698             }
699         }
700         else
701         {
702             clear_dvec(comBuffer[0]);
703             clear_dvec(comBuffer[1]);
704             clear_dvec(comBuffer[2]);
705         }
706     }
707
708     pullAllReduce(cr,
709                   comm,
710                   pull->group.size() * c_comBufferStride * DIM,
711                   static_cast<double*>(comm->comBuffer[0]));
712
713     for (size_t g = 0; g < pull->group.size(); g++)
714     {
715         pull_group_work_t* pgrp;
716
717         pgrp = &pull->group[g];
718         if (pgrp->needToCalcCom)
719         {
720             GMX_ASSERT(!pgrp->params.ind.empty(),
721                        "Normal pull groups should have atoms, only group 0, which should have "
722                        "bCalcCom=FALSE has nat=0");
723
724             const auto comBuffer = gmx::constArrayRefFromArray(
725                     comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
726
727             if (pgrp->epgrppbc != epgrppbcCOS)
728             {
729                 double wmass, wwmass;
730                 int    m;
731
732                 /* Determine the inverse mass */
733                 wmass         = comBuffer[2][0];
734                 wwmass        = comBuffer[2][1];
735                 pgrp->mwscale = 1.0 / wmass;
736                 /* invtm==0 signals a frozen group, so then we should keep it zero */
737                 if (pgrp->invtm != 0)
738                 {
739                     pgrp->wscale = wmass / wwmass;
740                     pgrp->invtm  = wwmass / (wmass * wmass);
741                 }
742                 /* Divide by the total mass */
743                 for (m = 0; m < DIM; m++)
744                 {
745                     pgrp->x[m] = comBuffer[0][m] * pgrp->mwscale;
746                     if (xp)
747                     {
748                         pgrp->xp[m] = comBuffer[1][m] * pgrp->mwscale;
749                     }
750                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
751                     {
752                         pgrp->x[m] += comm->pbcAtomBuffer[g][m];
753                         if (xp)
754                         {
755                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
756                         }
757                     }
758                 }
759             }
760             else
761             {
762                 /* Cosine weighting geometry */
763                 double csw, snw, wmass, wwmass;
764
765                 /* Determine the optimal location of the cosine weight */
766                 csw                   = comBuffer[0][0];
767                 snw                   = comBuffer[0][1];
768                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
769                 /* Set the weights for the local atoms */
770                 wmass  = sqrt(csw * csw + snw * snw);
771                 wwmass = (comBuffer[1][0] * csw * csw + comBuffer[1][1] * csw * snw
772                           + comBuffer[1][2] * snw * snw)
773                          / (wmass * wmass);
774
775                 pgrp->mwscale = 1.0 / wmass;
776                 pgrp->wscale  = wmass / wwmass;
777                 pgrp->invtm   = wwmass / (wmass * wmass);
778                 /* Set the weights for the local atoms */
779                 csw *= pgrp->invtm;
780                 snw *= pgrp->invtm;
781                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
782                 {
783                     int ii                = pgrp->atomSet.localIndex()[i];
784                     pgrp->localWeights[i] = csw * std::cos(twopi_box * x[ii][pull->cosdim])
785                                             + snw * std::sin(twopi_box * x[ii][pull->cosdim]);
786                 }
787                 if (xp)
788                 {
789                     csw                    = comBuffer[2][0];
790                     snw                    = comBuffer[2][1];
791                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
792                 }
793             }
794             if (debug)
795             {
796                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
797             }
798         }
799     }
800
801     if (pull->bCylinder)
802     {
803         /* Calculate the COMs for the cyclinder reference groups */
804         make_cyl_refgrps(cr, pull, masses, pbc, t, x);
805     }
806 }
807
808 using BoolVec = gmx::BasicVector<bool>;
809
810 /* Returns whether the pull group obeys the PBC restrictions */
811 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t& group,
812                                           const BoolVec&           dimUsed,
813                                           const rvec*              x,
814                                           const t_pbc&             pbc,
815                                           const gmx::RVec&         x_pbc,
816                                           const real               pbcMargin)
817 {
818     /* Determine which dimensions are relevant for PBC */
819     BoolVec dimUsesPbc       = { false, false, false };
820     bool    pbcIsRectangular = true;
821     for (int d = 0; d < pbc.ndim_ePBC; d++)
822     {
823         if (dimUsed[d])
824         {
825             dimUsesPbc[d] = true;
826             /* All non-zero dimensions of vector v are involved in PBC */
827             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
828             {
829                 assert(d2 < DIM);
830                 if (pbc.box[d2][d] != 0)
831                 {
832                     dimUsesPbc[d2]   = true;
833                     pbcIsRectangular = false;
834                 }
835             }
836         }
837     }
838
839     rvec marginPerDim    = {};
840     real marginDistance2 = 0;
841     if (pbcIsRectangular)
842     {
843         /* Use margins for dimensions independently */
844         for (int d = 0; d < pbc.ndim_ePBC; d++)
845         {
846             marginPerDim[d] = pbcMargin * pbc.hbox_diag[d];
847         }
848     }
849     else
850     {
851         /* Check the total distance along the relevant dimensions */
852         for (int d = 0; d < pbc.ndim_ePBC; d++)
853         {
854             if (dimUsesPbc[d])
855             {
856                 marginDistance2 += pbcMargin * gmx::square(0.5) * norm2(pbc.box[d]);
857             }
858         }
859     }
860
861     auto localAtomIndices = group.atomSet.localIndex();
862     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
863     {
864         rvec dx;
865         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
866
867         bool atomIsTooFar = false;
868         if (pbcIsRectangular)
869         {
870             for (int d = 0; d < pbc.ndim_ePBC; d++)
871             {
872                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] || dx[d] > marginPerDim[d]))
873                 {
874                     atomIsTooFar = true;
875                 }
876             }
877         }
878         else
879         {
880             real pbcDistance2 = 0;
881             for (int d = 0; d < pbc.ndim_ePBC; d++)
882             {
883                 if (dimUsesPbc[d])
884                 {
885                     pbcDistance2 += gmx::square(dx[d]);
886                 }
887             }
888             atomIsTooFar = (pbcDistance2 > marginDistance2);
889         }
890         if (atomIsTooFar)
891         {
892             return false;
893         }
894     }
895
896     return true;
897 }
898
899 int pullCheckPbcWithinGroups(const pull_t& pull, const rvec* x, const t_pbc& pbc, real pbcMargin)
900 {
901     if (pbc.pbcType == PbcType::No)
902     {
903         return -1;
904     }
905
906     /* Determine what dimensions are used for each group by pull coordinates */
907     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
908     for (size_t c = 0; c < pull.coord.size(); c++)
909     {
910         const t_pull_coord& coordParams = pull.coord[c].params;
911         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
912         {
913             for (int d = 0; d < DIM; d++)
914             {
915                 if (coordParams.dim[d]
916                     && !(coordParams.eGeom == PullGroupGeometry::Cylinder && groupIndex == 0))
917                 {
918                     dimUsed[coordParams.group[groupIndex]][d] = true;
919                 }
920             }
921         }
922     }
923
924     /* Check PBC for every group that uses a PBC reference atom treatment */
925     for (size_t g = 0; g < pull.group.size(); g++)
926     {
927         const pull_group_work_t& group = pull.group[g];
928         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
929             && !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
930         {
931             return g;
932         }
933     }
934
935     return -1;
936 }
937
938 bool pullCheckPbcWithinGroup(const pull_t&                  pull,
939                              gmx::ArrayRef<const gmx::RVec> x,
940                              const t_pbc&                   pbc,
941                              int                            groupNr,
942                              real                           pbcMargin)
943 {
944     if (pbc.pbcType == PbcType::No)
945     {
946         return true;
947     }
948     GMX_ASSERT(groupNr < gmx::ssize(pull.group), "groupNr is out of range");
949
950     /* Check PBC if the group uses a PBC reference atom treatment. */
951     const pull_group_work_t& group = pull.group[groupNr];
952     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
953     {
954         return true;
955     }
956
957     /* Determine what dimensions are used for each group by pull coordinates */
958     BoolVec dimUsed = { false, false, false };
959     for (size_t c = 0; c < pull.coord.size(); c++)
960     {
961         const t_pull_coord& coordParams = pull.coord[c].params;
962         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
963         {
964             if (coordParams.group[groupIndex] == groupNr)
965             {
966                 for (int d = 0; d < DIM; d++)
967                 {
968                     if (coordParams.dim[d]
969                         && !(coordParams.eGeom == PullGroupGeometry::Cylinder && groupIndex == 0))
970                     {
971                         dimUsed[d] = true;
972                     }
973                 }
974             }
975         }
976     }
977
978     return (pullGroupObeysPbcRestrictions(
979             group, dimUsed, as_rvec_array(x.data()), pbc, pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
980 }
981
982 void setPrevStepPullComFromState(struct pull_t* pull, const t_state* state)
983 {
984     for (size_t g = 0; g < pull->group.size(); g++)
985     {
986         for (int j = 0; j < DIM; j++)
987         {
988             pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g * DIM + j];
989         }
990     }
991 }
992
993 void updatePrevStepPullCom(struct pull_t* pull, t_state* state)
994 {
995     for (size_t g = 0; g < pull->group.size(); g++)
996     {
997         if (pull->group[g].needToCalcCom)
998         {
999             for (int j = 0; j < DIM; j++)
1000             {
1001                 pull->group[g].x_prev_step[j]          = pull->group[g].x[j];
1002                 state->pull_com_prev_step[g * DIM + j] = pull->group[g].x[j];
1003             }
1004         }
1005     }
1006 }
1007
1008 void allocStatePrevStepPullCom(t_state* state, const pull_t* pull)
1009 {
1010     if (!pull)
1011     {
1012         state->pull_com_prev_step.clear();
1013         return;
1014     }
1015     size_t ngroup = pull->group.size();
1016     if (state->pull_com_prev_step.size() / DIM != ngroup)
1017     {
1018         state->pull_com_prev_step.resize(ngroup * DIM, NAN);
1019     }
1020 }
1021
1022 void initPullComFromPrevStep(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, const rvec x[])
1023 {
1024     pull_comm_t* comm   = &pull->comm;
1025     size_t       ngroup = pull->group.size();
1026
1027     if (!comm->bParticipate)
1028     {
1029         return;
1030     }
1031
1032     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
1033                "pbcAtomBuffer should have size number of groups");
1034     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
1035                "comBuffer should have size #group*c_comBufferStride");
1036
1037     pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1038
1039     for (size_t g = 0; g < ngroup; g++)
1040     {
1041         pull_group_work_t* pgrp;
1042
1043         pgrp = &pull->group[g];
1044
1045         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1046         {
1047             GMX_ASSERT(pgrp->params.ind.size() > 1,
1048                        "Groups with no atoms, or only one atom, should not "
1049                        "use the COM from the previous step as reference.");
1050
1051             rvec x_pbc = { 0, 0, 0 };
1052             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1053
1054             if (debug)
1055             {
1056                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1057                 for (int m = 0; m < DIM; m++)
1058                 {
1059                     fprintf(debug, " %f", x_pbc[m]);
1060                 }
1061                 fprintf(debug, "\n");
1062             }
1063
1064             /* The following is to a large extent similar to pull_calc_coms() */
1065
1066             /* The final sums should end up in sum_com[0] */
1067             ComSums& comSumsTotal = pull->comSums[0];
1068
1069             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1070             {
1071                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, nullptr, masses, pbc, x_pbc, &comSumsTotal);
1072             }
1073             else
1074             {
1075 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
1076                 for (int t = 0; t < pull->nthreads; t++)
1077                 {
1078                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
1079                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
1080                     sum_com_part(
1081                             pgrp, ind_start, ind_end, x, nullptr, masses, pbc, x_pbc, &pull->comSums[t]);
1082                 }
1083
1084                 /* Reduce the thread contributions to sum_com[0] */
1085                 for (int t = 1; t < pull->nthreads; t++)
1086                 {
1087                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
1088                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1089                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1090                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1091                 }
1092             }
1093
1094             if (pgrp->localWeights.empty())
1095             {
1096                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1097             }
1098
1099             /* Copy local sums to a buffer for global summing */
1100             auto localSums = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
1101                                                     c_comBufferStride);
1102
1103             localSums[0]    = comSumsTotal.sum_wmx;
1104             localSums[1]    = comSumsTotal.sum_wmxp;
1105             localSums[2][0] = comSumsTotal.sum_wm;
1106             localSums[2][1] = comSumsTotal.sum_wwm;
1107             localSums[2][2] = 0;
1108         }
1109     }
1110
1111     pullAllReduce(cr, comm, ngroup * c_comBufferStride * DIM, static_cast<double*>(comm->comBuffer[0]));
1112
1113     for (size_t g = 0; g < ngroup; g++)
1114     {
1115         pull_group_work_t* pgrp;
1116
1117         pgrp = &pull->group[g];
1118         if (pgrp->needToCalcCom)
1119         {
1120             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1121             {
1122                 auto localSums = gmx::arrayRefFromArray(
1123                         comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
1124                 double wmass, wwmass;
1125
1126                 /* Determine the inverse mass */
1127                 wmass         = localSums[2][0];
1128                 wwmass        = localSums[2][1];
1129                 pgrp->mwscale = 1.0 / wmass;
1130                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1131                 if (pgrp->invtm != 0)
1132                 {
1133                     pgrp->wscale = wmass / wwmass;
1134                     pgrp->invtm  = wwmass / (wmass * wmass);
1135                 }
1136                 /* Divide by the total mass */
1137                 for (int m = 0; m < DIM; m++)
1138                 {
1139                     pgrp->x[m] = localSums[0][m] * pgrp->mwscale;
1140                     pgrp->x[m] += comm->pbcAtomBuffer[g][m];
1141                 }
1142                 if (debug)
1143                 {
1144                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
1145                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1146                     for (int m = 0; m < DIM; m++)
1147                     {
1148                         fprintf(debug, " %f", pgrp->x[m]);
1149                     }
1150                     fprintf(debug, "\n");
1151                 }
1152                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1153             }
1154         }
1155     }
1156 }