ec15584183734e56c41747955a56d39e97b921ba
[alexxy/gromacs.git] / src / gromacs / pulling / pullutil.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <cassert>
43 #include <cstdlib>
44
45 #include "gromacs/fileio/confio.h"
46 #include "gromacs/gmxlib/network.h"
47 #include "gromacs/math/functions.h"
48 #include "gromacs/math/utilities.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/commrec.h"
51 #include "gromacs/mdtypes/inputrec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/mdtypes/mdatom.h"
54 #include "gromacs/mdtypes/state.h"
55 #include "gromacs/pbcutil/pbc.h"
56 #include "gromacs/pulling/pull.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/real.h"
61 #include "gromacs/utility/smalloc.h"
62
63 #include "pull_internal.h"
64
65 #if GMX_MPI
66
67 // Helper function to deduce MPI datatype from the type of data
68 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused* data)
69 {
70     return MPI_FLOAT;
71 }
72
73 // Helper function to deduce MPI datatype from the type of data
74 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused* data)
75 {
76     return MPI_DOUBLE;
77 }
78
79 #endif // GMX_MPI
80
81 #if !GMX_DOUBLE
82 // Helper function; note that gmx_sum(d) should actually be templated
83 gmx_unused static void gmxAllReduce(int n, real* data, const t_commrec* cr)
84 {
85     gmx_sum(n, data, cr);
86 }
87 #endif
88
89 // Helper function; note that gmx_sum(d) should actually be templated
90 gmx_unused static void gmxAllReduce(int n, double* data, const t_commrec* cr)
91 {
92     gmx_sumd(n, data, cr);
93 }
94
95 // Reduce data of n elements over all ranks currently participating in pull
96 template<typename T>
97 static void pullAllReduce(const t_commrec* cr, pull_comm_t* comm, int n, T* data)
98 {
99     if (cr != nullptr && PAR(cr))
100     {
101         if (comm->bParticipateAll)
102         {
103             /* Sum the contributions over all DD ranks */
104             gmxAllReduce(n, data, cr);
105         }
106         else
107         {
108             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
109 #if GMX_MPI
110             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
111 #else
112             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
113 #endif
114         }
115     }
116 }
117
118 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
119  * When those coordinates are not available on this rank, clears x_pbc.
120  */
121 static void setPbcAtomCoords(const pull_group_work_t& pgrp, const rvec* x, rvec x_pbc)
122 {
123     if (pgrp.pbcAtomSet != nullptr)
124     {
125         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
126         {
127             /* We have the atom locally, copy its coordinates */
128             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
129         }
130         else
131         {
132             /* Another rank has it, clear the coordinates for MPI_Allreduce */
133             clear_rvec(x_pbc);
134         }
135     }
136     else
137     {
138         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
139     }
140 }
141
142 static void pull_set_pbcatoms(const t_commrec* cr, struct pull_t* pull, const rvec* x, gmx::ArrayRef<gmx::RVec> x_pbc)
143 {
144     int numPbcAtoms = 0;
145     for (size_t g = 0; g < pull->group.size(); g++)
146     {
147         const pull_group_work_t& group = pull->group[g];
148         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
149         {
150             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
151             numPbcAtoms++;
152         }
153         else
154         {
155             clear_rvec(x_pbc[g]);
156         }
157     }
158
159     if (cr && PAR(cr) && numPbcAtoms > 0)
160     {
161         /* Sum over participating ranks to get x_pbc from the home ranks.
162          * This can be very expensive at high parallelization, so we only
163          * do this after each DD repartitioning.
164          */
165         pullAllReduce(cr, &pull->comm, pull->group.size() * DIM, static_cast<real*>(x_pbc[0]));
166     }
167 }
168
169 static void
170 make_cyl_refgrps(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, double t, const rvec* x)
171 {
172     pull_comm_t* comm = &pull->comm;
173
174     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size() * c_cylinderBufferStride,
175                "cylinderBuffer should have the correct size");
176
177     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
178
179     /* loop over all groups to make a reference group for each*/
180     for (size_t c = 0; c < pull->coord.size(); c++)
181     {
182         pull_coord_work_t* pcrd;
183         double             sum_a, wmass, wwmass;
184         dvec               radf_fac0, radf_fac1;
185
186         pcrd = &pull->coord[c];
187
188         sum_a  = 0;
189         wmass  = 0;
190         wwmass = 0;
191         clear_dvec(radf_fac0);
192         clear_dvec(radf_fac1);
193
194         if (pcrd->params.eGeom == PullGroupGeometry::Cylinder)
195         {
196             /* pref will be the same group for all pull coordinates */
197             const pull_group_work_t& pref  = pull->group[pcrd->params.group[0]];
198             const pull_group_work_t& pgrp  = pull->group[pcrd->params.group[1]];
199             pull_group_work_t&       pdyna = pull->dyna[c];
200             rvec                     direction;
201             copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
202
203             /* Since we have not calculated the COM of the cylinder group yet,
204              * we calculate distances with respect to location of the pull
205              * group minus the reference position along the vector.
206              * here we already have the COM of the pull group. This resolves
207              * any PBC issues and we don't need to use a PBC-atom here.
208              */
209             if (pcrd->params.rate != 0)
210             {
211                 /* With rate=0, value_ref is set initially */
212                 pcrd->value_ref = pcrd->params.init + pcrd->params.rate * t;
213             }
214             rvec reference;
215             for (int m = 0; m < DIM; m++)
216             {
217                 reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m] * pcrd->value_ref;
218             }
219
220             auto localAtomIndices = pref.atomSet.localIndex();
221
222             /* This actually only needs to be done at init or DD time,
223              * but resizing with the same size does not cause much overhead.
224              */
225             pdyna.localWeights.resize(localAtomIndices.size());
226             pdyna.mdw.resize(localAtomIndices.size());
227             pdyna.dv.resize(localAtomIndices.size());
228
229             /* loop over all atoms in the main ref group */
230             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
231             {
232                 int  atomIndex = localAtomIndices[indexInSet];
233                 rvec dx;
234                 pbc_dx_aiuc(pbc, x[atomIndex], reference, dx);
235                 double axialLocation = iprod(direction, dx);
236                 dvec   radialLocation;
237                 double dr2 = 0;
238                 for (int m = 0; m < DIM; m++)
239                 {
240                     /* Determine the radial components */
241                     radialLocation[m] = dx[m] - axialLocation * direction[m];
242                     dr2 += gmx::square(radialLocation[m]);
243                 }
244                 double dr2_rel = dr2 * inv_cyl_r2;
245
246                 if (dr2_rel < 1)
247                 {
248                     /* add atom to sum of COM and to weight array */
249
250                     double mass = masses[atomIndex];
251                     /* The radial weight function is 1-2x^2+x^4,
252                      * where x=r/cylinder_r. Since this function depends
253                      * on the radial component, we also get radial forces
254                      * on both groups.
255                      */
256                     double weight                  = 1 + (-2 + dr2_rel) * dr2_rel;
257                     double dweight_r               = (-4 + 4 * dr2_rel) * inv_cyl_r2;
258                     pdyna.localWeights[indexInSet] = weight;
259                     sum_a += mass * weight * axialLocation;
260                     wmass += mass * weight;
261                     wwmass += mass * weight * weight;
262                     dvec mdw;
263                     dsvmul(mass * dweight_r, radialLocation, mdw);
264                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
265                     /* Currently we only have the axial component of the
266                      * offset from the cylinder COM up to an unkown offset.
267                      * We add this offset after the reduction needed
268                      * for determining the COM of the cylinder group.
269                      */
270                     pdyna.dv[indexInSet] = axialLocation;
271                     for (int m = 0; m < DIM; m++)
272                     {
273                         radf_fac0[m] += mdw[m];
274                         radf_fac1[m] += mdw[m] * axialLocation;
275                     }
276                 }
277                 else
278                 {
279                     pdyna.localWeights[indexInSet] = 0;
280                 }
281             }
282         }
283
284         auto buffer = gmx::arrayRefFromArray(
285                 comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
286
287         buffer[0] = wmass;
288         buffer[1] = wwmass;
289         buffer[2] = sum_a;
290
291         buffer[3] = radf_fac0[XX];
292         buffer[4] = radf_fac0[YY];
293         buffer[5] = radf_fac0[ZZ];
294
295         buffer[6] = radf_fac1[XX];
296         buffer[7] = radf_fac1[YY];
297         buffer[8] = radf_fac1[ZZ];
298     }
299
300     if (cr != nullptr && PAR(cr))
301     {
302         /* Sum the contributions over the ranks */
303         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
304     }
305
306     for (size_t c = 0; c < pull->coord.size(); c++)
307     {
308         pull_coord_work_t* pcrd;
309
310         pcrd = &pull->coord[c];
311
312         if (pcrd->params.eGeom == PullGroupGeometry::Cylinder)
313         {
314             pull_group_work_t*    pdyna       = &pull->dyna[c];
315             pull_group_work_t*    pgrp        = &pull->group[pcrd->params.group[1]];
316             PullCoordSpatialData& spatialData = pcrd->spatialData;
317
318             auto buffer = gmx::constArrayRefFromArray(
319                     comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
320             double wmass   = buffer[0];
321             double wwmass  = buffer[1];
322             pdyna->mwscale = 1.0 / wmass;
323             /* Cylinder pulling can't be used with constraints, but we set
324              * wscale and invtm anyhow, in case someone would like to use them.
325              */
326             pdyna->wscale = wmass / wwmass;
327             pdyna->invtm  = wwmass / (wmass * wmass);
328
329             /* We store the deviation of the COM from the reference location
330              * used above, since we need it when we apply the radial forces
331              * to the atoms in the cylinder group.
332              */
333             spatialData.cyl_dev = 0;
334             for (int m = 0; m < DIM; m++)
335             {
336                 double reference = pgrp->x[m] - spatialData.vec[m] * pcrd->value_ref;
337                 double dist      = -spatialData.vec[m] * buffer[2] * pdyna->mwscale;
338                 pdyna->x[m]      = reference - dist;
339                 spatialData.cyl_dev += dist;
340             }
341             /* Now we know the exact COM of the cylinder reference group,
342              * we can determine the radial force factor (ffrad) that when
343              * multiplied with the axial pull force will give the radial
344              * force on the pulled (non-cylinder) group.
345              */
346             for (int m = 0; m < DIM; m++)
347             {
348                 spatialData.ffrad[m] = (buffer[6 + m] + buffer[3 + m] * spatialData.cyl_dev) / wmass;
349             }
350
351             if (debug)
352             {
353                 fprintf(debug,
354                         "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n",
355                         c,
356                         pdyna->x[0],
357                         pdyna->x[1],
358                         pdyna->x[2],
359                         1.0 / pdyna->invtm);
360                 fprintf(debug,
361                         "ffrad %8.3f %8.3f %8.3f\n",
362                         spatialData.ffrad[XX],
363                         spatialData.ffrad[YY],
364                         spatialData.ffrad[ZZ]);
365             }
366         }
367     }
368 }
369
370 static double atan2_0_2pi(double y, double x)
371 {
372     double a;
373
374     a = atan2(y, x);
375     if (a < 0)
376     {
377         a += 2.0 * M_PI;
378     }
379     return a;
380 }
381
382 static void sum_com_part(const pull_group_work_t* pgrp,
383                          int                      ind_start,
384                          int                      ind_end,
385                          const rvec*              x,
386                          const rvec*              xp,
387                          const real*              mass,
388                          const t_pbc*             pbc,
389                          const rvec               x_pbc,
390                          ComSums*                 sum_com)
391 {
392     double sum_wm   = 0;
393     double sum_wwm  = 0;
394     dvec   sum_wmx  = { 0, 0, 0 };
395     dvec   sum_wmxp = { 0, 0, 0 };
396
397     auto localAtomIndices = pgrp->atomSet.localIndex();
398     for (int i = ind_start; i < ind_end; i++)
399     {
400         int  ii = localAtomIndices[i];
401         real wm;
402         if (pgrp->localWeights.empty())
403         {
404             wm = mass[ii];
405             sum_wm += wm;
406         }
407         else
408         {
409             real w;
410
411             w  = pgrp->localWeights[i];
412             wm = w * mass[ii];
413             sum_wm += wm;
414             sum_wwm += wm * w;
415         }
416         if (pgrp->epgrppbc == epgrppbcNONE)
417         {
418             /* Plain COM: sum the coordinates */
419             for (int d = 0; d < DIM; d++)
420             {
421                 sum_wmx[d] += wm * x[ii][d];
422             }
423             if (xp)
424             {
425                 for (int d = 0; d < DIM; d++)
426                 {
427                     sum_wmxp[d] += wm * xp[ii][d];
428                 }
429             }
430         }
431         else
432         {
433             rvec dx;
434
435             /* Sum the difference with the reference atom */
436             pbc_dx(pbc, x[ii], x_pbc, dx);
437             for (int d = 0; d < DIM; d++)
438             {
439                 sum_wmx[d] += wm * dx[d];
440             }
441             if (xp)
442             {
443                 /* For xp add the difference between xp and x to dx,
444                  * such that we use the same periodic image,
445                  * also when xp has a large displacement.
446                  */
447                 for (int d = 0; d < DIM; d++)
448                 {
449                     sum_wmxp[d] += wm * (dx[d] + xp[ii][d] - x[ii][d]);
450                 }
451             }
452         }
453     }
454
455     sum_com->sum_wm  = sum_wm;
456     sum_com->sum_wwm = sum_wwm;
457     copy_dvec(sum_wmx, sum_com->sum_wmx);
458     if (xp)
459     {
460         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
461     }
462 }
463
464 static void sum_com_part_cosweight(const pull_group_work_t* pgrp,
465                                    int                      ind_start,
466                                    int                      ind_end,
467                                    int                      cosdim,
468                                    real                     twopi_box,
469                                    const rvec*              x,
470                                    const rvec*              xp,
471                                    const real*              mass,
472                                    ComSums*                 sum_com)
473 {
474     /* Cosine weighting geometry */
475     double sum_cm  = 0;
476     double sum_sm  = 0;
477     double sum_ccm = 0;
478     double sum_csm = 0;
479     double sum_ssm = 0;
480     double sum_cmp = 0;
481     double sum_smp = 0;
482
483     auto localAtomIndices = pgrp->atomSet.localIndex();
484
485     for (int i = ind_start; i < ind_end; i++)
486     {
487         int  ii = localAtomIndices[i];
488         real m  = mass[ii];
489         /* Determine cos and sin sums */
490         real cw = std::cos(x[ii][cosdim] * twopi_box);
491         real sw = std::sin(x[ii][cosdim] * twopi_box);
492         sum_cm += static_cast<double>(cw * m);
493         sum_sm += static_cast<double>(sw * m);
494         sum_ccm += static_cast<double>(cw * cw * m);
495         sum_csm += static_cast<double>(cw * sw * m);
496         sum_ssm += static_cast<double>(sw * sw * m);
497
498         if (xp != nullptr)
499         {
500             real cw = std::cos(xp[ii][cosdim] * twopi_box);
501             real sw = std::sin(xp[ii][cosdim] * twopi_box);
502             sum_cmp += static_cast<double>(cw * m);
503             sum_smp += static_cast<double>(sw * m);
504         }
505     }
506
507     sum_com->sum_cm  = sum_cm;
508     sum_com->sum_sm  = sum_sm;
509     sum_com->sum_ccm = sum_ccm;
510     sum_com->sum_csm = sum_csm;
511     sum_com->sum_ssm = sum_ssm;
512     sum_com->sum_cmp = sum_cmp;
513     sum_com->sum_smp = sum_smp;
514 }
515
516 /* calculates center of mass of selection index from all coordinates x */
517 void pull_calc_coms(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, double t, const rvec x[], rvec* xp)
518 {
519     real         twopi_box = 0;
520     pull_comm_t* comm;
521
522     comm = &pull->comm;
523
524     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
525                "pbcAtomBuffer should have size number of groups");
526     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
527                "comBuffer should have size #group*c_comBufferStride");
528
529     if (pull->bRefAt && pull->bSetPBCatoms)
530     {
531         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
532
533         if (cr != nullptr && DOMAINDECOMP(cr))
534         {
535             /* We can keep these PBC reference coordinates fixed for nstlist
536              * steps, since atoms won't jump over PBC.
537              * This avoids a global reduction at the next nstlist-1 steps.
538              * Note that the exact values of the pbc reference coordinates
539              * are irrelevant, as long all atoms in the group are within
540              * half a box distance of the reference coordinate.
541              */
542             pull->bSetPBCatoms = FALSE;
543         }
544     }
545
546     if (pull->cosdim >= 0)
547     {
548         int m;
549
550         assert(pull->npbcdim <= DIM);
551
552         for (m = pull->cosdim + 1; m < pull->npbcdim; m++)
553         {
554             if (pbc->box[m][pull->cosdim] != 0)
555             {
556                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
557             }
558         }
559         twopi_box = 2.0 * M_PI / pbc->box[pull->cosdim][pull->cosdim];
560     }
561
562     for (size_t g = 0; g < pull->group.size(); g++)
563     {
564         pull_group_work_t* pgrp = &pull->group[g];
565
566         /* Cosine-weighted COMs behave different from all other weighted COMs
567          * in the sense that the weights depend on instantaneous coordinates,
568          * not on pre-set weights. Thus we resize the local weight buffer here.
569          */
570         if (pgrp->epgrppbc == epgrppbcCOS)
571         {
572             pgrp->localWeights.resize(pgrp->atomSet.localIndex().size());
573         }
574
575         auto comBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
576                                                 c_comBufferStride);
577
578         if (pgrp->needToCalcCom)
579         {
580             if (pgrp->epgrppbc != epgrppbcCOS)
581             {
582                 rvec x_pbc = { 0, 0, 0 };
583
584                 switch (pgrp->epgrppbc)
585                 {
586                     case epgrppbcREFAT:
587                         /* Set the pbc atom */
588                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
589                         break;
590                     case epgrppbcPREVSTEPCOM:
591                         /* Set the pbc reference to the COM of the group of the last step */
592                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
593                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
594                 }
595
596                 /* The final sums should end up in comSums[0] */
597                 ComSums& comSumsTotal = pull->comSums[0];
598
599                 /* If we have a single-atom group the mass is irrelevant, so
600                  * we can remove the mass factor to avoid division by zero.
601                  * Note that with constraint pulling the mass does matter, but
602                  * in that case a check group mass != 0 has been done before.
603                  */
604                 if (pgrp->params.ind.size() == 1 && pgrp->atomSet.numAtomsLocal() == 1
605                     && masses[pgrp->atomSet.localIndex()[0]] == 0)
606                 {
607                     GMX_ASSERT(xp == nullptr,
608                                "We should not have groups with zero mass with constraints, i.e. "
609                                "xp!=NULL");
610
611                     /* Copy the single atom coordinate */
612                     for (int d = 0; d < DIM; d++)
613                     {
614                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
615                     }
616                     /* Set all mass factors to 1 to get the correct COM */
617                     comSumsTotal.sum_wm  = 1;
618                     comSumsTotal.sum_wwm = 1;
619                 }
620                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
621                 {
622                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, xp, masses, pbc, x_pbc, &comSumsTotal);
623                 }
624                 else
625                 {
626 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
627                     for (int t = 0; t < pull->nthreads; t++)
628                     {
629                         int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
630                         int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
631                         sum_com_part(
632                                 pgrp, ind_start, ind_end, x, xp, masses, pbc, x_pbc, &pull->comSums[t]);
633                     }
634
635                     /* Reduce the thread contributions to sum_com[0] */
636                     for (int t = 1; t < pull->nthreads; t++)
637                     {
638                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
639                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
640                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
641                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
642                     }
643                 }
644
645                 if (pgrp->localWeights.empty())
646                 {
647                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
648                 }
649
650                 /* Copy local sums to a buffer for global summing */
651                 copy_dvec(comSumsTotal.sum_wmx, comBuffer[0]);
652
653                 copy_dvec(comSumsTotal.sum_wmxp, comBuffer[1]);
654
655                 comBuffer[2][0] = comSumsTotal.sum_wm;
656                 comBuffer[2][1] = comSumsTotal.sum_wwm;
657                 comBuffer[2][2] = 0;
658             }
659             else
660             {
661                 /* Cosine weighting geometry.
662                  * This uses a slab of the system, thus we always have many
663                  * atoms in the pull groups. Therefore, always use threads.
664                  */
665 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
666                 for (int t = 0; t < pull->nthreads; t++)
667                 {
668                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
669                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
670                     sum_com_part_cosweight(
671                             pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp, masses, &pull->comSums[t]);
672                 }
673
674                 /* Reduce the thread contributions to comSums[0] */
675                 ComSums& comSumsTotal = pull->comSums[0];
676                 for (int t = 1; t < pull->nthreads; t++)
677                 {
678                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
679                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
680                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
681                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
682                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
683                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
684                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
685                 }
686
687                 /* Copy local sums to a buffer for global summing */
688                 comBuffer[0][0] = comSumsTotal.sum_cm;
689                 comBuffer[0][1] = comSumsTotal.sum_sm;
690                 comBuffer[0][2] = 0;
691                 comBuffer[1][0] = comSumsTotal.sum_ccm;
692                 comBuffer[1][1] = comSumsTotal.sum_csm;
693                 comBuffer[1][2] = comSumsTotal.sum_ssm;
694                 comBuffer[2][0] = comSumsTotal.sum_cmp;
695                 comBuffer[2][1] = comSumsTotal.sum_smp;
696                 comBuffer[2][2] = 0;
697             }
698         }
699         else
700         {
701             clear_dvec(comBuffer[0]);
702             clear_dvec(comBuffer[1]);
703             clear_dvec(comBuffer[2]);
704         }
705     }
706
707     pullAllReduce(cr,
708                   comm,
709                   pull->group.size() * c_comBufferStride * DIM,
710                   static_cast<double*>(comm->comBuffer[0]));
711
712     for (size_t g = 0; g < pull->group.size(); g++)
713     {
714         pull_group_work_t* pgrp;
715
716         pgrp = &pull->group[g];
717         if (pgrp->needToCalcCom)
718         {
719             GMX_ASSERT(!pgrp->params.ind.empty(),
720                        "Normal pull groups should have atoms, only group 0, which should have "
721                        "bCalcCom=FALSE has nat=0");
722
723             const auto comBuffer = gmx::constArrayRefFromArray(
724                     comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
725
726             if (pgrp->epgrppbc != epgrppbcCOS)
727             {
728                 double wmass, wwmass;
729                 int    m;
730
731                 /* Determine the inverse mass */
732                 wmass         = comBuffer[2][0];
733                 wwmass        = comBuffer[2][1];
734                 pgrp->mwscale = 1.0 / wmass;
735                 /* invtm==0 signals a frozen group, so then we should keep it zero */
736                 if (pgrp->invtm != 0)
737                 {
738                     pgrp->wscale = wmass / wwmass;
739                     pgrp->invtm  = wwmass / (wmass * wmass);
740                 }
741                 /* Divide by the total mass */
742                 for (m = 0; m < DIM; m++)
743                 {
744                     pgrp->x[m] = comBuffer[0][m] * pgrp->mwscale;
745                     if (xp)
746                     {
747                         pgrp->xp[m] = comBuffer[1][m] * pgrp->mwscale;
748                     }
749                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
750                     {
751                         pgrp->x[m] += comm->pbcAtomBuffer[g][m];
752                         if (xp)
753                         {
754                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
755                         }
756                     }
757                 }
758             }
759             else
760             {
761                 /* Cosine weighting geometry */
762                 double csw, snw, wmass, wwmass;
763
764                 /* Determine the optimal location of the cosine weight */
765                 csw                   = comBuffer[0][0];
766                 snw                   = comBuffer[0][1];
767                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
768                 /* Set the weights for the local atoms */
769                 wmass  = sqrt(csw * csw + snw * snw);
770                 wwmass = (comBuffer[1][0] * csw * csw + comBuffer[1][1] * csw * snw
771                           + comBuffer[1][2] * snw * snw)
772                          / (wmass * wmass);
773
774                 pgrp->mwscale = 1.0 / wmass;
775                 pgrp->wscale  = wmass / wwmass;
776                 pgrp->invtm   = wwmass / (wmass * wmass);
777                 /* Set the weights for the local atoms */
778                 csw *= pgrp->invtm;
779                 snw *= pgrp->invtm;
780                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
781                 {
782                     int ii                = pgrp->atomSet.localIndex()[i];
783                     pgrp->localWeights[i] = csw * std::cos(twopi_box * x[ii][pull->cosdim])
784                                             + snw * std::sin(twopi_box * x[ii][pull->cosdim]);
785                 }
786                 if (xp)
787                 {
788                     csw                    = comBuffer[2][0];
789                     snw                    = comBuffer[2][1];
790                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
791                 }
792             }
793             if (debug)
794             {
795                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
796             }
797         }
798     }
799
800     if (pull->bCylinder)
801     {
802         /* Calculate the COMs for the cyclinder reference groups */
803         make_cyl_refgrps(cr, pull, masses, pbc, t, x);
804     }
805 }
806
807 using BoolVec = gmx::BasicVector<bool>;
808
809 /* Returns whether the pull group obeys the PBC restrictions */
810 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t& group,
811                                           const BoolVec&           dimUsed,
812                                           const rvec*              x,
813                                           const t_pbc&             pbc,
814                                           const gmx::RVec&         x_pbc,
815                                           const real               pbcMargin)
816 {
817     /* Determine which dimensions are relevant for PBC */
818     BoolVec dimUsesPbc       = { false, false, false };
819     bool    pbcIsRectangular = true;
820     for (int d = 0; d < pbc.ndim_ePBC; d++)
821     {
822         if (dimUsed[d])
823         {
824             dimUsesPbc[d] = true;
825             /* All non-zero dimensions of vector v are involved in PBC */
826             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
827             {
828                 assert(d2 < DIM);
829                 if (pbc.box[d2][d] != 0)
830                 {
831                     dimUsesPbc[d2]   = true;
832                     pbcIsRectangular = false;
833                 }
834             }
835         }
836     }
837
838     rvec marginPerDim    = {};
839     real marginDistance2 = 0;
840     if (pbcIsRectangular)
841     {
842         /* Use margins for dimensions independently */
843         for (int d = 0; d < pbc.ndim_ePBC; d++)
844         {
845             marginPerDim[d] = pbcMargin * pbc.hbox_diag[d];
846         }
847     }
848     else
849     {
850         /* Check the total distance along the relevant dimensions */
851         for (int d = 0; d < pbc.ndim_ePBC; d++)
852         {
853             if (dimUsesPbc[d])
854             {
855                 marginDistance2 += pbcMargin * gmx::square(0.5) * norm2(pbc.box[d]);
856             }
857         }
858     }
859
860     auto localAtomIndices = group.atomSet.localIndex();
861     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
862     {
863         rvec dx;
864         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
865
866         bool atomIsTooFar = false;
867         if (pbcIsRectangular)
868         {
869             for (int d = 0; d < pbc.ndim_ePBC; d++)
870             {
871                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] || dx[d] > marginPerDim[d]))
872                 {
873                     atomIsTooFar = true;
874                 }
875             }
876         }
877         else
878         {
879             real pbcDistance2 = 0;
880             for (int d = 0; d < pbc.ndim_ePBC; d++)
881             {
882                 if (dimUsesPbc[d])
883                 {
884                     pbcDistance2 += gmx::square(dx[d]);
885                 }
886             }
887             atomIsTooFar = (pbcDistance2 > marginDistance2);
888         }
889         if (atomIsTooFar)
890         {
891             return false;
892         }
893     }
894
895     return true;
896 }
897
898 int pullCheckPbcWithinGroups(const pull_t& pull, const rvec* x, const t_pbc& pbc, real pbcMargin)
899 {
900     if (pbc.pbcType == PbcType::No)
901     {
902         return -1;
903     }
904
905     /* Determine what dimensions are used for each group by pull coordinates */
906     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
907     for (size_t c = 0; c < pull.coord.size(); c++)
908     {
909         const t_pull_coord& coordParams = pull.coord[c].params;
910         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
911         {
912             for (int d = 0; d < DIM; d++)
913             {
914                 if (coordParams.dim[d]
915                     && !(coordParams.eGeom == PullGroupGeometry::Cylinder && groupIndex == 0))
916                 {
917                     dimUsed[coordParams.group[groupIndex]][d] = true;
918                 }
919             }
920         }
921     }
922
923     /* Check PBC for every group that uses a PBC reference atom treatment */
924     for (size_t g = 0; g < pull.group.size(); g++)
925     {
926         const pull_group_work_t& group = pull.group[g];
927         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
928             && !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
929         {
930             return g;
931         }
932     }
933
934     return -1;
935 }
936
937 bool pullCheckPbcWithinGroup(const pull_t&                  pull,
938                              gmx::ArrayRef<const gmx::RVec> x,
939                              const t_pbc&                   pbc,
940                              int                            groupNr,
941                              real                           pbcMargin)
942 {
943     if (pbc.pbcType == PbcType::No)
944     {
945         return true;
946     }
947     GMX_ASSERT(groupNr < gmx::ssize(pull.group), "groupNr is out of range");
948
949     /* Check PBC if the group uses a PBC reference atom treatment. */
950     const pull_group_work_t& group = pull.group[groupNr];
951     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
952     {
953         return true;
954     }
955
956     /* Determine what dimensions are used for each group by pull coordinates */
957     BoolVec dimUsed = { false, false, false };
958     for (size_t c = 0; c < pull.coord.size(); c++)
959     {
960         const t_pull_coord& coordParams = pull.coord[c].params;
961         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
962         {
963             if (coordParams.group[groupIndex] == groupNr)
964             {
965                 for (int d = 0; d < DIM; d++)
966                 {
967                     if (coordParams.dim[d]
968                         && !(coordParams.eGeom == PullGroupGeometry::Cylinder && groupIndex == 0))
969                     {
970                         dimUsed[d] = true;
971                     }
972                 }
973             }
974         }
975     }
976
977     return (pullGroupObeysPbcRestrictions(
978             group, dimUsed, as_rvec_array(x.data()), pbc, pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
979 }
980
981 void setPrevStepPullComFromState(struct pull_t* pull, const t_state* state)
982 {
983     for (size_t g = 0; g < pull->group.size(); g++)
984     {
985         for (int j = 0; j < DIM; j++)
986         {
987             pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g * DIM + j];
988         }
989     }
990 }
991
992 void updatePrevStepPullCom(struct pull_t* pull, t_state* state)
993 {
994     for (size_t g = 0; g < pull->group.size(); g++)
995     {
996         if (pull->group[g].needToCalcCom)
997         {
998             for (int j = 0; j < DIM; j++)
999             {
1000                 pull->group[g].x_prev_step[j]          = pull->group[g].x[j];
1001                 state->pull_com_prev_step[g * DIM + j] = pull->group[g].x[j];
1002             }
1003         }
1004     }
1005 }
1006
1007 void allocStatePrevStepPullCom(t_state* state, const pull_t* pull)
1008 {
1009     if (!pull)
1010     {
1011         state->pull_com_prev_step.clear();
1012         return;
1013     }
1014     size_t ngroup = pull->group.size();
1015     if (state->pull_com_prev_step.size() / DIM != ngroup)
1016     {
1017         state->pull_com_prev_step.resize(ngroup * DIM, NAN);
1018     }
1019 }
1020
1021 void initPullComFromPrevStep(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, const rvec x[])
1022 {
1023     pull_comm_t* comm   = &pull->comm;
1024     size_t       ngroup = pull->group.size();
1025
1026     if (!comm->bParticipate)
1027     {
1028         return;
1029     }
1030
1031     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
1032                "pbcAtomBuffer should have size number of groups");
1033     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
1034                "comBuffer should have size #group*c_comBufferStride");
1035
1036     pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1037
1038     for (size_t g = 0; g < ngroup; g++)
1039     {
1040         pull_group_work_t* pgrp;
1041
1042         pgrp = &pull->group[g];
1043
1044         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1045         {
1046             GMX_ASSERT(pgrp->params.ind.size() > 1,
1047                        "Groups with no atoms, or only one atom, should not "
1048                        "use the COM from the previous step as reference.");
1049
1050             rvec x_pbc = { 0, 0, 0 };
1051             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1052
1053             if (debug)
1054             {
1055                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1056                 for (int m = 0; m < DIM; m++)
1057                 {
1058                     fprintf(debug, " %f", x_pbc[m]);
1059                 }
1060                 fprintf(debug, "\n");
1061             }
1062
1063             /* The following is to a large extent similar to pull_calc_coms() */
1064
1065             /* The final sums should end up in sum_com[0] */
1066             ComSums& comSumsTotal = pull->comSums[0];
1067
1068             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1069             {
1070                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, nullptr, masses, pbc, x_pbc, &comSumsTotal);
1071             }
1072             else
1073             {
1074 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
1075                 for (int t = 0; t < pull->nthreads; t++)
1076                 {
1077                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
1078                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
1079                     sum_com_part(
1080                             pgrp, ind_start, ind_end, x, nullptr, masses, pbc, x_pbc, &pull->comSums[t]);
1081                 }
1082
1083                 /* Reduce the thread contributions to sum_com[0] */
1084                 for (int t = 1; t < pull->nthreads; t++)
1085                 {
1086                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
1087                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1088                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1089                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1090                 }
1091             }
1092
1093             if (pgrp->localWeights.empty())
1094             {
1095                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1096             }
1097
1098             /* Copy local sums to a buffer for global summing */
1099             auto localSums = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
1100                                                     c_comBufferStride);
1101
1102             localSums[0]    = comSumsTotal.sum_wmx;
1103             localSums[1]    = comSumsTotal.sum_wmxp;
1104             localSums[2][0] = comSumsTotal.sum_wm;
1105             localSums[2][1] = comSumsTotal.sum_wwm;
1106             localSums[2][2] = 0;
1107         }
1108     }
1109
1110     pullAllReduce(cr, comm, ngroup * c_comBufferStride * DIM, static_cast<double*>(comm->comBuffer[0]));
1111
1112     for (size_t g = 0; g < ngroup; g++)
1113     {
1114         pull_group_work_t* pgrp;
1115
1116         pgrp = &pull->group[g];
1117         if (pgrp->needToCalcCom)
1118         {
1119             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1120             {
1121                 auto localSums = gmx::arrayRefFromArray(
1122                         comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
1123                 double wmass, wwmass;
1124
1125                 /* Determine the inverse mass */
1126                 wmass         = localSums[2][0];
1127                 wwmass        = localSums[2][1];
1128                 pgrp->mwscale = 1.0 / wmass;
1129                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1130                 if (pgrp->invtm != 0)
1131                 {
1132                     pgrp->wscale = wmass / wwmass;
1133                     pgrp->invtm  = wwmass / (wmass * wmass);
1134                 }
1135                 /* Divide by the total mass */
1136                 for (int m = 0; m < DIM; m++)
1137                 {
1138                     pgrp->x[m] = localSums[0][m] * pgrp->mwscale;
1139                     pgrp->x[m] += comm->pbcAtomBuffer[g][m];
1140                 }
1141                 if (debug)
1142                 {
1143                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
1144                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1145                     for (int m = 0; m < DIM; m++)
1146                     {
1147                         fprintf(debug, " %f", pgrp->x[m]);
1148                     }
1149                     fprintf(debug, "\n");
1150                 }
1151                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1152             }
1153         }
1154     }
1155 }