Apply clang-format to source tree
[alexxy/gromacs.git] / src / gromacs / pulling / pullutil.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "config.h"
40
41 #include <cassert>
42 #include <cstdlib>
43
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/gmxlib/network.h"
46 #include "gromacs/math/functions.h"
47 #include "gromacs/math/utilities.h"
48 #include "gromacs/math/vec.h"
49 #include "gromacs/mdtypes/commrec.h"
50 #include "gromacs/mdtypes/inputrec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/mdtypes/mdatom.h"
53 #include "gromacs/mdtypes/state.h"
54 #include "gromacs/pbcutil/pbc.h"
55 #include "gromacs/pulling/pull.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/utility/gmxassert.h"
59 #include "gromacs/utility/real.h"
60 #include "gromacs/utility/smalloc.h"
61
62 #include "pull_internal.h"
63
64 #if GMX_MPI
65
66 // Helper function to deduce MPI datatype from the type of data
67 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused* data)
68 {
69     return MPI_FLOAT;
70 }
71
72 // Helper function to deduce MPI datatype from the type of data
73 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused* data)
74 {
75     return MPI_DOUBLE;
76 }
77
78 #endif // GMX_MPI
79
80 #if !GMX_DOUBLE
81 // Helper function; note that gmx_sum(d) should actually be templated
82 gmx_unused static void gmxAllReduce(int n, real* data, const t_commrec* cr)
83 {
84     gmx_sum(n, data, cr);
85 }
86 #endif
87
88 // Helper function; note that gmx_sum(d) should actually be templated
89 gmx_unused static void gmxAllReduce(int n, double* data, const t_commrec* cr)
90 {
91     gmx_sumd(n, data, cr);
92 }
93
94 // Reduce data of n elements over all ranks currently participating in pull
95 template<typename T>
96 static void pullAllReduce(const t_commrec* cr, pull_comm_t* comm, int n, T* data)
97 {
98     if (cr != nullptr && PAR(cr))
99     {
100         if (comm->bParticipateAll)
101         {
102             /* Sum the contributions over all DD ranks */
103             gmxAllReduce(n, data, cr);
104         }
105         else
106         {
107             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
108 #if GMX_MPI
109 #    if MPI_IN_PLACE_EXISTS
110             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
111 #    else
112             std::vector<T> buf(n);
113
114             MPI_Allreduce(data, buf.data(), n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
115
116             /* Copy the result from the buffer to the input/output data */
117             for (int i = 0; i < n; i++)
118             {
119                 data[i] = buf[i];
120             }
121 #    endif
122 #else
123             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
124 #endif
125         }
126     }
127 }
128
129 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
130  * When those coordinates are not available on this rank, clears x_pbc.
131  */
132 static void setPbcAtomCoords(const pull_group_work_t& pgrp, const rvec* x, rvec x_pbc)
133 {
134     if (pgrp.pbcAtomSet != nullptr)
135     {
136         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
137         {
138             /* We have the atom locally, copy its coordinates */
139             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
140         }
141         else
142         {
143             /* Another rank has it, clear the coordinates for MPI_Allreduce */
144             clear_rvec(x_pbc);
145         }
146     }
147     else
148     {
149         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
150     }
151 }
152
153 static void pull_set_pbcatoms(const t_commrec* cr, struct pull_t* pull, const rvec* x, gmx::ArrayRef<gmx::RVec> x_pbc)
154 {
155     int numPbcAtoms = 0;
156     for (size_t g = 0; g < pull->group.size(); g++)
157     {
158         const pull_group_work_t& group = pull->group[g];
159         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
160         {
161             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
162             numPbcAtoms++;
163         }
164         else
165         {
166             clear_rvec(x_pbc[g]);
167         }
168     }
169
170     if (cr && PAR(cr) && numPbcAtoms > 0)
171     {
172         /* Sum over participating ranks to get x_pbc from the home ranks.
173          * This can be very expensive at high parallelization, so we only
174          * do this after each DD repartitioning.
175          */
176         pullAllReduce(cr, &pull->comm, pull->group.size() * DIM, static_cast<real*>(x_pbc[0]));
177     }
178 }
179
180 static void
181 make_cyl_refgrps(const t_commrec* cr, pull_t* pull, const t_mdatoms* md, t_pbc* pbc, double t, const rvec* x)
182 {
183     pull_comm_t* comm = &pull->comm;
184
185     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size() * c_cylinderBufferStride,
186                "cylinderBuffer should have the correct size");
187
188     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
189
190     /* loop over all groups to make a reference group for each*/
191     for (size_t c = 0; c < pull->coord.size(); c++)
192     {
193         pull_coord_work_t* pcrd;
194         double             sum_a, wmass, wwmass;
195         dvec               radf_fac0, radf_fac1;
196
197         pcrd = &pull->coord[c];
198
199         sum_a  = 0;
200         wmass  = 0;
201         wwmass = 0;
202         clear_dvec(radf_fac0);
203         clear_dvec(radf_fac1);
204
205         if (pcrd->params.eGeom == epullgCYL)
206         {
207             /* pref will be the same group for all pull coordinates */
208             const pull_group_work_t& pref  = pull->group[pcrd->params.group[0]];
209             const pull_group_work_t& pgrp  = pull->group[pcrd->params.group[1]];
210             pull_group_work_t&       pdyna = pull->dyna[c];
211             rvec                     direction;
212             copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
213
214             /* Since we have not calculated the COM of the cylinder group yet,
215              * we calculate distances with respect to location of the pull
216              * group minus the reference position along the vector.
217              * here we already have the COM of the pull group. This resolves
218              * any PBC issues and we don't need to use a PBC-atom here.
219              */
220             if (pcrd->params.rate != 0)
221             {
222                 /* With rate=0, value_ref is set initially */
223                 pcrd->value_ref = pcrd->params.init + pcrd->params.rate * t;
224             }
225             rvec reference;
226             for (int m = 0; m < DIM; m++)
227             {
228                 reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m] * pcrd->value_ref;
229             }
230
231             auto localAtomIndices = pref.atomSet.localIndex();
232
233             /* This actually only needs to be done at init or DD time,
234              * but resizing with the same size does not cause much overhead.
235              */
236             pdyna.localWeights.resize(localAtomIndices.size());
237             pdyna.mdw.resize(localAtomIndices.size());
238             pdyna.dv.resize(localAtomIndices.size());
239
240             /* loop over all atoms in the main ref group */
241             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
242             {
243                 int  atomIndex = localAtomIndices[indexInSet];
244                 rvec dx;
245                 pbc_dx_aiuc(pbc, x[atomIndex], reference, dx);
246                 double axialLocation = iprod(direction, dx);
247                 dvec   radialLocation;
248                 double dr2 = 0;
249                 for (int m = 0; m < DIM; m++)
250                 {
251                     /* Determine the radial components */
252                     radialLocation[m] = dx[m] - axialLocation * direction[m];
253                     dr2 += gmx::square(radialLocation[m]);
254                 }
255                 double dr2_rel = dr2 * inv_cyl_r2;
256
257                 if (dr2_rel < 1)
258                 {
259                     /* add atom to sum of COM and to weight array */
260
261                     double mass = md->massT[atomIndex];
262                     /* The radial weight function is 1-2x^2+x^4,
263                      * where x=r/cylinder_r. Since this function depends
264                      * on the radial component, we also get radial forces
265                      * on both groups.
266                      */
267                     double weight                  = 1 + (-2 + dr2_rel) * dr2_rel;
268                     double dweight_r               = (-4 + 4 * dr2_rel) * inv_cyl_r2;
269                     pdyna.localWeights[indexInSet] = weight;
270                     sum_a += mass * weight * axialLocation;
271                     wmass += mass * weight;
272                     wwmass += mass * weight * weight;
273                     dvec mdw;
274                     dsvmul(mass * dweight_r, radialLocation, mdw);
275                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
276                     /* Currently we only have the axial component of the
277                      * offset from the cylinder COM up to an unkown offset.
278                      * We add this offset after the reduction needed
279                      * for determining the COM of the cylinder group.
280                      */
281                     pdyna.dv[indexInSet] = axialLocation;
282                     for (int m = 0; m < DIM; m++)
283                     {
284                         radf_fac0[m] += mdw[m];
285                         radf_fac1[m] += mdw[m] * axialLocation;
286                     }
287                 }
288                 else
289                 {
290                     pdyna.localWeights[indexInSet] = 0;
291                 }
292             }
293         }
294
295         auto buffer = gmx::arrayRefFromArray(
296                 comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
297
298         buffer[0] = wmass;
299         buffer[1] = wwmass;
300         buffer[2] = sum_a;
301
302         buffer[3] = radf_fac0[XX];
303         buffer[4] = radf_fac0[YY];
304         buffer[5] = radf_fac0[ZZ];
305
306         buffer[6] = radf_fac1[XX];
307         buffer[7] = radf_fac1[YY];
308         buffer[8] = radf_fac1[ZZ];
309     }
310
311     if (cr != nullptr && PAR(cr))
312     {
313         /* Sum the contributions over the ranks */
314         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
315     }
316
317     for (size_t c = 0; c < pull->coord.size(); c++)
318     {
319         pull_coord_work_t* pcrd;
320
321         pcrd = &pull->coord[c];
322
323         if (pcrd->params.eGeom == epullgCYL)
324         {
325             pull_group_work_t*    pdyna       = &pull->dyna[c];
326             pull_group_work_t*    pgrp        = &pull->group[pcrd->params.group[1]];
327             PullCoordSpatialData& spatialData = pcrd->spatialData;
328
329             auto buffer = gmx::constArrayRefFromArray(
330                     comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
331             double wmass   = buffer[0];
332             double wwmass  = buffer[1];
333             pdyna->mwscale = 1.0 / wmass;
334             /* Cylinder pulling can't be used with constraints, but we set
335              * wscale and invtm anyhow, in case someone would like to use them.
336              */
337             pdyna->wscale = wmass / wwmass;
338             pdyna->invtm  = wwmass / (wmass * wmass);
339
340             /* We store the deviation of the COM from the reference location
341              * used above, since we need it when we apply the radial forces
342              * to the atoms in the cylinder group.
343              */
344             spatialData.cyl_dev = 0;
345             for (int m = 0; m < DIM; m++)
346             {
347                 double reference = pgrp->x[m] - spatialData.vec[m] * pcrd->value_ref;
348                 double dist      = -spatialData.vec[m] * buffer[2] * pdyna->mwscale;
349                 pdyna->x[m]      = reference - dist;
350                 spatialData.cyl_dev += dist;
351             }
352             /* Now we know the exact COM of the cylinder reference group,
353              * we can determine the radial force factor (ffrad) that when
354              * multiplied with the axial pull force will give the radial
355              * force on the pulled (non-cylinder) group.
356              */
357             for (int m = 0; m < DIM; m++)
358             {
359                 spatialData.ffrad[m] = (buffer[6 + m] + buffer[3 + m] * spatialData.cyl_dev) / wmass;
360             }
361
362             if (debug)
363             {
364                 fprintf(debug, "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0],
365                         pdyna->x[1], pdyna->x[2], 1.0 / pdyna->invtm);
366                 fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n", spatialData.ffrad[XX],
367                         spatialData.ffrad[YY], spatialData.ffrad[ZZ]);
368             }
369         }
370     }
371 }
372
373 static double atan2_0_2pi(double y, double x)
374 {
375     double a;
376
377     a = atan2(y, x);
378     if (a < 0)
379     {
380         a += 2.0 * M_PI;
381     }
382     return a;
383 }
384
385 static void sum_com_part(const pull_group_work_t* pgrp,
386                          int                      ind_start,
387                          int                      ind_end,
388                          const rvec*              x,
389                          const rvec*              xp,
390                          const real*              mass,
391                          const t_pbc*             pbc,
392                          const rvec               x_pbc,
393                          ComSums*                 sum_com)
394 {
395     double sum_wm   = 0;
396     double sum_wwm  = 0;
397     dvec   sum_wmx  = { 0, 0, 0 };
398     dvec   sum_wmxp = { 0, 0, 0 };
399
400     auto localAtomIndices = pgrp->atomSet.localIndex();
401     for (int i = ind_start; i < ind_end; i++)
402     {
403         int  ii = localAtomIndices[i];
404         real wm;
405         if (pgrp->localWeights.empty())
406         {
407             wm = mass[ii];
408             sum_wm += wm;
409         }
410         else
411         {
412             real w;
413
414             w  = pgrp->localWeights[i];
415             wm = w * mass[ii];
416             sum_wm += wm;
417             sum_wwm += wm * w;
418         }
419         if (pgrp->epgrppbc == epgrppbcNONE)
420         {
421             /* Plain COM: sum the coordinates */
422             for (int d = 0; d < DIM; d++)
423             {
424                 sum_wmx[d] += wm * x[ii][d];
425             }
426             if (xp)
427             {
428                 for (int d = 0; d < DIM; d++)
429                 {
430                     sum_wmxp[d] += wm * xp[ii][d];
431                 }
432             }
433         }
434         else
435         {
436             rvec dx;
437
438             /* Sum the difference with the reference atom */
439             pbc_dx(pbc, x[ii], x_pbc, dx);
440             for (int d = 0; d < DIM; d++)
441             {
442                 sum_wmx[d] += wm * dx[d];
443             }
444             if (xp)
445             {
446                 /* For xp add the difference between xp and x to dx,
447                  * such that we use the same periodic image,
448                  * also when xp has a large displacement.
449                  */
450                 for (int d = 0; d < DIM; d++)
451                 {
452                     sum_wmxp[d] += wm * (dx[d] + xp[ii][d] - x[ii][d]);
453                 }
454             }
455         }
456     }
457
458     sum_com->sum_wm  = sum_wm;
459     sum_com->sum_wwm = sum_wwm;
460     copy_dvec(sum_wmx, sum_com->sum_wmx);
461     if (xp)
462     {
463         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
464     }
465 }
466
467 static void sum_com_part_cosweight(const pull_group_work_t* pgrp,
468                                    int                      ind_start,
469                                    int                      ind_end,
470                                    int                      cosdim,
471                                    real                     twopi_box,
472                                    const rvec*              x,
473                                    const rvec*              xp,
474                                    const real*              mass,
475                                    ComSums*                 sum_com)
476 {
477     /* Cosine weighting geometry */
478     double sum_cm  = 0;
479     double sum_sm  = 0;
480     double sum_ccm = 0;
481     double sum_csm = 0;
482     double sum_ssm = 0;
483     double sum_cmp = 0;
484     double sum_smp = 0;
485
486     auto localAtomIndices = pgrp->atomSet.localIndex();
487
488     for (int i = ind_start; i < ind_end; i++)
489     {
490         int  ii = localAtomIndices[i];
491         real m  = mass[ii];
492         /* Determine cos and sin sums */
493         real cw = std::cos(x[ii][cosdim] * twopi_box);
494         real sw = std::sin(x[ii][cosdim] * twopi_box);
495         sum_cm += static_cast<double>(cw * m);
496         sum_sm += static_cast<double>(sw * m);
497         sum_ccm += static_cast<double>(cw * cw * m);
498         sum_csm += static_cast<double>(cw * sw * m);
499         sum_ssm += static_cast<double>(sw * sw * m);
500
501         if (xp != nullptr)
502         {
503             real cw = std::cos(xp[ii][cosdim] * twopi_box);
504             real sw = std::sin(xp[ii][cosdim] * twopi_box);
505             sum_cmp += static_cast<double>(cw * m);
506             sum_smp += static_cast<double>(sw * m);
507         }
508     }
509
510     sum_com->sum_cm  = sum_cm;
511     sum_com->sum_sm  = sum_sm;
512     sum_com->sum_ccm = sum_ccm;
513     sum_com->sum_csm = sum_csm;
514     sum_com->sum_ssm = sum_ssm;
515     sum_com->sum_cmp = sum_cmp;
516     sum_com->sum_smp = sum_smp;
517 }
518
519 /* calculates center of mass of selection index from all coordinates x */
520 void pull_calc_coms(const t_commrec* cr,
521                     pull_t*          pull,
522                     const t_mdatoms* md,
523                     t_pbc*           pbc,
524                     double           t,
525                     const rvec       x[],
526                     rvec*            xp)
527 {
528     real         twopi_box = 0;
529     pull_comm_t* comm;
530
531     comm = &pull->comm;
532
533     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
534                "pbcAtomBuffer should have size number of groups");
535     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
536                "comBuffer should have size #group*c_comBufferStride");
537
538     if (pull->bRefAt && pull->bSetPBCatoms)
539     {
540         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
541
542         if (cr != nullptr && DOMAINDECOMP(cr))
543         {
544             /* We can keep these PBC reference coordinates fixed for nstlist
545              * steps, since atoms won't jump over PBC.
546              * This avoids a global reduction at the next nstlist-1 steps.
547              * Note that the exact values of the pbc reference coordinates
548              * are irrelevant, as long all atoms in the group are within
549              * half a box distance of the reference coordinate.
550              */
551             pull->bSetPBCatoms = FALSE;
552         }
553     }
554
555     if (pull->cosdim >= 0)
556     {
557         int m;
558
559         assert(pull->npbcdim <= DIM);
560
561         for (m = pull->cosdim + 1; m < pull->npbcdim; m++)
562         {
563             if (pbc->box[m][pull->cosdim] != 0)
564             {
565                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
566             }
567         }
568         twopi_box = 2.0 * M_PI / pbc->box[pull->cosdim][pull->cosdim];
569     }
570
571     for (size_t g = 0; g < pull->group.size(); g++)
572     {
573         pull_group_work_t* pgrp = &pull->group[g];
574
575         /* Cosine-weighted COMs behave different from all other weighted COMs
576          * in the sense that the weights depend on instantaneous coordinates,
577          * not on pre-set weights. Thus we resize the local weight buffer here.
578          */
579         if (pgrp->epgrppbc == epgrppbcCOS)
580         {
581             pgrp->localWeights.resize(pgrp->atomSet.localIndex().size());
582         }
583
584         auto comBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
585                                                 c_comBufferStride);
586
587         if (pgrp->needToCalcCom)
588         {
589             if (pgrp->epgrppbc != epgrppbcCOS)
590             {
591                 rvec x_pbc = { 0, 0, 0 };
592
593                 switch (pgrp->epgrppbc)
594                 {
595                     case epgrppbcREFAT:
596                         /* Set the pbc atom */
597                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
598                         break;
599                     case epgrppbcPREVSTEPCOM:
600                         /* Set the pbc reference to the COM of the group of the last step */
601                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
602                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
603                 }
604
605                 /* The final sums should end up in comSums[0] */
606                 ComSums& comSumsTotal = pull->comSums[0];
607
608                 /* If we have a single-atom group the mass is irrelevant, so
609                  * we can remove the mass factor to avoid division by zero.
610                  * Note that with constraint pulling the mass does matter, but
611                  * in that case a check group mass != 0 has been done before.
612                  */
613                 if (pgrp->params.nat == 1 && pgrp->atomSet.numAtomsLocal() == 1
614                     && md->massT[pgrp->atomSet.localIndex()[0]] == 0)
615                 {
616                     GMX_ASSERT(xp == nullptr,
617                                "We should not have groups with zero mass with constraints, i.e. "
618                                "xp!=NULL");
619
620                     /* Copy the single atom coordinate */
621                     for (int d = 0; d < DIM; d++)
622                     {
623                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
624                     }
625                     /* Set all mass factors to 1 to get the correct COM */
626                     comSumsTotal.sum_wm  = 1;
627                     comSumsTotal.sum_wwm = 1;
628                 }
629                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
630                 {
631                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, xp, md->massT, pbc,
632                                  x_pbc, &comSumsTotal);
633                 }
634                 else
635                 {
636 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
637                     for (int t = 0; t < pull->nthreads; t++)
638                     {
639                         int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
640                         int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
641                         sum_com_part(pgrp, ind_start, ind_end, x, xp, md->massT, pbc, x_pbc,
642                                      &pull->comSums[t]);
643                     }
644
645                     /* Reduce the thread contributions to sum_com[0] */
646                     for (int t = 1; t < pull->nthreads; t++)
647                     {
648                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
649                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
650                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
651                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
652                     }
653                 }
654
655                 if (pgrp->localWeights.empty())
656                 {
657                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
658                 }
659
660                 /* Copy local sums to a buffer for global summing */
661                 copy_dvec(comSumsTotal.sum_wmx, comBuffer[0]);
662
663                 copy_dvec(comSumsTotal.sum_wmxp, comBuffer[1]);
664
665                 comBuffer[2][0] = comSumsTotal.sum_wm;
666                 comBuffer[2][1] = comSumsTotal.sum_wwm;
667                 comBuffer[2][2] = 0;
668             }
669             else
670             {
671                 /* Cosine weighting geometry.
672                  * This uses a slab of the system, thus we always have many
673                  * atoms in the pull groups. Therefore, always use threads.
674                  */
675 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
676                 for (int t = 0; t < pull->nthreads; t++)
677                 {
678                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
679                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
680                     sum_com_part_cosweight(pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp,
681                                            md->massT, &pull->comSums[t]);
682                 }
683
684                 /* Reduce the thread contributions to comSums[0] */
685                 ComSums& comSumsTotal = pull->comSums[0];
686                 for (int t = 1; t < pull->nthreads; t++)
687                 {
688                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
689                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
690                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
691                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
692                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
693                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
694                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
695                 }
696
697                 /* Copy local sums to a buffer for global summing */
698                 comBuffer[0][0] = comSumsTotal.sum_cm;
699                 comBuffer[0][1] = comSumsTotal.sum_sm;
700                 comBuffer[0][2] = 0;
701                 comBuffer[1][0] = comSumsTotal.sum_ccm;
702                 comBuffer[1][1] = comSumsTotal.sum_csm;
703                 comBuffer[1][2] = comSumsTotal.sum_ssm;
704                 comBuffer[2][0] = comSumsTotal.sum_cmp;
705                 comBuffer[2][1] = comSumsTotal.sum_smp;
706                 comBuffer[2][2] = 0;
707             }
708         }
709         else
710         {
711             clear_dvec(comBuffer[0]);
712             clear_dvec(comBuffer[1]);
713             clear_dvec(comBuffer[2]);
714         }
715     }
716
717     pullAllReduce(cr, comm, pull->group.size() * c_comBufferStride * DIM,
718                   static_cast<double*>(comm->comBuffer[0]));
719
720     for (size_t g = 0; g < pull->group.size(); g++)
721     {
722         pull_group_work_t* pgrp;
723
724         pgrp = &pull->group[g];
725         if (pgrp->needToCalcCom)
726         {
727             GMX_ASSERT(pgrp->params.nat > 0,
728                        "Normal pull groups should have atoms, only group 0, which should have "
729                        "bCalcCom=FALSE has nat=0");
730
731             const auto comBuffer = gmx::constArrayRefFromArray(
732                     comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
733
734             if (pgrp->epgrppbc != epgrppbcCOS)
735             {
736                 double wmass, wwmass;
737                 int    m;
738
739                 /* Determine the inverse mass */
740                 wmass         = comBuffer[2][0];
741                 wwmass        = comBuffer[2][1];
742                 pgrp->mwscale = 1.0 / wmass;
743                 /* invtm==0 signals a frozen group, so then we should keep it zero */
744                 if (pgrp->invtm != 0)
745                 {
746                     pgrp->wscale = wmass / wwmass;
747                     pgrp->invtm  = wwmass / (wmass * wmass);
748                 }
749                 /* Divide by the total mass */
750                 for (m = 0; m < DIM; m++)
751                 {
752                     pgrp->x[m] = comBuffer[0][m] * pgrp->mwscale;
753                     if (xp)
754                     {
755                         pgrp->xp[m] = comBuffer[1][m] * pgrp->mwscale;
756                     }
757                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
758                     {
759                         pgrp->x[m] += comm->pbcAtomBuffer[g][m];
760                         if (xp)
761                         {
762                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
763                         }
764                     }
765                 }
766             }
767             else
768             {
769                 /* Cosine weighting geometry */
770                 double csw, snw, wmass, wwmass;
771
772                 /* Determine the optimal location of the cosine weight */
773                 csw                   = comBuffer[0][0];
774                 snw                   = comBuffer[0][1];
775                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
776                 /* Set the weights for the local atoms */
777                 wmass  = sqrt(csw * csw + snw * snw);
778                 wwmass = (comBuffer[1][0] * csw * csw + comBuffer[1][1] * csw * snw
779                           + comBuffer[1][2] * snw * snw)
780                          / (wmass * wmass);
781
782                 pgrp->mwscale = 1.0 / wmass;
783                 pgrp->wscale  = wmass / wwmass;
784                 pgrp->invtm   = wwmass / (wmass * wmass);
785                 /* Set the weights for the local atoms */
786                 csw *= pgrp->invtm;
787                 snw *= pgrp->invtm;
788                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
789                 {
790                     int ii                = pgrp->atomSet.localIndex()[i];
791                     pgrp->localWeights[i] = csw * std::cos(twopi_box * x[ii][pull->cosdim])
792                                             + snw * std::sin(twopi_box * x[ii][pull->cosdim]);
793                 }
794                 if (xp)
795                 {
796                     csw                    = comBuffer[2][0];
797                     snw                    = comBuffer[2][1];
798                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
799                 }
800             }
801             if (debug)
802             {
803                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
804             }
805         }
806     }
807
808     if (pull->bCylinder)
809     {
810         /* Calculate the COMs for the cyclinder reference groups */
811         make_cyl_refgrps(cr, pull, md, pbc, t, x);
812     }
813 }
814
815 using BoolVec = gmx::BasicVector<bool>;
816
817 /* Returns whether the pull group obeys the PBC restrictions */
818 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t& group,
819                                           const BoolVec&           dimUsed,
820                                           const rvec*              x,
821                                           const t_pbc&             pbc,
822                                           const gmx::RVec&         x_pbc,
823                                           const real               pbcMargin)
824 {
825     /* Determine which dimensions are relevant for PBC */
826     BoolVec dimUsesPbc       = { false, false, false };
827     bool    pbcIsRectangular = true;
828     for (int d = 0; d < pbc.ndim_ePBC; d++)
829     {
830         if (dimUsed[d])
831         {
832             dimUsesPbc[d] = true;
833             /* All non-zero dimensions of vector v are involved in PBC */
834             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
835             {
836                 assert(d2 < DIM);
837                 if (pbc.box[d2][d] != 0)
838                 {
839                     dimUsesPbc[d2]   = true;
840                     pbcIsRectangular = false;
841                 }
842             }
843         }
844     }
845
846     rvec marginPerDim    = {};
847     real marginDistance2 = 0;
848     if (pbcIsRectangular)
849     {
850         /* Use margins for dimensions independently */
851         for (int d = 0; d < pbc.ndim_ePBC; d++)
852         {
853             marginPerDim[d] = pbcMargin * pbc.hbox_diag[d];
854         }
855     }
856     else
857     {
858         /* Check the total distance along the relevant dimensions */
859         for (int d = 0; d < pbc.ndim_ePBC; d++)
860         {
861             if (dimUsesPbc[d])
862             {
863                 marginDistance2 += pbcMargin * gmx::square(0.5) * norm2(pbc.box[d]);
864             }
865         }
866     }
867
868     auto localAtomIndices = group.atomSet.localIndex();
869     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
870     {
871         rvec dx;
872         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
873
874         bool atomIsTooFar = false;
875         if (pbcIsRectangular)
876         {
877             for (int d = 0; d < pbc.ndim_ePBC; d++)
878             {
879                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] || dx[d] > marginPerDim[d]))
880                 {
881                     atomIsTooFar = true;
882                 }
883             }
884         }
885         else
886         {
887             real pbcDistance2 = 0;
888             for (int d = 0; d < pbc.ndim_ePBC; d++)
889             {
890                 if (dimUsesPbc[d])
891                 {
892                     pbcDistance2 += gmx::square(dx[d]);
893                 }
894             }
895             atomIsTooFar = (pbcDistance2 > marginDistance2);
896         }
897         if (atomIsTooFar)
898         {
899             return false;
900         }
901     }
902
903     return true;
904 }
905
906 int pullCheckPbcWithinGroups(const pull_t& pull, const rvec* x, const t_pbc& pbc, real pbcMargin)
907 {
908     if (pbc.ePBC == epbcNONE)
909     {
910         return -1;
911     }
912
913     /* Determine what dimensions are used for each group by pull coordinates */
914     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
915     for (size_t c = 0; c < pull.coord.size(); c++)
916     {
917         const t_pull_coord& coordParams = pull.coord[c].params;
918         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
919         {
920             for (int d = 0; d < DIM; d++)
921             {
922                 if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
923                 {
924                     dimUsed[coordParams.group[groupIndex]][d] = true;
925                 }
926             }
927         }
928     }
929
930     /* Check PBC for every group that uses a PBC reference atom treatment */
931     for (size_t g = 0; g < pull.group.size(); g++)
932     {
933         const pull_group_work_t& group = pull.group[g];
934         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
935             && !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
936         {
937             return g;
938         }
939     }
940
941     return -1;
942 }
943
944 bool pullCheckPbcWithinGroup(const pull_t&                  pull,
945                              gmx::ArrayRef<const gmx::RVec> x,
946                              const t_pbc&                   pbc,
947                              int                            groupNr,
948                              real                           pbcMargin)
949 {
950     if (pbc.ePBC == epbcNONE)
951     {
952         return true;
953     }
954     GMX_ASSERT(groupNr < gmx::ssize(pull.group), "groupNr is out of range");
955
956     /* Check PBC if the group uses a PBC reference atom treatment. */
957     const pull_group_work_t& group = pull.group[groupNr];
958     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
959     {
960         return true;
961     }
962
963     /* Determine what dimensions are used for each group by pull coordinates */
964     BoolVec dimUsed = { false, false, false };
965     for (size_t c = 0; c < pull.coord.size(); c++)
966     {
967         const t_pull_coord& coordParams = pull.coord[c].params;
968         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
969         {
970             if (coordParams.group[groupIndex] == groupNr)
971             {
972                 for (int d = 0; d < DIM; d++)
973                 {
974                     if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
975                     {
976                         dimUsed[d] = true;
977                     }
978                 }
979             }
980         }
981     }
982
983     return (pullGroupObeysPbcRestrictions(group, dimUsed, as_rvec_array(x.data()), pbc,
984                                           pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
985 }
986
987 void setPrevStepPullComFromState(struct pull_t* pull, const t_state* state)
988 {
989     for (size_t g = 0; g < pull->group.size(); g++)
990     {
991         for (int j = 0; j < DIM; j++)
992         {
993             pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g * DIM + j];
994         }
995     }
996 }
997
998 void updatePrevStepPullCom(struct pull_t* pull, t_state* state)
999 {
1000     for (size_t g = 0; g < pull->group.size(); g++)
1001     {
1002         if (pull->group[g].needToCalcCom)
1003         {
1004             for (int j = 0; j < DIM; j++)
1005             {
1006                 pull->group[g].x_prev_step[j]          = pull->group[g].x[j];
1007                 state->pull_com_prev_step[g * DIM + j] = pull->group[g].x[j];
1008             }
1009         }
1010     }
1011 }
1012
1013 void allocStatePrevStepPullCom(t_state* state, const pull_t* pull)
1014 {
1015     if (!pull)
1016     {
1017         state->pull_com_prev_step.clear();
1018         return;
1019     }
1020     size_t ngroup = pull->group.size();
1021     if (state->pull_com_prev_step.size() / DIM != ngroup)
1022     {
1023         state->pull_com_prev_step.resize(ngroup * DIM, NAN);
1024     }
1025 }
1026
1027 void initPullComFromPrevStep(const t_commrec* cr, pull_t* pull, const t_mdatoms* md, t_pbc* pbc, const rvec x[])
1028 {
1029     pull_comm_t* comm   = &pull->comm;
1030     size_t       ngroup = pull->group.size();
1031
1032     if (!comm->bParticipate)
1033     {
1034         return;
1035     }
1036
1037     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
1038                "pbcAtomBuffer should have size number of groups");
1039     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
1040                "comBuffer should have size #group*c_comBufferStride");
1041
1042     pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1043
1044     for (size_t g = 0; g < ngroup; g++)
1045     {
1046         pull_group_work_t* pgrp;
1047
1048         pgrp = &pull->group[g];
1049
1050         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1051         {
1052             GMX_ASSERT(pgrp->params.nat > 1,
1053                        "Groups with no atoms, or only one atom, should not "
1054                        "use the COM from the previous step as reference.");
1055
1056             rvec x_pbc = { 0, 0, 0 };
1057             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1058
1059             if (debug)
1060             {
1061                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1062                 for (int m = 0; m < DIM; m++)
1063                 {
1064                     fprintf(debug, " %f", x_pbc[m]);
1065                 }
1066                 fprintf(debug, "\n");
1067             }
1068
1069             /* The following is to a large extent similar to pull_calc_coms() */
1070
1071             /* The final sums should end up in sum_com[0] */
1072             ComSums& comSumsTotal = pull->comSums[0];
1073
1074             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1075             {
1076                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, nullptr, md->massT, pbc,
1077                              x_pbc, &comSumsTotal);
1078             }
1079             else
1080             {
1081 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
1082                 for (int t = 0; t < pull->nthreads; t++)
1083                 {
1084                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
1085                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
1086                     sum_com_part(pgrp, ind_start, ind_end, x, nullptr, md->massT, pbc, x_pbc,
1087                                  &pull->comSums[t]);
1088                 }
1089
1090                 /* Reduce the thread contributions to sum_com[0] */
1091                 for (int t = 1; t < pull->nthreads; t++)
1092                 {
1093                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
1094                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1095                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1096                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1097                 }
1098             }
1099
1100             if (pgrp->localWeights.empty())
1101             {
1102                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1103             }
1104
1105             /* Copy local sums to a buffer for global summing */
1106             auto localSums = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
1107                                                     c_comBufferStride);
1108
1109             localSums[0]    = comSumsTotal.sum_wmx;
1110             localSums[1]    = comSumsTotal.sum_wmxp;
1111             localSums[2][0] = comSumsTotal.sum_wm;
1112             localSums[2][1] = comSumsTotal.sum_wwm;
1113             localSums[2][2] = 0;
1114         }
1115     }
1116
1117     pullAllReduce(cr, comm, ngroup * c_comBufferStride * DIM, static_cast<double*>(comm->comBuffer[0]));
1118
1119     for (size_t g = 0; g < ngroup; g++)
1120     {
1121         pull_group_work_t* pgrp;
1122
1123         pgrp = &pull->group[g];
1124         if (pgrp->needToCalcCom)
1125         {
1126             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1127             {
1128                 auto localSums = gmx::arrayRefFromArray(
1129                         comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
1130                 double wmass, wwmass;
1131
1132                 /* Determine the inverse mass */
1133                 wmass         = localSums[2][0];
1134                 wwmass        = localSums[2][1];
1135                 pgrp->mwscale = 1.0 / wmass;
1136                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1137                 if (pgrp->invtm != 0)
1138                 {
1139                     pgrp->wscale = wmass / wwmass;
1140                     pgrp->invtm  = wwmass / (wmass * wmass);
1141                 }
1142                 /* Divide by the total mass */
1143                 for (int m = 0; m < DIM; m++)
1144                 {
1145                     pgrp->x[m] = localSums[0][m] * pgrp->mwscale;
1146                     pgrp->x[m] += comm->pbcAtomBuffer[g][m];
1147                 }
1148                 if (debug)
1149                 {
1150                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale,
1151                             pgrp->invtm);
1152                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1153                     for (int m = 0; m < DIM; m++)
1154                     {
1155                         fprintf(debug, " %f", pgrp->x[m]);
1156                     }
1157                     fprintf(debug, "\n");
1158                 }
1159                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1160             }
1161         }
1162     }
1163 }