Narrow-down Intel-specific #ifdef's
[alexxy/gromacs.git] / src / gromacs / pulling / pullutil.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <cassert>
43 #include <cstdlib>
44
45 #include "gromacs/fileio/confio.h"
46 #include "gromacs/gmxlib/network.h"
47 #include "gromacs/math/functions.h"
48 #include "gromacs/math/utilities.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/commrec.h"
51 #include "gromacs/mdtypes/inputrec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/mdtypes/mdatom.h"
54 #include "gromacs/mdtypes/state.h"
55 #include "gromacs/pbcutil/pbc.h"
56 #include "gromacs/pulling/pull.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/real.h"
61 #include "gromacs/utility/smalloc.h"
62
63 #include "pull_internal.h"
64
65 #if GMX_MPI
66
67 // Helper function to deduce MPI datatype from the type of data
68 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused* data)
69 {
70     return MPI_FLOAT;
71 }
72
73 // Helper function to deduce MPI datatype from the type of data
74 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused* data)
75 {
76     return MPI_DOUBLE;
77 }
78
79 #endif // GMX_MPI
80
81 #if !GMX_DOUBLE
82 // Helper function; note that gmx_sum(d) should actually be templated
83 gmx_unused static void gmxAllReduce(int n, real* data, const t_commrec* cr)
84 {
85     gmx_sum(n, data, cr);
86 }
87 #endif
88
89 // Helper function; note that gmx_sum(d) should actually be templated
90 gmx_unused static void gmxAllReduce(int n, double* data, const t_commrec* cr)
91 {
92     gmx_sumd(n, data, cr);
93 }
94
95 // Reduce data of n elements over all ranks currently participating in pull
96 template<typename T>
97 static void pullAllReduce(const t_commrec* cr, pull_comm_t* comm, int n, T* data)
98 {
99     if (cr != nullptr && PAR(cr))
100     {
101         if (comm->bParticipateAll)
102         {
103             /* Sum the contributions over all DD ranks */
104             gmxAllReduce(n, data, cr);
105         }
106         else
107         {
108             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
109 #if GMX_MPI
110 #    if MPI_IN_PLACE_EXISTS
111             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
112 #    else
113             std::vector<T> buf(n);
114
115             MPI_Allreduce(data, buf.data(), n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
116
117             /* Copy the result from the buffer to the input/output data */
118             for (int i = 0; i < n; i++)
119             {
120                 data[i] = buf[i];
121             }
122 #    endif
123 #else
124             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
125 #endif
126         }
127     }
128 }
129
130 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
131  * When those coordinates are not available on this rank, clears x_pbc.
132  */
133 static void setPbcAtomCoords(const pull_group_work_t& pgrp, const rvec* x, rvec x_pbc)
134 {
135     if (pgrp.pbcAtomSet != nullptr)
136     {
137         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
138         {
139             /* We have the atom locally, copy its coordinates */
140             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
141         }
142         else
143         {
144             /* Another rank has it, clear the coordinates for MPI_Allreduce */
145             clear_rvec(x_pbc);
146         }
147     }
148     else
149     {
150         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
151     }
152 }
153
154 static void pull_set_pbcatoms(const t_commrec* cr, struct pull_t* pull, const rvec* x, gmx::ArrayRef<gmx::RVec> x_pbc)
155 {
156     int numPbcAtoms = 0;
157     for (size_t g = 0; g < pull->group.size(); g++)
158     {
159         const pull_group_work_t& group = pull->group[g];
160         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
161         {
162             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
163             numPbcAtoms++;
164         }
165         else
166         {
167             clear_rvec(x_pbc[g]);
168         }
169     }
170
171     if (cr && PAR(cr) && numPbcAtoms > 0)
172     {
173         /* Sum over participating ranks to get x_pbc from the home ranks.
174          * This can be very expensive at high parallelization, so we only
175          * do this after each DD repartitioning.
176          */
177         pullAllReduce(cr, &pull->comm, pull->group.size() * DIM, static_cast<real*>(x_pbc[0]));
178     }
179 }
180
181 static void
182 make_cyl_refgrps(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, double t, const rvec* x)
183 {
184     pull_comm_t* comm = &pull->comm;
185
186     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size() * c_cylinderBufferStride,
187                "cylinderBuffer should have the correct size");
188
189     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
190
191     /* loop over all groups to make a reference group for each*/
192     for (size_t c = 0; c < pull->coord.size(); c++)
193     {
194         pull_coord_work_t* pcrd;
195         double             sum_a, wmass, wwmass;
196         dvec               radf_fac0, radf_fac1;
197
198         pcrd = &pull->coord[c];
199
200         sum_a  = 0;
201         wmass  = 0;
202         wwmass = 0;
203         clear_dvec(radf_fac0);
204         clear_dvec(radf_fac1);
205
206         if (pcrd->params.eGeom == epullgCYL)
207         {
208             /* pref will be the same group for all pull coordinates */
209             const pull_group_work_t& pref  = pull->group[pcrd->params.group[0]];
210             const pull_group_work_t& pgrp  = pull->group[pcrd->params.group[1]];
211             pull_group_work_t&       pdyna = pull->dyna[c];
212             rvec                     direction;
213             copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
214
215             /* Since we have not calculated the COM of the cylinder group yet,
216              * we calculate distances with respect to location of the pull
217              * group minus the reference position along the vector.
218              * here we already have the COM of the pull group. This resolves
219              * any PBC issues and we don't need to use a PBC-atom here.
220              */
221             if (pcrd->params.rate != 0)
222             {
223                 /* With rate=0, value_ref is set initially */
224                 pcrd->value_ref = pcrd->params.init + pcrd->params.rate * t;
225             }
226             rvec reference;
227             for (int m = 0; m < DIM; m++)
228             {
229                 reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m] * pcrd->value_ref;
230             }
231
232             auto localAtomIndices = pref.atomSet.localIndex();
233
234             /* This actually only needs to be done at init or DD time,
235              * but resizing with the same size does not cause much overhead.
236              */
237             pdyna.localWeights.resize(localAtomIndices.size());
238             pdyna.mdw.resize(localAtomIndices.size());
239             pdyna.dv.resize(localAtomIndices.size());
240
241             /* loop over all atoms in the main ref group */
242             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
243             {
244                 int  atomIndex = localAtomIndices[indexInSet];
245                 rvec dx;
246                 pbc_dx_aiuc(pbc, x[atomIndex], reference, dx);
247                 double axialLocation = iprod(direction, dx);
248                 dvec   radialLocation;
249                 double dr2 = 0;
250                 for (int m = 0; m < DIM; m++)
251                 {
252                     /* Determine the radial components */
253                     radialLocation[m] = dx[m] - axialLocation * direction[m];
254                     dr2 += gmx::square(radialLocation[m]);
255                 }
256                 double dr2_rel = dr2 * inv_cyl_r2;
257
258                 if (dr2_rel < 1)
259                 {
260                     /* add atom to sum of COM and to weight array */
261
262                     double mass = masses[atomIndex];
263                     /* The radial weight function is 1-2x^2+x^4,
264                      * where x=r/cylinder_r. Since this function depends
265                      * on the radial component, we also get radial forces
266                      * on both groups.
267                      */
268                     double weight                  = 1 + (-2 + dr2_rel) * dr2_rel;
269                     double dweight_r               = (-4 + 4 * dr2_rel) * inv_cyl_r2;
270                     pdyna.localWeights[indexInSet] = weight;
271                     sum_a += mass * weight * axialLocation;
272                     wmass += mass * weight;
273                     wwmass += mass * weight * weight;
274                     dvec mdw;
275                     dsvmul(mass * dweight_r, radialLocation, mdw);
276                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
277                     /* Currently we only have the axial component of the
278                      * offset from the cylinder COM up to an unkown offset.
279                      * We add this offset after the reduction needed
280                      * for determining the COM of the cylinder group.
281                      */
282                     pdyna.dv[indexInSet] = axialLocation;
283                     for (int m = 0; m < DIM; m++)
284                     {
285                         radf_fac0[m] += mdw[m];
286                         radf_fac1[m] += mdw[m] * axialLocation;
287                     }
288                 }
289                 else
290                 {
291                     pdyna.localWeights[indexInSet] = 0;
292                 }
293             }
294         }
295
296         auto buffer = gmx::arrayRefFromArray(
297                 comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
298
299         buffer[0] = wmass;
300         buffer[1] = wwmass;
301         buffer[2] = sum_a;
302
303         buffer[3] = radf_fac0[XX];
304         buffer[4] = radf_fac0[YY];
305         buffer[5] = radf_fac0[ZZ];
306
307         buffer[6] = radf_fac1[XX];
308         buffer[7] = radf_fac1[YY];
309         buffer[8] = radf_fac1[ZZ];
310     }
311
312     if (cr != nullptr && PAR(cr))
313     {
314         /* Sum the contributions over the ranks */
315         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
316     }
317
318     for (size_t c = 0; c < pull->coord.size(); c++)
319     {
320         pull_coord_work_t* pcrd;
321
322         pcrd = &pull->coord[c];
323
324         if (pcrd->params.eGeom == epullgCYL)
325         {
326             pull_group_work_t*    pdyna       = &pull->dyna[c];
327             pull_group_work_t*    pgrp        = &pull->group[pcrd->params.group[1]];
328             PullCoordSpatialData& spatialData = pcrd->spatialData;
329
330             auto buffer = gmx::constArrayRefFromArray(
331                     comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
332             double wmass   = buffer[0];
333             double wwmass  = buffer[1];
334             pdyna->mwscale = 1.0 / wmass;
335             /* Cylinder pulling can't be used with constraints, but we set
336              * wscale and invtm anyhow, in case someone would like to use them.
337              */
338             pdyna->wscale = wmass / wwmass;
339             pdyna->invtm  = wwmass / (wmass * wmass);
340
341             /* We store the deviation of the COM from the reference location
342              * used above, since we need it when we apply the radial forces
343              * to the atoms in the cylinder group.
344              */
345             spatialData.cyl_dev = 0;
346             for (int m = 0; m < DIM; m++)
347             {
348                 double reference = pgrp->x[m] - spatialData.vec[m] * pcrd->value_ref;
349                 double dist      = -spatialData.vec[m] * buffer[2] * pdyna->mwscale;
350                 pdyna->x[m]      = reference - dist;
351                 spatialData.cyl_dev += dist;
352             }
353             /* Now we know the exact COM of the cylinder reference group,
354              * we can determine the radial force factor (ffrad) that when
355              * multiplied with the axial pull force will give the radial
356              * force on the pulled (non-cylinder) group.
357              */
358             for (int m = 0; m < DIM; m++)
359             {
360                 spatialData.ffrad[m] = (buffer[6 + m] + buffer[3 + m] * spatialData.cyl_dev) / wmass;
361             }
362
363             if (debug)
364             {
365                 fprintf(debug, "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0],
366                         pdyna->x[1], pdyna->x[2], 1.0 / pdyna->invtm);
367                 fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n", spatialData.ffrad[XX],
368                         spatialData.ffrad[YY], spatialData.ffrad[ZZ]);
369             }
370         }
371     }
372 }
373
374 static double atan2_0_2pi(double y, double x)
375 {
376     double a;
377
378     a = atan2(y, x);
379     if (a < 0)
380     {
381         a += 2.0 * M_PI;
382     }
383     return a;
384 }
385
386 static void sum_com_part(const pull_group_work_t* pgrp,
387                          int                      ind_start,
388                          int                      ind_end,
389                          const rvec*              x,
390                          const rvec*              xp,
391                          const real*              mass,
392                          const t_pbc*             pbc,
393                          const rvec               x_pbc,
394                          ComSums*                 sum_com)
395 {
396     double sum_wm   = 0;
397     double sum_wwm  = 0;
398     dvec   sum_wmx  = { 0, 0, 0 };
399     dvec   sum_wmxp = { 0, 0, 0 };
400
401     auto localAtomIndices = pgrp->atomSet.localIndex();
402     for (int i = ind_start; i < ind_end; i++)
403     {
404         int  ii = localAtomIndices[i];
405         real wm;
406         if (pgrp->localWeights.empty())
407         {
408             wm = mass[ii];
409             sum_wm += wm;
410         }
411         else
412         {
413             real w;
414
415             w  = pgrp->localWeights[i];
416             wm = w * mass[ii];
417             sum_wm += wm;
418             sum_wwm += wm * w;
419         }
420         if (pgrp->epgrppbc == epgrppbcNONE)
421         {
422             /* Plain COM: sum the coordinates */
423             for (int d = 0; d < DIM; d++)
424             {
425                 sum_wmx[d] += wm * x[ii][d];
426             }
427             if (xp)
428             {
429                 for (int d = 0; d < DIM; d++)
430                 {
431                     sum_wmxp[d] += wm * xp[ii][d];
432                 }
433             }
434         }
435         else
436         {
437             rvec dx;
438
439             /* Sum the difference with the reference atom */
440             pbc_dx(pbc, x[ii], x_pbc, dx);
441             for (int d = 0; d < DIM; d++)
442             {
443                 sum_wmx[d] += wm * dx[d];
444             }
445             if (xp)
446             {
447                 /* For xp add the difference between xp and x to dx,
448                  * such that we use the same periodic image,
449                  * also when xp has a large displacement.
450                  */
451                 for (int d = 0; d < DIM; d++)
452                 {
453                     sum_wmxp[d] += wm * (dx[d] + xp[ii][d] - x[ii][d]);
454                 }
455             }
456         }
457     }
458
459     sum_com->sum_wm  = sum_wm;
460     sum_com->sum_wwm = sum_wwm;
461     copy_dvec(sum_wmx, sum_com->sum_wmx);
462     if (xp)
463     {
464         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
465     }
466 }
467
468 static void sum_com_part_cosweight(const pull_group_work_t* pgrp,
469                                    int                      ind_start,
470                                    int                      ind_end,
471                                    int                      cosdim,
472                                    real                     twopi_box,
473                                    const rvec*              x,
474                                    const rvec*              xp,
475                                    const real*              mass,
476                                    ComSums*                 sum_com)
477 {
478     /* Cosine weighting geometry */
479     double sum_cm  = 0;
480     double sum_sm  = 0;
481     double sum_ccm = 0;
482     double sum_csm = 0;
483     double sum_ssm = 0;
484     double sum_cmp = 0;
485     double sum_smp = 0;
486
487     auto localAtomIndices = pgrp->atomSet.localIndex();
488
489     for (int i = ind_start; i < ind_end; i++)
490     {
491         int  ii = localAtomIndices[i];
492         real m  = mass[ii];
493         /* Determine cos and sin sums */
494         real cw = std::cos(x[ii][cosdim] * twopi_box);
495         real sw = std::sin(x[ii][cosdim] * twopi_box);
496         sum_cm += static_cast<double>(cw * m);
497         sum_sm += static_cast<double>(sw * m);
498         sum_ccm += static_cast<double>(cw * cw * m);
499         sum_csm += static_cast<double>(cw * sw * m);
500         sum_ssm += static_cast<double>(sw * sw * m);
501
502         if (xp != nullptr)
503         {
504             real cw = std::cos(xp[ii][cosdim] * twopi_box);
505             real sw = std::sin(xp[ii][cosdim] * twopi_box);
506             sum_cmp += static_cast<double>(cw * m);
507             sum_smp += static_cast<double>(sw * m);
508         }
509     }
510
511     sum_com->sum_cm  = sum_cm;
512     sum_com->sum_sm  = sum_sm;
513     sum_com->sum_ccm = sum_ccm;
514     sum_com->sum_csm = sum_csm;
515     sum_com->sum_ssm = sum_ssm;
516     sum_com->sum_cmp = sum_cmp;
517     sum_com->sum_smp = sum_smp;
518 }
519
520 /* calculates center of mass of selection index from all coordinates x */
521 // Compiler segfault with 2019_update_5 and 2020_initial
522 #if defined(__INTEL_COMPILER)                                          \
523         && ((__INTEL_COMPILER == 1900 && __INTEL_COMPILER_UPDATE >= 5) \
524             || (__INTEL_COMPILER >= 1910 && __INTEL_COMPILER < 2021))
525 #    pragma intel optimization_level 2
526 #endif
527 void pull_calc_coms(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, double t, const rvec x[], rvec* xp)
528 {
529     real         twopi_box = 0;
530     pull_comm_t* comm;
531
532     comm = &pull->comm;
533
534     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
535                "pbcAtomBuffer should have size number of groups");
536     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
537                "comBuffer should have size #group*c_comBufferStride");
538
539     if (pull->bRefAt && pull->bSetPBCatoms)
540     {
541         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
542
543         if (cr != nullptr && DOMAINDECOMP(cr))
544         {
545             /* We can keep these PBC reference coordinates fixed for nstlist
546              * steps, since atoms won't jump over PBC.
547              * This avoids a global reduction at the next nstlist-1 steps.
548              * Note that the exact values of the pbc reference coordinates
549              * are irrelevant, as long all atoms in the group are within
550              * half a box distance of the reference coordinate.
551              */
552             pull->bSetPBCatoms = FALSE;
553         }
554     }
555
556     if (pull->cosdim >= 0)
557     {
558         int m;
559
560         assert(pull->npbcdim <= DIM);
561
562         for (m = pull->cosdim + 1; m < pull->npbcdim; m++)
563         {
564             if (pbc->box[m][pull->cosdim] != 0)
565             {
566                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
567             }
568         }
569         twopi_box = 2.0 * M_PI / pbc->box[pull->cosdim][pull->cosdim];
570     }
571
572     for (size_t g = 0; g < pull->group.size(); g++)
573     {
574         pull_group_work_t* pgrp = &pull->group[g];
575
576         /* Cosine-weighted COMs behave different from all other weighted COMs
577          * in the sense that the weights depend on instantaneous coordinates,
578          * not on pre-set weights. Thus we resize the local weight buffer here.
579          */
580         if (pgrp->epgrppbc == epgrppbcCOS)
581         {
582             pgrp->localWeights.resize(pgrp->atomSet.localIndex().size());
583         }
584
585         auto comBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
586                                                 c_comBufferStride);
587
588         if (pgrp->needToCalcCom)
589         {
590             if (pgrp->epgrppbc != epgrppbcCOS)
591             {
592                 rvec x_pbc = { 0, 0, 0 };
593
594                 switch (pgrp->epgrppbc)
595                 {
596                     case epgrppbcREFAT:
597                         /* Set the pbc atom */
598                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
599                         break;
600                     case epgrppbcPREVSTEPCOM:
601                         /* Set the pbc reference to the COM of the group of the last step */
602                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
603                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
604                 }
605
606                 /* The final sums should end up in comSums[0] */
607                 ComSums& comSumsTotal = pull->comSums[0];
608
609                 /* If we have a single-atom group the mass is irrelevant, so
610                  * we can remove the mass factor to avoid division by zero.
611                  * Note that with constraint pulling the mass does matter, but
612                  * in that case a check group mass != 0 has been done before.
613                  */
614                 if (pgrp->params.ind.size() == 1 && pgrp->atomSet.numAtomsLocal() == 1
615                     && masses[pgrp->atomSet.localIndex()[0]] == 0)
616                 {
617                     GMX_ASSERT(xp == nullptr,
618                                "We should not have groups with zero mass with constraints, i.e. "
619                                "xp!=NULL");
620
621                     /* Copy the single atom coordinate */
622                     for (int d = 0; d < DIM; d++)
623                     {
624                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
625                     }
626                     /* Set all mass factors to 1 to get the correct COM */
627                     comSumsTotal.sum_wm  = 1;
628                     comSumsTotal.sum_wwm = 1;
629                 }
630                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
631                 {
632                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, xp, masses, pbc, x_pbc,
633                                  &comSumsTotal);
634                 }
635                 else
636                 {
637 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
638                     for (int t = 0; t < pull->nthreads; t++)
639                     {
640                         int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
641                         int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
642                         sum_com_part(pgrp, ind_start, ind_end, x, xp, masses, pbc, x_pbc,
643                                      &pull->comSums[t]);
644                     }
645
646                     /* Reduce the thread contributions to sum_com[0] */
647                     for (int t = 1; t < pull->nthreads; t++)
648                     {
649                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
650                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
651                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
652                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
653                     }
654                 }
655
656                 if (pgrp->localWeights.empty())
657                 {
658                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
659                 }
660
661                 /* Copy local sums to a buffer for global summing */
662                 copy_dvec(comSumsTotal.sum_wmx, comBuffer[0]);
663
664                 copy_dvec(comSumsTotal.sum_wmxp, comBuffer[1]);
665
666                 comBuffer[2][0] = comSumsTotal.sum_wm;
667                 comBuffer[2][1] = comSumsTotal.sum_wwm;
668                 comBuffer[2][2] = 0;
669             }
670             else
671             {
672                 /* Cosine weighting geometry.
673                  * This uses a slab of the system, thus we always have many
674                  * atoms in the pull groups. Therefore, always use threads.
675                  */
676 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
677                 for (int t = 0; t < pull->nthreads; t++)
678                 {
679                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
680                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
681                     sum_com_part_cosweight(pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp,
682                                            masses, &pull->comSums[t]);
683                 }
684
685                 /* Reduce the thread contributions to comSums[0] */
686                 ComSums& comSumsTotal = pull->comSums[0];
687                 for (int t = 1; t < pull->nthreads; t++)
688                 {
689                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
690                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
691                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
692                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
693                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
694                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
695                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
696                 }
697
698                 /* Copy local sums to a buffer for global summing */
699                 comBuffer[0][0] = comSumsTotal.sum_cm;
700                 comBuffer[0][1] = comSumsTotal.sum_sm;
701                 comBuffer[0][2] = 0;
702                 comBuffer[1][0] = comSumsTotal.sum_ccm;
703                 comBuffer[1][1] = comSumsTotal.sum_csm;
704                 comBuffer[1][2] = comSumsTotal.sum_ssm;
705                 comBuffer[2][0] = comSumsTotal.sum_cmp;
706                 comBuffer[2][1] = comSumsTotal.sum_smp;
707                 comBuffer[2][2] = 0;
708             }
709         }
710         else
711         {
712             clear_dvec(comBuffer[0]);
713             clear_dvec(comBuffer[1]);
714             clear_dvec(comBuffer[2]);
715         }
716     }
717
718     pullAllReduce(cr, comm, pull->group.size() * c_comBufferStride * DIM,
719                   static_cast<double*>(comm->comBuffer[0]));
720
721     for (size_t g = 0; g < pull->group.size(); g++)
722     {
723         pull_group_work_t* pgrp;
724
725         pgrp = &pull->group[g];
726         if (pgrp->needToCalcCom)
727         {
728             GMX_ASSERT(!pgrp->params.ind.empty(),
729                        "Normal pull groups should have atoms, only group 0, which should have "
730                        "bCalcCom=FALSE has nat=0");
731
732             const auto comBuffer = gmx::constArrayRefFromArray(
733                     comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
734
735             if (pgrp->epgrppbc != epgrppbcCOS)
736             {
737                 double wmass, wwmass;
738                 int    m;
739
740                 /* Determine the inverse mass */
741                 wmass         = comBuffer[2][0];
742                 wwmass        = comBuffer[2][1];
743                 pgrp->mwscale = 1.0 / wmass;
744                 /* invtm==0 signals a frozen group, so then we should keep it zero */
745                 if (pgrp->invtm != 0)
746                 {
747                     pgrp->wscale = wmass / wwmass;
748                     pgrp->invtm  = wwmass / (wmass * wmass);
749                 }
750                 /* Divide by the total mass */
751                 for (m = 0; m < DIM; m++)
752                 {
753                     pgrp->x[m] = comBuffer[0][m] * pgrp->mwscale;
754                     if (xp)
755                     {
756                         pgrp->xp[m] = comBuffer[1][m] * pgrp->mwscale;
757                     }
758                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
759                     {
760                         pgrp->x[m] += comm->pbcAtomBuffer[g][m];
761                         if (xp)
762                         {
763                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
764                         }
765                     }
766                 }
767             }
768             else
769             {
770                 /* Cosine weighting geometry */
771                 double csw, snw, wmass, wwmass;
772
773                 /* Determine the optimal location of the cosine weight */
774                 csw                   = comBuffer[0][0];
775                 snw                   = comBuffer[0][1];
776                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
777                 /* Set the weights for the local atoms */
778                 wmass  = sqrt(csw * csw + snw * snw);
779                 wwmass = (comBuffer[1][0] * csw * csw + comBuffer[1][1] * csw * snw
780                           + comBuffer[1][2] * snw * snw)
781                          / (wmass * wmass);
782
783                 pgrp->mwscale = 1.0 / wmass;
784                 pgrp->wscale  = wmass / wwmass;
785                 pgrp->invtm   = wwmass / (wmass * wmass);
786                 /* Set the weights for the local atoms */
787                 csw *= pgrp->invtm;
788                 snw *= pgrp->invtm;
789                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
790                 {
791                     int ii                = pgrp->atomSet.localIndex()[i];
792                     pgrp->localWeights[i] = csw * std::cos(twopi_box * x[ii][pull->cosdim])
793                                             + snw * std::sin(twopi_box * x[ii][pull->cosdim]);
794                 }
795                 if (xp)
796                 {
797                     csw                    = comBuffer[2][0];
798                     snw                    = comBuffer[2][1];
799                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
800                 }
801             }
802             if (debug)
803             {
804                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
805             }
806         }
807     }
808
809     if (pull->bCylinder)
810     {
811         /* Calculate the COMs for the cyclinder reference groups */
812         make_cyl_refgrps(cr, pull, masses, pbc, t, x);
813     }
814 }
815
816 using BoolVec = gmx::BasicVector<bool>;
817
818 /* Returns whether the pull group obeys the PBC restrictions */
819 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t& group,
820                                           const BoolVec&           dimUsed,
821                                           const rvec*              x,
822                                           const t_pbc&             pbc,
823                                           const gmx::RVec&         x_pbc,
824                                           const real               pbcMargin)
825 {
826     /* Determine which dimensions are relevant for PBC */
827     BoolVec dimUsesPbc       = { false, false, false };
828     bool    pbcIsRectangular = true;
829     for (int d = 0; d < pbc.ndim_ePBC; d++)
830     {
831         if (dimUsed[d])
832         {
833             dimUsesPbc[d] = true;
834             /* All non-zero dimensions of vector v are involved in PBC */
835             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
836             {
837                 assert(d2 < DIM);
838                 if (pbc.box[d2][d] != 0)
839                 {
840                     dimUsesPbc[d2]   = true;
841                     pbcIsRectangular = false;
842                 }
843             }
844         }
845     }
846
847     rvec marginPerDim    = {};
848     real marginDistance2 = 0;
849     if (pbcIsRectangular)
850     {
851         /* Use margins for dimensions independently */
852         for (int d = 0; d < pbc.ndim_ePBC; d++)
853         {
854             marginPerDim[d] = pbcMargin * pbc.hbox_diag[d];
855         }
856     }
857     else
858     {
859         /* Check the total distance along the relevant dimensions */
860         for (int d = 0; d < pbc.ndim_ePBC; d++)
861         {
862             if (dimUsesPbc[d])
863             {
864                 marginDistance2 += pbcMargin * gmx::square(0.5) * norm2(pbc.box[d]);
865             }
866         }
867     }
868
869     auto localAtomIndices = group.atomSet.localIndex();
870     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
871     {
872         rvec dx;
873         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
874
875         bool atomIsTooFar = false;
876         if (pbcIsRectangular)
877         {
878             for (int d = 0; d < pbc.ndim_ePBC; d++)
879             {
880                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] || dx[d] > marginPerDim[d]))
881                 {
882                     atomIsTooFar = true;
883                 }
884             }
885         }
886         else
887         {
888             real pbcDistance2 = 0;
889             for (int d = 0; d < pbc.ndim_ePBC; d++)
890             {
891                 if (dimUsesPbc[d])
892                 {
893                     pbcDistance2 += gmx::square(dx[d]);
894                 }
895             }
896             atomIsTooFar = (pbcDistance2 > marginDistance2);
897         }
898         if (atomIsTooFar)
899         {
900             return false;
901         }
902     }
903
904     return true;
905 }
906
907 int pullCheckPbcWithinGroups(const pull_t& pull, const rvec* x, const t_pbc& pbc, real pbcMargin)
908 {
909     if (pbc.pbcType == PbcType::No)
910     {
911         return -1;
912     }
913
914     /* Determine what dimensions are used for each group by pull coordinates */
915     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
916     for (size_t c = 0; c < pull.coord.size(); c++)
917     {
918         const t_pull_coord& coordParams = pull.coord[c].params;
919         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
920         {
921             for (int d = 0; d < DIM; d++)
922             {
923                 if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
924                 {
925                     dimUsed[coordParams.group[groupIndex]][d] = true;
926                 }
927             }
928         }
929     }
930
931     /* Check PBC for every group that uses a PBC reference atom treatment */
932     for (size_t g = 0; g < pull.group.size(); g++)
933     {
934         const pull_group_work_t& group = pull.group[g];
935         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
936             && !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
937         {
938             return g;
939         }
940     }
941
942     return -1;
943 }
944
945 bool pullCheckPbcWithinGroup(const pull_t&                  pull,
946                              gmx::ArrayRef<const gmx::RVec> x,
947                              const t_pbc&                   pbc,
948                              int                            groupNr,
949                              real                           pbcMargin)
950 {
951     if (pbc.pbcType == PbcType::No)
952     {
953         return true;
954     }
955     GMX_ASSERT(groupNr < gmx::ssize(pull.group), "groupNr is out of range");
956
957     /* Check PBC if the group uses a PBC reference atom treatment. */
958     const pull_group_work_t& group = pull.group[groupNr];
959     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
960     {
961         return true;
962     }
963
964     /* Determine what dimensions are used for each group by pull coordinates */
965     BoolVec dimUsed = { false, false, false };
966     for (size_t c = 0; c < pull.coord.size(); c++)
967     {
968         const t_pull_coord& coordParams = pull.coord[c].params;
969         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
970         {
971             if (coordParams.group[groupIndex] == groupNr)
972             {
973                 for (int d = 0; d < DIM; d++)
974                 {
975                     if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
976                     {
977                         dimUsed[d] = true;
978                     }
979                 }
980             }
981         }
982     }
983
984     return (pullGroupObeysPbcRestrictions(group, dimUsed, as_rvec_array(x.data()), pbc,
985                                           pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
986 }
987
988 void setPrevStepPullComFromState(struct pull_t* pull, const t_state* state)
989 {
990     for (size_t g = 0; g < pull->group.size(); g++)
991     {
992         for (int j = 0; j < DIM; j++)
993         {
994             pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g * DIM + j];
995         }
996     }
997 }
998
999 void updatePrevStepPullCom(struct pull_t* pull, t_state* state)
1000 {
1001     for (size_t g = 0; g < pull->group.size(); g++)
1002     {
1003         if (pull->group[g].needToCalcCom)
1004         {
1005             for (int j = 0; j < DIM; j++)
1006             {
1007                 pull->group[g].x_prev_step[j]          = pull->group[g].x[j];
1008                 state->pull_com_prev_step[g * DIM + j] = pull->group[g].x[j];
1009             }
1010         }
1011     }
1012 }
1013
1014 void allocStatePrevStepPullCom(t_state* state, const pull_t* pull)
1015 {
1016     if (!pull)
1017     {
1018         state->pull_com_prev_step.clear();
1019         return;
1020     }
1021     size_t ngroup = pull->group.size();
1022     if (state->pull_com_prev_step.size() / DIM != ngroup)
1023     {
1024         state->pull_com_prev_step.resize(ngroup * DIM, NAN);
1025     }
1026 }
1027
1028 void initPullComFromPrevStep(const t_commrec* cr, pull_t* pull, const real* masses, t_pbc* pbc, const rvec x[])
1029 {
1030     pull_comm_t* comm   = &pull->comm;
1031     size_t       ngroup = pull->group.size();
1032
1033     if (!comm->bParticipate)
1034     {
1035         return;
1036     }
1037
1038     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
1039                "pbcAtomBuffer should have size number of groups");
1040     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
1041                "comBuffer should have size #group*c_comBufferStride");
1042
1043     pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1044
1045     for (size_t g = 0; g < ngroup; g++)
1046     {
1047         pull_group_work_t* pgrp;
1048
1049         pgrp = &pull->group[g];
1050
1051         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1052         {
1053             GMX_ASSERT(pgrp->params.ind.size() > 1,
1054                        "Groups with no atoms, or only one atom, should not "
1055                        "use the COM from the previous step as reference.");
1056
1057             rvec x_pbc = { 0, 0, 0 };
1058             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1059
1060             if (debug)
1061             {
1062                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1063                 for (int m = 0; m < DIM; m++)
1064                 {
1065                     fprintf(debug, " %f", x_pbc[m]);
1066                 }
1067                 fprintf(debug, "\n");
1068             }
1069
1070             /* The following is to a large extent similar to pull_calc_coms() */
1071
1072             /* The final sums should end up in sum_com[0] */
1073             ComSums& comSumsTotal = pull->comSums[0];
1074
1075             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1076             {
1077                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, nullptr, masses, pbc, x_pbc,
1078                              &comSumsTotal);
1079             }
1080             else
1081             {
1082 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
1083                 for (int t = 0; t < pull->nthreads; t++)
1084                 {
1085                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
1086                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
1087                     sum_com_part(pgrp, ind_start, ind_end, x, nullptr, masses, pbc, x_pbc,
1088                                  &pull->comSums[t]);
1089                 }
1090
1091                 /* Reduce the thread contributions to sum_com[0] */
1092                 for (int t = 1; t < pull->nthreads; t++)
1093                 {
1094                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
1095                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1096                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1097                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1098                 }
1099             }
1100
1101             if (pgrp->localWeights.empty())
1102             {
1103                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1104             }
1105
1106             /* Copy local sums to a buffer for global summing */
1107             auto localSums = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
1108                                                     c_comBufferStride);
1109
1110             localSums[0]    = comSumsTotal.sum_wmx;
1111             localSums[1]    = comSumsTotal.sum_wmxp;
1112             localSums[2][0] = comSumsTotal.sum_wm;
1113             localSums[2][1] = comSumsTotal.sum_wwm;
1114             localSums[2][2] = 0;
1115         }
1116     }
1117
1118     pullAllReduce(cr, comm, ngroup * c_comBufferStride * DIM, static_cast<double*>(comm->comBuffer[0]));
1119
1120     for (size_t g = 0; g < ngroup; g++)
1121     {
1122         pull_group_work_t* pgrp;
1123
1124         pgrp = &pull->group[g];
1125         if (pgrp->needToCalcCom)
1126         {
1127             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1128             {
1129                 auto localSums = gmx::arrayRefFromArray(
1130                         comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
1131                 double wmass, wwmass;
1132
1133                 /* Determine the inverse mass */
1134                 wmass         = localSums[2][0];
1135                 wwmass        = localSums[2][1];
1136                 pgrp->mwscale = 1.0 / wmass;
1137                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1138                 if (pgrp->invtm != 0)
1139                 {
1140                     pgrp->wscale = wmass / wwmass;
1141                     pgrp->invtm  = wwmass / (wmass * wmass);
1142                 }
1143                 /* Divide by the total mass */
1144                 for (int m = 0; m < DIM; m++)
1145                 {
1146                     pgrp->x[m] = localSums[0][m] * pgrp->mwscale;
1147                     pgrp->x[m] += comm->pbcAtomBuffer[g][m];
1148                 }
1149                 if (debug)
1150                 {
1151                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale,
1152                             pgrp->invtm);
1153                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1154                     for (int m = 0; m < DIM; m++)
1155                     {
1156                         fprintf(debug, " %f", pgrp->x[m]);
1157                     }
1158                     fprintf(debug, "\n");
1159                 }
1160                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1161             }
1162         }
1163     }
1164 }