Allow using COM of previous step as PBC reference
[alexxy/gromacs.git] / src / gromacs / pulling / pullutil.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "config.h"
40
41 #include <cassert>
42 #include <cstdlib>
43
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/gmxlib/network.h"
46 #include "gromacs/math/functions.h"
47 #include "gromacs/math/utilities.h"
48 #include "gromacs/math/vec.h"
49 #include "gromacs/mdtypes/commrec.h"
50 #include "gromacs/mdtypes/inputrec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/mdtypes/mdatom.h"
53 #include "gromacs/mdtypes/state.h"
54 #include "gromacs/pbcutil/pbc.h"
55 #include "gromacs/pulling/pull.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/utility/gmxassert.h"
59 #include "gromacs/utility/real.h"
60 #include "gromacs/utility/smalloc.h"
61
62 #include "pull_internal.h"
63
64 #if GMX_MPI
65
66 // Helper function to deduce MPI datatype from the type of data
67 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused *data)
68 {
69     return MPI_FLOAT;
70 }
71
72 // Helper function to deduce MPI datatype from the type of data
73 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused *data)
74 {
75     return MPI_DOUBLE;
76 }
77
78 #endif // GMX_MPI
79
80 #if !GMX_DOUBLE
81 // Helper function; note that gmx_sum(d) should actually be templated
82 gmx_unused static void gmxAllReduce(int n, real *data, const t_commrec *cr)
83 {
84     gmx_sum(n, data, cr);
85 }
86 #endif
87
88 // Helper function; note that gmx_sum(d) should actually be templated
89 gmx_unused static void gmxAllReduce(int n, double *data, const t_commrec *cr)
90 {
91     gmx_sumd(n, data, cr);
92 }
93
94 // Reduce data of n elements over all ranks currently participating in pull
95 template <typename T>
96 static void pullAllReduce(const t_commrec *cr,
97                           pull_comm_t     *comm,
98                           int              n,
99                           T               *data)
100 {
101     if (cr != nullptr && PAR(cr))
102     {
103         if (comm->bParticipateAll)
104         {
105             /* Sum the contributions over all DD ranks */
106             gmxAllReduce(n, data, cr);
107         }
108         else
109         {
110             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
111 #if GMX_MPI
112 #if MPI_IN_PLACE_EXISTS
113             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM,
114                           comm->mpi_comm_com);
115 #else
116             std::vector<T> buf(n);
117
118             MPI_Allreduce(data, buf, n, mpiDatatype(data), MPI_SUM,
119                           comm->mpi_comm_com);
120
121             /* Copy the result from the buffer to the input/output data */
122             for (int i = 0; i < n; i++)
123             {
124                 data[i] = buf[i];
125             }
126 #endif
127 #else
128             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
129 #endif
130         }
131     }
132 }
133
134 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
135  * When those coordinates are not available on this rank, clears x_pbc.
136  */
137 static void setPbcAtomCoords(const pull_group_work_t &pgrp,
138                              const rvec              *x,
139                              rvec                     x_pbc)
140 {
141     if (pgrp.pbcAtomSet != nullptr)
142     {
143         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
144         {
145             /* We have the atom locally, copy its coordinates */
146             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
147         }
148         else
149         {
150             /* Another rank has it, clear the coordinates for MPI_Allreduce */
151             clear_rvec(x_pbc);
152         }
153     }
154     else
155     {
156         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
157     }
158 }
159
160 static void pull_set_pbcatoms(const t_commrec *cr, struct pull_t *pull,
161                               const rvec *x,
162                               gmx::ArrayRef<gmx::RVec> x_pbc)
163 {
164     int numPbcAtoms = 0;
165     for (size_t g = 0; g < pull->group.size(); g++)
166     {
167         const pull_group_work_t &group = pull->group[g];
168         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
169         {
170             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
171             numPbcAtoms++;
172         }
173         else
174         {
175             clear_rvec(x_pbc[g]);
176         }
177     }
178
179     if (cr && PAR(cr) && numPbcAtoms > 0)
180     {
181         /* Sum over participating ranks to get x_pbc from the home ranks.
182          * This can be very expensive at high parallelization, so we only
183          * do this after each DD repartitioning.
184          */
185         pullAllReduce(cr, &pull->comm, pull->group.size()*DIM,
186                       static_cast<real *>(x_pbc[0]));
187     }
188 }
189
190 static void make_cyl_refgrps(const t_commrec *cr,
191                              pull_t          *pull,
192                              const t_mdatoms *md,
193                              t_pbc           *pbc,
194                              double           t,
195                              const rvec      *x)
196 {
197     pull_comm_t *comm = &pull->comm;
198
199     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size()*c_cylinderBufferStride, "cylinderBuffer should have the correct size");
200
201     double inv_cyl_r2 = 1.0/gmx::square(pull->params.cylinder_r);
202
203     /* loop over all groups to make a reference group for each*/
204     for (size_t c = 0; c < pull->coord.size(); c++)
205     {
206         pull_coord_work_t *pcrd;
207         double             sum_a, wmass, wwmass;
208         dvec               radf_fac0, radf_fac1;
209
210         pcrd   = &pull->coord[c];
211
212         sum_a  = 0;
213         wmass  = 0;
214         wwmass = 0;
215         clear_dvec(radf_fac0);
216         clear_dvec(radf_fac1);
217
218         if (pcrd->params.eGeom == epullgCYL)
219         {
220             /* pref will be the same group for all pull coordinates */
221             const pull_group_work_t &pref  = pull->group[pcrd->params.group[0]];
222             const pull_group_work_t &pgrp  = pull->group[pcrd->params.group[1]];
223             pull_group_work_t       &pdyna = pull->dyna[c];
224             rvec                     direction;
225             copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
226
227             /* Since we have not calculated the COM of the cylinder group yet,
228              * we calculate distances with respect to location of the pull
229              * group minus the reference position along the vector.
230              * here we already have the COM of the pull group. This resolves
231              * any PBC issues and we don't need to use a PBC-atom here.
232              */
233             if (pcrd->params.rate != 0)
234             {
235                 /* With rate=0, value_ref is set initially */
236                 pcrd->value_ref = pcrd->params.init + pcrd->params.rate*t;
237             }
238             rvec reference;
239             for (int m = 0; m < DIM; m++)
240             {
241                 reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m]*pcrd->value_ref;
242             }
243
244             auto localAtomIndices = pref.atomSet.localIndex();
245
246             /* This actually only needs to be done at init or DD time,
247              * but resizing with the same size does not cause much overhead.
248              */
249             pdyna.localWeights.resize(localAtomIndices.size());
250             pdyna.mdw.resize(localAtomIndices.size());
251             pdyna.dv.resize(localAtomIndices.size());
252
253             /* loop over all atoms in the main ref group */
254             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.size(); indexInSet++)
255             {
256                 int    atomIndex = localAtomIndices[indexInSet];
257                 rvec   dx;
258                 pbc_dx_aiuc(pbc, x[atomIndex], reference, dx);
259                 double axialLocation = iprod(direction, dx);
260                 dvec   radialLocation;
261                 double dr2 = 0;
262                 for (int m = 0; m < DIM; m++)
263                 {
264                     /* Determine the radial components */
265                     radialLocation[m]  = dx[m] - axialLocation*direction[m];
266                     dr2               += gmx::square(radialLocation[m]);
267                 }
268                 double dr2_rel = dr2*inv_cyl_r2;
269
270                 if (dr2_rel < 1)
271                 {
272                     /* add atom to sum of COM and to weight array */
273
274                     double mass                     = md->massT[atomIndex];
275                     /* The radial weight function is 1-2x^2+x^4,
276                      * where x=r/cylinder_r. Since this function depends
277                      * on the radial component, we also get radial forces
278                      * on both groups.
279                      */
280                     double weight                   =  1 + (-2 + dr2_rel)*dr2_rel;
281                     double dweight_r                = (-4 + 4*dr2_rel)*inv_cyl_r2;
282                     pdyna.localWeights[indexInSet]  = weight;
283                     sum_a                          += mass*weight*axialLocation;
284                     wmass                          += mass*weight;
285                     wwmass                         += mass*weight*weight;
286                     dvec mdw;
287                     dsvmul(mass*dweight_r, radialLocation, mdw);
288                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
289                     /* Currently we only have the axial component of the
290                      * offset from the cylinder COM up to an unkown offset.
291                      * We add this offset after the reduction needed
292                      * for determining the COM of the cylinder group.
293                      */
294                     pdyna.dv[indexInSet] = axialLocation;
295                     for (int m = 0; m < DIM; m++)
296                     {
297                         radf_fac0[m] += mdw[m];
298                         radf_fac1[m] += mdw[m]*axialLocation;
299                     }
300                 }
301                 else
302                 {
303                     pdyna.localWeights[indexInSet] = 0;
304                 }
305             }
306         }
307
308         auto buffer = gmx::arrayRefFromArray(comm->cylinderBuffer.data() + c*c_cylinderBufferStride, c_cylinderBufferStride);
309
310         buffer[0] = wmass;
311         buffer[1] = wwmass;
312         buffer[2] = sum_a;
313
314         buffer[3] = radf_fac0[XX];
315         buffer[4] = radf_fac0[YY];
316         buffer[5] = radf_fac0[ZZ];
317
318         buffer[6] = radf_fac1[XX];
319         buffer[7] = radf_fac1[YY];
320         buffer[8] = radf_fac1[ZZ];
321     }
322
323     if (cr != nullptr && PAR(cr))
324     {
325         /* Sum the contributions over the ranks */
326         pullAllReduce(cr, comm, pull->coord.size()*c_cylinderBufferStride,
327                       comm->cylinderBuffer.data());
328     }
329
330     for (size_t c = 0; c < pull->coord.size(); c++)
331     {
332         pull_coord_work_t *pcrd;
333
334         pcrd  = &pull->coord[c];
335
336         if (pcrd->params.eGeom == epullgCYL)
337         {
338             pull_group_work_t    *pdyna       = &pull->dyna[c];
339             pull_group_work_t    *pgrp        = &pull->group[pcrd->params.group[1]];
340             PullCoordSpatialData &spatialData = pcrd->spatialData;
341
342             auto                  buffer      = gmx::constArrayRefFromArray(comm->cylinderBuffer.data() + c*c_cylinderBufferStride, c_cylinderBufferStride);
343             double                wmass       = buffer[0];
344             double                wwmass      = buffer[1];
345             pdyna->mwscale                    = 1.0/wmass;
346             /* Cylinder pulling can't be used with constraints, but we set
347              * wscale and invtm anyhow, in case someone would like to use them.
348              */
349             pdyna->wscale  = wmass/wwmass;
350             pdyna->invtm   = wwmass/(wmass*wmass);
351
352             /* We store the deviation of the COM from the reference location
353              * used above, since we need it when we apply the radial forces
354              * to the atoms in the cylinder group.
355              */
356             spatialData.cyl_dev = 0;
357             for (int m = 0; m < DIM; m++)
358             {
359                 double reference     = pgrp->x[m] - spatialData.vec[m]*pcrd->value_ref;
360                 double dist          = -spatialData.vec[m]*buffer[2]*pdyna->mwscale;
361                 pdyna->x[m]          = reference - dist;
362                 spatialData.cyl_dev += dist;
363             }
364             /* Now we know the exact COM of the cylinder reference group,
365              * we can determine the radial force factor (ffrad) that when
366              * multiplied with the axial pull force will give the radial
367              * force on the pulled (non-cylinder) group.
368              */
369             for (int m = 0; m < DIM; m++)
370             {
371                 spatialData.ffrad[m] = (buffer[6 + m] +
372                                         buffer[3 + m]*spatialData.cyl_dev)/wmass;
373             }
374
375             if (debug)
376             {
377                 fprintf(debug, "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n",
378                         c, pdyna->x[0], pdyna->x[1],
379                         pdyna->x[2], 1.0/pdyna->invtm);
380                 fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n",
381                         spatialData.ffrad[XX], spatialData.ffrad[YY], spatialData.ffrad[ZZ]);
382             }
383         }
384     }
385 }
386
387 static double atan2_0_2pi(double y, double x)
388 {
389     double a;
390
391     a = atan2(y, x);
392     if (a < 0)
393     {
394         a += 2.0*M_PI;
395     }
396     return a;
397 }
398
399 static void sum_com_part(const pull_group_work_t *pgrp,
400                          int ind_start, int ind_end,
401                          const rvec *x, const rvec *xp,
402                          const real *mass,
403                          const t_pbc *pbc,
404                          const rvec x_pbc,
405                          ComSums *sum_com)
406 {
407     double sum_wm   = 0;
408     double sum_wwm  = 0;
409     dvec   sum_wmx  = { 0, 0, 0 };
410     dvec   sum_wmxp = { 0, 0, 0 };
411
412     auto   localAtomIndices = pgrp->atomSet.localIndex();
413     for (int i = ind_start; i < ind_end; i++)
414     {
415         int  ii = localAtomIndices[i];
416         real wm;
417         if (pgrp->localWeights.empty())
418         {
419             wm      = mass[ii];
420             sum_wm += wm;
421         }
422         else
423         {
424             real w;
425
426             w        = pgrp->localWeights[i];
427             wm       = w*mass[ii];
428             sum_wm  += wm;
429             sum_wwm += wm*w;
430         }
431         if (pgrp->epgrppbc == epgrppbcNONE)
432         {
433             /* Plain COM: sum the coordinates */
434             for (int d = 0; d < DIM; d++)
435             {
436                 sum_wmx[d]      += wm*x[ii][d];
437             }
438             if (xp)
439             {
440                 for (int d = 0; d < DIM; d++)
441                 {
442                     sum_wmxp[d] += wm*xp[ii][d];
443                 }
444             }
445         }
446         else
447         {
448             rvec dx;
449
450             /* Sum the difference with the reference atom */
451             pbc_dx(pbc, x[ii], x_pbc, dx);
452             for (int d = 0; d < DIM; d++)
453             {
454                 sum_wmx[d]     += wm*dx[d];
455             }
456             if (xp)
457             {
458                 /* For xp add the difference between xp and x to dx,
459                  * such that we use the same periodic image,
460                  * also when xp has a large displacement.
461                  */
462                 for (int d = 0; d < DIM; d++)
463                 {
464                     sum_wmxp[d] += wm*(dx[d] + xp[ii][d] - x[ii][d]);
465                 }
466             }
467         }
468     }
469
470     sum_com->sum_wm  = sum_wm;
471     sum_com->sum_wwm = sum_wwm;
472     copy_dvec(sum_wmx, sum_com->sum_wmx);
473     if (xp)
474     {
475         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
476     }
477 }
478
479 static void sum_com_part_cosweight(const pull_group_work_t *pgrp,
480                                    int ind_start, int ind_end,
481                                    int cosdim, real twopi_box,
482                                    const rvec *x, const rvec *xp,
483                                    const real *mass,
484                                    ComSums *sum_com)
485 {
486     /* Cosine weighting geometry */
487     double sum_cm  = 0;
488     double sum_sm  = 0;
489     double sum_ccm = 0;
490     double sum_csm = 0;
491     double sum_ssm = 0;
492     double sum_cmp = 0;
493     double sum_smp = 0;
494
495     auto   localAtomIndices = pgrp->atomSet.localIndex();
496
497     for (int i = ind_start; i < ind_end; i++)
498     {
499         int  ii  = localAtomIndices[i];
500         real m   = mass[ii];
501         /* Determine cos and sin sums */
502         real cw  = std::cos(x[ii][cosdim]*twopi_box);
503         real sw  = std::sin(x[ii][cosdim]*twopi_box);
504         sum_cm  += static_cast<double>(cw*m);
505         sum_sm  += static_cast<double>(sw*m);
506         sum_ccm += static_cast<double>(cw*cw*m);
507         sum_csm += static_cast<double>(cw*sw*m);
508         sum_ssm += static_cast<double>(sw*sw*m);
509
510         if (xp != nullptr)
511         {
512             real cw  = std::cos(xp[ii][cosdim]*twopi_box);
513             real sw  = std::sin(xp[ii][cosdim]*twopi_box);
514             sum_cmp += static_cast<double>(cw*m);
515             sum_smp += static_cast<double>(sw*m);
516         }
517     }
518
519     sum_com->sum_cm  = sum_cm;
520     sum_com->sum_sm  = sum_sm;
521     sum_com->sum_ccm = sum_ccm;
522     sum_com->sum_csm = sum_csm;
523     sum_com->sum_ssm = sum_ssm;
524     sum_com->sum_cmp = sum_cmp;
525     sum_com->sum_smp = sum_smp;
526 }
527
528 /* calculates center of mass of selection index from all coordinates x */
529 void pull_calc_coms(const t_commrec *cr,
530                     pull_t *pull,
531                     const t_mdatoms *md,
532                     t_pbc *pbc,
533                     double t,
534                     const rvec x[], rvec *xp)
535 {
536     real         twopi_box = 0;
537     pull_comm_t *comm;
538
539     comm = &pull->comm;
540
541     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(), "pbcAtomBuffer should have size number of groups");
542     GMX_ASSERT(comm->comBuffer.size() == pull->group.size()*DIM, "comBuffer should have size #group*DIM");
543
544     if (pull->bRefAt && pull->bSetPBCatoms)
545     {
546         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
547
548         if (cr != nullptr && DOMAINDECOMP(cr))
549         {
550             /* We can keep these PBC reference coordinates fixed for nstlist
551              * steps, since atoms won't jump over PBC.
552              * This avoids a global reduction at the next nstlist-1 steps.
553              * Note that the exact values of the pbc reference coordinates
554              * are irrelevant, as long all atoms in the group are within
555              * half a box distance of the reference coordinate.
556              */
557             pull->bSetPBCatoms = FALSE;
558         }
559     }
560
561     if (pull->cosdim >= 0)
562     {
563         int m;
564
565         assert(pull->npbcdim <= DIM);
566
567         for (m = pull->cosdim+1; m < pull->npbcdim; m++)
568         {
569             if (pbc->box[m][pull->cosdim] != 0)
570             {
571                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
572             }
573         }
574         twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim];
575     }
576
577     for (size_t g = 0; g < pull->group.size(); g++)
578     {
579         pull_group_work_t *pgrp;
580
581         pgrp = &pull->group[g];
582
583         if (pgrp->needToCalcCom)
584         {
585             if (pgrp->epgrppbc != epgrppbcCOS)
586             {
587                 rvec x_pbc = { 0, 0, 0 };
588
589                 switch (pgrp->epgrppbc)
590                 {
591                     case epgrppbcREFAT:
592                         /* Set the pbc atom */
593                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
594                         break;
595                     case epgrppbcPREVSTEPCOM:
596                         /* Set the pbc reference to the COM of the group of the last step */
597                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
598                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
599                 }
600
601                 /* The final sums should end up in comSums[0] */
602                 ComSums &comSumsTotal = pull->comSums[0];
603
604                 /* If we have a single-atom group the mass is irrelevant, so
605                  * we can remove the mass factor to avoid division by zero.
606                  * Note that with constraint pulling the mass does matter, but
607                  * in that case a check group mass != 0 has been done before.
608                  */
609                 if (pgrp->params.nat == 1 &&
610                     pgrp->atomSet.numAtomsLocal() == 1 &&
611                     md->massT[pgrp->atomSet.localIndex()[0]] == 0)
612                 {
613                     GMX_ASSERT(xp == nullptr, "We should not have groups with zero mass with constraints, i.e. xp!=NULL");
614
615                     /* Copy the single atom coordinate */
616                     for (int d = 0; d < DIM; d++)
617                     {
618                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
619                     }
620                     /* Set all mass factors to 1 to get the correct COM */
621                     comSumsTotal.sum_wm  = 1;
622                     comSumsTotal.sum_wwm = 1;
623                 }
624                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
625                 {
626                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(),
627                                  x, xp, md->massT,
628                                  pbc, x_pbc,
629                                  &comSumsTotal);
630                 }
631                 else
632                 {
633 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
634                     for (int t = 0; t < pull->nthreads; t++)
635                     {
636                         int ind_start = (pgrp->atomSet.numAtomsLocal()*(t + 0))/pull->nthreads;
637                         int ind_end   = (pgrp->atomSet.numAtomsLocal()*(t + 1))/pull->nthreads;
638                         sum_com_part(pgrp, ind_start, ind_end,
639                                      x, xp, md->massT,
640                                      pbc, x_pbc,
641                                      &pull->comSums[t]);
642                     }
643
644                     /* Reduce the thread contributions to sum_com[0] */
645                     for (int t = 1; t < pull->nthreads; t++)
646                     {
647                         comSumsTotal.sum_wm  += pull->comSums[t].sum_wm;
648                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
649                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
650                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
651                     }
652                 }
653
654                 if (pgrp->localWeights.empty())
655                 {
656                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
657                 }
658
659                 /* Copy local sums to a buffer for global summing */
660                 auto buffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g*DIM, DIM);
661
662                 copy_dvec(comSumsTotal.sum_wmx,  buffer[0]);
663
664                 copy_dvec(comSumsTotal.sum_wmxp, buffer[1]);
665
666                 buffer[2][0] = comSumsTotal.sum_wm;
667                 buffer[2][1] = comSumsTotal.sum_wwm;
668                 buffer[2][2] = 0;
669             }
670             else
671             {
672                 /* Cosine weighting geometry.
673                  * This uses a slab of the system, thus we always have many
674                  * atoms in the pull groups. Therefore, always use threads.
675                  */
676 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
677                 for (int t = 0; t < pull->nthreads; t++)
678                 {
679                     int ind_start = (pgrp->atomSet.numAtomsLocal()*(t + 0))/pull->nthreads;
680                     int ind_end   = (pgrp->atomSet.numAtomsLocal()*(t + 1))/pull->nthreads;
681                     sum_com_part_cosweight(pgrp, ind_start, ind_end,
682                                            pull->cosdim, twopi_box,
683                                            x, xp, md->massT,
684                                            &pull->comSums[t]);
685                 }
686
687                 /* Reduce the thread contributions to comSums[0] */
688                 ComSums &comSumsTotal = pull->comSums[0];
689                 for (int t = 1; t < pull->nthreads; t++)
690                 {
691                     comSumsTotal.sum_cm  += pull->comSums[t].sum_cm;
692                     comSumsTotal.sum_sm  += pull->comSums[t].sum_sm;
693                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
694                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
695                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
696                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
697                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
698                 }
699
700                 /* Copy local sums to a buffer for global summing */
701                 auto buffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g*DIM, DIM);
702
703                 buffer[0][0] = comSumsTotal.sum_cm;
704                 buffer[0][1] = comSumsTotal.sum_sm;
705                 buffer[0][2] = 0;
706                 buffer[1][0] = comSumsTotal.sum_ccm;
707                 buffer[1][1] = comSumsTotal.sum_csm;
708                 buffer[1][2] = comSumsTotal.sum_ssm;
709                 buffer[2][0] = comSumsTotal.sum_cmp;
710                 buffer[2][1] = comSumsTotal.sum_smp;
711                 buffer[2][2] = 0;
712             }
713         }
714     }
715
716     pullAllReduce(cr, comm, pull->group.size()*3*DIM,
717                   static_cast<double *>(comm->comBuffer[0]));
718
719     for (size_t g = 0; g < pull->group.size(); g++)
720     {
721         pull_group_work_t *pgrp;
722
723         pgrp = &pull->group[g];
724         if (pgrp->needToCalcCom)
725         {
726             GMX_ASSERT(pgrp->params.nat > 0, "Normal pull groups should have atoms, only group 0, which should have bCalcCom=FALSE has nat=0");
727
728             auto dvecBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g*DIM, DIM);
729
730             if (pgrp->epgrppbc != epgrppbcCOS)
731             {
732                 double wmass, wwmass;
733                 int    m;
734
735                 /* Determine the inverse mass */
736                 wmass             = dvecBuffer[2][0];
737                 wwmass            = dvecBuffer[2][1];
738                 pgrp->mwscale     = 1.0/wmass;
739                 /* invtm==0 signals a frozen group, so then we should keep it zero */
740                 if (pgrp->invtm != 0)
741                 {
742                     pgrp->wscale  = wmass/wwmass;
743                     pgrp->invtm   = wwmass/(wmass*wmass);
744                 }
745                 /* Divide by the total mass */
746                 for (m = 0; m < DIM; m++)
747                 {
748                     pgrp->x[m]      = dvecBuffer[0][m]*pgrp->mwscale;
749                     if (xp)
750                     {
751                         pgrp->xp[m] = dvecBuffer[1][m]*pgrp->mwscale;
752                     }
753                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
754                     {
755                         pgrp->x[m]      += comm->pbcAtomBuffer[g][m];
756                         if (xp)
757                         {
758                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
759                         }
760                     }
761                 }
762             }
763             else
764             {
765                 /* Cosine weighting geometry */
766                 double csw, snw, wmass, wwmass;
767
768                 /* Determine the optimal location of the cosine weight */
769                 csw                   = dvecBuffer[0][0];
770                 snw                   = dvecBuffer[0][1];
771                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
772                 /* Set the weights for the local atoms */
773                 wmass  = sqrt(csw*csw + snw*snw);
774                 wwmass = (dvecBuffer[1][0]*csw*csw +
775                           dvecBuffer[1][1]*csw*snw +
776                           dvecBuffer[1][2]*snw*snw)/(wmass*wmass);
777
778                 pgrp->mwscale = 1.0/wmass;
779                 pgrp->wscale  = wmass/wwmass;
780                 pgrp->invtm   = wwmass/(wmass*wmass);
781                 /* Set the weights for the local atoms */
782                 csw *= pgrp->invtm;
783                 snw *= pgrp->invtm;
784                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
785                 {
786                     int ii = pgrp->atomSet.localIndex()[i];
787                     pgrp->localWeights[i] = csw*std::cos(twopi_box*x[ii][pull->cosdim]) +
788                         snw*std::sin(twopi_box*x[ii][pull->cosdim]);
789                 }
790                 if (xp)
791                 {
792                     csw                    = dvecBuffer[2][0];
793                     snw                    = dvecBuffer[2][1];
794                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
795                 }
796             }
797             if (debug)
798             {
799                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n",
800                         g, 1.0/pgrp->mwscale, pgrp->invtm);
801             }
802         }
803     }
804
805     if (pull->bCylinder)
806     {
807         /* Calculate the COMs for the cyclinder reference groups */
808         make_cyl_refgrps(cr, pull, md, pbc, t, x);
809     }
810 }
811
812 using BoolVec = gmx::BasicVector<bool>;
813
814 /* Returns whether the pull group obeys the PBC restrictions */
815 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t &group,
816                                           const BoolVec           &dimUsed,
817                                           const rvec              *x,
818                                           const t_pbc             &pbc,
819                                           const gmx::RVec         &x_pbc,
820                                           const real               pbcMargin)
821 {
822     /* Determine which dimensions are relevant for PBC */
823     BoolVec dimUsesPbc       = { false, false, false };
824     bool    pbcIsRectangular = true;
825     for (int d = 0; d < pbc.ndim_ePBC; d++)
826     {
827         if (dimUsed[d])
828         {
829             dimUsesPbc[d] = true;
830             /* All non-zero dimensions of vector v are involved in PBC */
831             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
832             {
833                 assert(d2 < DIM);
834                 if (pbc.box[d2][d] != 0)
835                 {
836                     dimUsesPbc[d2]   = true;
837                     pbcIsRectangular = false;
838                 }
839             }
840         }
841     }
842
843     rvec marginPerDim = {};
844     real marginDistance2 = 0;
845     if (pbcIsRectangular)
846     {
847         /* Use margins for dimensions independently */
848         for (int d = 0; d < pbc.ndim_ePBC; d++)
849         {
850             marginPerDim[d] = pbcMargin*pbc.hbox_diag[d];
851         }
852     }
853     else
854     {
855         /* Check the total distance along the relevant dimensions */
856         for (int d = 0; d < pbc.ndim_ePBC; d++)
857         {
858             if (dimUsesPbc[d])
859             {
860                 marginDistance2 += pbcMargin*gmx::square(0.5)*norm2(pbc.box[d]);
861             }
862         }
863     }
864
865     auto localAtomIndices = group.atomSet.localIndex();
866     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.size(); indexInSet++)
867     {
868         rvec dx;
869         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
870
871         bool atomIsTooFar = false;
872         if (pbcIsRectangular)
873         {
874             for (int d = 0; d < pbc.ndim_ePBC; d++)
875             {
876                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] ||
877                                       dx[d] >  marginPerDim[d]))
878                 {
879                     atomIsTooFar = true;
880                 }
881             }
882         }
883         else
884         {
885             real pbcDistance2 = 0;
886             for (int d = 0; d < pbc.ndim_ePBC; d++)
887             {
888                 if (dimUsesPbc[d])
889                 {
890                     pbcDistance2 += gmx::square(dx[d]);
891                 }
892             }
893             atomIsTooFar = (pbcDistance2 > marginDistance2);
894         }
895         if (atomIsTooFar)
896         {
897             return false;
898         }
899     }
900
901     return true;
902 }
903
904 int pullCheckPbcWithinGroups(const pull_t &pull,
905                              const rvec   *x,
906                              const t_pbc  &pbc,
907                              real          pbcMargin)
908 {
909     if (pbc.ePBC == epbcNONE)
910     {
911         return -1;
912     }
913
914     /* Determine what dimensions are used for each group by pull coordinates */
915     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
916     for (size_t c = 0; c < pull.coord.size(); c++)
917     {
918         const t_pull_coord &coordParams = pull.coord[c].params;
919         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
920         {
921             for (int d = 0; d < DIM; d++)
922             {
923                 if (coordParams.dim[d] &&
924                     !(coordParams.eGeom == epullgCYL && groupIndex == 0))
925                 {
926                     dimUsed[coordParams.group[groupIndex]][d] = true;
927                 }
928             }
929         }
930     }
931
932     /* Check PBC for every group that uses a PBC reference atom treatment */
933     for (size_t g = 0; g < pull.group.size(); g++)
934     {
935         const pull_group_work_t &group = pull.group[g];
936         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM) &&
937             !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
938         {
939             return g;
940         }
941     }
942
943     return -1;
944 }
945
946 bool pullCheckPbcWithinGroup(const pull_t                  &pull,
947                              gmx::ArrayRef<const gmx::RVec> x,
948                              const t_pbc                   &pbc,
949                              int                            groupNr,
950                              real                           pbcMargin)
951 {
952     if (pbc.ePBC == epbcNONE)
953     {
954         return true;
955     }
956     GMX_ASSERT(groupNr < static_cast<int>(pull.group.size()), "groupNr is out of range");
957
958     /* Check PBC if the group uses a PBC reference atom treatment. */
959     const pull_group_work_t &group = pull.group[groupNr];
960     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
961     {
962         return true;
963     }
964
965     /* Determine what dimensions are used for each group by pull coordinates */
966     BoolVec dimUsed = { false, false, false };
967     for (size_t c = 0; c < pull.coord.size(); c++)
968     {
969         const t_pull_coord &coordParams = pull.coord[c].params;
970         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
971         {
972             if (coordParams.group[groupIndex] == groupNr)
973             {
974                 for (int d = 0; d < DIM; d++)
975                 {
976                     if (coordParams.dim[d] &&
977                         !(coordParams.eGeom == epullgCYL && groupIndex == 0))
978                     {
979                         dimUsed[d] = true;
980                     }
981                 }
982             }
983         }
984     }
985
986     return (pullGroupObeysPbcRestrictions(group, dimUsed, as_rvec_array(x.data()), pbc, pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
987 }
988
989 void setStatePrevStepPullCom(const struct pull_t *pull, t_state *state)
990 {
991     for (size_t i = 0; i < state->com_prev_step.size()/DIM; i++)
992     {
993         for (int j = 0; j < DIM; j++)
994         {
995             state->com_prev_step[i*DIM+j] = pull->group[i].x_prev_step[j];
996         }
997     }
998 }
999
1000 void setPrevStepPullComFromState(struct pull_t *pull, const t_state *state)
1001 {
1002     for (size_t i = 0; i < state->com_prev_step.size()/DIM; i++)
1003     {
1004         for (int j = 0; j < DIM; j++)
1005         {
1006             pull->group[i].x_prev_step[j] = state->com_prev_step[i*DIM+j];
1007         }
1008     }
1009 }
1010
1011 void updatePrevStepCom(struct pull_t *pull)
1012 {
1013     for (size_t g = 0; g < pull->group.size(); g++)
1014     {
1015         if (pull->group[g].needToCalcCom)
1016         {
1017             for (int j = 0; j < DIM; j++)
1018             {
1019                 pull->group[g].x_prev_step[j] = pull->group[g].x[j];
1020             }
1021         }
1022     }
1023 }
1024
1025 void allocStatePrevStepPullCom(t_state *state, pull_t *pull)
1026 {
1027     if (!pull)
1028     {
1029         state->com_prev_step.clear();
1030         return;
1031     }
1032     size_t ngroup = pull->group.size();
1033     if (state->com_prev_step.size()/DIM != ngroup)
1034     {
1035         state->com_prev_step.resize(ngroup * DIM, NAN);
1036     }
1037 }
1038
1039 void initPullComFromPrevStep(const t_commrec *cr,
1040                              pull_t          *pull,
1041                              const t_mdatoms *md,
1042                              t_pbc           *pbc,
1043                              const rvec       x[])
1044 {
1045     pull_comm_t *comm   = &pull->comm;
1046     size_t       ngroup = pull->group.size();
1047
1048     comm->pbcAtomBuffer.resize(ngroup);
1049     comm->comBuffer.resize(ngroup*DIM);
1050
1051     for (size_t g = 0; g < ngroup; g++)
1052     {
1053         pull_group_work_t *pgrp;
1054
1055         pgrp = &pull->group[g];
1056
1057         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1058         {
1059             GMX_ASSERT(pgrp->params.nat > 1, "Groups with no atoms, or only one atom, should not "
1060                        "use the COM from the previous step as reference.");
1061
1062             rvec x_pbc = { 0, 0, 0 };
1063             pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1064             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1065
1066             if (debug)
1067             {
1068                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1069                 for (int m = 0; m < DIM; m++)
1070                 {
1071                     fprintf(debug, " %f", x_pbc[m]);
1072                 }
1073                 fprintf(debug, "\n");
1074             }
1075
1076             /* The following is to a large extent similar to pull_calc_coms() */
1077
1078             /* The final sums should end up in sum_com[0] */
1079             ComSums &comSumsTotal = pull->comSums[0];
1080
1081             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1082             {
1083                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(),
1084                              x, nullptr, md->massT,
1085                              pbc, x_pbc,
1086                              &comSumsTotal);
1087             }
1088             else
1089             {
1090 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
1091                 for (int t = 0; t < pull->nthreads; t++)
1092                 {
1093                     int ind_start = (pgrp->atomSet.numAtomsLocal()*(t + 0))/pull->nthreads;
1094                     int ind_end   = (pgrp->atomSet.numAtomsLocal()*(t + 1))/pull->nthreads;
1095                     sum_com_part(pgrp, ind_start, ind_end,
1096                                  x, nullptr, md->massT,
1097                                  pbc, x_pbc,
1098                                  &pull->comSums[t]);
1099                 }
1100
1101                 /* Reduce the thread contributions to sum_com[0] */
1102                 for (int t = 1; t < pull->nthreads; t++)
1103                 {
1104                     comSumsTotal.sum_wm  += pull->comSums[t].sum_wm;
1105                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1106                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1107                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1108                 }
1109             }
1110
1111             if (pgrp->localWeights.empty())
1112             {
1113                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1114             }
1115
1116             /* Copy local sums to a buffer for global summing */
1117             copy_dvec(comSumsTotal.sum_wmx,  comm->comBuffer[g*3]);
1118             copy_dvec(comSumsTotal.sum_wmxp, comm->comBuffer[g*3 + 1]);
1119             comm->comBuffer[g*3 + 2][0] = comSumsTotal.sum_wm;
1120             comm->comBuffer[g*3 + 2][1] = comSumsTotal.sum_wwm;
1121             comm->comBuffer[g*3 + 2][2] = 0;
1122         }
1123     }
1124
1125     pullAllReduce(cr, comm, ngroup*3*DIM, static_cast<double *>(comm->comBuffer[0]));
1126
1127     for (size_t g = 0; g < ngroup; g++)
1128     {
1129         pull_group_work_t *pgrp;
1130
1131         pgrp = &pull->group[g];
1132         if (pgrp->needToCalcCom)
1133         {
1134             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1135             {
1136                 double wmass, wwmass;
1137
1138                 /* Determine the inverse mass */
1139                 wmass             = comm->comBuffer[g*3+2][0];
1140                 wwmass            = comm->comBuffer[g*3+2][1];
1141                 pgrp->mwscale     = 1.0/wmass;
1142                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1143                 if (pgrp->invtm != 0)
1144                 {
1145                     pgrp->wscale  = wmass/wwmass;
1146                     pgrp->invtm   = wwmass/(wmass*wmass);
1147                 }
1148                 /* Divide by the total mass */
1149                 for (int m = 0; m < DIM; m++)
1150                 {
1151                     pgrp->x[m]    = comm->comBuffer[g*3  ][m]*pgrp->mwscale;
1152                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1153                     {
1154                         pgrp->x[m]     += comm->pbcAtomBuffer[g][m];
1155                     }
1156                 }
1157                 if (debug)
1158                 {
1159                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n",
1160                             g, 1.0/pgrp->mwscale, pgrp->invtm);
1161                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1162                     for (int m = 0; m < DIM; m++)
1163                     {
1164                         fprintf(debug, " %f", pgrp->x[m]);
1165                     }
1166                     fprintf(debug, "\n");
1167                 }
1168                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1169             }
1170         }
1171     }
1172 }