src/gromacs/pulling/pullutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cstdlib>
  44
  45 #include "gromacs/fileio/confio.h"
  46 #include "gromacs/gmxlib/network.h"
  47 #include "gromacs/math/functions.h"
  48 #include "gromacs/math/utilities.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/commrec.h"
  51 #include "gromacs/mdtypes/inputrec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/mdtypes/mdatom.h"
  54 #include "gromacs/mdtypes/state.h"
  55 #include "gromacs/pbcutil/pbc.h"
  56 #include "gromacs/pulling/pull.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/futil.h"
  59 #include "gromacs/utility/gmxassert.h"
  60 #include "gromacs/utility/real.h"
  61 #include "gromacs/utility/smalloc.h"
  62
  63 #include "pull_internal.h"
  64
  65 #if GMX_MPI
  66
  67 // Helper function to deduce MPI datatype from the type of data
  68 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused* data)
  69 {
  70     return MPI_FLOAT;
  71 }
  72
  73 // Helper function to deduce MPI datatype from the type of data
  74 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused* data)
  75 {
  76     return MPI_DOUBLE;
  77 }
  78
  79 #endif // GMX_MPI
  80
  81 #if !GMX_DOUBLE
  82 // Helper function; note that gmx_sum(d) should actually be templated
  83 gmx_unused static void gmxAllReduce(int n, real* data, const t_commrec* cr)
  84 {
  85     gmx_sum(n, data, cr);
  86 }
  87 #endif
  88
  89 // Helper function; note that gmx_sum(d) should actually be templated
  90 gmx_unused static void gmxAllReduce(int n, double* data, const t_commrec* cr)
  91 {
  92     gmx_sumd(n, data, cr);
  93 }
  94
  95 // Reduce data of n elements over all ranks currently participating in pull
  96 template<typename T>
  97 static void pullAllReduce(const t_commrec* cr, pull_comm_t* comm, int n, T* data)
  98 {
  99     if (cr != nullptr && PAR(cr))
 100     {
 101         if (comm->bParticipateAll)
 102         {
 103             /* Sum the contributions over all DD ranks */
 104             gmxAllReduce(n, data, cr);
 105         }
 106         else
 107         {
 108             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
 109 #if GMX_MPI
 110 #    if MPI_IN_PLACE_EXISTS
 111             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
 112 #    else
 113             std::vector<T> buf(n);
 114
 115             MPI_Allreduce(data, buf.data(), n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
 116
 117             /* Copy the result from the buffer to the input/output data */
 118             for (int i = 0; i < n; i++)
 119             {
 120                 data[i] = buf[i];
 121             }
 122 #    endif
 123 #else
 124             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
 125 #endif
 126         }
 127     }
 128 }
 129
 130 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
 131  * When those coordinates are not available on this rank, clears x_pbc.
 132  */
 133 static void setPbcAtomCoords(const pull_group_work_t& pgrp, const rvec* x, rvec x_pbc)
 134 {
 135     if (pgrp.pbcAtomSet != nullptr)
 136     {
 137         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
 138         {
 139             /* We have the atom locally, copy its coordinates */
 140             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
 141         }
 142         else
 143         {
 144             /* Another rank has it, clear the coordinates for MPI_Allreduce */
 145             clear_rvec(x_pbc);
 146         }
 147     }
 148     else
 149     {
 150         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
 151     }
 152 }
 153
 154 static void pull_set_pbcatoms(const t_commrec* cr, struct pull_t* pull, const rvec* x, gmx::ArrayRef<gmx::RVec> x_pbc)
 155 {
 156     int numPbcAtoms = 0;
 157     for (size_t g = 0; g < pull->group.size(); g++)
 158     {
 159         const pull_group_work_t& group = pull->group[g];
 160         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
 161         {
 162             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
 163             numPbcAtoms++;
 164         }
 165         else
 166         {
 167             clear_rvec(x_pbc[g]);
 168         }
 169     }
 170
 171     if (cr && PAR(cr) && numPbcAtoms > 0)
 172     {
 173         /* Sum over participating ranks to get x_pbc from the home ranks.
 174          * This can be very expensive at high parallelization, so we only
 175          * do this after each DD repartitioning.
 176          */
 177         pullAllReduce(cr, &pull->comm, pull->group.size() * DIM, static_cast<real*>(x_pbc[0]));
 178     }
 179 }
 180
 181 static void
 182 make_cyl_refgrps(const t_commrec* cr, pull_t* pull, const t_mdatoms* md, t_pbc* pbc, double t, const rvec* x)
 183 {
 184     pull_comm_t* comm = &pull->comm;
 185
 186     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size() * c_cylinderBufferStride,
 187                "cylinderBuffer should have the correct size");
 188
 189     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
 190
 191     /* loop over all groups to make a reference group for each*/
 192     for (size_t c = 0; c < pull->coord.size(); c++)
 193     {
 194         pull_coord_work_t* pcrd;
 195         double             sum_a, wmass, wwmass;
 196         dvec               radf_fac0, radf_fac1;
 197
 198         pcrd = &pull->coord[c];
 199
 200         sum_a  = 0;
 201         wmass  = 0;
 202         wwmass = 0;
 203         clear_dvec(radf_fac0);
 204         clear_dvec(radf_fac1);
 205
 206         if (pcrd->params.eGeom == epullgCYL)
 207         {
 208             /* pref will be the same group for all pull coordinates */
 209             const pull_group_work_t& pref  = pull->group[pcrd->params.group[0]];
 210             const pull_group_work_t& pgrp  = pull->group[pcrd->params.group[1]];
 211             pull_group_work_t&       pdyna = pull->dyna[c];
 212             rvec                     direction;
 213             copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
 214
 215             /* Since we have not calculated the COM of the cylinder group yet,
 216              * we calculate distances with respect to location of the pull
 217              * group minus the reference position along the vector.
 218              * here we already have the COM of the pull group. This resolves
 219              * any PBC issues and we don't need to use a PBC-atom here.
 220              */
 221             if (pcrd->params.rate != 0)
 222             {
 223                 /* With rate=0, value_ref is set initially */
 224                 pcrd->value_ref = pcrd->params.init + pcrd->params.rate * t;
 225             }
 226             rvec reference;
 227             for (int m = 0; m < DIM; m++)
 228             {
 229                 reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m] * pcrd->value_ref;
 230             }
 231
 232             auto localAtomIndices = pref.atomSet.localIndex();
 233
 234             /* This actually only needs to be done at init or DD time,
 235              * but resizing with the same size does not cause much overhead.
 236              */
 237             pdyna.localWeights.resize(localAtomIndices.size());
 238             pdyna.mdw.resize(localAtomIndices.size());
 239             pdyna.dv.resize(localAtomIndices.size());
 240
 241             /* loop over all atoms in the main ref group */
 242             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
 243             {
 244                 int  atomIndex = localAtomIndices[indexInSet];
 245                 rvec dx;
 246                 pbc_dx_aiuc(pbc, x[atomIndex], reference, dx);
 247                 double axialLocation = iprod(direction, dx);
 248                 dvec   radialLocation;
 249                 double dr2 = 0;
 250                 for (int m = 0; m < DIM; m++)
 251                 {
 252                     /* Determine the radial components */
 253                     radialLocation[m] = dx[m] - axialLocation * direction[m];
 254                     dr2 += gmx::square(radialLocation[m]);
 255                 }
 256                 double dr2_rel = dr2 * inv_cyl_r2;
 257
 258                 if (dr2_rel < 1)
 259                 {
 260                     /* add atom to sum of COM and to weight array */
 261
 262                     double mass = md->massT[atomIndex];
 263                     /* The radial weight function is 1-2x^2+x^4,
 264                      * where x=r/cylinder_r. Since this function depends
 265                      * on the radial component, we also get radial forces
 266                      * on both groups.
 267                      */
 268                     double weight                  = 1 + (-2 + dr2_rel) * dr2_rel;
 269                     double dweight_r               = (-4 + 4 * dr2_rel) * inv_cyl_r2;
 270                     pdyna.localWeights[indexInSet] = weight;
 271                     sum_a += mass * weight * axialLocation;
 272                     wmass += mass * weight;
 273                     wwmass += mass * weight * weight;
 274                     dvec mdw;
 275                     dsvmul(mass * dweight_r, radialLocation, mdw);
 276                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
 277                     /* Currently we only have the axial component of the
 278                      * offset from the cylinder COM up to an unkown offset.
 279                      * We add this offset after the reduction needed
 280                      * for determining the COM of the cylinder group.
 281                      */
 282                     pdyna.dv[indexInSet] = axialLocation;
 283                     for (int m = 0; m < DIM; m++)
 284                     {
 285                         radf_fac0[m] += mdw[m];
 286                         radf_fac1[m] += mdw[m] * axialLocation;
 287                     }
 288                 }
 289                 else
 290                 {
 291                     pdyna.localWeights[indexInSet] = 0;
 292                 }
 293             }
 294         }
 295
 296         auto buffer = gmx::arrayRefFromArray(
 297                 comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
 298
 299         buffer[0] = wmass;
 300         buffer[1] = wwmass;
 301         buffer[2] = sum_a;
 302
 303         buffer[3] = radf_fac0[XX];
 304         buffer[4] = radf_fac0[YY];
 305         buffer[5] = radf_fac0[ZZ];
 306
 307         buffer[6] = radf_fac1[XX];
 308         buffer[7] = radf_fac1[YY];
 309         buffer[8] = radf_fac1[ZZ];
 310     }
 311
 312     if (cr != nullptr && PAR(cr))
 313     {
 314         /* Sum the contributions over the ranks */
 315         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
 316     }
 317
 318     for (size_t c = 0; c < pull->coord.size(); c++)
 319     {
 320         pull_coord_work_t* pcrd;
 321
 322         pcrd = &pull->coord[c];
 323
 324         if (pcrd->params.eGeom == epullgCYL)
 325         {
 326             pull_group_work_t*    pdyna       = &pull->dyna[c];
 327             pull_group_work_t*    pgrp        = &pull->group[pcrd->params.group[1]];
 328             PullCoordSpatialData& spatialData = pcrd->spatialData;
 329
 330             auto buffer = gmx::constArrayRefFromArray(
 331                     comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
 332             double wmass   = buffer[0];
 333             double wwmass  = buffer[1];
 334             pdyna->mwscale = 1.0 / wmass;
 335             /* Cylinder pulling can't be used with constraints, but we set
 336              * wscale and invtm anyhow, in case someone would like to use them.
 337              */
 338             pdyna->wscale = wmass / wwmass;
 339             pdyna->invtm  = wwmass / (wmass * wmass);
 340
 341             /* We store the deviation of the COM from the reference location
 342              * used above, since we need it when we apply the radial forces
 343              * to the atoms in the cylinder group.
 344              */
 345             spatialData.cyl_dev = 0;
 346             for (int m = 0; m < DIM; m++)
 347             {
 348                 double reference = pgrp->x[m] - spatialData.vec[m] * pcrd->value_ref;
 349                 double dist      = -spatialData.vec[m] * buffer[2] * pdyna->mwscale;
 350                 pdyna->x[m]      = reference - dist;
 351                 spatialData.cyl_dev += dist;
 352             }
 353             /* Now we know the exact COM of the cylinder reference group,
 354              * we can determine the radial force factor (ffrad) that when
 355              * multiplied with the axial pull force will give the radial
 356              * force on the pulled (non-cylinder) group.
 357              */
 358             for (int m = 0; m < DIM; m++)
 359             {
 360                 spatialData.ffrad[m] = (buffer[6 + m] + buffer[3 + m] * spatialData.cyl_dev) / wmass;
 361             }
 362
 363             if (debug)
 364             {
 365                 fprintf(debug, "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0],
 366                         pdyna->x[1], pdyna->x[2], 1.0 / pdyna->invtm);
 367                 fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n", spatialData.ffrad[XX],
 368                         spatialData.ffrad[YY], spatialData.ffrad[ZZ]);
 369             }
 370         }
 371     }
 372 }
 373
 374 static double atan2_0_2pi(double y, double x)
 375 {
 376     double a;
 377
 378     a = atan2(y, x);
 379     if (a < 0)
 380     {
 381         a += 2.0 * M_PI;
 382     }
 383     return a;
 384 }
 385
 386 static void sum_com_part(const pull_group_work_t* pgrp,
 387                          int                      ind_start,
 388                          int                      ind_end,
 389                          const rvec*              x,
 390                          const rvec*              xp,
 391                          const real*              mass,
 392                          const t_pbc*             pbc,
 393                          const rvec               x_pbc,
 394                          ComSums*                 sum_com)
 395 {
 396     double sum_wm   = 0;
 397     double sum_wwm  = 0;
 398     dvec   sum_wmx  = { 0, 0, 0 };
 399     dvec   sum_wmxp = { 0, 0, 0 };
 400
 401     auto localAtomIndices = pgrp->atomSet.localIndex();
 402     for (int i = ind_start; i < ind_end; i++)
 403     {
 404         int  ii = localAtomIndices[i];
 405         real wm;
 406         if (pgrp->localWeights.empty())
 407         {
 408             wm = mass[ii];
 409             sum_wm += wm;
 410         }
 411         else
 412         {
 413             real w;
 414
 415             w  = pgrp->localWeights[i];
 416             wm = w * mass[ii];
 417             sum_wm += wm;
 418             sum_wwm += wm * w;
 419         }
 420         if (pgrp->epgrppbc == epgrppbcNONE)
 421         {
 422             /* Plain COM: sum the coordinates */
 423             for (int d = 0; d < DIM; d++)
 424             {
 425                 sum_wmx[d] += wm * x[ii][d];
 426             }
 427             if (xp)
 428             {
 429                 for (int d = 0; d < DIM; d++)
 430                 {
 431                     sum_wmxp[d] += wm * xp[ii][d];
 432                 }
 433             }
 434         }
 435         else
 436         {
 437             rvec dx;
 438
 439             /* Sum the difference with the reference atom */
 440             pbc_dx(pbc, x[ii], x_pbc, dx);
 441             for (int d = 0; d < DIM; d++)
 442             {
 443                 sum_wmx[d] += wm * dx[d];
 444             }
 445             if (xp)
 446             {
 447                 /* For xp add the difference between xp and x to dx,
 448                  * such that we use the same periodic image,
 449                  * also when xp has a large displacement.
 450                  */
 451                 for (int d = 0; d < DIM; d++)
 452                 {
 453                     sum_wmxp[d] += wm * (dx[d] + xp[ii][d] - x[ii][d]);
 454                 }
 455             }
 456         }
 457     }
 458
 459     sum_com->sum_wm  = sum_wm;
 460     sum_com->sum_wwm = sum_wwm;
 461     copy_dvec(sum_wmx, sum_com->sum_wmx);
 462     if (xp)
 463     {
 464         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
 465     }
 466 }
 467
 468 static void sum_com_part_cosweight(const pull_group_work_t* pgrp,
 469                                    int                      ind_start,
 470                                    int                      ind_end,
 471                                    int                      cosdim,
 472                                    real                     twopi_box,
 473                                    const rvec*              x,
 474                                    const rvec*              xp,
 475                                    const real*              mass,
 476                                    ComSums*                 sum_com)
 477 {
 478     /* Cosine weighting geometry */
 479     double sum_cm  = 0;
 480     double sum_sm  = 0;
 481     double sum_ccm = 0;
 482     double sum_csm = 0;
 483     double sum_ssm = 0;
 484     double sum_cmp = 0;
 485     double sum_smp = 0;
 486
 487     auto localAtomIndices = pgrp->atomSet.localIndex();
 488
 489     for (int i = ind_start; i < ind_end; i++)
 490     {
 491         int  ii = localAtomIndices[i];
 492         real m  = mass[ii];
 493         /* Determine cos and sin sums */
 494         real cw = std::cos(x[ii][cosdim] * twopi_box);
 495         real sw = std::sin(x[ii][cosdim] * twopi_box);
 496         sum_cm += static_cast<double>(cw * m);
 497         sum_sm += static_cast<double>(sw * m);
 498         sum_ccm += static_cast<double>(cw * cw * m);
 499         sum_csm += static_cast<double>(cw * sw * m);
 500         sum_ssm += static_cast<double>(sw * sw * m);
 501
 502         if (xp != nullptr)
 503         {
 504             real cw = std::cos(xp[ii][cosdim] * twopi_box);
 505             real sw = std::sin(xp[ii][cosdim] * twopi_box);
 506             sum_cmp += static_cast<double>(cw * m);
 507             sum_smp += static_cast<double>(sw * m);
 508         }
 509     }
 510
 511     sum_com->sum_cm  = sum_cm;
 512     sum_com->sum_sm  = sum_sm;
 513     sum_com->sum_ccm = sum_ccm;
 514     sum_com->sum_csm = sum_csm;
 515     sum_com->sum_ssm = sum_ssm;
 516     sum_com->sum_cmp = sum_cmp;
 517     sum_com->sum_smp = sum_smp;
 518 }
 519
 520 /* calculates center of mass of selection index from all coordinates x */
 521 // Compiler segfault with 2019_update_5 and 2020_initial
 522 #if defined(__INTEL_COMPILER) \
 523         && ((__INTEL_COMPILER == 1900 && __INTEL_COMPILER_UPDATE >= 5) || __INTEL_COMPILER >= 1910)
 524 #    pragma intel optimization_level 2
 525 #endif
 526 void pull_calc_coms(const t_commrec* cr,
 527                     pull_t*          pull,
 528                     const t_mdatoms* md,
 529                     t_pbc*           pbc,
 530                     double           t,
 531                     const rvec       x[],
 532                     rvec*            xp)
 533 {
 534     real         twopi_box = 0;
 535     pull_comm_t* comm;
 536
 537     comm = &pull->comm;
 538
 539     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
 540                "pbcAtomBuffer should have size number of groups");
 541     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
 542                "comBuffer should have size #group*c_comBufferStride");
 543
 544     if (pull->bRefAt && pull->bSetPBCatoms)
 545     {
 546         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
 547
 548         if (cr != nullptr && DOMAINDECOMP(cr))
 549         {
 550             /* We can keep these PBC reference coordinates fixed for nstlist
 551              * steps, since atoms won't jump over PBC.
 552              * This avoids a global reduction at the next nstlist-1 steps.
 553              * Note that the exact values of the pbc reference coordinates
 554              * are irrelevant, as long all atoms in the group are within
 555              * half a box distance of the reference coordinate.
 556              */
 557             pull->bSetPBCatoms = FALSE;
 558         }
 559     }
 560
 561     if (pull->cosdim >= 0)
 562     {
 563         int m;
 564
 565         assert(pull->npbcdim <= DIM);
 566
 567         for (m = pull->cosdim + 1; m < pull->npbcdim; m++)
 568         {
 569             if (pbc->box[m][pull->cosdim] != 0)
 570             {
 571                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
 572             }
 573         }
 574         twopi_box = 2.0 * M_PI / pbc->box[pull->cosdim][pull->cosdim];
 575     }
 576
 577     for (size_t g = 0; g < pull->group.size(); g++)
 578     {
 579         pull_group_work_t* pgrp = &pull->group[g];
 580
 581         /* Cosine-weighted COMs behave different from all other weighted COMs
 582          * in the sense that the weights depend on instantaneous coordinates,
 583          * not on pre-set weights. Thus we resize the local weight buffer here.
 584          */
 585         if (pgrp->epgrppbc == epgrppbcCOS)
 586         {
 587             pgrp->localWeights.resize(pgrp->atomSet.localIndex().size());
 588         }
 589
 590         auto comBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
 591                                                 c_comBufferStride);
 592
 593         if (pgrp->needToCalcCom)
 594         {
 595             if (pgrp->epgrppbc != epgrppbcCOS)
 596             {
 597                 rvec x_pbc = { 0, 0, 0 };
 598
 599                 switch (pgrp->epgrppbc)
 600                 {
 601                     case epgrppbcREFAT:
 602                         /* Set the pbc atom */
 603                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
 604                         break;
 605                     case epgrppbcPREVSTEPCOM:
 606                         /* Set the pbc reference to the COM of the group of the last step */
 607                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
 608                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
 609                 }
 610
 611                 /* The final sums should end up in comSums[0] */
 612                 ComSums& comSumsTotal = pull->comSums[0];
 613
 614                 /* If we have a single-atom group the mass is irrelevant, so
 615                  * we can remove the mass factor to avoid division by zero.
 616                  * Note that with constraint pulling the mass does matter, but
 617                  * in that case a check group mass != 0 has been done before.
 618                  */
 619                 if (pgrp->params.nat == 1 && pgrp->atomSet.numAtomsLocal() == 1
 620                     && md->massT[pgrp->atomSet.localIndex()[0]] == 0)
 621                 {
 622                     GMX_ASSERT(xp == nullptr,
 623                                "We should not have groups with zero mass with constraints, i.e. "
 624                                "xp!=NULL");
 625
 626                     /* Copy the single atom coordinate */
 627                     for (int d = 0; d < DIM; d++)
 628                     {
 629                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
 630                     }
 631                     /* Set all mass factors to 1 to get the correct COM */
 632                     comSumsTotal.sum_wm  = 1;
 633                     comSumsTotal.sum_wwm = 1;
 634                 }
 635                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
 636                 {
 637                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, xp, md->massT, pbc,
 638                                  x_pbc, &comSumsTotal);
 639                 }
 640                 else
 641                 {
 642 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
 643                     for (int t = 0; t < pull->nthreads; t++)
 644                     {
 645                         int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
 646                         int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
 647                         sum_com_part(pgrp, ind_start, ind_end, x, xp, md->massT, pbc, x_pbc,
 648                                      &pull->comSums[t]);
 649                     }
 650
 651                     /* Reduce the thread contributions to sum_com[0] */
 652                     for (int t = 1; t < pull->nthreads; t++)
 653                     {
 654                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
 655                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
 656                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
 657                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
 658                     }
 659                 }
 660
 661                 if (pgrp->localWeights.empty())
 662                 {
 663                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
 664                 }
 665
 666                 /* Copy local sums to a buffer for global summing */
 667                 copy_dvec(comSumsTotal.sum_wmx, comBuffer[0]);
 668
 669                 copy_dvec(comSumsTotal.sum_wmxp, comBuffer[1]);
 670
 671                 comBuffer[2][0] = comSumsTotal.sum_wm;
 672                 comBuffer[2][1] = comSumsTotal.sum_wwm;
 673                 comBuffer[2][2] = 0;
 674             }
 675             else
 676             {
 677                 /* Cosine weighting geometry.
 678                  * This uses a slab of the system, thus we always have many
 679                  * atoms in the pull groups. Therefore, always use threads.
 680                  */
 681 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
 682                 for (int t = 0; t < pull->nthreads; t++)
 683                 {
 684                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
 685                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
 686                     sum_com_part_cosweight(pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp,
 687                                            md->massT, &pull->comSums[t]);
 688                 }
 689
 690                 /* Reduce the thread contributions to comSums[0] */
 691                 ComSums& comSumsTotal = pull->comSums[0];
 692                 for (int t = 1; t < pull->nthreads; t++)
 693                 {
 694                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
 695                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
 696                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
 697                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
 698                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
 699                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
 700                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
 701                 }
 702
 703                 /* Copy local sums to a buffer for global summing */
 704                 comBuffer[0][0] = comSumsTotal.sum_cm;
 705                 comBuffer[0][1] = comSumsTotal.sum_sm;
 706                 comBuffer[0][2] = 0;
 707                 comBuffer[1][0] = comSumsTotal.sum_ccm;
 708                 comBuffer[1][1] = comSumsTotal.sum_csm;
 709                 comBuffer[1][2] = comSumsTotal.sum_ssm;
 710                 comBuffer[2][0] = comSumsTotal.sum_cmp;
 711                 comBuffer[2][1] = comSumsTotal.sum_smp;
 712                 comBuffer[2][2] = 0;
 713             }
 714         }
 715         else
 716         {
 717             clear_dvec(comBuffer[0]);
 718             clear_dvec(comBuffer[1]);
 719             clear_dvec(comBuffer[2]);
 720         }
 721     }
 722
 723     pullAllReduce(cr, comm, pull->group.size() * c_comBufferStride * DIM,
 724                   static_cast<double*>(comm->comBuffer[0]));
 725
 726     for (size_t g = 0; g < pull->group.size(); g++)
 727     {
 728         pull_group_work_t* pgrp;
 729
 730         pgrp = &pull->group[g];
 731         if (pgrp->needToCalcCom)
 732         {
 733             GMX_ASSERT(pgrp->params.nat > 0,
 734                        "Normal pull groups should have atoms, only group 0, which should have "
 735                        "bCalcCom=FALSE has nat=0");
 736
 737             const auto comBuffer = gmx::constArrayRefFromArray(
 738                     comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
 739
 740             if (pgrp->epgrppbc != epgrppbcCOS)
 741             {
 742                 double wmass, wwmass;
 743                 int    m;
 744
 745                 /* Determine the inverse mass */
 746                 wmass         = comBuffer[2][0];
 747                 wwmass        = comBuffer[2][1];
 748                 pgrp->mwscale = 1.0 / wmass;
 749                 /* invtm==0 signals a frozen group, so then we should keep it zero */
 750                 if (pgrp->invtm != 0)
 751                 {
 752                     pgrp->wscale = wmass / wwmass;
 753                     pgrp->invtm  = wwmass / (wmass * wmass);
 754                 }
 755                 /* Divide by the total mass */
 756                 for (m = 0; m < DIM; m++)
 757                 {
 758                     pgrp->x[m] = comBuffer[0][m] * pgrp->mwscale;
 759                     if (xp)
 760                     {
 761                         pgrp->xp[m] = comBuffer[1][m] * pgrp->mwscale;
 762                     }
 763                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
 764                     {
 765                         pgrp->x[m] += comm->pbcAtomBuffer[g][m];
 766                         if (xp)
 767                         {
 768                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
 769                         }
 770                     }
 771                 }
 772             }
 773             else
 774             {
 775                 /* Cosine weighting geometry */
 776                 double csw, snw, wmass, wwmass;
 777
 778                 /* Determine the optimal location of the cosine weight */
 779                 csw                   = comBuffer[0][0];
 780                 snw                   = comBuffer[0][1];
 781                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
 782                 /* Set the weights for the local atoms */
 783                 wmass  = sqrt(csw * csw + snw * snw);
 784                 wwmass = (comBuffer[1][0] * csw * csw + comBuffer[1][1] * csw * snw
 785                           + comBuffer[1][2] * snw * snw)
 786                          / (wmass * wmass);
 787
 788                 pgrp->mwscale = 1.0 / wmass;
 789                 pgrp->wscale  = wmass / wwmass;
 790                 pgrp->invtm   = wwmass / (wmass * wmass);
 791                 /* Set the weights for the local atoms */
 792                 csw *= pgrp->invtm;
 793                 snw *= pgrp->invtm;
 794                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
 795                 {
 796                     int ii                = pgrp->atomSet.localIndex()[i];
 797                     pgrp->localWeights[i] = csw * std::cos(twopi_box * x[ii][pull->cosdim])
 798                                             + snw * std::sin(twopi_box * x[ii][pull->cosdim]);
 799                 }
 800                 if (xp)
 801                 {
 802                     csw                    = comBuffer[2][0];
 803                     snw                    = comBuffer[2][1];
 804                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
 805                 }
 806             }
 807             if (debug)
 808             {
 809                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
 810             }
 811         }
 812     }
 813
 814     if (pull->bCylinder)
 815     {
 816         /* Calculate the COMs for the cyclinder reference groups */
 817         make_cyl_refgrps(cr, pull, md, pbc, t, x);
 818     }
 819 }
 820
 821 using BoolVec = gmx::BasicVector<bool>;
 822
 823 /* Returns whether the pull group obeys the PBC restrictions */
 824 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t& group,
 825                                           const BoolVec&           dimUsed,
 826                                           const rvec*              x,
 827                                           const t_pbc&             pbc,
 828                                           const gmx::RVec&         x_pbc,
 829                                           const real               pbcMargin)
 830 {
 831     /* Determine which dimensions are relevant for PBC */
 832     BoolVec dimUsesPbc       = { false, false, false };
 833     bool    pbcIsRectangular = true;
 834     for (int d = 0; d < pbc.ndim_ePBC; d++)
 835     {
 836         if (dimUsed[d])
 837         {
 838             dimUsesPbc[d] = true;
 839             /* All non-zero dimensions of vector v are involved in PBC */
 840             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
 841             {
 842                 assert(d2 < DIM);
 843                 if (pbc.box[d2][d] != 0)
 844                 {
 845                     dimUsesPbc[d2]   = true;
 846                     pbcIsRectangular = false;
 847                 }
 848             }
 849         }
 850     }
 851
 852     rvec marginPerDim    = {};
 853     real marginDistance2 = 0;
 854     if (pbcIsRectangular)
 855     {
 856         /* Use margins for dimensions independently */
 857         for (int d = 0; d < pbc.ndim_ePBC; d++)
 858         {
 859             marginPerDim[d] = pbcMargin * pbc.hbox_diag[d];
 860         }
 861     }
 862     else
 863     {
 864         /* Check the total distance along the relevant dimensions */
 865         for (int d = 0; d < pbc.ndim_ePBC; d++)
 866         {
 867             if (dimUsesPbc[d])
 868             {
 869                 marginDistance2 += pbcMargin * gmx::square(0.5) * norm2(pbc.box[d]);
 870             }
 871         }
 872     }
 873
 874     auto localAtomIndices = group.atomSet.localIndex();
 875     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
 876     {
 877         rvec dx;
 878         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
 879
 880         bool atomIsTooFar = false;
 881         if (pbcIsRectangular)
 882         {
 883             for (int d = 0; d < pbc.ndim_ePBC; d++)
 884             {
 885                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] || dx[d] > marginPerDim[d]))
 886                 {
 887                     atomIsTooFar = true;
 888                 }
 889             }
 890         }
 891         else
 892         {
 893             real pbcDistance2 = 0;
 894             for (int d = 0; d < pbc.ndim_ePBC; d++)
 895             {
 896                 if (dimUsesPbc[d])
 897                 {
 898                     pbcDistance2 += gmx::square(dx[d]);
 899                 }
 900             }
 901             atomIsTooFar = (pbcDistance2 > marginDistance2);
 902         }
 903         if (atomIsTooFar)
 904         {
 905             return false;
 906         }
 907     }
 908
 909     return true;
 910 }
 911
 912 int pullCheckPbcWithinGroups(const pull_t& pull, const rvec* x, const t_pbc& pbc, real pbcMargin)
 913 {
 914     if (pbc.pbcType == PbcType::No)
 915     {
 916         return -1;
 917     }
 918
 919     /* Determine what dimensions are used for each group by pull coordinates */
 920     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
 921     for (size_t c = 0; c < pull.coord.size(); c++)
 922     {
 923         const t_pull_coord& coordParams = pull.coord[c].params;
 924         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
 925         {
 926             for (int d = 0; d < DIM; d++)
 927             {
 928                 if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
 929                 {
 930                     dimUsed[coordParams.group[groupIndex]][d] = true;
 931                 }
 932             }
 933         }
 934     }
 935
 936     /* Check PBC for every group that uses a PBC reference atom treatment */
 937     for (size_t g = 0; g < pull.group.size(); g++)
 938     {
 939         const pull_group_work_t& group = pull.group[g];
 940         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
 941             && !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
 942         {
 943             return g;
 944         }
 945     }
 946
 947     return -1;
 948 }
 949
 950 bool pullCheckPbcWithinGroup(const pull_t&                  pull,
 951                              gmx::ArrayRef<const gmx::RVec> x,
 952                              const t_pbc&                   pbc,
 953                              int                            groupNr,
 954                              real                           pbcMargin)
 955 {
 956     if (pbc.pbcType == PbcType::No)
 957     {
 958         return true;
 959     }
 960     GMX_ASSERT(groupNr < gmx::ssize(pull.group), "groupNr is out of range");
 961
 962     /* Check PBC if the group uses a PBC reference atom treatment. */
 963     const pull_group_work_t& group = pull.group[groupNr];
 964     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
 965     {
 966         return true;
 967     }
 968
 969     /* Determine what dimensions are used for each group by pull coordinates */
 970     BoolVec dimUsed = { false, false, false };
 971     for (size_t c = 0; c < pull.coord.size(); c++)
 972     {
 973         const t_pull_coord& coordParams = pull.coord[c].params;
 974         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
 975         {
 976             if (coordParams.group[groupIndex] == groupNr)
 977             {
 978                 for (int d = 0; d < DIM; d++)
 979                 {
 980                     if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
 981                     {
 982                         dimUsed[d] = true;
 983                     }
 984                 }
 985             }
 986         }
 987     }
 988
 989     return (pullGroupObeysPbcRestrictions(group, dimUsed, as_rvec_array(x.data()), pbc,
 990                                           pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
 991 }
 992
 993 void setPrevStepPullComFromState(struct pull_t* pull, const t_state* state)
 994 {
 995     for (size_t g = 0; g < pull->group.size(); g++)
 996     {
 997         for (int j = 0; j < DIM; j++)
 998         {
 999             pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g * DIM + j];
1000         }
1001     }
1002 }
1003
1004 void updatePrevStepPullCom(struct pull_t* pull, t_state* state)
1005 {
1006     for (size_t g = 0; g < pull->group.size(); g++)
1007     {
1008         if (pull->group[g].needToCalcCom)
1009         {
1010             for (int j = 0; j < DIM; j++)
1011             {
1012                 pull->group[g].x_prev_step[j]          = pull->group[g].x[j];
1013                 state->pull_com_prev_step[g * DIM + j] = pull->group[g].x[j];
1014             }
1015         }
1016     }
1017 }
1018
1019 void allocStatePrevStepPullCom(t_state* state, const pull_t* pull)
1020 {
1021     if (!pull)
1022     {
1023         state->pull_com_prev_step.clear();
1024         return;
1025     }
1026     size_t ngroup = pull->group.size();
1027     if (state->pull_com_prev_step.size() / DIM != ngroup)
1028     {
1029         state->pull_com_prev_step.resize(ngroup * DIM, NAN);
1030     }
1031 }
1032
1033 void initPullComFromPrevStep(const t_commrec* cr, pull_t* pull, const t_mdatoms* md, t_pbc* pbc, const rvec x[])
1034 {
1035     pull_comm_t* comm   = &pull->comm;
1036     size_t       ngroup = pull->group.size();
1037
1038     if (!comm->bParticipate)
1039     {
1040         return;
1041     }
1042
1043     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
1044                "pbcAtomBuffer should have size number of groups");
1045     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
1046                "comBuffer should have size #group*c_comBufferStride");
1047
1048     pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1049
1050     for (size_t g = 0; g < ngroup; g++)
1051     {
1052         pull_group_work_t* pgrp;
1053
1054         pgrp = &pull->group[g];
1055
1056         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1057         {
1058             GMX_ASSERT(pgrp->params.nat > 1,
1059                        "Groups with no atoms, or only one atom, should not "
1060                        "use the COM from the previous step as reference.");
1061
1062             rvec x_pbc = { 0, 0, 0 };
1063             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1064
1065             if (debug)
1066             {
1067                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1068                 for (int m = 0; m < DIM; m++)
1069                 {
1070                     fprintf(debug, " %f", x_pbc[m]);
1071                 }
1072                 fprintf(debug, "\n");
1073             }
1074
1075             /* The following is to a large extent similar to pull_calc_coms() */
1076
1077             /* The final sums should end up in sum_com[0] */
1078             ComSums& comSumsTotal = pull->comSums[0];
1079
1080             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1081             {
1082                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, nullptr, md->massT, pbc,
1083                              x_pbc, &comSumsTotal);
1084             }
1085             else
1086             {
1087 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
1088                 for (int t = 0; t < pull->nthreads; t++)
1089                 {
1090                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
1091                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
1092                     sum_com_part(pgrp, ind_start, ind_end, x, nullptr, md->massT, pbc, x_pbc,
1093                                  &pull->comSums[t]);
1094                 }
1095
1096                 /* Reduce the thread contributions to sum_com[0] */
1097                 for (int t = 1; t < pull->nthreads; t++)
1098                 {
1099                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
1100                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1101                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1102                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1103                 }
1104             }
1105
1106             if (pgrp->localWeights.empty())
1107             {
1108                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1109             }
1110
1111             /* Copy local sums to a buffer for global summing */
1112             auto localSums = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
1113                                                     c_comBufferStride);
1114
1115             localSums[0]    = comSumsTotal.sum_wmx;
1116             localSums[1]    = comSumsTotal.sum_wmxp;
1117             localSums[2][0] = comSumsTotal.sum_wm;
1118             localSums[2][1] = comSumsTotal.sum_wwm;
1119             localSums[2][2] = 0;
1120         }
1121     }
1122
1123     pullAllReduce(cr, comm, ngroup * c_comBufferStride * DIM, static_cast<double*>(comm->comBuffer[0]));
1124
1125     for (size_t g = 0; g < ngroup; g++)
1126     {
1127         pull_group_work_t* pgrp;
1128
1129         pgrp = &pull->group[g];
1130         if (pgrp->needToCalcCom)
1131         {
1132             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1133             {
1134                 auto localSums = gmx::arrayRefFromArray(
1135                         comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
1136                 double wmass, wwmass;
1137
1138                 /* Determine the inverse mass */
1139                 wmass         = localSums[2][0];
1140                 wwmass        = localSums[2][1];
1141                 pgrp->mwscale = 1.0 / wmass;
1142                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1143                 if (pgrp->invtm != 0)
1144                 {
1145                     pgrp->wscale = wmass / wwmass;
1146                     pgrp->invtm  = wwmass / (wmass * wmass);
1147                 }
1148                 /* Divide by the total mass */
1149                 for (int m = 0; m < DIM; m++)
1150                 {
1151                     pgrp->x[m] = localSums[0][m] * pgrp->mwscale;
1152                     pgrp->x[m] += comm->pbcAtomBuffer[g][m];
1153                 }
1154                 if (debug)
1155                 {
1156                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale,
1157                             pgrp->invtm);
1158                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1159                     for (int m = 0; m < DIM; m++)
1160                     {
1161                         fprintf(debug, " %f", pgrp->x[m]);
1162                     }
1163                     fprintf(debug, "\n");
1164                 }
1165                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1166             }
1167         }
1168     }
1169 }