src/gromacs/pulling/pullutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "config.h"
  40
  41 #include <cassert>
  42 #include <cstdlib>
  43
  44 #include "gromacs/fileio/confio.h"
  45 #include "gromacs/gmxlib/network.h"
  46 #include "gromacs/math/functions.h"
  47 #include "gromacs/math/utilities.h"
  48 #include "gromacs/math/vec.h"
  49 #include "gromacs/mdtypes/commrec.h"
  50 #include "gromacs/mdtypes/inputrec.h"
  51 #include "gromacs/mdtypes/md_enums.h"
  52 #include "gromacs/mdtypes/mdatom.h"
  53 #include "gromacs/mdtypes/state.h"
  54 #include "gromacs/pbcutil/pbc.h"
  55 #include "gromacs/pulling/pull.h"
  56 #include "gromacs/utility/fatalerror.h"
  57 #include "gromacs/utility/futil.h"
  58 #include "gromacs/utility/gmxassert.h"
  59 #include "gromacs/utility/real.h"
  60 #include "gromacs/utility/smalloc.h"
  61
  62 #include "pull_internal.h"
  63
  64 #if GMX_MPI
  65
  66 // Helper function to deduce MPI datatype from the type of data
  67 gmx_unused static MPI_Datatype mpiDatatype(const float gmx_unused* data)
  68 {
  69     return MPI_FLOAT;
  70 }
  71
  72 // Helper function to deduce MPI datatype from the type of data
  73 gmx_unused static MPI_Datatype mpiDatatype(const double gmx_unused* data)
  74 {
  75     return MPI_DOUBLE;
  76 }
  77
  78 #endif // GMX_MPI
  79
  80 #if !GMX_DOUBLE
  81 // Helper function; note that gmx_sum(d) should actually be templated
  82 gmx_unused static void gmxAllReduce(int n, real* data, const t_commrec* cr)
  83 {
  84     gmx_sum(n, data, cr);
  85 }
  86 #endif
  87
  88 // Helper function; note that gmx_sum(d) should actually be templated
  89 gmx_unused static void gmxAllReduce(int n, double* data, const t_commrec* cr)
  90 {
  91     gmx_sumd(n, data, cr);
  92 }
  93
  94 // Reduce data of n elements over all ranks currently participating in pull
  95 template<typename T>
  96 static void pullAllReduce(const t_commrec* cr, pull_comm_t* comm, int n, T* data)
  97 {
  98     if (cr != nullptr && PAR(cr))
  99     {
 100         if (comm->bParticipateAll)
 101         {
 102             /* Sum the contributions over all DD ranks */
 103             gmxAllReduce(n, data, cr);
 104         }
 105         else
 106         {
 107             /* Separate branch because gmx_sum uses cr->mpi_comm_mygroup */
 108 #if GMX_MPI
 109 #    if MPI_IN_PLACE_EXISTS
 110             MPI_Allreduce(MPI_IN_PLACE, data, n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
 111 #    else
 112             std::vector<T> buf(n);
 113
 114             MPI_Allreduce(data, buf.data(), n, mpiDatatype(data), MPI_SUM, comm->mpi_comm_com);
 115
 116             /* Copy the result from the buffer to the input/output data */
 117             for (int i = 0; i < n; i++)
 118             {
 119                 data[i] = buf[i];
 120             }
 121 #    endif
 122 #else
 123             gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
 124 #endif
 125         }
 126     }
 127 }
 128
 129 /* Copies the coordinates of the PBC atom of pgrp to x_pbc.
 130  * When those coordinates are not available on this rank, clears x_pbc.
 131  */
 132 static void setPbcAtomCoords(const pull_group_work_t& pgrp, const rvec* x, rvec x_pbc)
 133 {
 134     if (pgrp.pbcAtomSet != nullptr)
 135     {
 136         if (pgrp.pbcAtomSet->numAtomsLocal() > 0)
 137         {
 138             /* We have the atom locally, copy its coordinates */
 139             copy_rvec(x[pgrp.pbcAtomSet->localIndex()[0]], x_pbc);
 140         }
 141         else
 142         {
 143             /* Another rank has it, clear the coordinates for MPI_Allreduce */
 144             clear_rvec(x_pbc);
 145         }
 146     }
 147     else
 148     {
 149         copy_rvec(x[pgrp.params.pbcatom], x_pbc);
 150     }
 151 }
 152
 153 static void pull_set_pbcatoms(const t_commrec* cr, struct pull_t* pull, const rvec* x, gmx::ArrayRef<gmx::RVec> x_pbc)
 154 {
 155     int numPbcAtoms = 0;
 156     for (size_t g = 0; g < pull->group.size(); g++)
 157     {
 158         const pull_group_work_t& group = pull->group[g];
 159         if (group.needToCalcCom && (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM))
 160         {
 161             setPbcAtomCoords(pull->group[g], x, x_pbc[g]);
 162             numPbcAtoms++;
 163         }
 164         else
 165         {
 166             clear_rvec(x_pbc[g]);
 167         }
 168     }
 169
 170     if (cr && PAR(cr) && numPbcAtoms > 0)
 171     {
 172         /* Sum over participating ranks to get x_pbc from the home ranks.
 173          * This can be very expensive at high parallelization, so we only
 174          * do this after each DD repartitioning.
 175          */
 176         pullAllReduce(cr, &pull->comm, pull->group.size() * DIM, static_cast<real*>(x_pbc[0]));
 177     }
 178 }
 179
 180 static void
 181 make_cyl_refgrps(const t_commrec* cr, pull_t* pull, const t_mdatoms* md, t_pbc* pbc, double t, const rvec* x)
 182 {
 183     pull_comm_t* comm = &pull->comm;
 184
 185     GMX_ASSERT(comm->cylinderBuffer.size() == pull->coord.size() * c_cylinderBufferStride,
 186                "cylinderBuffer should have the correct size");
 187
 188     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
 189
 190     /* loop over all groups to make a reference group for each*/
 191     for (size_t c = 0; c < pull->coord.size(); c++)
 192     {
 193         pull_coord_work_t* pcrd;
 194         double             sum_a, wmass, wwmass;
 195         dvec               radf_fac0, radf_fac1;
 196
 197         pcrd = &pull->coord[c];
 198
 199         sum_a  = 0;
 200         wmass  = 0;
 201         wwmass = 0;
 202         clear_dvec(radf_fac0);
 203         clear_dvec(radf_fac1);
 204
 205         if (pcrd->params.eGeom == epullgCYL)
 206         {
 207             /* pref will be the same group for all pull coordinates */
 208             const pull_group_work_t& pref  = pull->group[pcrd->params.group[0]];
 209             const pull_group_work_t& pgrp  = pull->group[pcrd->params.group[1]];
 210             pull_group_work_t&       pdyna = pull->dyna[c];
 211             rvec                     direction;
 212             copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
 213
 214             /* Since we have not calculated the COM of the cylinder group yet,
 215              * we calculate distances with respect to location of the pull
 216              * group minus the reference position along the vector.
 217              * here we already have the COM of the pull group. This resolves
 218              * any PBC issues and we don't need to use a PBC-atom here.
 219              */
 220             if (pcrd->params.rate != 0)
 221             {
 222                 /* With rate=0, value_ref is set initially */
 223                 pcrd->value_ref = pcrd->params.init + pcrd->params.rate * t;
 224             }
 225             rvec reference;
 226             for (int m = 0; m < DIM; m++)
 227             {
 228                 reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m] * pcrd->value_ref;
 229             }
 230
 231             auto localAtomIndices = pref.atomSet.localIndex();
 232
 233             /* This actually only needs to be done at init or DD time,
 234              * but resizing with the same size does not cause much overhead.
 235              */
 236             pdyna.localWeights.resize(localAtomIndices.size());
 237             pdyna.mdw.resize(localAtomIndices.size());
 238             pdyna.dv.resize(localAtomIndices.size());
 239
 240             /* loop over all atoms in the main ref group */
 241             for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
 242             {
 243                 int  atomIndex = localAtomIndices[indexInSet];
 244                 rvec dx;
 245                 pbc_dx_aiuc(pbc, x[atomIndex], reference, dx);
 246                 double axialLocation = iprod(direction, dx);
 247                 dvec   radialLocation;
 248                 double dr2 = 0;
 249                 for (int m = 0; m < DIM; m++)
 250                 {
 251                     /* Determine the radial components */
 252                     radialLocation[m] = dx[m] - axialLocation * direction[m];
 253                     dr2 += gmx::square(radialLocation[m]);
 254                 }
 255                 double dr2_rel = dr2 * inv_cyl_r2;
 256
 257                 if (dr2_rel < 1)
 258                 {
 259                     /* add atom to sum of COM and to weight array */
 260
 261                     double mass = md->massT[atomIndex];
 262                     /* The radial weight function is 1-2x^2+x^4,
 263                      * where x=r/cylinder_r. Since this function depends
 264                      * on the radial component, we also get radial forces
 265                      * on both groups.
 266                      */
 267                     double weight                  = 1 + (-2 + dr2_rel) * dr2_rel;
 268                     double dweight_r               = (-4 + 4 * dr2_rel) * inv_cyl_r2;
 269                     pdyna.localWeights[indexInSet] = weight;
 270                     sum_a += mass * weight * axialLocation;
 271                     wmass += mass * weight;
 272                     wwmass += mass * weight * weight;
 273                     dvec mdw;
 274                     dsvmul(mass * dweight_r, radialLocation, mdw);
 275                     copy_dvec(mdw, pdyna.mdw[indexInSet]);
 276                     /* Currently we only have the axial component of the
 277                      * offset from the cylinder COM up to an unkown offset.
 278                      * We add this offset after the reduction needed
 279                      * for determining the COM of the cylinder group.
 280                      */
 281                     pdyna.dv[indexInSet] = axialLocation;
 282                     for (int m = 0; m < DIM; m++)
 283                     {
 284                         radf_fac0[m] += mdw[m];
 285                         radf_fac1[m] += mdw[m] * axialLocation;
 286                     }
 287                 }
 288                 else
 289                 {
 290                     pdyna.localWeights[indexInSet] = 0;
 291                 }
 292             }
 293         }
 294
 295         auto buffer = gmx::arrayRefFromArray(
 296                 comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
 297
 298         buffer[0] = wmass;
 299         buffer[1] = wwmass;
 300         buffer[2] = sum_a;
 301
 302         buffer[3] = radf_fac0[XX];
 303         buffer[4] = radf_fac0[YY];
 304         buffer[5] = radf_fac0[ZZ];
 305
 306         buffer[6] = radf_fac1[XX];
 307         buffer[7] = radf_fac1[YY];
 308         buffer[8] = radf_fac1[ZZ];
 309     }
 310
 311     if (cr != nullptr && PAR(cr))
 312     {
 313         /* Sum the contributions over the ranks */
 314         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
 315     }
 316
 317     for (size_t c = 0; c < pull->coord.size(); c++)
 318     {
 319         pull_coord_work_t* pcrd;
 320
 321         pcrd = &pull->coord[c];
 322
 323         if (pcrd->params.eGeom == epullgCYL)
 324         {
 325             pull_group_work_t*    pdyna       = &pull->dyna[c];
 326             pull_group_work_t*    pgrp        = &pull->group[pcrd->params.group[1]];
 327             PullCoordSpatialData& spatialData = pcrd->spatialData;
 328
 329             auto buffer = gmx::constArrayRefFromArray(
 330                     comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
 331             double wmass   = buffer[0];
 332             double wwmass  = buffer[1];
 333             pdyna->mwscale = 1.0 / wmass;
 334             /* Cylinder pulling can't be used with constraints, but we set
 335              * wscale and invtm anyhow, in case someone would like to use them.
 336              */
 337             pdyna->wscale = wmass / wwmass;
 338             pdyna->invtm  = wwmass / (wmass * wmass);
 339
 340             /* We store the deviation of the COM from the reference location
 341              * used above, since we need it when we apply the radial forces
 342              * to the atoms in the cylinder group.
 343              */
 344             spatialData.cyl_dev = 0;
 345             for (int m = 0; m < DIM; m++)
 346             {
 347                 double reference = pgrp->x[m] - spatialData.vec[m] * pcrd->value_ref;
 348                 double dist      = -spatialData.vec[m] * buffer[2] * pdyna->mwscale;
 349                 pdyna->x[m]      = reference - dist;
 350                 spatialData.cyl_dev += dist;
 351             }
 352             /* Now we know the exact COM of the cylinder reference group,
 353              * we can determine the radial force factor (ffrad) that when
 354              * multiplied with the axial pull force will give the radial
 355              * force on the pulled (non-cylinder) group.
 356              */
 357             for (int m = 0; m < DIM; m++)
 358             {
 359                 spatialData.ffrad[m] = (buffer[6 + m] + buffer[3 + m] * spatialData.cyl_dev) / wmass;
 360             }
 361
 362             if (debug)
 363             {
 364                 fprintf(debug, "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0],
 365                         pdyna->x[1], pdyna->x[2], 1.0 / pdyna->invtm);
 366                 fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n", spatialData.ffrad[XX],
 367                         spatialData.ffrad[YY], spatialData.ffrad[ZZ]);
 368             }
 369         }
 370     }
 371 }
 372
 373 static double atan2_0_2pi(double y, double x)
 374 {
 375     double a;
 376
 377     a = atan2(y, x);
 378     if (a < 0)
 379     {
 380         a += 2.0 * M_PI;
 381     }
 382     return a;
 383 }
 384
 385 static void sum_com_part(const pull_group_work_t* pgrp,
 386                          int                      ind_start,
 387                          int                      ind_end,
 388                          const rvec*              x,
 389                          const rvec*              xp,
 390                          const real*              mass,
 391                          const t_pbc*             pbc,
 392                          const rvec               x_pbc,
 393                          ComSums*                 sum_com)
 394 {
 395     double sum_wm   = 0;
 396     double sum_wwm  = 0;
 397     dvec   sum_wmx  = { 0, 0, 0 };
 398     dvec   sum_wmxp = { 0, 0, 0 };
 399
 400     auto localAtomIndices = pgrp->atomSet.localIndex();
 401     for (int i = ind_start; i < ind_end; i++)
 402     {
 403         int  ii = localAtomIndices[i];
 404         real wm;
 405         if (pgrp->localWeights.empty())
 406         {
 407             wm = mass[ii];
 408             sum_wm += wm;
 409         }
 410         else
 411         {
 412             real w;
 413
 414             w  = pgrp->localWeights[i];
 415             wm = w * mass[ii];
 416             sum_wm += wm;
 417             sum_wwm += wm * w;
 418         }
 419         if (pgrp->epgrppbc == epgrppbcNONE)
 420         {
 421             /* Plain COM: sum the coordinates */
 422             for (int d = 0; d < DIM; d++)
 423             {
 424                 sum_wmx[d] += wm * x[ii][d];
 425             }
 426             if (xp)
 427             {
 428                 for (int d = 0; d < DIM; d++)
 429                 {
 430                     sum_wmxp[d] += wm * xp[ii][d];
 431                 }
 432             }
 433         }
 434         else
 435         {
 436             rvec dx;
 437
 438             /* Sum the difference with the reference atom */
 439             pbc_dx(pbc, x[ii], x_pbc, dx);
 440             for (int d = 0; d < DIM; d++)
 441             {
 442                 sum_wmx[d] += wm * dx[d];
 443             }
 444             if (xp)
 445             {
 446                 /* For xp add the difference between xp and x to dx,
 447                  * such that we use the same periodic image,
 448                  * also when xp has a large displacement.
 449                  */
 450                 for (int d = 0; d < DIM; d++)
 451                 {
 452                     sum_wmxp[d] += wm * (dx[d] + xp[ii][d] - x[ii][d]);
 453                 }
 454             }
 455         }
 456     }
 457
 458     sum_com->sum_wm  = sum_wm;
 459     sum_com->sum_wwm = sum_wwm;
 460     copy_dvec(sum_wmx, sum_com->sum_wmx);
 461     if (xp)
 462     {
 463         copy_dvec(sum_wmxp, sum_com->sum_wmxp);
 464     }
 465 }
 466
 467 static void sum_com_part_cosweight(const pull_group_work_t* pgrp,
 468                                    int                      ind_start,
 469                                    int                      ind_end,
 470                                    int                      cosdim,
 471                                    real                     twopi_box,
 472                                    const rvec*              x,
 473                                    const rvec*              xp,
 474                                    const real*              mass,
 475                                    ComSums*                 sum_com)
 476 {
 477     /* Cosine weighting geometry */
 478     double sum_cm  = 0;
 479     double sum_sm  = 0;
 480     double sum_ccm = 0;
 481     double sum_csm = 0;
 482     double sum_ssm = 0;
 483     double sum_cmp = 0;
 484     double sum_smp = 0;
 485
 486     auto localAtomIndices = pgrp->atomSet.localIndex();
 487
 488     for (int i = ind_start; i < ind_end; i++)
 489     {
 490         int  ii = localAtomIndices[i];
 491         real m  = mass[ii];
 492         /* Determine cos and sin sums */
 493         real cw = std::cos(x[ii][cosdim] * twopi_box);
 494         real sw = std::sin(x[ii][cosdim] * twopi_box);
 495         sum_cm += static_cast<double>(cw * m);
 496         sum_sm += static_cast<double>(sw * m);
 497         sum_ccm += static_cast<double>(cw * cw * m);
 498         sum_csm += static_cast<double>(cw * sw * m);
 499         sum_ssm += static_cast<double>(sw * sw * m);
 500
 501         if (xp != nullptr)
 502         {
 503             real cw = std::cos(xp[ii][cosdim] * twopi_box);
 504             real sw = std::sin(xp[ii][cosdim] * twopi_box);
 505             sum_cmp += static_cast<double>(cw * m);
 506             sum_smp += static_cast<double>(sw * m);
 507         }
 508     }
 509
 510     sum_com->sum_cm  = sum_cm;
 511     sum_com->sum_sm  = sum_sm;
 512     sum_com->sum_ccm = sum_ccm;
 513     sum_com->sum_csm = sum_csm;
 514     sum_com->sum_ssm = sum_ssm;
 515     sum_com->sum_cmp = sum_cmp;
 516     sum_com->sum_smp = sum_smp;
 517 }
 518
 519 /* calculates center of mass of selection index from all coordinates x */
 520 // Compiler segfault with 2019_update_5 and 2020_initial
 521 #if defined(__INTEL_COMPILER) \
 522         && ((__INTEL_COMPILER == 1900 && __INTEL_COMPILER_UPDATE >= 5) || __INTEL_COMPILER >= 1910)
 523 #    pragma intel optimization_level 2
 524 #endif
 525 void pull_calc_coms(const t_commrec* cr,
 526                     pull_t*          pull,
 527                     const t_mdatoms* md,
 528                     t_pbc*           pbc,
 529                     double           t,
 530                     const rvec       x[],
 531                     rvec*            xp)
 532 {
 533     real         twopi_box = 0;
 534     pull_comm_t* comm;
 535
 536     comm = &pull->comm;
 537
 538     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
 539                "pbcAtomBuffer should have size number of groups");
 540     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
 541                "comBuffer should have size #group*c_comBufferStride");
 542
 543     if (pull->bRefAt && pull->bSetPBCatoms)
 544     {
 545         pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
 546
 547         if (cr != nullptr && DOMAINDECOMP(cr))
 548         {
 549             /* We can keep these PBC reference coordinates fixed for nstlist
 550              * steps, since atoms won't jump over PBC.
 551              * This avoids a global reduction at the next nstlist-1 steps.
 552              * Note that the exact values of the pbc reference coordinates
 553              * are irrelevant, as long all atoms in the group are within
 554              * half a box distance of the reference coordinate.
 555              */
 556             pull->bSetPBCatoms = FALSE;
 557         }
 558     }
 559
 560     if (pull->cosdim >= 0)
 561     {
 562         int m;
 563
 564         assert(pull->npbcdim <= DIM);
 565
 566         for (m = pull->cosdim + 1; m < pull->npbcdim; m++)
 567         {
 568             if (pbc->box[m][pull->cosdim] != 0)
 569             {
 570                 gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
 571             }
 572         }
 573         twopi_box = 2.0 * M_PI / pbc->box[pull->cosdim][pull->cosdim];
 574     }
 575
 576     for (size_t g = 0; g < pull->group.size(); g++)
 577     {
 578         pull_group_work_t* pgrp = &pull->group[g];
 579
 580         /* Cosine-weighted COMs behave different from all other weighted COMs
 581          * in the sense that the weights depend on instantaneous coordinates,
 582          * not on pre-set weights. Thus we resize the local weight buffer here.
 583          */
 584         if (pgrp->epgrppbc == epgrppbcCOS)
 585         {
 586             pgrp->localWeights.resize(pgrp->atomSet.localIndex().size());
 587         }
 588
 589         auto comBuffer = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
 590                                                 c_comBufferStride);
 591
 592         if (pgrp->needToCalcCom)
 593         {
 594             if (pgrp->epgrppbc != epgrppbcCOS)
 595             {
 596                 rvec x_pbc = { 0, 0, 0 };
 597
 598                 switch (pgrp->epgrppbc)
 599                 {
 600                     case epgrppbcREFAT:
 601                         /* Set the pbc atom */
 602                         copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
 603                         break;
 604                     case epgrppbcPREVSTEPCOM:
 605                         /* Set the pbc reference to the COM of the group of the last step */
 606                         copy_dvec_to_rvec(pgrp->x_prev_step, comm->pbcAtomBuffer[g]);
 607                         copy_dvec_to_rvec(pgrp->x_prev_step, x_pbc);
 608                 }
 609
 610                 /* The final sums should end up in comSums[0] */
 611                 ComSums& comSumsTotal = pull->comSums[0];
 612
 613                 /* If we have a single-atom group the mass is irrelevant, so
 614                  * we can remove the mass factor to avoid division by zero.
 615                  * Note that with constraint pulling the mass does matter, but
 616                  * in that case a check group mass != 0 has been done before.
 617                  */
 618                 if (pgrp->params.nat == 1 && pgrp->atomSet.numAtomsLocal() == 1
 619                     && md->massT[pgrp->atomSet.localIndex()[0]] == 0)
 620                 {
 621                     GMX_ASSERT(xp == nullptr,
 622                                "We should not have groups with zero mass with constraints, i.e. "
 623                                "xp!=NULL");
 624
 625                     /* Copy the single atom coordinate */
 626                     for (int d = 0; d < DIM; d++)
 627                     {
 628                         comSumsTotal.sum_wmx[d] = x[pgrp->atomSet.localIndex()[0]][d];
 629                     }
 630                     /* Set all mass factors to 1 to get the correct COM */
 631                     comSumsTotal.sum_wm  = 1;
 632                     comSumsTotal.sum_wwm = 1;
 633                 }
 634                 else if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
 635                 {
 636                     sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, xp, md->massT, pbc,
 637                                  x_pbc, &comSumsTotal);
 638                 }
 639                 else
 640                 {
 641 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
 642                     for (int t = 0; t < pull->nthreads; t++)
 643                     {
 644                         int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
 645                         int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
 646                         sum_com_part(pgrp, ind_start, ind_end, x, xp, md->massT, pbc, x_pbc,
 647                                      &pull->comSums[t]);
 648                     }
 649
 650                     /* Reduce the thread contributions to sum_com[0] */
 651                     for (int t = 1; t < pull->nthreads; t++)
 652                     {
 653                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
 654                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
 655                         dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
 656                         dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
 657                     }
 658                 }
 659
 660                 if (pgrp->localWeights.empty())
 661                 {
 662                     comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
 663                 }
 664
 665                 /* Copy local sums to a buffer for global summing */
 666                 copy_dvec(comSumsTotal.sum_wmx, comBuffer[0]);
 667
 668                 copy_dvec(comSumsTotal.sum_wmxp, comBuffer[1]);
 669
 670                 comBuffer[2][0] = comSumsTotal.sum_wm;
 671                 comBuffer[2][1] = comSumsTotal.sum_wwm;
 672                 comBuffer[2][2] = 0;
 673             }
 674             else
 675             {
 676                 /* Cosine weighting geometry.
 677                  * This uses a slab of the system, thus we always have many
 678                  * atoms in the pull groups. Therefore, always use threads.
 679                  */
 680 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
 681                 for (int t = 0; t < pull->nthreads; t++)
 682                 {
 683                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
 684                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
 685                     sum_com_part_cosweight(pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp,
 686                                            md->massT, &pull->comSums[t]);
 687                 }
 688
 689                 /* Reduce the thread contributions to comSums[0] */
 690                 ComSums& comSumsTotal = pull->comSums[0];
 691                 for (int t = 1; t < pull->nthreads; t++)
 692                 {
 693                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
 694                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
 695                     comSumsTotal.sum_ccm += pull->comSums[t].sum_ccm;
 696                     comSumsTotal.sum_csm += pull->comSums[t].sum_csm;
 697                     comSumsTotal.sum_ssm += pull->comSums[t].sum_ssm;
 698                     comSumsTotal.sum_cmp += pull->comSums[t].sum_cmp;
 699                     comSumsTotal.sum_smp += pull->comSums[t].sum_smp;
 700                 }
 701
 702                 /* Copy local sums to a buffer for global summing */
 703                 comBuffer[0][0] = comSumsTotal.sum_cm;
 704                 comBuffer[0][1] = comSumsTotal.sum_sm;
 705                 comBuffer[0][2] = 0;
 706                 comBuffer[1][0] = comSumsTotal.sum_ccm;
 707                 comBuffer[1][1] = comSumsTotal.sum_csm;
 708                 comBuffer[1][2] = comSumsTotal.sum_ssm;
 709                 comBuffer[2][0] = comSumsTotal.sum_cmp;
 710                 comBuffer[2][1] = comSumsTotal.sum_smp;
 711                 comBuffer[2][2] = 0;
 712             }
 713         }
 714         else
 715         {
 716             clear_dvec(comBuffer[0]);
 717             clear_dvec(comBuffer[1]);
 718             clear_dvec(comBuffer[2]);
 719         }
 720     }
 721
 722     pullAllReduce(cr, comm, pull->group.size() * c_comBufferStride * DIM,
 723                   static_cast<double*>(comm->comBuffer[0]));
 724
 725     for (size_t g = 0; g < pull->group.size(); g++)
 726     {
 727         pull_group_work_t* pgrp;
 728
 729         pgrp = &pull->group[g];
 730         if (pgrp->needToCalcCom)
 731         {
 732             GMX_ASSERT(pgrp->params.nat > 0,
 733                        "Normal pull groups should have atoms, only group 0, which should have "
 734                        "bCalcCom=FALSE has nat=0");
 735
 736             const auto comBuffer = gmx::constArrayRefFromArray(
 737                     comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
 738
 739             if (pgrp->epgrppbc != epgrppbcCOS)
 740             {
 741                 double wmass, wwmass;
 742                 int    m;
 743
 744                 /* Determine the inverse mass */
 745                 wmass         = comBuffer[2][0];
 746                 wwmass        = comBuffer[2][1];
 747                 pgrp->mwscale = 1.0 / wmass;
 748                 /* invtm==0 signals a frozen group, so then we should keep it zero */
 749                 if (pgrp->invtm != 0)
 750                 {
 751                     pgrp->wscale = wmass / wwmass;
 752                     pgrp->invtm  = wwmass / (wmass * wmass);
 753                 }
 754                 /* Divide by the total mass */
 755                 for (m = 0; m < DIM; m++)
 756                 {
 757                     pgrp->x[m] = comBuffer[0][m] * pgrp->mwscale;
 758                     if (xp)
 759                     {
 760                         pgrp->xp[m] = comBuffer[1][m] * pgrp->mwscale;
 761                     }
 762                     if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
 763                     {
 764                         pgrp->x[m] += comm->pbcAtomBuffer[g][m];
 765                         if (xp)
 766                         {
 767                             pgrp->xp[m] += comm->pbcAtomBuffer[g][m];
 768                         }
 769                     }
 770                 }
 771             }
 772             else
 773             {
 774                 /* Cosine weighting geometry */
 775                 double csw, snw, wmass, wwmass;
 776
 777                 /* Determine the optimal location of the cosine weight */
 778                 csw                   = comBuffer[0][0];
 779                 snw                   = comBuffer[0][1];
 780                 pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
 781                 /* Set the weights for the local atoms */
 782                 wmass  = sqrt(csw * csw + snw * snw);
 783                 wwmass = (comBuffer[1][0] * csw * csw + comBuffer[1][1] * csw * snw
 784                           + comBuffer[1][2] * snw * snw)
 785                          / (wmass * wmass);
 786
 787                 pgrp->mwscale = 1.0 / wmass;
 788                 pgrp->wscale  = wmass / wwmass;
 789                 pgrp->invtm   = wwmass / (wmass * wmass);
 790                 /* Set the weights for the local atoms */
 791                 csw *= pgrp->invtm;
 792                 snw *= pgrp->invtm;
 793                 for (size_t i = 0; i < pgrp->atomSet.numAtomsLocal(); i++)
 794                 {
 795                     int ii                = pgrp->atomSet.localIndex()[i];
 796                     pgrp->localWeights[i] = csw * std::cos(twopi_box * x[ii][pull->cosdim])
 797                                             + snw * std::sin(twopi_box * x[ii][pull->cosdim]);
 798                 }
 799                 if (xp)
 800                 {
 801                     csw                    = comBuffer[2][0];
 802                     snw                    = comBuffer[2][1];
 803                     pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw) / twopi_box;
 804                 }
 805             }
 806             if (debug)
 807             {
 808                 fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale, pgrp->invtm);
 809             }
 810         }
 811     }
 812
 813     if (pull->bCylinder)
 814     {
 815         /* Calculate the COMs for the cyclinder reference groups */
 816         make_cyl_refgrps(cr, pull, md, pbc, t, x);
 817     }
 818 }
 819
 820 using BoolVec = gmx::BasicVector<bool>;
 821
 822 /* Returns whether the pull group obeys the PBC restrictions */
 823 static bool pullGroupObeysPbcRestrictions(const pull_group_work_t& group,
 824                                           const BoolVec&           dimUsed,
 825                                           const rvec*              x,
 826                                           const t_pbc&             pbc,
 827                                           const gmx::RVec&         x_pbc,
 828                                           const real               pbcMargin)
 829 {
 830     /* Determine which dimensions are relevant for PBC */
 831     BoolVec dimUsesPbc       = { false, false, false };
 832     bool    pbcIsRectangular = true;
 833     for (int d = 0; d < pbc.ndim_ePBC; d++)
 834     {
 835         if (dimUsed[d])
 836         {
 837             dimUsesPbc[d] = true;
 838             /* All non-zero dimensions of vector v are involved in PBC */
 839             for (int d2 = d + 1; d2 < pbc.ndim_ePBC; d2++)
 840             {
 841                 assert(d2 < DIM);
 842                 if (pbc.box[d2][d] != 0)
 843                 {
 844                     dimUsesPbc[d2]   = true;
 845                     pbcIsRectangular = false;
 846                 }
 847             }
 848         }
 849     }
 850
 851     rvec marginPerDim    = {};
 852     real marginDistance2 = 0;
 853     if (pbcIsRectangular)
 854     {
 855         /* Use margins for dimensions independently */
 856         for (int d = 0; d < pbc.ndim_ePBC; d++)
 857         {
 858             marginPerDim[d] = pbcMargin * pbc.hbox_diag[d];
 859         }
 860     }
 861     else
 862     {
 863         /* Check the total distance along the relevant dimensions */
 864         for (int d = 0; d < pbc.ndim_ePBC; d++)
 865         {
 866             if (dimUsesPbc[d])
 867             {
 868                 marginDistance2 += pbcMargin * gmx::square(0.5) * norm2(pbc.box[d]);
 869             }
 870         }
 871     }
 872
 873     auto localAtomIndices = group.atomSet.localIndex();
 874     for (gmx::index indexInSet = 0; indexInSet < localAtomIndices.ssize(); indexInSet++)
 875     {
 876         rvec dx;
 877         pbc_dx(&pbc, x[localAtomIndices[indexInSet]], x_pbc, dx);
 878
 879         bool atomIsTooFar = false;
 880         if (pbcIsRectangular)
 881         {
 882             for (int d = 0; d < pbc.ndim_ePBC; d++)
 883             {
 884                 if (dimUsesPbc[d] && (dx[d] < -marginPerDim[d] || dx[d] > marginPerDim[d]))
 885                 {
 886                     atomIsTooFar = true;
 887                 }
 888             }
 889         }
 890         else
 891         {
 892             real pbcDistance2 = 0;
 893             for (int d = 0; d < pbc.ndim_ePBC; d++)
 894             {
 895                 if (dimUsesPbc[d])
 896                 {
 897                     pbcDistance2 += gmx::square(dx[d]);
 898                 }
 899             }
 900             atomIsTooFar = (pbcDistance2 > marginDistance2);
 901         }
 902         if (atomIsTooFar)
 903         {
 904             return false;
 905         }
 906     }
 907
 908     return true;
 909 }
 910
 911 int pullCheckPbcWithinGroups(const pull_t& pull, const rvec* x, const t_pbc& pbc, real pbcMargin)
 912 {
 913     if (pbc.ePBC == epbcNONE)
 914     {
 915         return -1;
 916     }
 917
 918     /* Determine what dimensions are used for each group by pull coordinates */
 919     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
 920     for (size_t c = 0; c < pull.coord.size(); c++)
 921     {
 922         const t_pull_coord& coordParams = pull.coord[c].params;
 923         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
 924         {
 925             for (int d = 0; d < DIM; d++)
 926             {
 927                 if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
 928                 {
 929                     dimUsed[coordParams.group[groupIndex]][d] = true;
 930                 }
 931             }
 932         }
 933     }
 934
 935     /* Check PBC for every group that uses a PBC reference atom treatment */
 936     for (size_t g = 0; g < pull.group.size(); g++)
 937     {
 938         const pull_group_work_t& group = pull.group[g];
 939         if ((group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
 940             && !pullGroupObeysPbcRestrictions(group, dimUsed[g], x, pbc, pull.comm.pbcAtomBuffer[g], pbcMargin))
 941         {
 942             return g;
 943         }
 944     }
 945
 946     return -1;
 947 }
 948
 949 bool pullCheckPbcWithinGroup(const pull_t&                  pull,
 950                              gmx::ArrayRef<const gmx::RVec> x,
 951                              const t_pbc&                   pbc,
 952                              int                            groupNr,
 953                              real                           pbcMargin)
 954 {
 955     if (pbc.ePBC == epbcNONE)
 956     {
 957         return true;
 958     }
 959     GMX_ASSERT(groupNr < gmx::ssize(pull.group), "groupNr is out of range");
 960
 961     /* Check PBC if the group uses a PBC reference atom treatment. */
 962     const pull_group_work_t& group = pull.group[groupNr];
 963     if (group.epgrppbc != epgrppbcREFAT && group.epgrppbc != epgrppbcPREVSTEPCOM)
 964     {
 965         return true;
 966     }
 967
 968     /* Determine what dimensions are used for each group by pull coordinates */
 969     BoolVec dimUsed = { false, false, false };
 970     for (size_t c = 0; c < pull.coord.size(); c++)
 971     {
 972         const t_pull_coord& coordParams = pull.coord[c].params;
 973         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
 974         {
 975             if (coordParams.group[groupIndex] == groupNr)
 976             {
 977                 for (int d = 0; d < DIM; d++)
 978                 {
 979                     if (coordParams.dim[d] && !(coordParams.eGeom == epullgCYL && groupIndex == 0))
 980                     {
 981                         dimUsed[d] = true;
 982                     }
 983                 }
 984             }
 985         }
 986     }
 987
 988     return (pullGroupObeysPbcRestrictions(group, dimUsed, as_rvec_array(x.data()), pbc,
 989                                           pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
 990 }
 991
 992 void setPrevStepPullComFromState(struct pull_t* pull, const t_state* state)
 993 {
 994     for (size_t g = 0; g < pull->group.size(); g++)
 995     {
 996         for (int j = 0; j < DIM; j++)
 997         {
 998             pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g * DIM + j];
 999         }
1000     }
1001 }
1002
1003 void updatePrevStepPullCom(struct pull_t* pull, t_state* state)
1004 {
1005     for (size_t g = 0; g < pull->group.size(); g++)
1006     {
1007         if (pull->group[g].needToCalcCom)
1008         {
1009             for (int j = 0; j < DIM; j++)
1010             {
1011                 pull->group[g].x_prev_step[j]          = pull->group[g].x[j];
1012                 state->pull_com_prev_step[g * DIM + j] = pull->group[g].x[j];
1013             }
1014         }
1015     }
1016 }
1017
1018 void allocStatePrevStepPullCom(t_state* state, const pull_t* pull)
1019 {
1020     if (!pull)
1021     {
1022         state->pull_com_prev_step.clear();
1023         return;
1024     }
1025     size_t ngroup = pull->group.size();
1026     if (state->pull_com_prev_step.size() / DIM != ngroup)
1027     {
1028         state->pull_com_prev_step.resize(ngroup * DIM, NAN);
1029     }
1030 }
1031
1032 void initPullComFromPrevStep(const t_commrec* cr, pull_t* pull, const t_mdatoms* md, t_pbc* pbc, const rvec x[])
1033 {
1034     pull_comm_t* comm   = &pull->comm;
1035     size_t       ngroup = pull->group.size();
1036
1037     if (!comm->bParticipate)
1038     {
1039         return;
1040     }
1041
1042     GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(),
1043                "pbcAtomBuffer should have size number of groups");
1044     GMX_ASSERT(comm->comBuffer.size() == pull->group.size() * c_comBufferStride,
1045                "comBuffer should have size #group*c_comBufferStride");
1046
1047     pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
1048
1049     for (size_t g = 0; g < ngroup; g++)
1050     {
1051         pull_group_work_t* pgrp;
1052
1053         pgrp = &pull->group[g];
1054
1055         if (pgrp->needToCalcCom && pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1056         {
1057             GMX_ASSERT(pgrp->params.nat > 1,
1058                        "Groups with no atoms, or only one atom, should not "
1059                        "use the COM from the previous step as reference.");
1060
1061             rvec x_pbc = { 0, 0, 0 };
1062             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
1063
1064             if (debug)
1065             {
1066                 fprintf(debug, "Initialising prev step COM of pull group %zu. x_pbc =", g);
1067                 for (int m = 0; m < DIM; m++)
1068                 {
1069                     fprintf(debug, " %f", x_pbc[m]);
1070                 }
1071                 fprintf(debug, "\n");
1072             }
1073
1074             /* The following is to a large extent similar to pull_calc_coms() */
1075
1076             /* The final sums should end up in sum_com[0] */
1077             ComSums& comSumsTotal = pull->comSums[0];
1078
1079             if (pgrp->atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded)
1080             {
1081                 sum_com_part(pgrp, 0, pgrp->atomSet.numAtomsLocal(), x, nullptr, md->massT, pbc,
1082                              x_pbc, &comSumsTotal);
1083             }
1084             else
1085             {
1086 #pragma omp parallel for num_threads(pull->nthreads) schedule(static)
1087                 for (int t = 0; t < pull->nthreads; t++)
1088                 {
1089                     int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
1090                     int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
1091                     sum_com_part(pgrp, ind_start, ind_end, x, nullptr, md->massT, pbc, x_pbc,
1092                                  &pull->comSums[t]);
1093                 }
1094
1095                 /* Reduce the thread contributions to sum_com[0] */
1096                 for (int t = 1; t < pull->nthreads; t++)
1097                 {
1098                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
1099                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
1100                     dvec_inc(comSumsTotal.sum_wmx, pull->comSums[t].sum_wmx);
1101                     dvec_inc(comSumsTotal.sum_wmxp, pull->comSums[t].sum_wmxp);
1102                 }
1103             }
1104
1105             if (pgrp->localWeights.empty())
1106             {
1107                 comSumsTotal.sum_wwm = comSumsTotal.sum_wm;
1108             }
1109
1110             /* Copy local sums to a buffer for global summing */
1111             auto localSums = gmx::arrayRefFromArray(comm->comBuffer.data() + g * c_comBufferStride,
1112                                                     c_comBufferStride);
1113
1114             localSums[0]    = comSumsTotal.sum_wmx;
1115             localSums[1]    = comSumsTotal.sum_wmxp;
1116             localSums[2][0] = comSumsTotal.sum_wm;
1117             localSums[2][1] = comSumsTotal.sum_wwm;
1118             localSums[2][2] = 0;
1119         }
1120     }
1121
1122     pullAllReduce(cr, comm, ngroup * c_comBufferStride * DIM, static_cast<double*>(comm->comBuffer[0]));
1123
1124     for (size_t g = 0; g < ngroup; g++)
1125     {
1126         pull_group_work_t* pgrp;
1127
1128         pgrp = &pull->group[g];
1129         if (pgrp->needToCalcCom)
1130         {
1131             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
1132             {
1133                 auto localSums = gmx::arrayRefFromArray(
1134                         comm->comBuffer.data() + g * c_comBufferStride, c_comBufferStride);
1135                 double wmass, wwmass;
1136
1137                 /* Determine the inverse mass */
1138                 wmass         = localSums[2][0];
1139                 wwmass        = localSums[2][1];
1140                 pgrp->mwscale = 1.0 / wmass;
1141                 /* invtm==0 signals a frozen group, so then we should keep it zero */
1142                 if (pgrp->invtm != 0)
1143                 {
1144                     pgrp->wscale = wmass / wwmass;
1145                     pgrp->invtm  = wwmass / (wmass * wmass);
1146                 }
1147                 /* Divide by the total mass */
1148                 for (int m = 0; m < DIM; m++)
1149                 {
1150                     pgrp->x[m] = localSums[0][m] * pgrp->mwscale;
1151                     pgrp->x[m] += comm->pbcAtomBuffer[g][m];
1152                 }
1153                 if (debug)
1154                 {
1155                     fprintf(debug, "Pull group %zu wmass %f invtm %f\n", g, 1.0 / pgrp->mwscale,
1156                             pgrp->invtm);
1157                     fprintf(debug, "Initialising prev step COM of pull group %zu to", g);
1158                     for (int m = 0; m < DIM; m++)
1159                     {
1160                         fprintf(debug, " %f", pgrp->x[m]);
1161                     }
1162                     fprintf(debug, "\n");
1163                 }
1164                 copy_dvec(pgrp->x, pgrp->x_prev_step);
1165             }
1166         }
1167     }
1168 }