src/gromacs/mdlib/settle.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  * \brief Defines SETTLE code.
  40  *
  41  * \author Berk Hess <hess@kth.se>
  42  * \author Mark Abraham <mark.j.abraham@gmail.com>
  43  * \ingroup module_mdlib
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "settle.h"
  48
  49 #include <cassert>
  50 #include <cmath>
  51 #include <cstdio>
  52
  53 #include <algorithm>
  54
  55 #include "gromacs/math/arrayrefwithpadding.h"
  56 #include "gromacs/math/functions.h"
  57 #include "gromacs/math/invertmatrix.h"
  58 #include "gromacs/math/vec.h"
  59 #include "gromacs/mdlib/constr.h"
  60 #include "gromacs/pbcutil/ishift.h"
  61 #include "gromacs/pbcutil/pbc.h"
  62 #include "gromacs/pbcutil/pbc_simd.h"
  63 #include "gromacs/simd/simd.h"
  64 #include "gromacs/simd/simd_math.h"
  65 #include "gromacs/topology/idef.h"
  66 #include "gromacs/topology/ifunc.h"
  67 #include "gromacs/topology/mtop_util.h"
  68 #include "gromacs/utility/arrayref.h"
  69 #include "gromacs/utility/fatalerror.h"
  70 #include "gromacs/utility/gmxassert.h"
  71
  72 namespace gmx
  73 {
  74
  75 /*! \brief Initializes a projection matrix.
  76  *
  77  * \param[in]  invmO                  Reciprocal oxygen mass
  78  * \param[in]  invmH                  Reciprocal hydrogen mass
  79  * \param[in]  dOH                    Target O-H bond length
  80  * \param[in]  dHH                    Target H-H bond length
  81  * \param[out] inverseCouplingMatrix  Inverse bond coupling matrix for the projection version of SETTLE
  82  */
  83 static void initializeProjectionMatrix(const real invmO,
  84                                        const real invmH,
  85                                        const real dOH,
  86                                        const real dHH,
  87                                        matrix     inverseCouplingMatrix)
  88 {
  89     // We normalize the inverse masses with invmO for the matrix inversion.
  90     // so we can keep using masses of almost zero for frozen particles,
  91     // without running out of the float range in invertMatrix.
  92     double invmORelative = 1.0;
  93     double invmHRelative = invmH / static_cast<double>(invmO);
  94     double distanceRatio = dHH / static_cast<double>(dOH);
  95
  96     /* Construct the constraint coupling matrix */
  97     matrix mat;
  98     mat[0][0] = invmORelative + invmHRelative;
  99     mat[0][1] = invmORelative * (1.0 - 0.5 * gmx::square(distanceRatio));
 100     mat[0][2] = invmHRelative * 0.5 * distanceRatio;
 101     mat[1][1] = mat[0][0];
 102     mat[1][2] = mat[0][2];
 103     mat[2][2] = invmHRelative + invmHRelative;
 104     mat[1][0] = mat[0][1];
 105     mat[2][0] = mat[0][2];
 106     mat[2][1] = mat[1][2];
 107
 108     invertMatrix(mat, inverseCouplingMatrix);
 109
 110     msmul(inverseCouplingMatrix, 1 / invmO, inverseCouplingMatrix);
 111 }
 112
 113 SettleParameters
 114 settleParameters(const real mO, const real mH, const real invmO, const real invmH, const real dOH, const real dHH)
 115 {
 116     SettleParameters params;
 117
 118     // We calculate parameters in double precision to minimize errors.
 119     // The velocity correction applied during SETTLE coordinate constraining
 120     // introduces a systematic error of approximately 1 bit per atom,
 121     // depending on what the compiler does with the code.
 122     double wohh;
 123
 124     params.mO   = mO;
 125     params.mH   = mH;
 126     wohh        = mO + 2.0 * mH;
 127     params.wh   = mH / wohh;
 128     params.dOH  = dOH;
 129     params.dHH  = dHH;
 130     double rc   = dHH / 2.0;
 131     double ra   = 2.0 * mH * std::sqrt(dOH * dOH - rc * rc) / wohh;
 132     params.rb   = std::sqrt(dOH * dOH - rc * rc) - ra;
 133     params.rc   = rc;
 134     params.ra   = ra;
 135     params.irc2 = 1.0 / dHH;
 136
 137     // For projection: inverse masses and coupling matrix inversion
 138     params.imO = invmO;
 139     params.imH = invmH;
 140
 141     params.invdOH = 1.0 / dOH;
 142     params.invdHH = 1.0 / dHH;
 143
 144     initializeProjectionMatrix(invmO, invmH, dOH, dHH, params.invmat);
 145
 146     if (debug)
 147     {
 148         fprintf(debug, "wh =%g, rc = %g, ra = %g\n", params.wh, params.rc, params.ra);
 149         fprintf(debug, "rb = %g, irc2 = %g, dHH = %g, dOH = %g\n", params.rb, params.irc2, params.dHH, params.dOH);
 150     }
 151
 152     return params;
 153 }
 154
 155 SettleData::SettleData(const gmx_mtop_t& mtop) :
 156     useSimd_(getenv("GMX_DISABLE_SIMD_KERNELS") == nullptr)
 157 {
 158     /* Check that we have only one settle type */
 159     int       settle_type = -1;
 160     const int nral1       = 1 + NRAL(F_SETTLE);
 161     for (const auto ilists : IListRange(mtop))
 162     {
 163         const InteractionList& ilist = ilists.list()[F_SETTLE];
 164         for (int i = 0; i < ilist.size(); i += nral1)
 165         {
 166             if (settle_type == -1)
 167             {
 168                 settle_type = ilist.iatoms[i];
 169             }
 170             else if (ilist.iatoms[i] != settle_type)
 171             {
 172                 gmx_fatal(FARGS,
 173                           "The [molecules] section of your topology specifies more than one block "
 174                           "of\n"
 175                           "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
 176                           "If you are trying to partition your solvent into different *groups*\n"
 177                           "(e.g. for freezing, T-coupling, etc.), you are using the wrong "
 178                           "approach. Index\n"
 179                           "files specify groups. Otherwise, you may wish to change the least-used\n"
 180                           "block of molecules with SETTLE constraints into 3 normal constraints.");
 181             }
 182         }
 183     }
 184     GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
 185
 186     /* We will not initialize the normal SETTLE parameters here yet,
 187      * since the atom (inv)masses can depend on the integrator and
 188      * free-energy perturbation. We set mO=-1 to trigger later initialization.
 189      */
 190     parametersMassWeighted_.mO = -1;
 191
 192     real dOH              = mtop.ffparams.iparams[settle_type].settle.doh;
 193     real dHH              = mtop.ffparams.iparams[settle_type].settle.dhh;
 194     parametersAllMasses1_ = settleParameters(1.0, 1.0, 1.0, 1.0, dOH, dHH);
 195 }
 196
 197 void SettleData::setConstraints(const InteractionList&    il_settle,
 198                                 const int                 numHomeAtoms,
 199                                 gmx::ArrayRef<const real> masses,
 200                                 gmx::ArrayRef<const real> inverseMasses)
 201 {
 202 #if GMX_SIMD_HAVE_REAL
 203     const int pack_size = GMX_SIMD_REAL_WIDTH;
 204 #else
 205     const int pack_size = 1;
 206 #endif
 207
 208     const int nral1   = 1 + NRAL(F_SETTLE);
 209     int       nsettle = il_settle.size() / nral1;
 210     numSettles_       = nsettle;
 211
 212     if (nsettle > 0)
 213     {
 214         ArrayRef<const int> iatoms = il_settle.iatoms;
 215
 216         /* Here we initialize the normal SETTLE parameters */
 217         if (parametersMassWeighted_.mO < 0)
 218         {
 219             int firstO              = iatoms[1];
 220             int firstH              = iatoms[2];
 221             parametersMassWeighted_ = settleParameters(masses[firstO],
 222                                                        masses[firstH],
 223                                                        inverseMasses[firstO],
 224                                                        inverseMasses[firstH],
 225                                                        parametersAllMasses1_.dOH,
 226                                                        parametersAllMasses1_.dHH);
 227         }
 228
 229         const int paddedSize = ((nsettle + pack_size - 1) / pack_size) * pack_size;
 230         ow1_.resize(paddedSize);
 231         hw2_.resize(paddedSize);
 232         hw3_.resize(paddedSize);
 233         virfac_.resize(paddedSize);
 234
 235         for (int i = 0; i < nsettle; i++)
 236         {
 237             ow1_[i] = iatoms[i * nral1 + 1];
 238             hw2_[i] = iatoms[i * nral1 + 2];
 239             hw3_[i] = iatoms[i * nral1 + 3];
 240             /* We should avoid double counting of virial contributions for
 241              * SETTLEs that appear in multiple DD domains, so we only count
 242              * the contribution on the home range of the oxygen atom.
 243              */
 244             virfac_[i] = (iatoms[i * nral1 + 1] < numHomeAtoms ? 1 : 0);
 245         }
 246
 247         /* Pad the index array to the full SIMD width with copies from
 248          * the last normal entry, but with no virial contribution.
 249          */
 250         for (int i = nsettle; i < paddedSize; i++)
 251         {
 252             ow1_[i]    = ow1_[nsettle - 1];
 253             hw2_[i]    = hw2_[nsettle - 1];
 254             hw3_[i]    = hw3_[nsettle - 1];
 255             virfac_[i] = 0;
 256         }
 257     }
 258 }
 259
 260 void settle_proj(const SettleData&    settled,
 261                  ConstraintVariable   econq,
 262                  int                  nsettle,
 263                  const t_iatom        iatoms[],
 264                  const t_pbc*         pbc,
 265                  ArrayRef<const RVec> x,
 266                  ArrayRef<RVec>       der,
 267                  ArrayRef<RVec>       derp,
 268                  int                  calcvir_atom_end,
 269                  tensor               vir_r_m_dder)
 270 {
 271     /* Settle for projection out constraint components
 272      * of derivatives of the coordinates.
 273      * Berk Hess 2008-1-10
 274      */
 275
 276     const SettleParameters* p;
 277     real                    imO, imH, dOH, dHH, invdOH, invdHH;
 278     matrix                  invmat;
 279     int                     i, m, m2, ow1, hw2, hw3;
 280     rvec                    roh2, roh3, rhh, dc, fc;
 281
 282     calcvir_atom_end *= DIM;
 283
 284     if (econq == ConstraintVariable::Force)
 285     {
 286         p = &settled.parametersAllMasses1();
 287     }
 288     else
 289     {
 290         p = &settled.parametersMassWeighted();
 291     }
 292     imO = p->imO;
 293     imH = p->imH;
 294     copy_mat(p->invmat, invmat);
 295     dOH    = p->dOH;
 296     dHH    = p->dHH;
 297     invdOH = p->invdOH;
 298     invdHH = p->invdHH;
 299
 300     const int nral1 = 1 + NRAL(F_SETTLE);
 301
 302     for (i = 0; i < nsettle; i++)
 303     {
 304         ow1 = iatoms[i * nral1 + 1];
 305         hw2 = iatoms[i * nral1 + 2];
 306         hw3 = iatoms[i * nral1 + 3];
 307
 308         if (pbc == nullptr)
 309         {
 310             rvec_sub(x[ow1], x[hw2], roh2);
 311             rvec_sub(x[ow1], x[hw3], roh3);
 312             rvec_sub(x[hw2], x[hw3], rhh);
 313         }
 314         else
 315         {
 316             pbc_dx_aiuc(pbc, x[ow1], x[hw2], roh2);
 317             pbc_dx_aiuc(pbc, x[ow1], x[hw3], roh3);
 318             pbc_dx_aiuc(pbc, x[hw2], x[hw3], rhh);
 319         }
 320         svmul(invdOH, roh2, roh2);
 321         svmul(invdOH, roh3, roh3);
 322         svmul(invdHH, rhh, rhh);
 323         /* 18 flops */
 324
 325         /* Determine the projections of der on the bonds */
 326         clear_rvec(dc);
 327         for (m = 0; m < DIM; m++)
 328         {
 329             dc[0] += (der[ow1][m] - der[hw2][m]) * roh2[m];
 330             dc[1] += (der[ow1][m] - der[hw3][m]) * roh3[m];
 331             dc[2] += (der[hw2][m] - der[hw3][m]) * rhh[m];
 332         }
 333         /* 27 flops */
 334
 335         /* Determine the correction for the three bonds */
 336         mvmul(invmat, dc, fc);
 337         /* 15 flops */
 338
 339         /* Subtract the corrections from derp */
 340         for (m = 0; m < DIM; m++)
 341         {
 342             derp[ow1][m] -= imO * (fc[0] * roh2[m] + fc[1] * roh3[m]);
 343             derp[hw2][m] -= imH * (-fc[0] * roh2[m] + fc[2] * rhh[m]);
 344             derp[hw3][m] -= imH * (-fc[1] * roh3[m] - fc[2] * rhh[m]);
 345         }
 346
 347         /* 45 flops */
 348
 349         if (ow1 < calcvir_atom_end)
 350         {
 351             /* Determining r \dot m der is easy,
 352              * since fc contains the mass weighted corrections for der.
 353              */
 354
 355             for (m = 0; m < DIM; m++)
 356             {
 357                 for (m2 = 0; m2 < DIM; m2++)
 358                 {
 359                     vir_r_m_dder[m][m2] += dOH * roh2[m] * roh2[m2] * fc[0]
 360                                            + dOH * roh3[m] * roh3[m2] * fc[1]
 361                                            + dHH * rhh[m] * rhh[m2] * fc[2];
 362                 }
 363             }
 364         }
 365     }
 366 }
 367
 368
 369 /*! \brief The actual settle code, templated for real/SimdReal and for optimization */
 370 template<typename T, typename TypeBool, int packSize, typename TypePbc, bool bCorrectVelocity, bool bCalcVirial>
 371 static void settleTemplate(const SettleData&  settled,
 372                            int                settleStart,
 373                            int                settleEnd,
 374                            const TypePbc      pbc,
 375                            const real*        x,
 376                            real*              xprime,
 377                            real               invdt,
 378                            real* gmx_restrict v,
 379                            tensor             vir_r_m_dr,
 380                            bool*              bErrorHasOccurred)
 381 {
 382     /* ******************************************************************* */
 383     /*                                                                  ** */
 384     /*    Original code by Shuichi Miyamoto, last update Oct. 1, 1992   ** */
 385     /*                                                                  ** */
 386     /*    Algorithm changes by Berk Hess:                               ** */
 387     /*    2004-07-15 Convert COM to double precision to avoid drift     ** */
 388     /*    2006-10-16 Changed velocity update to use differences         ** */
 389     /*    2012-09-24 Use oxygen as reference instead of COM             ** */
 390     /*    2016-02    Complete rewrite of the code for SIMD              ** */
 391     /*    2020-06    Completely remove use of COM to minimize drift     ** */
 392     /*                                                                  ** */
 393     /*    Reference for the SETTLE algorithm                            ** */
 394     /*           S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992).    ** */
 395     /*                                                                  ** */
 396     /* ******************************************************************* */
 397
 398     assert(settleStart % packSize == 0);
 399     assert(settleEnd % packSize == 0);
 400
 401     TypeBool bError = TypeBool(false);
 402
 403     const SettleParameters* p    = &settled.parametersMassWeighted();
 404     T                       wh   = T(p->wh);
 405     T                       rc   = T(p->rc);
 406     T                       ra   = T(p->ra);
 407     T                       rb   = T(p->rb);
 408     T                       irc2 = T(p->irc2);
 409     T                       mO   = T(p->mO);
 410     T                       mH   = T(p->mH);
 411
 412     T almost_zero = T(1e-12);
 413
 414     T sum_r_m_dr[DIM][DIM];
 415
 416     if (bCalcVirial)
 417     {
 418         for (int d2 = 0; d2 < DIM; d2++)
 419         {
 420             for (int d = 0; d < DIM; d++)
 421             {
 422                 sum_r_m_dr[d2][d] = T(0);
 423             }
 424         }
 425     }
 426
 427     for (int i = settleStart; i < settleEnd; i += packSize)
 428     {
 429         /* Here we pad up to packSize with copies from the last valid entry.
 430          * This gives correct results, since we store (not increment) all
 431          * output, so we store the same output multiple times.
 432          */
 433         const int* ow1 = settled.ow1() + i;
 434         const int* hw2 = settled.hw2() + i;
 435         const int* hw3 = settled.hw3() + i;
 436
 437         T x_ow1[DIM], x_hw2[DIM], x_hw3[DIM];
 438
 439         gatherLoadUTranspose<3>(x, ow1, &x_ow1[XX], &x_ow1[YY], &x_ow1[ZZ]);
 440         gatherLoadUTranspose<3>(x, hw2, &x_hw2[XX], &x_hw2[YY], &x_hw2[ZZ]);
 441         gatherLoadUTranspose<3>(x, hw3, &x_hw3[XX], &x_hw3[YY], &x_hw3[ZZ]);
 442
 443         T xprime_ow1[DIM], xprime_hw2[DIM], xprime_hw3[DIM];
 444
 445         gatherLoadUTranspose<3>(xprime, ow1, &xprime_ow1[XX], &xprime_ow1[YY], &xprime_ow1[ZZ]);
 446         gatherLoadUTranspose<3>(xprime, hw2, &xprime_hw2[XX], &xprime_hw2[YY], &xprime_hw2[ZZ]);
 447         gatherLoadUTranspose<3>(xprime, hw3, &xprime_hw3[XX], &xprime_hw3[YY], &xprime_hw3[ZZ]);
 448
 449         T dist21[DIM], dist31[DIM];
 450         T doh2[DIM], doh3[DIM];
 451
 452         pbc_dx_aiuc(pbc, x_hw2, x_ow1, dist21);
 453
 454         pbc_dx_aiuc(pbc, x_hw3, x_ow1, dist31);
 455
 456         pbc_dx_aiuc(pbc, xprime_hw2, xprime_ow1, doh2);
 457
 458         pbc_dx_aiuc(pbc, xprime_hw3, xprime_ow1, doh3);
 459         /* 4 * 18 flops (would be 4 * 3 without PBC) */
 460
 461         /* Note that we completely avoid computing the center of mass and
 462          * only use distances. This minimizes energy drift and also makes
 463          * the computation slightly cheaper.
 464          * Straightforward computation of the COM, as in the original algorithm,
 465          * makes SETTLE the largest source of energy drift for simulations of water,
 466          * as then the oxygen coordinate is multiplied by 0.89 at every step,
 467          * which can then transfer a systematic rounding to the oxygen velocity.
 468          * For some time we computed the COM using offsets from the oxygen, this
 469          * significantly reduces the energy drift, but not using the COM at all,
 470          * as we do now, is optimal.
 471          */
 472         T a1[DIM];
 473         for (int d = 0; d < DIM; d++)
 474         {
 475             a1[d] = -(doh2[d] + doh3[d]) * wh;
 476         }
 477         T b1[DIM];
 478         for (int d = 0; d < DIM; d++)
 479         {
 480             b1[d] = doh2[d] + a1[d];
 481         }
 482         T c1[DIM];
 483         for (int d = 0; d < DIM; d++)
 484         {
 485             c1[d] = doh3[d] + a1[d];
 486         }
 487         /* 12 flops */
 488
 489         T xakszd = dist21[YY] * dist31[ZZ] - dist21[ZZ] * dist31[YY];
 490         T yakszd = dist21[ZZ] * dist31[XX] - dist21[XX] * dist31[ZZ];
 491         T zakszd = dist21[XX] * dist31[YY] - dist21[YY] * dist31[XX];
 492         T xaksxd = a1[YY] * zakszd - a1[ZZ] * yakszd;
 493         T yaksxd = a1[ZZ] * xakszd - a1[XX] * zakszd;
 494         T zaksxd = a1[XX] * yakszd - a1[YY] * xakszd;
 495         T xaksyd = yakszd * zaksxd - zakszd * yaksxd;
 496         T yaksyd = zakszd * xaksxd - xakszd * zaksxd;
 497         T zaksyd = xakszd * yaksxd - yakszd * xaksxd;
 498         /* 27 flops */
 499
 500         T axlng = gmx::invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd);
 501         T aylng = gmx::invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd);
 502         T azlng = gmx::invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd);
 503
 504         T trns1[DIM], trns2[DIM], trns3[DIM];
 505
 506         trns1[XX] = xaksxd * axlng;
 507         trns2[XX] = yaksxd * axlng;
 508         trns3[XX] = zaksxd * axlng;
 509         trns1[YY] = xaksyd * aylng;
 510         trns2[YY] = yaksyd * aylng;
 511         trns3[YY] = zaksyd * aylng;
 512         trns1[ZZ] = xakszd * azlng;
 513         trns2[ZZ] = yakszd * azlng;
 514         trns3[ZZ] = zakszd * azlng;
 515         /* 24 flops */
 516
 517         T b0d[2], c0d[2];
 518
 519         for (int d = 0; d < 2; d++)
 520         {
 521             b0d[d] = trns1[d] * dist21[XX] + trns2[d] * dist21[YY] + trns3[d] * dist21[ZZ];
 522             c0d[d] = trns1[d] * dist31[XX] + trns2[d] * dist31[YY] + trns3[d] * dist31[ZZ];
 523         }
 524
 525         T a1d_z, b1d[DIM], c1d[DIM];
 526
 527         a1d_z = trns1[ZZ] * a1[XX] + trns2[ZZ] * a1[YY] + trns3[ZZ] * a1[ZZ];
 528         for (int d = 0; d < DIM; d++)
 529         {
 530             b1d[d] = trns1[d] * b1[XX] + trns2[d] * b1[YY] + trns3[d] * b1[ZZ];
 531             c1d[d] = trns1[d] * c1[XX] + trns2[d] * c1[YY] + trns3[d] * c1[ZZ];
 532         }
 533         /* 65 flops */
 534
 535         T tmp, tmp2;
 536
 537         T sinphi = a1d_z * gmx::invsqrt(ra * ra);
 538         tmp2     = 1.0 - sinphi * sinphi;
 539
 540         /* If tmp2 gets close to or beyond zero we have severly distorted
 541          * water molecules and we should terminate the simulation.
 542          * Below we take the max with almost_zero to continue the loop.
 543          */
 544         bError = bError || (tmp2 <= almost_zero);
 545
 546         tmp2     = max(tmp2, almost_zero);
 547         tmp      = gmx::invsqrt(tmp2);
 548         T cosphi = tmp2 * tmp;
 549         T sinpsi = (b1d[ZZ] - c1d[ZZ]) * irc2 * tmp;
 550         tmp2     = 1.0 - sinpsi * sinpsi;
 551
 552         T cospsi = tmp2 * gmx::invsqrt(tmp2);
 553         /* 46 flops */
 554
 555         T a2d_y = ra * cosphi;
 556         T b2d_x = -rc * cospsi;
 557         T t1    = -rb * cosphi;
 558         T t2    = rc * sinpsi * sinphi;
 559         T b2d_y = t1 - t2;
 560         T c2d_y = t1 + t2;
 561         /* 7 flops */
 562
 563         /*     --- Step3  al,be,ga            --- */
 564         T alpha  = b2d_x * (b0d[XX] - c0d[XX]) + b0d[YY] * b2d_y + c0d[YY] * c2d_y;
 565         T beta   = b2d_x * (c0d[YY] - b0d[YY]) + b0d[XX] * b2d_y + c0d[XX] * c2d_y;
 566         T gamma  = b0d[XX] * b1d[YY] - b1d[XX] * b0d[YY] + c0d[XX] * c1d[YY] - c1d[XX] * c0d[YY];
 567         T al2be2 = alpha * alpha + beta * beta;
 568         tmp2     = (al2be2 - gamma * gamma);
 569         T sinthe = (alpha * gamma - beta * tmp2 * gmx::invsqrt(tmp2)) * gmx::invsqrt(al2be2 * al2be2);
 570         /* 47 flops */
 571
 572         /*  --- Step4  A3' --- */
 573         tmp2     = 1.0 - sinthe * sinthe;
 574         T costhe = tmp2 * gmx::invsqrt(tmp2);
 575
 576         T a3d[DIM], b3d[DIM], c3d[DIM];
 577
 578         a3d[XX] = -a2d_y * sinthe;
 579         a3d[YY] = a2d_y * costhe;
 580         a3d[ZZ] = a1d_z;
 581         b3d[XX] = b2d_x * costhe - b2d_y * sinthe;
 582         b3d[YY] = b2d_x * sinthe + b2d_y * costhe;
 583         b3d[ZZ] = b1d[ZZ];
 584         c3d[XX] = -b2d_x * costhe - c2d_y * sinthe;
 585         c3d[YY] = -b2d_x * sinthe + c2d_y * costhe;
 586         c3d[ZZ] = c1d[ZZ];
 587         /* 26 flops */
 588
 589         /*    --- Step5  A3 --- */
 590         T a3[DIM], b3[DIM], c3[DIM];
 591
 592         a3[XX] = trns1[XX] * a3d[XX] + trns1[YY] * a3d[YY] + trns1[ZZ] * a3d[ZZ];
 593         a3[YY] = trns2[XX] * a3d[XX] + trns2[YY] * a3d[YY] + trns2[ZZ] * a3d[ZZ];
 594         a3[ZZ] = trns3[XX] * a3d[XX] + trns3[YY] * a3d[YY] + trns3[ZZ] * a3d[ZZ];
 595         b3[XX] = trns1[XX] * b3d[XX] + trns1[YY] * b3d[YY] + trns1[ZZ] * b3d[ZZ];
 596         b3[YY] = trns2[XX] * b3d[XX] + trns2[YY] * b3d[YY] + trns2[ZZ] * b3d[ZZ];
 597         b3[ZZ] = trns3[XX] * b3d[XX] + trns3[YY] * b3d[YY] + trns3[ZZ] * b3d[ZZ];
 598         c3[XX] = trns1[XX] * c3d[XX] + trns1[YY] * c3d[YY] + trns1[ZZ] * c3d[ZZ];
 599         c3[YY] = trns2[XX] * c3d[XX] + trns2[YY] * c3d[YY] + trns2[ZZ] * c3d[ZZ];
 600         c3[ZZ] = trns3[XX] * c3d[XX] + trns3[YY] * c3d[YY] + trns3[ZZ] * c3d[ZZ];
 601         /* 45 flops */
 602
 603         /* Compute and store the corrected new coordinate */
 604         T dxOw1[DIM];
 605         for (int d = 0; d < DIM; d++)
 606         {
 607             dxOw1[d]      = a3[d] - a1[d];
 608             xprime_ow1[d] = xprime_ow1[d] + dxOw1[d];
 609         }
 610         T dxHw2[DIM];
 611         for (int d = 0; d < DIM; d++)
 612         {
 613             dxHw2[d]      = b3[d] - b1[d];
 614             xprime_hw2[d] = xprime_hw2[d] + dxHw2[d];
 615         }
 616         T dxHw3[DIM];
 617         for (int d = 0; d < DIM; d++)
 618         {
 619             dxHw3[d]      = c3[d] - c1[d];
 620             xprime_hw3[d] = xprime_hw3[d] + dxHw3[d];
 621         }
 622         /* 9 + 9 flops */
 623
 624         transposeScatterStoreU<3>(xprime, ow1, xprime_ow1[XX], xprime_ow1[YY], xprime_ow1[ZZ]);
 625         transposeScatterStoreU<3>(xprime, hw2, xprime_hw2[XX], xprime_hw2[YY], xprime_hw2[ZZ]);
 626         transposeScatterStoreU<3>(xprime, hw3, xprime_hw3[XX], xprime_hw3[YY], xprime_hw3[ZZ]);
 627
 628         if (bCorrectVelocity)
 629         {
 630             T v_ow1[DIM], v_hw2[DIM], v_hw3[DIM];
 631
 632             gatherLoadUTranspose<3>(v, ow1, &v_ow1[XX], &v_ow1[YY], &v_ow1[ZZ]);
 633             gatherLoadUTranspose<3>(v, hw2, &v_hw2[XX], &v_hw2[YY], &v_hw2[ZZ]);
 634             gatherLoadUTranspose<3>(v, hw3, &v_hw3[XX], &v_hw3[YY], &v_hw3[ZZ]);
 635
 636             /* Add the position correction divided by dt to the velocity */
 637             for (int d = 0; d < DIM; d++)
 638             {
 639                 v_ow1[d] = gmx::fma(dxOw1[d], invdt, v_ow1[d]);
 640             }
 641             for (int d = 0; d < DIM; d++)
 642             {
 643                 v_hw2[d] = gmx::fma(dxHw2[d], invdt, v_hw2[d]);
 644             }
 645             for (int d = 0; d < DIM; d++)
 646             {
 647                 v_hw3[d] = gmx::fma(dxHw3[d], invdt, v_hw3[d]);
 648             }
 649             /* 3*6 flops */
 650
 651             transposeScatterStoreU<3>(v, ow1, v_ow1[XX], v_ow1[YY], v_ow1[ZZ]);
 652             transposeScatterStoreU<3>(v, hw2, v_hw2[XX], v_hw2[YY], v_hw2[ZZ]);
 653             transposeScatterStoreU<3>(v, hw3, v_hw3[XX], v_hw3[YY], v_hw3[ZZ]);
 654         }
 655
 656         if (bCalcVirial)
 657         {
 658             /* Filter out the non-local settles */
 659             T filter = load<T>(settled.virfac() + i);
 660             T mOf    = filter * mO;
 661             T mHf    = filter * mH;
 662
 663             T mdo[DIM], mdb[DIM], mdc[DIM];
 664
 665             for (int d = 0; d < DIM; d++)
 666             {
 667                 mdb[d] = mHf * dxHw2[d];
 668                 mdc[d] = mHf * dxHw3[d];
 669                 mdo[d] = mOf * dxOw1[d] + mdb[d] + mdc[d];
 670             }
 671
 672             for (int d2 = 0; d2 < DIM; d2++)
 673             {
 674                 for (int d = 0; d < DIM; d++)
 675                 {
 676                     sum_r_m_dr[d2][d] =
 677                             sum_r_m_dr[d2][d]
 678                             - (x_ow1[d2] * mdo[d] + dist21[d2] * mdb[d] + dist31[d2] * mdc[d]);
 679                 }
 680             }
 681             /* 71 flops */
 682         }
 683     }
 684
 685     if (bCalcVirial)
 686     {
 687         for (int d2 = 0; d2 < DIM; d2++)
 688         {
 689             for (int d = 0; d < DIM; d++)
 690             {
 691                 vir_r_m_dr[d2][d] += reduce(sum_r_m_dr[d2][d]);
 692             }
 693         }
 694     }
 695
 696     *bErrorHasOccurred = anyTrue(bError);
 697 }
 698
 699 /*! \brief Wrapper template function that divides the settles over threads
 700  * and instantiates the core template with instantiated booleans.
 701  */
 702 template<typename T, typename TypeBool, int packSize, typename TypePbc>
 703 static void settleTemplateWrapper(const SettleData& settled,
 704                                   int               nthread,
 705                                   int               thread,
 706                                   TypePbc           pbc,
 707                                   const real        x[],
 708                                   real              xprime[],
 709                                   real              invdt,
 710                                   real*             v,
 711                                   bool              bCalcVirial,
 712                                   tensor            vir_r_m_dr,
 713                                   bool*             bErrorHasOccurred)
 714 {
 715     /* We need to assign settles to threads in groups of pack_size */
 716     int numSettlePacks = (settled.numSettles() + packSize - 1) / packSize;
 717     /* Round the end value up to give thread 0 more work */
 718     int settleStart = ((numSettlePacks * thread + nthread - 1) / nthread) * packSize;
 719     int settleEnd   = ((numSettlePacks * (thread + 1) + nthread - 1) / nthread) * packSize;
 720
 721     if (v != nullptr)
 722     {
 723         if (!bCalcVirial)
 724         {
 725             settleTemplate<T, TypeBool, packSize, TypePbc, true, false>(
 726                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
 727         }
 728         else
 729         {
 730             settleTemplate<T, TypeBool, packSize, TypePbc, true, true>(
 731                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
 732         }
 733     }
 734     else
 735     {
 736         if (!bCalcVirial)
 737         {
 738             settleTemplate<T, TypeBool, packSize, TypePbc, false, false>(
 739                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
 740         }
 741         else
 742         {
 743             settleTemplate<T, TypeBool, packSize, TypePbc, false, true>(
 744                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
 745         }
 746     }
 747 }
 748
 749 void csettle(const SettleData&               settled,
 750              int                             nthread,
 751              int                             thread,
 752              const t_pbc*                    pbc,
 753              ArrayRefWithPadding<const RVec> x,
 754              ArrayRefWithPadding<RVec>       xprime,
 755              real                            invdt,
 756              ArrayRefWithPadding<RVec>       v,
 757              bool                            bCalcVirial,
 758              tensor                          vir_r_m_dr,
 759              bool*                           bErrorHasOccurred)
 760 {
 761     const real* xPtr      = as_rvec_array(x.paddedArrayRef().data())[0];
 762     real*       xprimePtr = as_rvec_array(xprime.paddedArrayRef().data())[0];
 763     real*       vPtr      = as_rvec_array(v.paddedArrayRef().data())[0];
 764
 765 #if GMX_SIMD_HAVE_REAL
 766     if (settled.useSimd())
 767     {
 768         /* Convert the pbc struct for SIMD */
 769         alignas(GMX_SIMD_ALIGNMENT) real pbcSimd[9 * GMX_SIMD_REAL_WIDTH];
 770         set_pbc_simd(pbc, pbcSimd);
 771
 772         settleTemplateWrapper<SimdReal, SimdBool, GMX_SIMD_REAL_WIDTH, const real*>(
 773                 settled, nthread, thread, pbcSimd, xPtr, xprimePtr, invdt, vPtr, bCalcVirial, vir_r_m_dr, bErrorHasOccurred);
 774     }
 775     else
 776 #endif
 777     {
 778         /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
 779         t_pbc        pbcNo;
 780         const t_pbc* pbcNonNull;
 781
 782         if (pbc != nullptr)
 783         {
 784             pbcNonNull = pbc;
 785         }
 786         else
 787         {
 788             set_pbc(&pbcNo, PbcType::No, nullptr);
 789             pbcNonNull = &pbcNo;
 790         }
 791
 792         settleTemplateWrapper<real, bool, 1, const t_pbc*>(
 793                 settled, nthread, thread, pbcNonNull, &xPtr[0], &xprimePtr[0], invdt, &vPtr[0], bCalcVirial, vir_r_m_dr, bErrorHasOccurred);
 794     }
 795 }
 796
 797 } // namespace gmx