src/gromacs/mdlib/settle.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  * \brief Defines SETTLE code.
  40  *
  41  * \author Berk Hess <hess@kth.se>
  42  * \author Mark Abraham <mark.j.abraham@gmail.com>
  43  * \ingroup module_mdlib
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "settle.h"
  48
  49 #include <cassert>
  50 #include <cmath>
  51 #include <cstdio>
  52
  53 #include <algorithm>
  54
  55 #include "gromacs/math/arrayrefwithpadding.h"
  56 #include "gromacs/math/functions.h"
  57 #include "gromacs/math/invertmatrix.h"
  58 #include "gromacs/math/vec.h"
  59 #include "gromacs/mdlib/constr.h"
  60 #include "gromacs/mdtypes/mdatom.h"
  61 #include "gromacs/pbcutil/ishift.h"
  62 #include "gromacs/pbcutil/pbc.h"
  63 #include "gromacs/pbcutil/pbc_simd.h"
  64 #include "gromacs/simd/simd.h"
  65 #include "gromacs/simd/simd_math.h"
  66 #include "gromacs/topology/idef.h"
  67 #include "gromacs/topology/ifunc.h"
  68 #include "gromacs/topology/mtop_util.h"
  69 #include "gromacs/utility/fatalerror.h"
  70 #include "gromacs/utility/gmxassert.h"
  71 #include "gromacs/utility/smalloc.h"
  72
  73 namespace gmx
  74 {
  75
  76 struct settleparam_t
  77 {
  78     real mO;
  79     real mH;
  80     real wh;
  81     real dOH;
  82     real dHH;
  83     real ra;
  84     real rb;
  85     real rc;
  86     real irc2;
  87     /* For projection */
  88     real   imO;
  89     real   imH;
  90     real   invdOH;
  91     real   invdHH;
  92     matrix invmat;
  93 };
  94
  95 struct settledata
  96 {
  97     settleparam_t massw; /* Parameters for SETTLE for coordinates */
  98     settleparam_t mass1; /* Parameters with all masses 1, for forces */
  99
 100     int   nsettle; /* The number of settles on our rank */
 101     int*  ow1;     /* Index to OW1 atoms, size nsettle + SIMD padding */
 102     int*  hw2;     /* Index to HW2 atoms, size nsettle + SIMD padding */
 103     int*  hw3;     /* Index to HW3 atoms, size nsettle + SIMD padding */
 104     real* virfac;  /* Virial factor 0 or 1, size nsettle + SIMD pad. */
 105     int   nalloc;  /* Allocation size of ow1, hw2, hw3, virfac */
 106
 107     bool bUseSimd; /* Use SIMD intrinsics code, if possible */
 108 };
 109
 110
 111 //! Initializes a projection matrix.
 112 static void init_proj_matrix(real invmO, real invmH, real dOH, real dHH, matrix inverseCouplingMatrix)
 113 {
 114     /* We normalize the inverse masses with invmO for the matrix inversion.
 115      * so we can keep using masses of almost zero for frozen particles,
 116      * without running out of the float range in invertMatrix.
 117      */
 118     double invmORelative = 1.0;
 119     double invmHRelative = invmH / static_cast<double>(invmO);
 120     double distanceRatio = dHH / static_cast<double>(dOH);
 121
 122     /* Construct the constraint coupling matrix */
 123     matrix mat;
 124     mat[0][0] = invmORelative + invmHRelative;
 125     mat[0][1] = invmORelative * (1.0 - 0.5 * gmx::square(distanceRatio));
 126     mat[0][2] = invmHRelative * 0.5 * distanceRatio;
 127     mat[1][1] = mat[0][0];
 128     mat[1][2] = mat[0][2];
 129     mat[2][2] = invmHRelative + invmHRelative;
 130     mat[1][0] = mat[0][1];
 131     mat[2][0] = mat[0][2];
 132     mat[2][1] = mat[1][2];
 133
 134     invertMatrix(mat, inverseCouplingMatrix);
 135
 136     msmul(inverseCouplingMatrix, 1 / invmO, inverseCouplingMatrix);
 137 }
 138
 139 //! Initializes settle parameters.
 140 static void settleparam_init(settleparam_t* p, real mO, real mH, real invmO, real invmH, real dOH, real dHH)
 141 {
 142     /* We calculate parameters in double precision to minimize errors.
 143      * The velocity correction applied during SETTLE coordinate constraining
 144      * introduces a systematic error of approximately 1 bit per atom,
 145      * depending on what the compiler does with the code.
 146      */
 147     double wohh;
 148
 149     p->mO     = mO;
 150     p->mH     = mH;
 151     wohh      = mO + 2.0 * mH;
 152     p->wh     = mH / wohh;
 153     p->dOH    = dOH;
 154     p->dHH    = dHH;
 155     double rc = dHH / 2.0;
 156     double ra = 2.0 * mH * std::sqrt(dOH * dOH - rc * rc) / wohh;
 157     p->rb     = std::sqrt(dOH * dOH - rc * rc) - ra;
 158     p->rc     = rc;
 159     p->ra     = ra;
 160     p->irc2   = 1.0 / dHH;
 161
 162     /* For projection: inverse masses and coupling matrix inversion */
 163     p->imO = invmO;
 164     p->imH = invmH;
 165
 166     p->invdOH = 1.0 / dOH;
 167     p->invdHH = 1.0 / dHH;
 168
 169     init_proj_matrix(invmO, invmH, dOH, dHH, p->invmat);
 170
 171     if (debug)
 172     {
 173         fprintf(debug, "wh =%g, rc = %g, ra = %g\n", p->wh, p->rc, p->ra);
 174         fprintf(debug, "rb = %g, irc2 = %g, dHH = %g, dOH = %g\n", p->rb, p->irc2, p->dHH, p->dOH);
 175     }
 176 }
 177
 178 settledata* settle_init(const gmx_mtop_t& mtop)
 179 {
 180     /* Check that we have only one settle type */
 181     int                  settle_type = -1;
 182     gmx_mtop_ilistloop_t iloop       = gmx_mtop_ilistloop_init(mtop);
 183     int                  nmol;
 184     const int            nral1 = 1 + NRAL(F_SETTLE);
 185     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
 186     {
 187         const InteractionList& ilist = (*ilists)[F_SETTLE];
 188         for (int i = 0; i < ilist.size(); i += nral1)
 189         {
 190             if (settle_type == -1)
 191             {
 192                 settle_type = ilist.iatoms[i];
 193             }
 194             else if (ilist.iatoms[i] != settle_type)
 195             {
 196                 gmx_fatal(FARGS,
 197                           "The [molecules] section of your topology specifies more than one block "
 198                           "of\n"
 199                           "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
 200                           "If you are trying to partition your solvent into different *groups*\n"
 201                           "(e.g. for freezing, T-coupling, etc.), you are using the wrong "
 202                           "approach. Index\n"
 203                           "files specify groups. Otherwise, you may wish to change the least-used\n"
 204                           "block of molecules with SETTLE constraints into 3 normal constraints.");
 205             }
 206         }
 207     }
 208     GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
 209
 210     settledata* settled;
 211
 212     snew(settled, 1);
 213
 214     /* We will not initialize the normal SETTLE parameters here yet,
 215      * since the atom (inv)masses can depend on the integrator and
 216      * free-energy perturbation. We set mO=-1 to trigger later initialization.
 217      */
 218     settled->massw.mO = -1;
 219
 220     real dOH = mtop.ffparams.iparams[settle_type].settle.doh;
 221     real dHH = mtop.ffparams.iparams[settle_type].settle.dhh;
 222     settleparam_init(&settled->mass1, 1.0, 1.0, 1.0, 1.0, dOH, dHH);
 223
 224     settled->ow1    = nullptr;
 225     settled->hw2    = nullptr;
 226     settled->hw3    = nullptr;
 227     settled->virfac = nullptr;
 228     settled->nalloc = 0;
 229
 230     /* Without SIMD configured, this bool is not used */
 231     settled->bUseSimd = (getenv("GMX_DISABLE_SIMD_KERNELS") == nullptr);
 232
 233     return settled;
 234 }
 235
 236 void settle_free(settledata* settled)
 237 {
 238     sfree_aligned(settled->ow1);
 239     sfree_aligned(settled->hw2);
 240     sfree_aligned(settled->hw3);
 241     sfree_aligned(settled->virfac);
 242     sfree(settled);
 243 }
 244
 245 void settle_set_constraints(settledata* settled, const t_ilist* il_settle, const t_mdatoms& mdatoms)
 246 {
 247 #if GMX_SIMD_HAVE_REAL
 248     const int pack_size = GMX_SIMD_REAL_WIDTH;
 249 #else
 250     const int pack_size = 1;
 251 #endif
 252
 253     const int nral1   = 1 + NRAL(F_SETTLE);
 254     int       nsettle = il_settle->nr / nral1;
 255     settled->nsettle  = nsettle;
 256
 257     if (nsettle > 0)
 258     {
 259         const t_iatom* iatoms = il_settle->iatoms;
 260
 261         /* Here we initialize the normal SETTLE parameters */
 262         if (settled->massw.mO < 0)
 263         {
 264             int firstO = iatoms[1];
 265             int firstH = iatoms[2];
 266             settleparam_init(&settled->massw, mdatoms.massT[firstO], mdatoms.massT[firstH],
 267                              mdatoms.invmass[firstO], mdatoms.invmass[firstH], settled->mass1.dOH,
 268                              settled->mass1.dHH);
 269         }
 270
 271         if (nsettle + pack_size > settled->nalloc)
 272         {
 273             settled->nalloc = over_alloc_dd(nsettle + pack_size);
 274             sfree_aligned(settled->ow1);
 275             sfree_aligned(settled->hw2);
 276             sfree_aligned(settled->hw3);
 277             sfree_aligned(settled->virfac);
 278             snew_aligned(settled->ow1, settled->nalloc, 64);
 279             snew_aligned(settled->hw2, settled->nalloc, 64);
 280             snew_aligned(settled->hw3, settled->nalloc, 64);
 281             snew_aligned(settled->virfac, settled->nalloc, 64);
 282         }
 283
 284         for (int i = 0; i < nsettle; i++)
 285         {
 286             settled->ow1[i] = iatoms[i * nral1 + 1];
 287             settled->hw2[i] = iatoms[i * nral1 + 2];
 288             settled->hw3[i] = iatoms[i * nral1 + 3];
 289             /* We should avoid double counting of virial contributions for
 290              * SETTLEs that appear in multiple DD domains, so we only count
 291              * the contribution on the home range of the oxygen atom.
 292              */
 293             settled->virfac[i] = (iatoms[i * nral1 + 1] < mdatoms.homenr ? 1 : 0);
 294         }
 295
 296         /* Pack the index array to the full SIMD width with copies from
 297          * the last normal entry, but with no virial contribution.
 298          */
 299         int end_packed = ((nsettle + pack_size - 1) / pack_size) * pack_size;
 300         for (int i = nsettle; i < end_packed; i++)
 301         {
 302             settled->ow1[i]    = settled->ow1[nsettle - 1];
 303             settled->hw2[i]    = settled->hw2[nsettle - 1];
 304             settled->hw3[i]    = settled->hw3[nsettle - 1];
 305             settled->virfac[i] = 0;
 306         }
 307     }
 308 }
 309
 310 void settle_proj(settledata*          settled,
 311                  ConstraintVariable   econq,
 312                  int                  nsettle,
 313                  const t_iatom        iatoms[],
 314                  const t_pbc*         pbc,
 315                  ArrayRef<const RVec> x,
 316                  ArrayRef<RVec>       der,
 317                  ArrayRef<RVec>       derp,
 318                  int                  calcvir_atom_end,
 319                  tensor               vir_r_m_dder)
 320 {
 321     /* Settle for projection out constraint components
 322      * of derivatives of the coordinates.
 323      * Berk Hess 2008-1-10
 324      */
 325
 326     settleparam_t* p;
 327     real           imO, imH, dOH, dHH, invdOH, invdHH;
 328     matrix         invmat;
 329     int            i, m, m2, ow1, hw2, hw3;
 330     rvec           roh2, roh3, rhh, dc, fc;
 331
 332     calcvir_atom_end *= DIM;
 333
 334     if (econq == ConstraintVariable::Force)
 335     {
 336         p = &settled->mass1;
 337     }
 338     else
 339     {
 340         p = &settled->massw;
 341     }
 342     imO = p->imO;
 343     imH = p->imH;
 344     copy_mat(p->invmat, invmat);
 345     dOH    = p->dOH;
 346     dHH    = p->dHH;
 347     invdOH = p->invdOH;
 348     invdHH = p->invdHH;
 349
 350     const int nral1 = 1 + NRAL(F_SETTLE);
 351
 352     for (i = 0; i < nsettle; i++)
 353     {
 354         ow1 = iatoms[i * nral1 + 1];
 355         hw2 = iatoms[i * nral1 + 2];
 356         hw3 = iatoms[i * nral1 + 3];
 357
 358         if (pbc == nullptr)
 359         {
 360             rvec_sub(x[ow1], x[hw2], roh2);
 361             rvec_sub(x[ow1], x[hw3], roh3);
 362             rvec_sub(x[hw2], x[hw3], rhh);
 363         }
 364         else
 365         {
 366             pbc_dx_aiuc(pbc, x[ow1], x[hw2], roh2);
 367             pbc_dx_aiuc(pbc, x[ow1], x[hw3], roh3);
 368             pbc_dx_aiuc(pbc, x[hw2], x[hw3], rhh);
 369         }
 370         svmul(invdOH, roh2, roh2);
 371         svmul(invdOH, roh3, roh3);
 372         svmul(invdHH, rhh, rhh);
 373         /* 18 flops */
 374
 375         /* Determine the projections of der on the bonds */
 376         clear_rvec(dc);
 377         for (m = 0; m < DIM; m++)
 378         {
 379             dc[0] += (der[ow1][m] - der[hw2][m]) * roh2[m];
 380             dc[1] += (der[ow1][m] - der[hw3][m]) * roh3[m];
 381             dc[2] += (der[hw2][m] - der[hw3][m]) * rhh[m];
 382         }
 383         /* 27 flops */
 384
 385         /* Determine the correction for the three bonds */
 386         mvmul(invmat, dc, fc);
 387         /* 15 flops */
 388
 389         /* Subtract the corrections from derp */
 390         for (m = 0; m < DIM; m++)
 391         {
 392             derp[ow1][m] -= imO * (fc[0] * roh2[m] + fc[1] * roh3[m]);
 393             derp[hw2][m] -= imH * (-fc[0] * roh2[m] + fc[2] * rhh[m]);
 394             derp[hw3][m] -= imH * (-fc[1] * roh3[m] - fc[2] * rhh[m]);
 395         }
 396
 397         /* 45 flops */
 398
 399         if (ow1 < calcvir_atom_end)
 400         {
 401             /* Determining r \dot m der is easy,
 402              * since fc contains the mass weighted corrections for der.
 403              */
 404
 405             for (m = 0; m < DIM; m++)
 406             {
 407                 for (m2 = 0; m2 < DIM; m2++)
 408                 {
 409                     vir_r_m_dder[m][m2] += dOH * roh2[m] * roh2[m2] * fc[0]
 410                                            + dOH * roh3[m] * roh3[m2] * fc[1]
 411                                            + dHH * rhh[m] * rhh[m2] * fc[2];
 412                 }
 413             }
 414         }
 415     }
 416 }
 417
 418
 419 /*! \brief The actual settle code, templated for real/SimdReal and for optimization */
 420 template<typename T, typename TypeBool, int packSize, typename TypePbc, bool bCorrectVelocity, bool bCalcVirial>
 421 static void settleTemplate(const settledata* settled,
 422                            int               settleStart,
 423                            int               settleEnd,
 424                            const TypePbc     pbc,
 425                            const real*       x,
 426                            real*             xprime,
 427                            real              invdt,
 428                            real* gmx_restrict v,
 429                            tensor             vir_r_m_dr,
 430                            bool*              bErrorHasOccurred)
 431 {
 432     /* ******************************************************************* */
 433     /*                                                                  ** */
 434     /*    Original code by Shuichi Miyamoto, last update Oct. 1, 1992   ** */
 435     /*                                                                  ** */
 436     /*    Algorithm changes by Berk Hess:                               ** */
 437     /*    2004-07-15 Convert COM to double precision to avoid drift     ** */
 438     /*    2006-10-16 Changed velocity update to use differences         ** */
 439     /*    2012-09-24 Use oxygen as reference instead of COM             ** */
 440     /*    2016-02    Complete rewrite of the code for SIMD              ** */
 441     /*                                                                  ** */
 442     /*    Reference for the SETTLE algorithm                            ** */
 443     /*           S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992).    ** */
 444     /*                                                                  ** */
 445     /* ******************************************************************* */
 446
 447     assert(settleStart % packSize == 0);
 448     assert(settleEnd % packSize == 0);
 449
 450     TypeBool bError = TypeBool(false);
 451
 452     const settleparam_t* p    = &settled->massw;
 453     T                    wh   = T(p->wh);
 454     T                    rc   = T(p->rc);
 455     T                    ra   = T(p->ra);
 456     T                    rb   = T(p->rb);
 457     T                    irc2 = T(p->irc2);
 458     T                    mO   = T(p->mO);
 459     T                    mH   = T(p->mH);
 460
 461     T almost_zero = T(1e-12);
 462
 463     T sum_r_m_dr[DIM][DIM];
 464
 465     if (bCalcVirial)
 466     {
 467         for (int d2 = 0; d2 < DIM; d2++)
 468         {
 469             for (int d = 0; d < DIM; d++)
 470             {
 471                 sum_r_m_dr[d2][d] = T(0);
 472             }
 473         }
 474     }
 475
 476     for (int i = settleStart; i < settleEnd; i += packSize)
 477     {
 478         /* Here we pad up to packSize with copies from the last valid entry.
 479          * This gives correct results, since we store (not increment) all
 480          * output, so we store the same output multiple times.
 481          */
 482         const int* ow1 = settled->ow1 + i;
 483         const int* hw2 = settled->hw2 + i;
 484         const int* hw3 = settled->hw3 + i;
 485
 486         T x_ow1[DIM], x_hw2[DIM], x_hw3[DIM];
 487
 488         gatherLoadUTranspose<3>(x, ow1, &x_ow1[XX], &x_ow1[YY], &x_ow1[ZZ]);
 489         gatherLoadUTranspose<3>(x, hw2, &x_hw2[XX], &x_hw2[YY], &x_hw2[ZZ]);
 490         gatherLoadUTranspose<3>(x, hw3, &x_hw3[XX], &x_hw3[YY], &x_hw3[ZZ]);
 491
 492         T xprime_ow1[DIM], xprime_hw2[DIM], xprime_hw3[DIM];
 493
 494         gatherLoadUTranspose<3>(xprime, ow1, &xprime_ow1[XX], &xprime_ow1[YY], &xprime_ow1[ZZ]);
 495         gatherLoadUTranspose<3>(xprime, hw2, &xprime_hw2[XX], &xprime_hw2[YY], &xprime_hw2[ZZ]);
 496         gatherLoadUTranspose<3>(xprime, hw3, &xprime_hw3[XX], &xprime_hw3[YY], &xprime_hw3[ZZ]);
 497
 498         T dist21[DIM], dist31[DIM];
 499         T doh2[DIM], doh3[DIM];
 500         T sh_hw2[DIM], sh_hw3[DIM];
 501
 502         pbc_dx_aiuc(pbc, x_hw2, x_ow1, dist21);
 503
 504         pbc_dx_aiuc(pbc, x_hw3, x_ow1, dist31);
 505
 506         /* Tedious way of doing pbc */
 507         pbc_dx_aiuc(pbc, xprime_hw2, xprime_ow1, doh2);
 508         for (int d = 0; d < DIM; d++)
 509         {
 510             sh_hw2[d]     = xprime_hw2[d] - (xprime_ow1[d] + doh2[d]);
 511             xprime_hw2[d] = xprime_hw2[d] - sh_hw2[d];
 512         }
 513         pbc_dx_aiuc(pbc, xprime_hw3, xprime_ow1, doh3);
 514         for (int d = 0; d < DIM; d++)
 515         {
 516             sh_hw3[d]     = xprime_hw3[d] - (xprime_ow1[d] + doh3[d]);
 517             xprime_hw3[d] = xprime_hw3[d] - sh_hw3[d];
 518         }
 519
 520         /* Not calculating the center of mass using the oxygen position
 521          * and the O-H distances, as done below, will make SETTLE
 522          * the largest source of energy drift for simulations of water,
 523          * as then the oxygen coordinate is multiplied by 0.89 at every step,
 524          * which can then transfer a systematic rounding to the oxygen velocity.
 525          */
 526         T a1[DIM], com[DIM];
 527         for (int d = 0; d < DIM; d++)
 528         {
 529             a1[d]  = -(doh2[d] + doh3[d]) * wh;
 530             com[d] = xprime_ow1[d] - a1[d];
 531         }
 532         T b1[DIM];
 533         for (int d = 0; d < DIM; d++)
 534         {
 535             b1[d] = xprime_hw2[d] - com[d];
 536         }
 537         T c1[DIM];
 538         for (int d = 0; d < DIM; d++)
 539         {
 540             c1[d] = xprime_hw3[d] - com[d];
 541         }
 542         /* 15 flops */
 543
 544         T xakszd = dist21[YY] * dist31[ZZ] - dist21[ZZ] * dist31[YY];
 545         T yakszd = dist21[ZZ] * dist31[XX] - dist21[XX] * dist31[ZZ];
 546         T zakszd = dist21[XX] * dist31[YY] - dist21[YY] * dist31[XX];
 547         T xaksxd = a1[YY] * zakszd - a1[ZZ] * yakszd;
 548         T yaksxd = a1[ZZ] * xakszd - a1[XX] * zakszd;
 549         T zaksxd = a1[XX] * yakszd - a1[YY] * xakszd;
 550         T xaksyd = yakszd * zaksxd - zakszd * yaksxd;
 551         T yaksyd = zakszd * xaksxd - xakszd * zaksxd;
 552         T zaksyd = xakszd * yaksxd - yakszd * xaksxd;
 553         /* 27 flops */
 554
 555         T axlng = gmx::invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd);
 556         T aylng = gmx::invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd);
 557         T azlng = gmx::invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd);
 558
 559         T trns1[DIM], trns2[DIM], trns3[DIM];
 560
 561         trns1[XX] = xaksxd * axlng;
 562         trns2[XX] = yaksxd * axlng;
 563         trns3[XX] = zaksxd * axlng;
 564         trns1[YY] = xaksyd * aylng;
 565         trns2[YY] = yaksyd * aylng;
 566         trns3[YY] = zaksyd * aylng;
 567         trns1[ZZ] = xakszd * azlng;
 568         trns2[ZZ] = yakszd * azlng;
 569         trns3[ZZ] = zakszd * azlng;
 570         /* 24 flops */
 571
 572         T b0d[2], c0d[2];
 573
 574         for (int d = 0; d < 2; d++)
 575         {
 576             b0d[d] = trns1[d] * dist21[XX] + trns2[d] * dist21[YY] + trns3[d] * dist21[ZZ];
 577             c0d[d] = trns1[d] * dist31[XX] + trns2[d] * dist31[YY] + trns3[d] * dist31[ZZ];
 578         }
 579
 580         T a1d_z, b1d[DIM], c1d[DIM];
 581
 582         a1d_z = trns1[ZZ] * a1[XX] + trns2[ZZ] * a1[YY] + trns3[ZZ] * a1[ZZ];
 583         for (int d = 0; d < DIM; d++)
 584         {
 585             b1d[d] = trns1[d] * b1[XX] + trns2[d] * b1[YY] + trns3[d] * b1[ZZ];
 586             c1d[d] = trns1[d] * c1[XX] + trns2[d] * c1[YY] + trns3[d] * c1[ZZ];
 587         }
 588         /* 65 flops */
 589
 590         T tmp, tmp2;
 591
 592         T sinphi = a1d_z * gmx::invsqrt(ra * ra);
 593         tmp2     = 1.0 - sinphi * sinphi;
 594
 595         /* If tmp2 gets close to or beyond zero we have severly distorted
 596          * water molecules and we should terminate the simulation.
 597          * Below we take the max with almost_zero to continue the loop.
 598          */
 599         bError = bError || (tmp2 <= almost_zero);
 600
 601         tmp2     = max(tmp2, almost_zero);
 602         tmp      = gmx::invsqrt(tmp2);
 603         T cosphi = tmp2 * tmp;
 604         T sinpsi = (b1d[ZZ] - c1d[ZZ]) * irc2 * tmp;
 605         tmp2     = 1.0 - sinpsi * sinpsi;
 606
 607         T cospsi = tmp2 * gmx::invsqrt(tmp2);
 608         /* 46 flops */
 609
 610         T a2d_y = ra * cosphi;
 611         T b2d_x = -rc * cospsi;
 612         T t1    = -rb * cosphi;
 613         T t2    = rc * sinpsi * sinphi;
 614         T b2d_y = t1 - t2;
 615         T c2d_y = t1 + t2;
 616         /* 7 flops */
 617
 618         /*     --- Step3  al,be,ga            --- */
 619         T alpha  = b2d_x * (b0d[XX] - c0d[XX]) + b0d[YY] * b2d_y + c0d[YY] * c2d_y;
 620         T beta   = b2d_x * (c0d[YY] - b0d[YY]) + b0d[XX] * b2d_y + c0d[XX] * c2d_y;
 621         T gamma  = b0d[XX] * b1d[YY] - b1d[XX] * b0d[YY] + c0d[XX] * c1d[YY] - c1d[XX] * c0d[YY];
 622         T al2be2 = alpha * alpha + beta * beta;
 623         tmp2     = (al2be2 - gamma * gamma);
 624         T sinthe = (alpha * gamma - beta * tmp2 * gmx::invsqrt(tmp2)) * gmx::invsqrt(al2be2 * al2be2);
 625         /* 47 flops */
 626
 627         /*  --- Step4  A3' --- */
 628         tmp2     = 1.0 - sinthe * sinthe;
 629         T costhe = tmp2 * gmx::invsqrt(tmp2);
 630
 631         T a3d[DIM], b3d[DIM], c3d[DIM];
 632
 633         a3d[XX] = -a2d_y * sinthe;
 634         a3d[YY] = a2d_y * costhe;
 635         a3d[ZZ] = a1d_z;
 636         b3d[XX] = b2d_x * costhe - b2d_y * sinthe;
 637         b3d[YY] = b2d_x * sinthe + b2d_y * costhe;
 638         b3d[ZZ] = b1d[ZZ];
 639         c3d[XX] = -b2d_x * costhe - c2d_y * sinthe;
 640         c3d[YY] = -b2d_x * sinthe + c2d_y * costhe;
 641         c3d[ZZ] = c1d[ZZ];
 642         /* 26 flops */
 643
 644         /*    --- Step5  A3 --- */
 645         T a3[DIM], b3[DIM], c3[DIM];
 646
 647         a3[XX] = trns1[XX] * a3d[XX] + trns1[YY] * a3d[YY] + trns1[ZZ] * a3d[ZZ];
 648         a3[YY] = trns2[XX] * a3d[XX] + trns2[YY] * a3d[YY] + trns2[ZZ] * a3d[ZZ];
 649         a3[ZZ] = trns3[XX] * a3d[XX] + trns3[YY] * a3d[YY] + trns3[ZZ] * a3d[ZZ];
 650         b3[XX] = trns1[XX] * b3d[XX] + trns1[YY] * b3d[YY] + trns1[ZZ] * b3d[ZZ];
 651         b3[YY] = trns2[XX] * b3d[XX] + trns2[YY] * b3d[YY] + trns2[ZZ] * b3d[ZZ];
 652         b3[ZZ] = trns3[XX] * b3d[XX] + trns3[YY] * b3d[YY] + trns3[ZZ] * b3d[ZZ];
 653         c3[XX] = trns1[XX] * c3d[XX] + trns1[YY] * c3d[YY] + trns1[ZZ] * c3d[ZZ];
 654         c3[YY] = trns2[XX] * c3d[XX] + trns2[YY] * c3d[YY] + trns2[ZZ] * c3d[ZZ];
 655         c3[ZZ] = trns3[XX] * c3d[XX] + trns3[YY] * c3d[YY] + trns3[ZZ] * c3d[ZZ];
 656         /* 45 flops */
 657
 658         /* Compute and store the corrected new coordinate */
 659         for (int d = 0; d < DIM; d++)
 660         {
 661             xprime_ow1[d] = com[d] + a3[d];
 662         }
 663         for (int d = 0; d < DIM; d++)
 664         {
 665             xprime_hw2[d] = com[d] + b3[d] + sh_hw2[d];
 666         }
 667         for (int d = 0; d < DIM; d++)
 668         {
 669             xprime_hw3[d] = com[d] + c3[d] + sh_hw3[d];
 670         }
 671         /* 9 flops + 6 pbc flops */
 672
 673         transposeScatterStoreU<3>(xprime, ow1, xprime_ow1[XX], xprime_ow1[YY], xprime_ow1[ZZ]);
 674         transposeScatterStoreU<3>(xprime, hw2, xprime_hw2[XX], xprime_hw2[YY], xprime_hw2[ZZ]);
 675         transposeScatterStoreU<3>(xprime, hw3, xprime_hw3[XX], xprime_hw3[YY], xprime_hw3[ZZ]);
 676
 677         if (bCorrectVelocity || bCalcVirial)
 678         {
 679             T da[DIM], db[DIM], dc[DIM];
 680             for (int d = 0; d < DIM; d++)
 681             {
 682                 da[d] = a3[d] - a1[d];
 683             }
 684             for (int d = 0; d < DIM; d++)
 685             {
 686                 db[d] = b3[d] - b1[d];
 687             }
 688             for (int d = 0; d < DIM; d++)
 689             {
 690                 dc[d] = c3[d] - c1[d];
 691             }
 692             /* 9 flops */
 693
 694             if (bCorrectVelocity)
 695             {
 696                 T v_ow1[DIM], v_hw2[DIM], v_hw3[DIM];
 697
 698                 gatherLoadUTranspose<3>(v, ow1, &v_ow1[XX], &v_ow1[YY], &v_ow1[ZZ]);
 699                 gatherLoadUTranspose<3>(v, hw2, &v_hw2[XX], &v_hw2[YY], &v_hw2[ZZ]);
 700                 gatherLoadUTranspose<3>(v, hw3, &v_hw3[XX], &v_hw3[YY], &v_hw3[ZZ]);
 701
 702                 /* Add the position correction divided by dt to the velocity */
 703                 for (int d = 0; d < DIM; d++)
 704                 {
 705                     v_ow1[d] = gmx::fma(da[d], invdt, v_ow1[d]);
 706                 }
 707                 for (int d = 0; d < DIM; d++)
 708                 {
 709                     v_hw2[d] = gmx::fma(db[d], invdt, v_hw2[d]);
 710                 }
 711                 for (int d = 0; d < DIM; d++)
 712                 {
 713                     v_hw3[d] = gmx::fma(dc[d], invdt, v_hw3[d]);
 714                 }
 715                 /* 3*6 flops */
 716
 717                 transposeScatterStoreU<3>(v, ow1, v_ow1[XX], v_ow1[YY], v_ow1[ZZ]);
 718                 transposeScatterStoreU<3>(v, hw2, v_hw2[XX], v_hw2[YY], v_hw2[ZZ]);
 719                 transposeScatterStoreU<3>(v, hw3, v_hw3[XX], v_hw3[YY], v_hw3[ZZ]);
 720             }
 721
 722             if (bCalcVirial)
 723             {
 724                 /* Filter out the non-local settles */
 725                 T filter = load<T>(settled->virfac + i);
 726                 T mOf    = filter * mO;
 727                 T mHf    = filter * mH;
 728
 729                 T mdo[DIM], mdb[DIM], mdc[DIM];
 730
 731                 for (int d = 0; d < DIM; d++)
 732                 {
 733                     mdb[d] = mHf * db[d];
 734                     mdc[d] = mHf * dc[d];
 735                     mdo[d] = mOf * da[d] + mdb[d] + mdc[d];
 736                 }
 737
 738                 for (int d2 = 0; d2 < DIM; d2++)
 739                 {
 740                     for (int d = 0; d < DIM; d++)
 741                     {
 742                         sum_r_m_dr[d2][d] =
 743                                 sum_r_m_dr[d2][d]
 744                                 - (x_ow1[d2] * mdo[d] + dist21[d2] * mdb[d] + dist31[d2] * mdc[d]);
 745                     }
 746                 }
 747                 /* 71 flops */
 748             }
 749         }
 750     }
 751
 752     if (bCalcVirial)
 753     {
 754         for (int d2 = 0; d2 < DIM; d2++)
 755         {
 756             for (int d = 0; d < DIM; d++)
 757             {
 758                 vir_r_m_dr[d2][d] += reduce(sum_r_m_dr[d2][d]);
 759             }
 760         }
 761     }
 762
 763     *bErrorHasOccurred = anyTrue(bError);
 764 }
 765
 766 /*! \brief Wrapper template function that divides the settles over threads
 767  * and instantiates the core template with instantiated booleans.
 768  */
 769 template<typename T, typename TypeBool, int packSize, typename TypePbc>
 770 static void settleTemplateWrapper(settledata* settled,
 771                                   int         nthread,
 772                                   int         thread,
 773                                   TypePbc     pbc,
 774                                   const real  x[],
 775                                   real        xprime[],
 776                                   real        invdt,
 777                                   real*       v,
 778                                   bool        bCalcVirial,
 779                                   tensor      vir_r_m_dr,
 780                                   bool*       bErrorHasOccurred)
 781 {
 782     /* We need to assign settles to threads in groups of pack_size */
 783     int numSettlePacks = (settled->nsettle + packSize - 1) / packSize;
 784     /* Round the end value up to give thread 0 more work */
 785     int settleStart = ((numSettlePacks * thread + nthread - 1) / nthread) * packSize;
 786     int settleEnd   = ((numSettlePacks * (thread + 1) + nthread - 1) / nthread) * packSize;
 787
 788     if (v != nullptr)
 789     {
 790         if (!bCalcVirial)
 791         {
 792             settleTemplate<T, TypeBool, packSize, TypePbc, true, false>(
 793                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
 794         }
 795         else
 796         {
 797             settleTemplate<T, TypeBool, packSize, TypePbc, true, true>(
 798                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
 799         }
 800     }
 801     else
 802     {
 803         if (!bCalcVirial)
 804         {
 805             settleTemplate<T, TypeBool, packSize, TypePbc, false, false>(
 806                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
 807         }
 808         else
 809         {
 810             settleTemplate<T, TypeBool, packSize, TypePbc, false, true>(
 811                     settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
 812         }
 813     }
 814 }
 815
 816 void csettle(settledata*                     settled,
 817              int                             nthread,
 818              int                             thread,
 819              const t_pbc*                    pbc,
 820              ArrayRefWithPadding<const RVec> x,
 821              ArrayRefWithPadding<RVec>       xprime,
 822              real                            invdt,
 823              ArrayRefWithPadding<RVec>       v,
 824              bool                            bCalcVirial,
 825              tensor                          vir_r_m_dr,
 826              bool*                           bErrorHasOccurred)
 827 {
 828     const real* xPtr      = as_rvec_array(x.paddedArrayRef().data())[0];
 829     real*       xprimePtr = as_rvec_array(xprime.paddedArrayRef().data())[0];
 830     real*       vPtr      = as_rvec_array(v.paddedArrayRef().data())[0];
 831
 832 #if GMX_SIMD_HAVE_REAL
 833     if (settled->bUseSimd)
 834     {
 835         /* Convert the pbc struct for SIMD */
 836         alignas(GMX_SIMD_ALIGNMENT) real pbcSimd[9 * GMX_SIMD_REAL_WIDTH];
 837         set_pbc_simd(pbc, pbcSimd);
 838
 839         settleTemplateWrapper<SimdReal, SimdBool, GMX_SIMD_REAL_WIDTH, const real*>(
 840                 settled, nthread, thread, pbcSimd, xPtr, xprimePtr, invdt, vPtr, bCalcVirial,
 841                 vir_r_m_dr, bErrorHasOccurred);
 842     }
 843     else
 844 #endif
 845     {
 846         /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
 847         t_pbc        pbcNo;
 848         const t_pbc* pbcNonNull;
 849
 850         if (pbc != nullptr)
 851         {
 852             pbcNonNull = pbc;
 853         }
 854         else
 855         {
 856             set_pbc(&pbcNo, PbcType::No, nullptr);
 857             pbcNonNull = &pbcNo;
 858         }
 859
 860         settleTemplateWrapper<real, bool, 1, const t_pbc*>(settled, nthread, thread, pbcNonNull,
 861                                                            &xPtr[0], &xprimePtr[0], invdt, &vPtr[0],
 862                                                            bCalcVirial, vir_r_m_dr, bErrorHasOccurred);
 863     }
 864 }
 865
 866 } // namespace gmx