2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Defines SETTLE code.
41 * \author Berk Hess <hess@kth.se>
42 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \ingroup module_mdlib
55 #include "gromacs/math/arrayrefwithpadding.h"
56 #include "gromacs/math/functions.h"
57 #include "gromacs/math/invertmatrix.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/mdlib/constr.h"
60 #include "gromacs/mdtypes/mdatom.h"
61 #include "gromacs/pbcutil/ishift.h"
62 #include "gromacs/pbcutil/pbc.h"
63 #include "gromacs/pbcutil/pbc_simd.h"
64 #include "gromacs/simd/simd.h"
65 #include "gromacs/simd/simd_math.h"
66 #include "gromacs/topology/idef.h"
67 #include "gromacs/topology/ifunc.h"
68 #include "gromacs/topology/mtop_util.h"
69 #include "gromacs/utility/fatalerror.h"
70 #include "gromacs/utility/gmxassert.h"
75 /*! \brief Initializes a projection matrix.
77 * \param[in] invmO Reciprocal oxygen mass
78 * \param[in] invmH Reciprocal hydrogen mass
79 * \param[in] dOH Target O-H bond length
80 * \param[in] dHH Target H-H bond length
81 * \param[out] inverseCouplingMatrix Inverse bond coupling matrix for the projection version of SETTLE
83 static void initializeProjectionMatrix(const real invmO,
87 matrix inverseCouplingMatrix)
89 // We normalize the inverse masses with invmO for the matrix inversion.
90 // so we can keep using masses of almost zero for frozen particles,
91 // without running out of the float range in invertMatrix.
92 double invmORelative = 1.0;
93 double invmHRelative = invmH / static_cast<double>(invmO);
94 double distanceRatio = dHH / static_cast<double>(dOH);
96 /* Construct the constraint coupling matrix */
98 mat[0][0] = invmORelative + invmHRelative;
99 mat[0][1] = invmORelative * (1.0 - 0.5 * gmx::square(distanceRatio));
100 mat[0][2] = invmHRelative * 0.5 * distanceRatio;
101 mat[1][1] = mat[0][0];
102 mat[1][2] = mat[0][2];
103 mat[2][2] = invmHRelative + invmHRelative;
104 mat[1][0] = mat[0][1];
105 mat[2][0] = mat[0][2];
106 mat[2][1] = mat[1][2];
108 invertMatrix(mat, inverseCouplingMatrix);
110 msmul(inverseCouplingMatrix, 1 / invmO, inverseCouplingMatrix);
114 settleParameters(const real mO, const real mH, const real invmO, const real invmH, const real dOH, const real dHH)
116 SettleParameters params;
118 // We calculate parameters in double precision to minimize errors.
119 // The velocity correction applied during SETTLE coordinate constraining
120 // introduces a systematic error of approximately 1 bit per atom,
121 // depending on what the compiler does with the code.
126 wohh = mO + 2.0 * mH;
127 params.wh = mH / wohh;
130 double rc = dHH / 2.0;
131 double ra = 2.0 * mH * std::sqrt(dOH * dOH - rc * rc) / wohh;
132 params.rb = std::sqrt(dOH * dOH - rc * rc) - ra;
135 params.irc2 = 1.0 / dHH;
137 // For projection: inverse masses and coupling matrix inversion
141 params.invdOH = 1.0 / dOH;
142 params.invdHH = 1.0 / dHH;
144 initializeProjectionMatrix(invmO, invmH, dOH, dHH, params.invmat);
148 fprintf(debug, "wh =%g, rc = %g, ra = %g\n", params.wh, params.rc, params.ra);
149 fprintf(debug, "rb = %g, irc2 = %g, dHH = %g, dOH = %g\n", params.rb, params.irc2, params.dHH, params.dOH);
155 SettleData::SettleData(const gmx_mtop_t& mtop) :
156 useSimd_(getenv("GMX_DISABLE_SIMD_KERNELS") == nullptr)
158 /* Check that we have only one settle type */
159 int settle_type = -1;
160 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
162 const int nral1 = 1 + NRAL(F_SETTLE);
163 while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
165 const InteractionList& ilist = (*ilists)[F_SETTLE];
166 for (int i = 0; i < ilist.size(); i += nral1)
168 if (settle_type == -1)
170 settle_type = ilist.iatoms[i];
172 else if (ilist.iatoms[i] != settle_type)
175 "The [molecules] section of your topology specifies more than one block "
177 "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
178 "If you are trying to partition your solvent into different *groups*\n"
179 "(e.g. for freezing, T-coupling, etc.), you are using the wrong "
181 "files specify groups. Otherwise, you may wish to change the least-used\n"
182 "block of molecules with SETTLE constraints into 3 normal constraints.");
186 GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
188 /* We will not initialize the normal SETTLE parameters here yet,
189 * since the atom (inv)masses can depend on the integrator and
190 * free-energy perturbation. We set mO=-1 to trigger later initialization.
192 parametersMassWeighted_.mO = -1;
194 real dOH = mtop.ffparams.iparams[settle_type].settle.doh;
195 real dHH = mtop.ffparams.iparams[settle_type].settle.dhh;
196 parametersAllMasses1_ = settleParameters(1.0, 1.0, 1.0, 1.0, dOH, dHH);
199 void SettleData::setConstraints(const InteractionList& il_settle,
200 const int numHomeAtoms,
202 const real* inverseMasses)
204 #if GMX_SIMD_HAVE_REAL
205 const int pack_size = GMX_SIMD_REAL_WIDTH;
207 const int pack_size = 1;
210 const int nral1 = 1 + NRAL(F_SETTLE);
211 int nsettle = il_settle.size() / nral1;
212 numSettles_ = nsettle;
216 ArrayRef<const int> iatoms = il_settle.iatoms;
218 /* Here we initialize the normal SETTLE parameters */
219 if (parametersMassWeighted_.mO < 0)
221 int firstO = iatoms[1];
222 int firstH = iatoms[2];
223 parametersMassWeighted_ = settleParameters(masses[firstO],
225 inverseMasses[firstO],
226 inverseMasses[firstH],
227 parametersAllMasses1_.dOH,
228 parametersAllMasses1_.dHH);
231 const int paddedSize = ((nsettle + pack_size - 1) / pack_size) * pack_size;
232 ow1_.resize(paddedSize);
233 hw2_.resize(paddedSize);
234 hw3_.resize(paddedSize);
235 virfac_.resize(paddedSize);
237 for (int i = 0; i < nsettle; i++)
239 ow1_[i] = iatoms[i * nral1 + 1];
240 hw2_[i] = iatoms[i * nral1 + 2];
241 hw3_[i] = iatoms[i * nral1 + 3];
242 /* We should avoid double counting of virial contributions for
243 * SETTLEs that appear in multiple DD domains, so we only count
244 * the contribution on the home range of the oxygen atom.
246 virfac_[i] = (iatoms[i * nral1 + 1] < numHomeAtoms ? 1 : 0);
249 /* Pad the index array to the full SIMD width with copies from
250 * the last normal entry, but with no virial contribution.
252 for (int i = nsettle; i < paddedSize; i++)
254 ow1_[i] = ow1_[nsettle - 1];
255 hw2_[i] = hw2_[nsettle - 1];
256 hw3_[i] = hw3_[nsettle - 1];
262 void settle_proj(const SettleData& settled,
263 ConstraintVariable econq,
265 const t_iatom iatoms[],
267 ArrayRef<const RVec> x,
270 int calcvir_atom_end,
273 /* Settle for projection out constraint components
274 * of derivatives of the coordinates.
275 * Berk Hess 2008-1-10
278 const SettleParameters* p;
279 real imO, imH, dOH, dHH, invdOH, invdHH;
281 int i, m, m2, ow1, hw2, hw3;
282 rvec roh2, roh3, rhh, dc, fc;
284 calcvir_atom_end *= DIM;
286 if (econq == ConstraintVariable::Force)
288 p = &settled.parametersAllMasses1();
292 p = &settled.parametersMassWeighted();
296 copy_mat(p->invmat, invmat);
302 const int nral1 = 1 + NRAL(F_SETTLE);
304 for (i = 0; i < nsettle; i++)
306 ow1 = iatoms[i * nral1 + 1];
307 hw2 = iatoms[i * nral1 + 2];
308 hw3 = iatoms[i * nral1 + 3];
312 rvec_sub(x[ow1], x[hw2], roh2);
313 rvec_sub(x[ow1], x[hw3], roh3);
314 rvec_sub(x[hw2], x[hw3], rhh);
318 pbc_dx_aiuc(pbc, x[ow1], x[hw2], roh2);
319 pbc_dx_aiuc(pbc, x[ow1], x[hw3], roh3);
320 pbc_dx_aiuc(pbc, x[hw2], x[hw3], rhh);
322 svmul(invdOH, roh2, roh2);
323 svmul(invdOH, roh3, roh3);
324 svmul(invdHH, rhh, rhh);
327 /* Determine the projections of der on the bonds */
329 for (m = 0; m < DIM; m++)
331 dc[0] += (der[ow1][m] - der[hw2][m]) * roh2[m];
332 dc[1] += (der[ow1][m] - der[hw3][m]) * roh3[m];
333 dc[2] += (der[hw2][m] - der[hw3][m]) * rhh[m];
337 /* Determine the correction for the three bonds */
338 mvmul(invmat, dc, fc);
341 /* Subtract the corrections from derp */
342 for (m = 0; m < DIM; m++)
344 derp[ow1][m] -= imO * (fc[0] * roh2[m] + fc[1] * roh3[m]);
345 derp[hw2][m] -= imH * (-fc[0] * roh2[m] + fc[2] * rhh[m]);
346 derp[hw3][m] -= imH * (-fc[1] * roh3[m] - fc[2] * rhh[m]);
351 if (ow1 < calcvir_atom_end)
353 /* Determining r \dot m der is easy,
354 * since fc contains the mass weighted corrections for der.
357 for (m = 0; m < DIM; m++)
359 for (m2 = 0; m2 < DIM; m2++)
361 vir_r_m_dder[m][m2] += dOH * roh2[m] * roh2[m2] * fc[0]
362 + dOH * roh3[m] * roh3[m2] * fc[1]
363 + dHH * rhh[m] * rhh[m2] * fc[2];
371 /*! \brief The actual settle code, templated for real/SimdReal and for optimization */
372 template<typename T, typename TypeBool, int packSize, typename TypePbc, bool bCorrectVelocity, bool bCalcVirial>
373 static void settleTemplate(const SettleData& settled,
380 real* gmx_restrict v,
382 bool* bErrorHasOccurred)
384 /* ******************************************************************* */
386 /* Original code by Shuichi Miyamoto, last update Oct. 1, 1992 ** */
388 /* Algorithm changes by Berk Hess: ** */
389 /* 2004-07-15 Convert COM to double precision to avoid drift ** */
390 /* 2006-10-16 Changed velocity update to use differences ** */
391 /* 2012-09-24 Use oxygen as reference instead of COM ** */
392 /* 2016-02 Complete rewrite of the code for SIMD ** */
393 /* 2020-06 Completely remove use of COM to minimize drift ** */
395 /* Reference for the SETTLE algorithm ** */
396 /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */
398 /* ******************************************************************* */
400 assert(settleStart % packSize == 0);
401 assert(settleEnd % packSize == 0);
403 TypeBool bError = TypeBool(false);
405 const SettleParameters* p = &settled.parametersMassWeighted();
414 T almost_zero = T(1e-12);
416 T sum_r_m_dr[DIM][DIM];
420 for (int d2 = 0; d2 < DIM; d2++)
422 for (int d = 0; d < DIM; d++)
424 sum_r_m_dr[d2][d] = T(0);
429 for (int i = settleStart; i < settleEnd; i += packSize)
431 /* Here we pad up to packSize with copies from the last valid entry.
432 * This gives correct results, since we store (not increment) all
433 * output, so we store the same output multiple times.
435 const int* ow1 = settled.ow1() + i;
436 const int* hw2 = settled.hw2() + i;
437 const int* hw3 = settled.hw3() + i;
439 T x_ow1[DIM], x_hw2[DIM], x_hw3[DIM];
441 gatherLoadUTranspose<3>(x, ow1, &x_ow1[XX], &x_ow1[YY], &x_ow1[ZZ]);
442 gatherLoadUTranspose<3>(x, hw2, &x_hw2[XX], &x_hw2[YY], &x_hw2[ZZ]);
443 gatherLoadUTranspose<3>(x, hw3, &x_hw3[XX], &x_hw3[YY], &x_hw3[ZZ]);
445 T xprime_ow1[DIM], xprime_hw2[DIM], xprime_hw3[DIM];
447 gatherLoadUTranspose<3>(xprime, ow1, &xprime_ow1[XX], &xprime_ow1[YY], &xprime_ow1[ZZ]);
448 gatherLoadUTranspose<3>(xprime, hw2, &xprime_hw2[XX], &xprime_hw2[YY], &xprime_hw2[ZZ]);
449 gatherLoadUTranspose<3>(xprime, hw3, &xprime_hw3[XX], &xprime_hw3[YY], &xprime_hw3[ZZ]);
451 T dist21[DIM], dist31[DIM];
452 T doh2[DIM], doh3[DIM];
454 pbc_dx_aiuc(pbc, x_hw2, x_ow1, dist21);
456 pbc_dx_aiuc(pbc, x_hw3, x_ow1, dist31);
458 pbc_dx_aiuc(pbc, xprime_hw2, xprime_ow1, doh2);
460 pbc_dx_aiuc(pbc, xprime_hw3, xprime_ow1, doh3);
461 /* 4 * 18 flops (would be 4 * 3 without PBC) */
463 /* Note that we completely avoid computing the center of mass and
464 * only use distances. This minimizes energy drift and also makes
465 * the computation slightly cheaper.
466 * Straightforward computation of the COM, as in the original algorithm,
467 * makes SETTLE the largest source of energy drift for simulations of water,
468 * as then the oxygen coordinate is multiplied by 0.89 at every step,
469 * which can then transfer a systematic rounding to the oxygen velocity.
470 * For some time we computed the COM using offsets from the oxygen, this
471 * significantly reduces the energy drift, but not using the COM at all,
472 * as we do now, is optimal.
475 for (int d = 0; d < DIM; d++)
477 a1[d] = -(doh2[d] + doh3[d]) * wh;
480 for (int d = 0; d < DIM; d++)
482 b1[d] = doh2[d] + a1[d];
485 for (int d = 0; d < DIM; d++)
487 c1[d] = doh3[d] + a1[d];
491 T xakszd = dist21[YY] * dist31[ZZ] - dist21[ZZ] * dist31[YY];
492 T yakszd = dist21[ZZ] * dist31[XX] - dist21[XX] * dist31[ZZ];
493 T zakszd = dist21[XX] * dist31[YY] - dist21[YY] * dist31[XX];
494 T xaksxd = a1[YY] * zakszd - a1[ZZ] * yakszd;
495 T yaksxd = a1[ZZ] * xakszd - a1[XX] * zakszd;
496 T zaksxd = a1[XX] * yakszd - a1[YY] * xakszd;
497 T xaksyd = yakszd * zaksxd - zakszd * yaksxd;
498 T yaksyd = zakszd * xaksxd - xakszd * zaksxd;
499 T zaksyd = xakszd * yaksxd - yakszd * xaksxd;
502 T axlng = gmx::invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd);
503 T aylng = gmx::invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd);
504 T azlng = gmx::invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd);
506 T trns1[DIM], trns2[DIM], trns3[DIM];
508 trns1[XX] = xaksxd * axlng;
509 trns2[XX] = yaksxd * axlng;
510 trns3[XX] = zaksxd * axlng;
511 trns1[YY] = xaksyd * aylng;
512 trns2[YY] = yaksyd * aylng;
513 trns3[YY] = zaksyd * aylng;
514 trns1[ZZ] = xakszd * azlng;
515 trns2[ZZ] = yakszd * azlng;
516 trns3[ZZ] = zakszd * azlng;
521 for (int d = 0; d < 2; d++)
523 b0d[d] = trns1[d] * dist21[XX] + trns2[d] * dist21[YY] + trns3[d] * dist21[ZZ];
524 c0d[d] = trns1[d] * dist31[XX] + trns2[d] * dist31[YY] + trns3[d] * dist31[ZZ];
527 T a1d_z, b1d[DIM], c1d[DIM];
529 a1d_z = trns1[ZZ] * a1[XX] + trns2[ZZ] * a1[YY] + trns3[ZZ] * a1[ZZ];
530 for (int d = 0; d < DIM; d++)
532 b1d[d] = trns1[d] * b1[XX] + trns2[d] * b1[YY] + trns3[d] * b1[ZZ];
533 c1d[d] = trns1[d] * c1[XX] + trns2[d] * c1[YY] + trns3[d] * c1[ZZ];
539 T sinphi = a1d_z * gmx::invsqrt(ra * ra);
540 tmp2 = 1.0 - sinphi * sinphi;
542 /* If tmp2 gets close to or beyond zero we have severly distorted
543 * water molecules and we should terminate the simulation.
544 * Below we take the max with almost_zero to continue the loop.
546 bError = bError || (tmp2 <= almost_zero);
548 tmp2 = max(tmp2, almost_zero);
549 tmp = gmx::invsqrt(tmp2);
550 T cosphi = tmp2 * tmp;
551 T sinpsi = (b1d[ZZ] - c1d[ZZ]) * irc2 * tmp;
552 tmp2 = 1.0 - sinpsi * sinpsi;
554 T cospsi = tmp2 * gmx::invsqrt(tmp2);
557 T a2d_y = ra * cosphi;
558 T b2d_x = -rc * cospsi;
560 T t2 = rc * sinpsi * sinphi;
565 /* --- Step3 al,be,ga --- */
566 T alpha = b2d_x * (b0d[XX] - c0d[XX]) + b0d[YY] * b2d_y + c0d[YY] * c2d_y;
567 T beta = b2d_x * (c0d[YY] - b0d[YY]) + b0d[XX] * b2d_y + c0d[XX] * c2d_y;
568 T gamma = b0d[XX] * b1d[YY] - b1d[XX] * b0d[YY] + c0d[XX] * c1d[YY] - c1d[XX] * c0d[YY];
569 T al2be2 = alpha * alpha + beta * beta;
570 tmp2 = (al2be2 - gamma * gamma);
571 T sinthe = (alpha * gamma - beta * tmp2 * gmx::invsqrt(tmp2)) * gmx::invsqrt(al2be2 * al2be2);
574 /* --- Step4 A3' --- */
575 tmp2 = 1.0 - sinthe * sinthe;
576 T costhe = tmp2 * gmx::invsqrt(tmp2);
578 T a3d[DIM], b3d[DIM], c3d[DIM];
580 a3d[XX] = -a2d_y * sinthe;
581 a3d[YY] = a2d_y * costhe;
583 b3d[XX] = b2d_x * costhe - b2d_y * sinthe;
584 b3d[YY] = b2d_x * sinthe + b2d_y * costhe;
586 c3d[XX] = -b2d_x * costhe - c2d_y * sinthe;
587 c3d[YY] = -b2d_x * sinthe + c2d_y * costhe;
591 /* --- Step5 A3 --- */
592 T a3[DIM], b3[DIM], c3[DIM];
594 a3[XX] = trns1[XX] * a3d[XX] + trns1[YY] * a3d[YY] + trns1[ZZ] * a3d[ZZ];
595 a3[YY] = trns2[XX] * a3d[XX] + trns2[YY] * a3d[YY] + trns2[ZZ] * a3d[ZZ];
596 a3[ZZ] = trns3[XX] * a3d[XX] + trns3[YY] * a3d[YY] + trns3[ZZ] * a3d[ZZ];
597 b3[XX] = trns1[XX] * b3d[XX] + trns1[YY] * b3d[YY] + trns1[ZZ] * b3d[ZZ];
598 b3[YY] = trns2[XX] * b3d[XX] + trns2[YY] * b3d[YY] + trns2[ZZ] * b3d[ZZ];
599 b3[ZZ] = trns3[XX] * b3d[XX] + trns3[YY] * b3d[YY] + trns3[ZZ] * b3d[ZZ];
600 c3[XX] = trns1[XX] * c3d[XX] + trns1[YY] * c3d[YY] + trns1[ZZ] * c3d[ZZ];
601 c3[YY] = trns2[XX] * c3d[XX] + trns2[YY] * c3d[YY] + trns2[ZZ] * c3d[ZZ];
602 c3[ZZ] = trns3[XX] * c3d[XX] + trns3[YY] * c3d[YY] + trns3[ZZ] * c3d[ZZ];
605 /* Compute and store the corrected new coordinate */
607 for (int d = 0; d < DIM; d++)
609 dxOw1[d] = a3[d] - a1[d];
610 xprime_ow1[d] = xprime_ow1[d] + dxOw1[d];
613 for (int d = 0; d < DIM; d++)
615 dxHw2[d] = b3[d] - b1[d];
616 xprime_hw2[d] = xprime_hw2[d] + dxHw2[d];
619 for (int d = 0; d < DIM; d++)
621 dxHw3[d] = c3[d] - c1[d];
622 xprime_hw3[d] = xprime_hw3[d] + dxHw3[d];
626 transposeScatterStoreU<3>(xprime, ow1, xprime_ow1[XX], xprime_ow1[YY], xprime_ow1[ZZ]);
627 transposeScatterStoreU<3>(xprime, hw2, xprime_hw2[XX], xprime_hw2[YY], xprime_hw2[ZZ]);
628 transposeScatterStoreU<3>(xprime, hw3, xprime_hw3[XX], xprime_hw3[YY], xprime_hw3[ZZ]);
630 if (bCorrectVelocity)
632 T v_ow1[DIM], v_hw2[DIM], v_hw3[DIM];
634 gatherLoadUTranspose<3>(v, ow1, &v_ow1[XX], &v_ow1[YY], &v_ow1[ZZ]);
635 gatherLoadUTranspose<3>(v, hw2, &v_hw2[XX], &v_hw2[YY], &v_hw2[ZZ]);
636 gatherLoadUTranspose<3>(v, hw3, &v_hw3[XX], &v_hw3[YY], &v_hw3[ZZ]);
638 /* Add the position correction divided by dt to the velocity */
639 for (int d = 0; d < DIM; d++)
641 v_ow1[d] = gmx::fma(dxOw1[d], invdt, v_ow1[d]);
643 for (int d = 0; d < DIM; d++)
645 v_hw2[d] = gmx::fma(dxHw2[d], invdt, v_hw2[d]);
647 for (int d = 0; d < DIM; d++)
649 v_hw3[d] = gmx::fma(dxHw3[d], invdt, v_hw3[d]);
653 transposeScatterStoreU<3>(v, ow1, v_ow1[XX], v_ow1[YY], v_ow1[ZZ]);
654 transposeScatterStoreU<3>(v, hw2, v_hw2[XX], v_hw2[YY], v_hw2[ZZ]);
655 transposeScatterStoreU<3>(v, hw3, v_hw3[XX], v_hw3[YY], v_hw3[ZZ]);
660 /* Filter out the non-local settles */
661 T filter = load<T>(settled.virfac() + i);
665 T mdo[DIM], mdb[DIM], mdc[DIM];
667 for (int d = 0; d < DIM; d++)
669 mdb[d] = mHf * dxHw2[d];
670 mdc[d] = mHf * dxHw3[d];
671 mdo[d] = mOf * dxOw1[d] + mdb[d] + mdc[d];
674 for (int d2 = 0; d2 < DIM; d2++)
676 for (int d = 0; d < DIM; d++)
680 - (x_ow1[d2] * mdo[d] + dist21[d2] * mdb[d] + dist31[d2] * mdc[d]);
689 for (int d2 = 0; d2 < DIM; d2++)
691 for (int d = 0; d < DIM; d++)
693 vir_r_m_dr[d2][d] += reduce(sum_r_m_dr[d2][d]);
698 *bErrorHasOccurred = anyTrue(bError);
701 /*! \brief Wrapper template function that divides the settles over threads
702 * and instantiates the core template with instantiated booleans.
704 template<typename T, typename TypeBool, int packSize, typename TypePbc>
705 static void settleTemplateWrapper(const SettleData& settled,
715 bool* bErrorHasOccurred)
717 /* We need to assign settles to threads in groups of pack_size */
718 int numSettlePacks = (settled.numSettles() + packSize - 1) / packSize;
719 /* Round the end value up to give thread 0 more work */
720 int settleStart = ((numSettlePacks * thread + nthread - 1) / nthread) * packSize;
721 int settleEnd = ((numSettlePacks * (thread + 1) + nthread - 1) / nthread) * packSize;
727 settleTemplate<T, TypeBool, packSize, TypePbc, true, false>(
728 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
732 settleTemplate<T, TypeBool, packSize, TypePbc, true, true>(
733 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
740 settleTemplate<T, TypeBool, packSize, TypePbc, false, false>(
741 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
745 settleTemplate<T, TypeBool, packSize, TypePbc, false, true>(
746 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
751 void csettle(const SettleData& settled,
755 ArrayRefWithPadding<const RVec> x,
756 ArrayRefWithPadding<RVec> xprime,
758 ArrayRefWithPadding<RVec> v,
761 bool* bErrorHasOccurred)
763 const real* xPtr = as_rvec_array(x.paddedArrayRef().data())[0];
764 real* xprimePtr = as_rvec_array(xprime.paddedArrayRef().data())[0];
765 real* vPtr = as_rvec_array(v.paddedArrayRef().data())[0];
767 #if GMX_SIMD_HAVE_REAL
768 if (settled.useSimd())
770 /* Convert the pbc struct for SIMD */
771 alignas(GMX_SIMD_ALIGNMENT) real pbcSimd[9 * GMX_SIMD_REAL_WIDTH];
772 set_pbc_simd(pbc, pbcSimd);
774 settleTemplateWrapper<SimdReal, SimdBool, GMX_SIMD_REAL_WIDTH, const real*>(
775 settled, nthread, thread, pbcSimd, xPtr, xprimePtr, invdt, vPtr, bCalcVirial, vir_r_m_dr, bErrorHasOccurred);
780 /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
782 const t_pbc* pbcNonNull;
790 set_pbc(&pbcNo, PbcType::No, nullptr);
794 settleTemplateWrapper<real, bool, 1, const t_pbc*>(
795 settled, nthread, thread, pbcNonNull, &xPtr[0], &xprimePtr[0], invdt, &vPtr[0], bCalcVirial, vir_r_m_dr, bErrorHasOccurred);