2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief Defines SETTLE code.
40 * \author Berk Hess <hess@kth.se>
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_mdlib
54 #include "gromacs/math/functions.h"
55 #include "gromacs/math/invertmatrix.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdlib/constr.h"
58 #include "gromacs/mdtypes/mdatom.h"
59 #include "gromacs/pbcutil/ishift.h"
60 #include "gromacs/pbcutil/pbc.h"
61 #include "gromacs/pbcutil/pbc_simd.h"
62 #include "gromacs/simd/simd.h"
63 #include "gromacs/simd/simd_math.h"
64 #include "gromacs/topology/idef.h"
65 #include "gromacs/topology/ifunc.h"
66 #include "gromacs/topology/mtop_util.h"
67 #include "gromacs/utility/fatalerror.h"
68 #include "gromacs/utility/gmxassert.h"
69 #include "gromacs/utility/smalloc.h"
95 settleparam_t massw; /* Parameters for SETTLE for coordinates */
96 settleparam_t mass1; /* Parameters with all masses 1, for forces */
98 int nsettle; /* The number of settles on our rank */
99 int* ow1; /* Index to OW1 atoms, size nsettle + SIMD padding */
100 int* hw2; /* Index to HW2 atoms, size nsettle + SIMD padding */
101 int* hw3; /* Index to HW3 atoms, size nsettle + SIMD padding */
102 real* virfac; /* Virial factor 0 or 1, size nsettle + SIMD pad. */
103 int nalloc; /* Allocation size of ow1, hw2, hw3, virfac */
105 bool bUseSimd; /* Use SIMD intrinsics code, if possible */
109 //! Initializes a projection matrix.
110 static void init_proj_matrix(real invmO, real invmH, real dOH, real dHH, matrix inverseCouplingMatrix)
112 /* We normalize the inverse masses with invmO for the matrix inversion.
113 * so we can keep using masses of almost zero for frozen particles,
114 * without running out of the float range in invertMatrix.
116 double invmORelative = 1.0;
117 double invmHRelative = invmH / static_cast<double>(invmO);
118 double distanceRatio = dHH / static_cast<double>(dOH);
120 /* Construct the constraint coupling matrix */
122 mat[0][0] = invmORelative + invmHRelative;
123 mat[0][1] = invmORelative * (1.0 - 0.5 * gmx::square(distanceRatio));
124 mat[0][2] = invmHRelative * 0.5 * distanceRatio;
125 mat[1][1] = mat[0][0];
126 mat[1][2] = mat[0][2];
127 mat[2][2] = invmHRelative + invmHRelative;
128 mat[1][0] = mat[0][1];
129 mat[2][0] = mat[0][2];
130 mat[2][1] = mat[1][2];
132 invertMatrix(mat, inverseCouplingMatrix);
134 msmul(inverseCouplingMatrix, 1 / invmO, inverseCouplingMatrix);
137 //! Initializes settle parameters.
138 static void settleparam_init(settleparam_t* p, real mO, real mH, real invmO, real invmH, real dOH, real dHH)
140 /* We calculate parameters in double precision to minimize errors.
141 * The velocity correction applied during SETTLE coordinate constraining
142 * introduces a systematic error of approximately 1 bit per atom,
143 * depending on what the compiler does with the code.
149 wohh = mO + 2.0 * mH;
153 double rc = dHH / 2.0;
154 double ra = 2.0 * mH * std::sqrt(dOH * dOH - rc * rc) / wohh;
155 p->rb = std::sqrt(dOH * dOH - rc * rc) - ra;
160 /* For projection: inverse masses and coupling matrix inversion */
164 p->invdOH = 1.0 / dOH;
165 p->invdHH = 1.0 / dHH;
167 init_proj_matrix(invmO, invmH, dOH, dHH, p->invmat);
171 fprintf(debug, "wh =%g, rc = %g, ra = %g\n", p->wh, p->rc, p->ra);
172 fprintf(debug, "rb = %g, irc2 = %g, dHH = %g, dOH = %g\n", p->rb, p->irc2, p->dHH, p->dOH);
176 settledata* settle_init(const gmx_mtop_t& mtop)
178 /* Check that we have only one settle type */
179 int settle_type = -1;
180 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
182 const int nral1 = 1 + NRAL(F_SETTLE);
183 while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
185 const InteractionList& ilist = (*ilists)[F_SETTLE];
186 for (int i = 0; i < ilist.size(); i += nral1)
188 if (settle_type == -1)
190 settle_type = ilist.iatoms[i];
192 else if (ilist.iatoms[i] != settle_type)
195 "The [molecules] section of your topology specifies more than one block "
197 "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
198 "If you are trying to partition your solvent into different *groups*\n"
199 "(e.g. for freezing, T-coupling, etc.), you are using the wrong "
201 "files specify groups. Otherwise, you may wish to change the least-used\n"
202 "block of molecules with SETTLE constraints into 3 normal constraints.");
206 GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
212 /* We will not initialize the normal SETTLE parameters here yet,
213 * since the atom (inv)masses can depend on the integrator and
214 * free-energy perturbation. We set mO=-1 to trigger later initialization.
216 settled->massw.mO = -1;
218 real dOH = mtop.ffparams.iparams[settle_type].settle.doh;
219 real dHH = mtop.ffparams.iparams[settle_type].settle.dhh;
220 settleparam_init(&settled->mass1, 1.0, 1.0, 1.0, 1.0, dOH, dHH);
222 settled->ow1 = nullptr;
223 settled->hw2 = nullptr;
224 settled->hw3 = nullptr;
225 settled->virfac = nullptr;
228 /* Without SIMD configured, this bool is not used */
229 settled->bUseSimd = (getenv("GMX_DISABLE_SIMD_KERNELS") == nullptr);
234 void settle_free(settledata* settled)
236 sfree_aligned(settled->ow1);
237 sfree_aligned(settled->hw2);
238 sfree_aligned(settled->hw3);
239 sfree_aligned(settled->virfac);
243 void settle_set_constraints(settledata* settled, const t_ilist* il_settle, const t_mdatoms& mdatoms)
245 #if GMX_SIMD_HAVE_REAL
246 const int pack_size = GMX_SIMD_REAL_WIDTH;
248 const int pack_size = 1;
251 const int nral1 = 1 + NRAL(F_SETTLE);
252 int nsettle = il_settle->nr / nral1;
253 settled->nsettle = nsettle;
257 const t_iatom* iatoms = il_settle->iatoms;
259 /* Here we initialize the normal SETTLE parameters */
260 if (settled->massw.mO < 0)
262 int firstO = iatoms[1];
263 int firstH = iatoms[2];
264 settleparam_init(&settled->massw, mdatoms.massT[firstO], mdatoms.massT[firstH],
265 mdatoms.invmass[firstO], mdatoms.invmass[firstH], settled->mass1.dOH,
269 if (nsettle + pack_size > settled->nalloc)
271 settled->nalloc = over_alloc_dd(nsettle + pack_size);
272 sfree_aligned(settled->ow1);
273 sfree_aligned(settled->hw2);
274 sfree_aligned(settled->hw3);
275 sfree_aligned(settled->virfac);
276 snew_aligned(settled->ow1, settled->nalloc, 64);
277 snew_aligned(settled->hw2, settled->nalloc, 64);
278 snew_aligned(settled->hw3, settled->nalloc, 64);
279 snew_aligned(settled->virfac, settled->nalloc, 64);
282 for (int i = 0; i < nsettle; i++)
284 settled->ow1[i] = iatoms[i * nral1 + 1];
285 settled->hw2[i] = iatoms[i * nral1 + 2];
286 settled->hw3[i] = iatoms[i * nral1 + 3];
287 /* We should avoid double counting of virial contributions for
288 * SETTLEs that appear in multiple DD domains, so we only count
289 * the contribution on the home range of the oxygen atom.
291 settled->virfac[i] = (iatoms[i * nral1 + 1] < mdatoms.homenr ? 1 : 0);
294 /* Pack the index array to the full SIMD width with copies from
295 * the last normal entry, but with no virial contribution.
297 int end_packed = ((nsettle + pack_size - 1) / pack_size) * pack_size;
298 for (int i = nsettle; i < end_packed; i++)
300 settled->ow1[i] = settled->ow1[nsettle - 1];
301 settled->hw2[i] = settled->hw2[nsettle - 1];
302 settled->hw3[i] = settled->hw3[nsettle - 1];
303 settled->virfac[i] = 0;
308 void settle_proj(settledata* settled,
309 ConstraintVariable econq,
311 const t_iatom iatoms[],
316 int calcvir_atom_end,
319 /* Settle for projection out constraint components
320 * of derivatives of the coordinates.
321 * Berk Hess 2008-1-10
325 real imO, imH, dOH, dHH, invdOH, invdHH;
327 int i, m, m2, ow1, hw2, hw3;
328 rvec roh2, roh3, rhh, dc, fc;
330 calcvir_atom_end *= DIM;
332 if (econq == ConstraintVariable::Force)
342 copy_mat(p->invmat, invmat);
348 const int nral1 = 1 + NRAL(F_SETTLE);
350 for (i = 0; i < nsettle; i++)
352 ow1 = iatoms[i * nral1 + 1];
353 hw2 = iatoms[i * nral1 + 2];
354 hw3 = iatoms[i * nral1 + 3];
358 rvec_sub(x[ow1], x[hw2], roh2);
359 rvec_sub(x[ow1], x[hw3], roh3);
360 rvec_sub(x[hw2], x[hw3], rhh);
364 pbc_dx_aiuc(pbc, x[ow1], x[hw2], roh2);
365 pbc_dx_aiuc(pbc, x[ow1], x[hw3], roh3);
366 pbc_dx_aiuc(pbc, x[hw2], x[hw3], rhh);
368 svmul(invdOH, roh2, roh2);
369 svmul(invdOH, roh3, roh3);
370 svmul(invdHH, rhh, rhh);
373 /* Determine the projections of der on the bonds */
375 for (m = 0; m < DIM; m++)
377 dc[0] += (der[ow1][m] - der[hw2][m]) * roh2[m];
378 dc[1] += (der[ow1][m] - der[hw3][m]) * roh3[m];
379 dc[2] += (der[hw2][m] - der[hw3][m]) * rhh[m];
383 /* Determine the correction for the three bonds */
384 mvmul(invmat, dc, fc);
387 /* Subtract the corrections from derp */
388 for (m = 0; m < DIM; m++)
390 derp[ow1][m] -= imO * (fc[0] * roh2[m] + fc[1] * roh3[m]);
391 derp[hw2][m] -= imH * (-fc[0] * roh2[m] + fc[2] * rhh[m]);
392 derp[hw3][m] -= imH * (-fc[1] * roh3[m] - fc[2] * rhh[m]);
397 if (ow1 < calcvir_atom_end)
399 /* Determining r \dot m der is easy,
400 * since fc contains the mass weighted corrections for der.
403 for (m = 0; m < DIM; m++)
405 for (m2 = 0; m2 < DIM; m2++)
407 vir_r_m_dder[m][m2] += dOH * roh2[m] * roh2[m2] * fc[0]
408 + dOH * roh3[m] * roh3[m2] * fc[1]
409 + dHH * rhh[m] * rhh[m2] * fc[2];
417 /*! \brief The actual settle code, templated for real/SimdReal and for optimization */
418 template<typename T, typename TypeBool, int packSize, typename TypePbc, bool bCorrectVelocity, bool bCalcVirial>
419 static void settleTemplate(const settledata* settled,
426 real* gmx_restrict v,
428 bool* bErrorHasOccurred)
430 /* ******************************************************************* */
432 /* Original code by Shuichi Miyamoto, last update Oct. 1, 1992 ** */
434 /* Algorithm changes by Berk Hess: ** */
435 /* 2004-07-15 Convert COM to double precision to avoid drift ** */
436 /* 2006-10-16 Changed velocity update to use differences ** */
437 /* 2012-09-24 Use oxygen as reference instead of COM ** */
438 /* 2016-02 Complete rewrite of the code for SIMD ** */
440 /* Reference for the SETTLE algorithm ** */
441 /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */
443 /* ******************************************************************* */
445 assert(settleStart % packSize == 0);
446 assert(settleEnd % packSize == 0);
448 TypeBool bError = TypeBool(false);
450 const settleparam_t* p = &settled->massw;
459 T almost_zero = T(1e-12);
461 T sum_r_m_dr[DIM][DIM];
465 for (int d2 = 0; d2 < DIM; d2++)
467 for (int d = 0; d < DIM; d++)
469 sum_r_m_dr[d2][d] = T(0);
474 for (int i = settleStart; i < settleEnd; i += packSize)
476 /* Here we pad up to packSize with copies from the last valid entry.
477 * This gives correct results, since we store (not increment) all
478 * output, so we store the same output multiple times.
480 const int* ow1 = settled->ow1 + i;
481 const int* hw2 = settled->hw2 + i;
482 const int* hw3 = settled->hw3 + i;
484 T x_ow1[DIM], x_hw2[DIM], x_hw3[DIM];
486 gatherLoadUTranspose<3>(x, ow1, &x_ow1[XX], &x_ow1[YY], &x_ow1[ZZ]);
487 gatherLoadUTranspose<3>(x, hw2, &x_hw2[XX], &x_hw2[YY], &x_hw2[ZZ]);
488 gatherLoadUTranspose<3>(x, hw3, &x_hw3[XX], &x_hw3[YY], &x_hw3[ZZ]);
490 T xprime_ow1[DIM], xprime_hw2[DIM], xprime_hw3[DIM];
492 gatherLoadUTranspose<3>(xprime, ow1, &xprime_ow1[XX], &xprime_ow1[YY], &xprime_ow1[ZZ]);
493 gatherLoadUTranspose<3>(xprime, hw2, &xprime_hw2[XX], &xprime_hw2[YY], &xprime_hw2[ZZ]);
494 gatherLoadUTranspose<3>(xprime, hw3, &xprime_hw3[XX], &xprime_hw3[YY], &xprime_hw3[ZZ]);
496 T dist21[DIM], dist31[DIM];
497 T doh2[DIM], doh3[DIM];
498 T sh_hw2[DIM], sh_hw3[DIM];
500 pbc_dx_aiuc(pbc, x_hw2, x_ow1, dist21);
502 pbc_dx_aiuc(pbc, x_hw3, x_ow1, dist31);
504 /* Tedious way of doing pbc */
505 pbc_dx_aiuc(pbc, xprime_hw2, xprime_ow1, doh2);
506 for (int d = 0; d < DIM; d++)
508 sh_hw2[d] = xprime_hw2[d] - (xprime_ow1[d] + doh2[d]);
509 xprime_hw2[d] = xprime_hw2[d] - sh_hw2[d];
511 pbc_dx_aiuc(pbc, xprime_hw3, xprime_ow1, doh3);
512 for (int d = 0; d < DIM; d++)
514 sh_hw3[d] = xprime_hw3[d] - (xprime_ow1[d] + doh3[d]);
515 xprime_hw3[d] = xprime_hw3[d] - sh_hw3[d];
518 /* Not calculating the center of mass using the oxygen position
519 * and the O-H distances, as done below, will make SETTLE
520 * the largest source of energy drift for simulations of water,
521 * as then the oxygen coordinate is multiplied by 0.89 at every step,
522 * which can then transfer a systematic rounding to the oxygen velocity.
525 for (int d = 0; d < DIM; d++)
527 a1[d] = -(doh2[d] + doh3[d]) * wh;
528 com[d] = xprime_ow1[d] - a1[d];
531 for (int d = 0; d < DIM; d++)
533 b1[d] = xprime_hw2[d] - com[d];
536 for (int d = 0; d < DIM; d++)
538 c1[d] = xprime_hw3[d] - com[d];
542 T xakszd = dist21[YY] * dist31[ZZ] - dist21[ZZ] * dist31[YY];
543 T yakszd = dist21[ZZ] * dist31[XX] - dist21[XX] * dist31[ZZ];
544 T zakszd = dist21[XX] * dist31[YY] - dist21[YY] * dist31[XX];
545 T xaksxd = a1[YY] * zakszd - a1[ZZ] * yakszd;
546 T yaksxd = a1[ZZ] * xakszd - a1[XX] * zakszd;
547 T zaksxd = a1[XX] * yakszd - a1[YY] * xakszd;
548 T xaksyd = yakszd * zaksxd - zakszd * yaksxd;
549 T yaksyd = zakszd * xaksxd - xakszd * zaksxd;
550 T zaksyd = xakszd * yaksxd - yakszd * xaksxd;
553 T axlng = gmx::invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd);
554 T aylng = gmx::invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd);
555 T azlng = gmx::invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd);
557 T trns1[DIM], trns2[DIM], trns3[DIM];
559 trns1[XX] = xaksxd * axlng;
560 trns2[XX] = yaksxd * axlng;
561 trns3[XX] = zaksxd * axlng;
562 trns1[YY] = xaksyd * aylng;
563 trns2[YY] = yaksyd * aylng;
564 trns3[YY] = zaksyd * aylng;
565 trns1[ZZ] = xakszd * azlng;
566 trns2[ZZ] = yakszd * azlng;
567 trns3[ZZ] = zakszd * azlng;
572 for (int d = 0; d < 2; d++)
574 b0d[d] = trns1[d] * dist21[XX] + trns2[d] * dist21[YY] + trns3[d] * dist21[ZZ];
575 c0d[d] = trns1[d] * dist31[XX] + trns2[d] * dist31[YY] + trns3[d] * dist31[ZZ];
578 T a1d_z, b1d[DIM], c1d[DIM];
580 a1d_z = trns1[ZZ] * a1[XX] + trns2[ZZ] * a1[YY] + trns3[ZZ] * a1[ZZ];
581 for (int d = 0; d < DIM; d++)
583 b1d[d] = trns1[d] * b1[XX] + trns2[d] * b1[YY] + trns3[d] * b1[ZZ];
584 c1d[d] = trns1[d] * c1[XX] + trns2[d] * c1[YY] + trns3[d] * c1[ZZ];
590 T sinphi = a1d_z * gmx::invsqrt(ra * ra);
591 tmp2 = 1.0 - sinphi * sinphi;
593 /* If tmp2 gets close to or beyond zero we have severly distorted
594 * water molecules and we should terminate the simulation.
595 * Below we take the max with almost_zero to continue the loop.
597 bError = bError || (tmp2 <= almost_zero);
599 tmp2 = max(tmp2, almost_zero);
600 tmp = gmx::invsqrt(tmp2);
601 T cosphi = tmp2 * tmp;
602 T sinpsi = (b1d[ZZ] - c1d[ZZ]) * irc2 * tmp;
603 tmp2 = 1.0 - sinpsi * sinpsi;
605 T cospsi = tmp2 * gmx::invsqrt(tmp2);
608 T a2d_y = ra * cosphi;
609 T b2d_x = -rc * cospsi;
611 T t2 = rc * sinpsi * sinphi;
616 /* --- Step3 al,be,ga --- */
617 T alpha = b2d_x * (b0d[XX] - c0d[XX]) + b0d[YY] * b2d_y + c0d[YY] * c2d_y;
618 T beta = b2d_x * (c0d[YY] - b0d[YY]) + b0d[XX] * b2d_y + c0d[XX] * c2d_y;
619 T gamma = b0d[XX] * b1d[YY] - b1d[XX] * b0d[YY] + c0d[XX] * c1d[YY] - c1d[XX] * c0d[YY];
620 T al2be2 = alpha * alpha + beta * beta;
621 tmp2 = (al2be2 - gamma * gamma);
622 T sinthe = (alpha * gamma - beta * tmp2 * gmx::invsqrt(tmp2)) * gmx::invsqrt(al2be2 * al2be2);
625 /* --- Step4 A3' --- */
626 tmp2 = 1.0 - sinthe * sinthe;
627 T costhe = tmp2 * gmx::invsqrt(tmp2);
629 T a3d[DIM], b3d[DIM], c3d[DIM];
631 a3d[XX] = -a2d_y * sinthe;
632 a3d[YY] = a2d_y * costhe;
634 b3d[XX] = b2d_x * costhe - b2d_y * sinthe;
635 b3d[YY] = b2d_x * sinthe + b2d_y * costhe;
637 c3d[XX] = -b2d_x * costhe - c2d_y * sinthe;
638 c3d[YY] = -b2d_x * sinthe + c2d_y * costhe;
642 /* --- Step5 A3 --- */
643 T a3[DIM], b3[DIM], c3[DIM];
645 a3[XX] = trns1[XX] * a3d[XX] + trns1[YY] * a3d[YY] + trns1[ZZ] * a3d[ZZ];
646 a3[YY] = trns2[XX] * a3d[XX] + trns2[YY] * a3d[YY] + trns2[ZZ] * a3d[ZZ];
647 a3[ZZ] = trns3[XX] * a3d[XX] + trns3[YY] * a3d[YY] + trns3[ZZ] * a3d[ZZ];
648 b3[XX] = trns1[XX] * b3d[XX] + trns1[YY] * b3d[YY] + trns1[ZZ] * b3d[ZZ];
649 b3[YY] = trns2[XX] * b3d[XX] + trns2[YY] * b3d[YY] + trns2[ZZ] * b3d[ZZ];
650 b3[ZZ] = trns3[XX] * b3d[XX] + trns3[YY] * b3d[YY] + trns3[ZZ] * b3d[ZZ];
651 c3[XX] = trns1[XX] * c3d[XX] + trns1[YY] * c3d[YY] + trns1[ZZ] * c3d[ZZ];
652 c3[YY] = trns2[XX] * c3d[XX] + trns2[YY] * c3d[YY] + trns2[ZZ] * c3d[ZZ];
653 c3[ZZ] = trns3[XX] * c3d[XX] + trns3[YY] * c3d[YY] + trns3[ZZ] * c3d[ZZ];
656 /* Compute and store the corrected new coordinate */
657 for (int d = 0; d < DIM; d++)
659 xprime_ow1[d] = com[d] + a3[d];
661 for (int d = 0; d < DIM; d++)
663 xprime_hw2[d] = com[d] + b3[d] + sh_hw2[d];
665 for (int d = 0; d < DIM; d++)
667 xprime_hw3[d] = com[d] + c3[d] + sh_hw3[d];
669 /* 9 flops + 6 pbc flops */
671 transposeScatterStoreU<3>(xprime, ow1, xprime_ow1[XX], xprime_ow1[YY], xprime_ow1[ZZ]);
672 transposeScatterStoreU<3>(xprime, hw2, xprime_hw2[XX], xprime_hw2[YY], xprime_hw2[ZZ]);
673 transposeScatterStoreU<3>(xprime, hw3, xprime_hw3[XX], xprime_hw3[YY], xprime_hw3[ZZ]);
675 if (bCorrectVelocity || bCalcVirial)
677 T da[DIM], db[DIM], dc[DIM];
678 for (int d = 0; d < DIM; d++)
680 da[d] = a3[d] - a1[d];
682 for (int d = 0; d < DIM; d++)
684 db[d] = b3[d] - b1[d];
686 for (int d = 0; d < DIM; d++)
688 dc[d] = c3[d] - c1[d];
692 if (bCorrectVelocity)
694 T v_ow1[DIM], v_hw2[DIM], v_hw3[DIM];
696 gatherLoadUTranspose<3>(v, ow1, &v_ow1[XX], &v_ow1[YY], &v_ow1[ZZ]);
697 gatherLoadUTranspose<3>(v, hw2, &v_hw2[XX], &v_hw2[YY], &v_hw2[ZZ]);
698 gatherLoadUTranspose<3>(v, hw3, &v_hw3[XX], &v_hw3[YY], &v_hw3[ZZ]);
700 /* Add the position correction divided by dt to the velocity */
701 for (int d = 0; d < DIM; d++)
703 v_ow1[d] = gmx::fma(da[d], invdt, v_ow1[d]);
705 for (int d = 0; d < DIM; d++)
707 v_hw2[d] = gmx::fma(db[d], invdt, v_hw2[d]);
709 for (int d = 0; d < DIM; d++)
711 v_hw3[d] = gmx::fma(dc[d], invdt, v_hw3[d]);
715 transposeScatterStoreU<3>(v, ow1, v_ow1[XX], v_ow1[YY], v_ow1[ZZ]);
716 transposeScatterStoreU<3>(v, hw2, v_hw2[XX], v_hw2[YY], v_hw2[ZZ]);
717 transposeScatterStoreU<3>(v, hw3, v_hw3[XX], v_hw3[YY], v_hw3[ZZ]);
722 /* Filter out the non-local settles */
723 T filter = load<T>(settled->virfac + i);
727 T mdo[DIM], mdb[DIM], mdc[DIM];
729 for (int d = 0; d < DIM; d++)
731 mdb[d] = mHf * db[d];
732 mdc[d] = mHf * dc[d];
733 mdo[d] = mOf * da[d] + mdb[d] + mdc[d];
736 for (int d2 = 0; d2 < DIM; d2++)
738 for (int d = 0; d < DIM; d++)
742 - (x_ow1[d2] * mdo[d] + dist21[d2] * mdb[d] + dist31[d2] * mdc[d]);
752 for (int d2 = 0; d2 < DIM; d2++)
754 for (int d = 0; d < DIM; d++)
756 vir_r_m_dr[d2][d] += reduce(sum_r_m_dr[d2][d]);
761 *bErrorHasOccurred = anyTrue(bError);
764 /*! \brief Wrapper template function that divides the settles over threads
765 * and instantiates the core template with instantiated booleans.
767 template<typename T, typename TypeBool, int packSize, typename TypePbc>
768 static void settleTemplateWrapper(settledata* settled,
778 bool* bErrorHasOccurred)
780 /* We need to assign settles to threads in groups of pack_size */
781 int numSettlePacks = (settled->nsettle + packSize - 1) / packSize;
782 /* Round the end value up to give thread 0 more work */
783 int settleStart = ((numSettlePacks * thread + nthread - 1) / nthread) * packSize;
784 int settleEnd = ((numSettlePacks * (thread + 1) + nthread - 1) / nthread) * packSize;
790 settleTemplate<T, TypeBool, packSize, TypePbc, true, false>(
791 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
795 settleTemplate<T, TypeBool, packSize, TypePbc, true, true>(
796 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
803 settleTemplate<T, TypeBool, packSize, TypePbc, false, false>(
804 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, nullptr, bErrorHasOccurred);
808 settleTemplate<T, TypeBool, packSize, TypePbc, false, true>(
809 settled, settleStart, settleEnd, pbc, x, xprime, invdt, v, vir_r_m_dr, bErrorHasOccurred);
814 void csettle(settledata* settled,
824 bool* bErrorHasOccurred)
826 #if GMX_SIMD_HAVE_REAL
827 if (settled->bUseSimd)
829 /* Convert the pbc struct for SIMD */
830 alignas(GMX_SIMD_ALIGNMENT) real pbcSimd[9 * GMX_SIMD_REAL_WIDTH];
831 set_pbc_simd(pbc, pbcSimd);
833 settleTemplateWrapper<SimdReal, SimdBool, GMX_SIMD_REAL_WIDTH, const real*>(
834 settled, nthread, thread, pbcSimd, x, xprime, invdt, v, bCalcVirial, vir_r_m_dr,
840 /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
842 const t_pbc* pbcNonNull;
850 set_pbc(&pbcNo, epbcNONE, nullptr);
854 settleTemplateWrapper<real, bool, 1, const t_pbc*>(settled, nthread, thread, pbcNonNull, x,
855 xprime, invdt, v, bCalcVirial,
856 vir_r_m_dr, bErrorHasOccurred);