Reimplement constant acceleration groups
[alexxy/gromacs.git] / src / gromacs / mdlib / update.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "update.h"
41
42 #include <cmath>
43 #include <cstdio>
44
45 #include <algorithm>
46 #include <memory>
47
48 #include "gromacs/domdec/domdec_struct.h"
49 #include "gromacs/gmxlib/network.h"
50 #include "gromacs/gmxlib/nrnb.h"
51 #include "gromacs/listed_forces/disre.h"
52 #include "gromacs/listed_forces/orires.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/paddedvector.h"
55 #include "gromacs/math/units.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdlib/boxdeformation.h"
58 #include "gromacs/mdlib/constr.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdlib/stat.h"
61 #include "gromacs/mdlib/tgroup.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/fcdata.h"
64 #include "gromacs/mdtypes/group.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pbcutil/pbc.h"
69 #include "gromacs/pulling/pull.h"
70 #include "gromacs/random/tabulatednormaldistribution.h"
71 #include "gromacs/random/threefry.h"
72 #include "gromacs/simd/simd.h"
73 #include "gromacs/timing/wallcycle.h"
74 #include "gromacs/topology/atoms.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxassert.h"
79 #include "gromacs/utility/smalloc.h"
80
81 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
82
83 struct gmx_sd_const_t
84 {
85     double em = 0;
86 };
87
88 struct gmx_sd_sigma_t
89 {
90     real V = 0;
91 };
92
93 struct gmx_stochd_t
94 {
95     /* BD stuff */
96     std::vector<real> bd_rf;
97     /* SD stuff */
98     std::vector<gmx_sd_const_t> sdc;
99     std::vector<gmx_sd_sigma_t> sdsig;
100     /* andersen temperature control stuff */
101     std::vector<bool> randomize_group;
102     std::vector<real> boltzfac;
103
104     explicit gmx_stochd_t(const t_inputrec& inputRecord);
105 };
106
107 //! pImpled implementation for Update
108 class Update::Impl
109 {
110 public:
111     //! Constructor
112     Impl(const t_inputrec& inputRecord, BoxDeformation* boxDeformation);
113     //! Destructor
114     ~Impl() = default;
115
116     void update_coords(const t_inputrec&                                inputRecord,
117                        int64_t                                          step,
118                        int                                              homenr,
119                        bool                                             havePartiallyFrozenAtoms,
120                        gmx::ArrayRef<const ParticleType>                ptype,
121                        gmx::ArrayRef<const real>                        invMass,
122                        gmx::ArrayRef<const rvec>                        invMassPerDim,
123                        t_state*                                         state,
124                        const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
125                        t_fcdata*                                        fcdata,
126                        const gmx_ekindata_t*                            ekind,
127                        const matrix                                     M,
128                        int                                              UpdatePart,
129                        const t_commrec*                                 cr,
130                        bool                                             haveConstraints);
131
132     void finish_update(const t_inputrec&                   inputRecord,
133                        bool                                havePartiallyFrozenAtoms,
134                        int                                 homenr,
135                        gmx::ArrayRef<const unsigned short> cFREEZE,
136                        t_state*                            state,
137                        gmx_wallcycle*                      wcycle,
138                        bool                                haveConstraints);
139
140     void update_sd_second_half(const t_inputrec&                 inputRecord,
141                                int64_t                           step,
142                                real*                             dvdlambda,
143                                int                               homenr,
144                                gmx::ArrayRef<const ParticleType> ptype,
145                                gmx::ArrayRef<const real>         invMass,
146                                t_state*                          state,
147                                const t_commrec*                  cr,
148                                t_nrnb*                           nrnb,
149                                gmx_wallcycle*                    wcycle,
150                                gmx::Constraints*                 constr,
151                                bool                              do_log,
152                                bool                              do_ene);
153
154     void update_for_constraint_virial(const t_inputrec&         inputRecord,
155                                       int                       homenr,
156                                       bool                      havePartiallyFrozenAtoms,
157                                       gmx::ArrayRef<const real> invmass,
158                                       gmx::ArrayRef<const rvec> invMassPerDim,
159                                       const t_state&            state,
160                                       const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
161                                       const gmx_ekindata_t&                            ekind);
162
163     void update_temperature_constants(const t_inputrec& inputRecord);
164
165     const std::vector<bool>& getAndersenRandomizeGroup() const { return sd_.randomize_group; }
166
167     const std::vector<real>& getBoltzmanFactor() const { return sd_.boltzfac; }
168
169     PaddedVector<RVec>* xp() { return &xp_; }
170
171     BoxDeformation* deform() const { return deform_; }
172
173     //! Group index for freezing
174     gmx::ArrayRef<const unsigned short> cFREEZE_;
175     //! Group index for temperature coupling
176     gmx::ArrayRef<const unsigned short> cTC_;
177     //! Group index for accleration groups
178     gmx::ArrayRef<const unsigned short> cAcceleration_;
179
180 private:
181     //! stochastic dynamics struct
182     gmx_stochd_t sd_;
183     //! xprime for constraint algorithms
184     PaddedVector<RVec> xp_;
185     //! Box deformation handler (or nullptr if inactive).
186     BoxDeformation* deform_ = nullptr;
187 };
188
189 Update::Update(const t_inputrec& inputRecord, BoxDeformation* boxDeformation) :
190     impl_(new Impl(inputRecord, boxDeformation)){};
191
192 Update::~Update(){};
193
194 const std::vector<bool>& Update::getAndersenRandomizeGroup() const
195 {
196     return impl_->getAndersenRandomizeGroup();
197 }
198
199 const std::vector<real>& Update::getBoltzmanFactor() const
200 {
201     return impl_->getBoltzmanFactor();
202 }
203
204 PaddedVector<RVec>* Update::xp()
205 {
206     return impl_->xp();
207 }
208
209 BoxDeformation* Update::deform() const
210 {
211     return impl_->deform();
212 }
213
214 void Update::update_coords(const t_inputrec&                 inputRecord,
215                            int64_t                           step,
216                            const int                         homenr,
217                            const bool                        havePartiallyFrozenAtoms,
218                            gmx::ArrayRef<const ParticleType> ptype,
219                            gmx::ArrayRef<const real>         invMass,
220                            gmx::ArrayRef<const rvec>         invMassPerDim,
221                            t_state*                          state,
222                            const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
223                            t_fcdata*                                        fcdata,
224                            const gmx_ekindata_t*                            ekind,
225                            const matrix                                     M,
226                            int                                              updatePart,
227                            const t_commrec*                                 cr,
228                            const bool                                       haveConstraints)
229 {
230     return impl_->update_coords(inputRecord,
231                                 step,
232                                 homenr,
233                                 havePartiallyFrozenAtoms,
234                                 ptype,
235                                 invMass,
236                                 invMassPerDim,
237                                 state,
238                                 f,
239                                 fcdata,
240                                 ekind,
241                                 M,
242                                 updatePart,
243                                 cr,
244                                 haveConstraints);
245 }
246
247 void Update::finish_update(const t_inputrec& inputRecord,
248                            const bool        havePartiallyFrozenAtoms,
249                            const int         homenr,
250                            t_state*          state,
251                            gmx_wallcycle*    wcycle,
252                            const bool        haveConstraints)
253 {
254     return impl_->finish_update(
255             inputRecord, havePartiallyFrozenAtoms, homenr, impl_->cFREEZE_, state, wcycle, haveConstraints);
256 }
257
258 void Update::update_sd_second_half(const t_inputrec&                 inputRecord,
259                                    int64_t                           step,
260                                    real*                             dvdlambda,
261                                    const int                         homenr,
262                                    gmx::ArrayRef<const ParticleType> ptype,
263                                    gmx::ArrayRef<const real>         invMass,
264                                    t_state*                          state,
265                                    const t_commrec*                  cr,
266                                    t_nrnb*                           nrnb,
267                                    gmx_wallcycle*                    wcycle,
268                                    gmx::Constraints*                 constr,
269                                    bool                              do_log,
270                                    bool                              do_ene)
271 {
272     return impl_->update_sd_second_half(
273             inputRecord, step, dvdlambda, homenr, ptype, invMass, state, cr, nrnb, wcycle, constr, do_log, do_ene);
274 }
275
276 void Update::update_for_constraint_virial(const t_inputrec&         inputRecord,
277                                           const int                 homenr,
278                                           const bool                havePartiallyFrozenAtoms,
279                                           gmx::ArrayRef<const real> invmass,
280                                           gmx::ArrayRef<const rvec> invMassPerDim,
281                                           const t_state&            state,
282                                           const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
283                                           const gmx_ekindata_t&                            ekind)
284 {
285     return impl_->update_for_constraint_virial(
286             inputRecord, homenr, havePartiallyFrozenAtoms, invmass, invMassPerDim, state, f, ekind);
287 }
288
289 void Update::update_temperature_constants(const t_inputrec& inputRecord)
290 {
291     return impl_->update_temperature_constants(inputRecord);
292 }
293
294 /*! \brief Sets whether we store the updated velocities */
295 enum class StoreUpdatedVelocities
296 {
297     yes, //!< Store the updated velocities
298     no   //!< Do not store the updated velocities
299 };
300
301 /*! \brief Sets the number of different temperature coupling values */
302 enum class NumTempScaleValues
303 {
304     single,  //!< Single T-scaling value (either one group or all values =1)
305     multiple //!< Multiple T-scaling values, need to use T-group indices
306 };
307
308 /*! \brief Sets if to apply no or diagonal Parrinello-Rahman pressure scaling
309  *
310  * Note that this enum is only used in updateMDLeapfrogSimple(), which does
311  * not handle fully anistropic Parrinello-Rahman scaling, so we only have
312  * options \p no and \p diagonal here and no anistropic option.
313  */
314 enum class ApplyParrinelloRahmanVScaling
315 {
316     no,      //!< Do not apply velocity scaling (not a PR-coupling run or step)
317     diagonal //!< Apply velocity scaling using a diagonal matrix
318 };
319
320 /*! \brief Integrate using leap-frog with T-scaling and optionally diagonal Parrinello-Rahman p-coupling
321  *
322  * \tparam       storeUpdatedVelocities Tells whether we should store the updated velocities
323  * \tparam       numTempScaleValues     The number of different T-couple values
324  * \tparam       applyPRVScaling        Apply Parrinello-Rahman velocity scaling
325  * \param[in]    start                  Index of first atom to update
326  * \param[in]    nrend                  Last atom to update: \p nrend - 1
327  * \param[in]    dt                     The time step
328  * \param[in]    dtPressureCouple       Time step for pressure coupling
329  * \param[in]    invMassPerDim          1/mass per atom and dimension
330  * \param[in]    tcstat                 Temperature coupling information
331  * \param[in]    cTC                    T-coupling group index per atom
332  * \param[in]    pRVScaleMatrixDiagonal Parrinello-Rahman v-scale matrix diagonal
333  * \param[in]    x                      Input coordinates
334  * \param[out]   xprime                 Updated coordinates
335  * \param[inout] v                      Velocities, type either rvec* or const rvec*
336  * \param[in]    f                      Forces
337  *
338  * We expect this template to get good SIMD acceleration by most compilers,
339  * unlike the more complex general template.
340  * Note that we might get even better SIMD acceleration when we introduce
341  * aligned (and padded) memory, possibly with some hints for the compilers.
342  */
343 template<StoreUpdatedVelocities storeUpdatedVelocities, NumTempScaleValues numTempScaleValues, ApplyParrinelloRahmanVScaling applyPRVScaling, typename VelocityType>
344 static std::enable_if_t<std::is_same<VelocityType, rvec>::value || std::is_same<VelocityType, const rvec>::value, void>
345 updateMDLeapfrogSimple(int                                 start,
346                        int                                 nrend,
347                        real                                dt,
348                        real                                dtPressureCouple,
349                        gmx::ArrayRef<const rvec>           invMassPerDim,
350                        gmx::ArrayRef<const t_grp_tcstat>   tcstat,
351                        gmx::ArrayRef<const unsigned short> cTC,
352                        const rvec                          pRVScaleMatrixDiagonal,
353                        const rvec* gmx_restrict            x,
354                        rvec* gmx_restrict                  xprime,
355                        VelocityType* gmx_restrict          v,
356                        const rvec* gmx_restrict            f)
357 {
358     real lambdaGroup;
359
360     if (numTempScaleValues == NumTempScaleValues::single)
361     {
362         lambdaGroup = tcstat[0].lambda;
363     }
364
365     for (int a = start; a < nrend; a++)
366     {
367         if (numTempScaleValues == NumTempScaleValues::multiple)
368         {
369             lambdaGroup = tcstat[cTC[a]].lambda;
370         }
371
372         for (int d = 0; d < DIM; d++)
373         {
374             /* Note that using rvec invMassPerDim results in more efficient
375              * SIMD code, but this increases the cache pressure.
376              * For large systems with PME on the CPU this slows down the
377              * (then already slow) update by 20%. If all data remains in cache,
378              * using rvec is much faster.
379              */
380             real vNew = lambdaGroup * v[a][d] + f[a][d] * invMassPerDim[a][d] * dt;
381
382             if (applyPRVScaling == ApplyParrinelloRahmanVScaling::diagonal)
383             {
384                 vNew -= dtPressureCouple * pRVScaleMatrixDiagonal[d] * v[a][d];
385             }
386             if constexpr (storeUpdatedVelocities == StoreUpdatedVelocities::yes)
387             {
388                 v[a][d] = vNew;
389             }
390             // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
391             xprime[a][d] = x[a][d] + vNew * dt;
392         }
393     }
394 }
395
396 #if GMX_SIMD && GMX_SIMD_HAVE_REAL
397 #    define GMX_HAVE_SIMD_UPDATE 1
398 #else
399 #    define GMX_HAVE_SIMD_UPDATE 0
400 #endif
401
402 #if GMX_HAVE_SIMD_UPDATE
403
404 /*! \brief Load (aligned) the contents of GMX_SIMD_REAL_WIDTH rvec elements sequentially into 3 SIMD registers
405  *
406  * The loaded output is:
407  * \p r0: { r[index][XX], r[index][YY], ... }
408  * \p r1: { ... }
409  * \p r2: { ..., r[index+GMX_SIMD_REAL_WIDTH-1][YY], r[index+GMX_SIMD_REAL_WIDTH-1][ZZ] }
410  *
411  * \param[in]  r      Real to an rvec array, has to be aligned to SIMD register width
412  * \param[in]  index  Index of the first rvec triplet of reals to load
413  * \param[out] r0     Pointer to first SIMD register
414  * \param[out] r1     Pointer to second SIMD register
415  * \param[out] r2     Pointer to third SIMD register
416  */
417 static inline void simdLoadRvecs(const rvec* r, int index, SimdReal* r0, SimdReal* r1, SimdReal* r2)
418 {
419     const real* realPtr = r[index];
420
421     GMX_ASSERT(isSimdAligned(realPtr), "Pointer should be SIMD aligned");
422
423     *r0 = simdLoad(realPtr + 0 * GMX_SIMD_REAL_WIDTH);
424     *r1 = simdLoad(realPtr + 1 * GMX_SIMD_REAL_WIDTH);
425     *r2 = simdLoad(realPtr + 2 * GMX_SIMD_REAL_WIDTH);
426 }
427
428 /*! \brief Store (aligned) 3 SIMD registers sequentially to GMX_SIMD_REAL_WIDTH rvec elements
429  *
430  * The stored output is:
431  * \p r[index] = { { r0[0], r0[1], ... }
432  * ...
433  * \p r[index+GMX_SIMD_REAL_WIDTH-1] =  { ... , r2[GMX_SIMD_REAL_WIDTH-2], r2[GMX_SIMD_REAL_WIDTH-1] }
434  *
435  * \param[out] r      Pointer to an rvec array, has to be aligned to SIMD register width
436  * \param[in]  index  Index of the first rvec triplet of reals to store to
437  * \param[in]  r0     First SIMD register
438  * \param[in]  r1     Second SIMD register
439  * \param[in]  r2     Third SIMD register
440  */
441 static inline void simdStoreRvecs(rvec* r, int index, SimdReal r0, SimdReal r1, SimdReal r2)
442 {
443     real* realPtr = r[index];
444
445     GMX_ASSERT(isSimdAligned(realPtr), "Pointer should be SIMD aligned");
446
447     store(realPtr + 0 * GMX_SIMD_REAL_WIDTH, r0);
448     store(realPtr + 1 * GMX_SIMD_REAL_WIDTH, r1);
449     store(realPtr + 2 * GMX_SIMD_REAL_WIDTH, r2);
450 }
451
452 /*! \brief Integrate using leap-frog with single group T-scaling and SIMD
453  *
454  * \tparam       storeUpdatedVelocities Tells whether we should store the updated velocities
455  * \param[in]    start                  Index of first atom to update
456  * \param[in]    nrend                  Last atom to update: \p nrend - 1
457  * \param[in]    dt                     The time step
458  * \param[in]    invMass                1/mass per atom
459  * \param[in]    tcstat                 Temperature coupling information
460  * \param[in]    x                      Input coordinates
461  * \param[out]   xprime                 Updated coordinates
462  * \param[inout] v                      Velocities, type either rvec* or const rvec*
463  * \param[in]    f                      Forces
464  */
465 template<StoreUpdatedVelocities storeUpdatedVelocities, typename VelocityType>
466 static std::enable_if_t<std::is_same<VelocityType, rvec>::value || std::is_same<VelocityType, const rvec>::value, void>
467 updateMDLeapfrogSimpleSimd(int                               start,
468                            int                               nrend,
469                            real                              dt,
470                            gmx::ArrayRef<const real>         invMass,
471                            gmx::ArrayRef<const t_grp_tcstat> tcstat,
472                            const rvec* gmx_restrict          x,
473                            rvec* gmx_restrict                xprime,
474                            VelocityType* gmx_restrict        v,
475                            const rvec* gmx_restrict          f)
476 {
477     SimdReal timestep(dt);
478     SimdReal lambdaSystem(tcstat[0].lambda);
479
480     /* We declare variables here, since code is often slower when declaring them inside the loop */
481
482     /* Note: We should implement a proper PaddedVector, so we don't need this check */
483     GMX_ASSERT(isSimdAligned(invMass.data()), "invMass should be aligned");
484
485     for (int a = start; a < nrend; a += GMX_SIMD_REAL_WIDTH)
486     {
487         SimdReal invMass0, invMass1, invMass2;
488         expandScalarsToTriplets(simdLoad(invMass.data() + a), &invMass0, &invMass1, &invMass2);
489
490         SimdReal v0, v1, v2;
491         SimdReal f0, f1, f2;
492         simdLoadRvecs(v, a, &v0, &v1, &v2);
493         simdLoadRvecs(f, a, &f0, &f1, &f2);
494
495         v0 = fma(f0 * invMass0, timestep, lambdaSystem * v0);
496         v1 = fma(f1 * invMass1, timestep, lambdaSystem * v1);
497         v2 = fma(f2 * invMass2, timestep, lambdaSystem * v2);
498
499         if constexpr (storeUpdatedVelocities == StoreUpdatedVelocities::yes)
500         {
501             simdStoreRvecs(v, a, v0, v1, v2);
502         }
503
504         // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
505         SimdReal x0, x1, x2;
506         simdLoadRvecs(x, a, &x0, &x1, &x2);
507
508         SimdReal xprime0 = fma(v0, timestep, x0);
509         SimdReal xprime1 = fma(v1, timestep, x1);
510         SimdReal xprime2 = fma(v2, timestep, x2);
511
512         simdStoreRvecs(xprime, a, xprime0, xprime1, xprime2);
513     }
514 }
515
516 #endif // GMX_HAVE_SIMD_UPDATE
517
518 /*! \brief Sets the NEMD acceleration type */
519 enum class AccelerationType
520 {
521     none,
522     group,
523     cosine
524 };
525
526 /*! \brief Integrate using leap-frog with support for everything.
527  *
528  * \tparam        accelerationType  Type of NEMD acceleration.
529  * \param[in]     start             Index of first atom to update.
530  * \param[in]     nrend             Last atom to update: \p nrend - 1.
531  * \param[in]     doNoseHoover      If to apply Nose-Hoover T-coupling.
532  * \param[in]     dt                The time step.
533  * \param[in]     dtPressureCouple  Time step for pressure coupling, is 0 when no pressure
534  *                                  coupling should be applied at this step.
535  * \param[in]     cTC               Temperature coupling group indices
536  * \param[in]     cAcceleration     Acceleration group indices
537  * \param[in]     acceleration      Acceleration per group.
538  * \param[in]     invMassPerDim     Inverse mass per dimension
539  * \param[in]     ekind             Kinetic energy data.
540  * \param[in]     box               The box dimensions.
541  * \param[in]     x                 Input coordinates.
542  * \param[out]    xprime            Updated coordinates.
543  * \param[inout]  v                 Velocities.
544  * \param[in]     f                 Forces.
545  * \param[in]     nh_vxi            Nose-Hoover velocity scaling factors.
546  * \param[in]     nsttcouple        Frequency of the temperature coupling steps.
547  * \param[in]     M                 Parrinello-Rahman scaling matrix.
548  */
549 template<AccelerationType accelerationType>
550 static void updateMDLeapfrogGeneral(int                                 start,
551                                     int                                 nrend,
552                                     bool                                doNoseHoover,
553                                     real                                dt,
554                                     real                                dtPressureCouple,
555                                     gmx::ArrayRef<const unsigned short> cTC,
556                                     gmx::ArrayRef<const unsigned short> cAcceleration,
557                                     const rvec* gmx_restrict            acceleration,
558                                     gmx::ArrayRef<const rvec>           invMassPerDim,
559                                     const gmx_ekindata_t*               ekind,
560                                     const matrix                        box,
561                                     const rvec* gmx_restrict            x,
562                                     rvec* gmx_restrict                  xprime,
563                                     rvec* gmx_restrict                  v,
564                                     const rvec* gmx_restrict            f,
565                                     const double* gmx_restrict          nh_vxi,
566                                     const int                           nsttcouple,
567                                     const matrix                        M)
568 {
569     /* This is a version of the leap-frog integrator that supports
570      * all combinations of T-coupling, P-coupling and NEMD.
571      * Nose-Hoover uses the reversible leap-frog integrator from
572      * Holian et al. Phys Rev E 52(3) : 2338, 1995
573      */
574
575     gmx::ArrayRef<const t_grp_tcstat> tcstat = ekind->tcstat;
576
577     /* Initialize group values, changed later when multiple groups are used */
578     int ga = 0;
579     int gt = 0;
580
581     real omega_Z = 2 * static_cast<real>(M_PI) / box[ZZ][ZZ];
582
583     for (int n = start; n < nrend; n++)
584     {
585         if (!cTC.empty())
586         {
587             gt = cTC[n];
588         }
589         real lg = tcstat[gt].lambda;
590
591         rvec vRel;
592         real cosineZ, vCosine;
593         switch (accelerationType)
594         {
595             case AccelerationType::none: copy_rvec(v[n], vRel); break;
596             case AccelerationType::group:
597                 if (!cAcceleration.empty())
598                 {
599                     ga = cAcceleration[n];
600                 }
601                 /* With constant acceleration we do scale the velocity of the accelerated groups */
602                 copy_rvec(v[n], vRel);
603                 break;
604             case AccelerationType::cosine:
605                 cosineZ = std::cos(x[n][ZZ] * omega_Z);
606                 vCosine = cosineZ * ekind->cosacc.vcos;
607                 /* Avoid scaling the cosine profile velocity */
608                 copy_rvec(v[n], vRel);
609                 vRel[XX] -= vCosine;
610                 break;
611         }
612
613         real factorNH = 0.0;
614         if (doNoseHoover)
615         {
616             /* Here we account for multiple time stepping, by increasing
617              * the Nose-Hoover correction by nsttcouple
618              * TODO: This can be pre-computed.
619              */
620             factorNH = 0.5 * nsttcouple * dt * nh_vxi[gt];
621         }
622
623         for (int d = 0; d < DIM; d++)
624         {
625             real vNew = (lg * vRel[d]
626                          + (f[n][d] * invMassPerDim[n][d] * dt - factorNH * vRel[d]
627                             - dtPressureCouple * iprod(M[d], vRel)))
628                         / (1 + factorNH);
629             switch (accelerationType)
630             {
631                 case AccelerationType::none: break;
632                 case AccelerationType::group:
633                     /* Apply the constant acceleration */
634                     vNew += acceleration[ga][d] * dt;
635                     break;
636                 case AccelerationType::cosine:
637                     if (d == XX)
638                     {
639                         /* Add back the mean velocity and apply acceleration */
640                         vNew += vCosine + cosineZ * ekind->cosacc.cos_accel * dt;
641                     }
642                     break;
643             }
644             v[n][d]      = vNew;
645             xprime[n][d] = x[n][d] + vNew * dt;
646         }
647     }
648 }
649
650 /*! \brief Handles the Leap-frog MD x and v integration */
651 static void do_update_md(int                                  start,
652                          int                                  nrend,
653                          real                                 dt,
654                          int64_t                              step,
655                          const rvec* gmx_restrict             x,
656                          rvec* gmx_restrict                   xprime,
657                          rvec* gmx_restrict                   v,
658                          const rvec* gmx_restrict             f,
659                          const TemperatureCoupling            etc,
660                          const PressureCoupling               epc,
661                          const int                            nsttcouple,
662                          const int                            nstpcouple,
663                          gmx::ArrayRef<const unsigned short>  cTC,
664                          const bool                           useConstantAcceleration,
665                          gmx::ArrayRef<const unsigned short>  cAcceleration,
666                          const rvec*                          acceleration,
667                          gmx::ArrayRef<const real> gmx_unused invmass,
668                          gmx::ArrayRef<const rvec>            invMassPerDim,
669                          const gmx_ekindata_t*                ekind,
670                          const matrix                         box,
671                          const double* gmx_restrict           nh_vxi,
672                          const matrix                         M,
673                          bool gmx_unused                      havePartiallyFrozenAtoms)
674 {
675     GMX_ASSERT(nrend == start || xprime != x,
676                "For SIMD optimization certain compilers need to have xprime != x");
677
678     /* Note: Berendsen pressure scaling is handled after do_update_md() */
679     bool doTempCouple =
680             (etc != TemperatureCoupling::No && do_per_step(step + nsttcouple - 1, nsttcouple));
681     bool doNoseHoover       = (etc == TemperatureCoupling::NoseHoover && doTempCouple);
682     bool doParrinelloRahman = (epc == PressureCoupling::ParrinelloRahman
683                                && do_per_step(step + nstpcouple - 1, nstpcouple));
684     bool doPROffDiagonal = (doParrinelloRahman && (M[YY][XX] != 0 || M[ZZ][XX] != 0 || M[ZZ][YY] != 0));
685
686     real dtPressureCouple = (doParrinelloRahman ? nstpcouple * dt : 0);
687
688     /* NEMD (also cosine) acceleration is applied in updateMDLeapFrogGeneral */
689     const bool doAcceleration = (useConstantAcceleration || ekind->cosacc.cos_accel != 0);
690
691     if (doNoseHoover || doPROffDiagonal || doAcceleration)
692     {
693         matrix stepM;
694         if (!doParrinelloRahman)
695         {
696             /* We should not apply PR scaling at this step */
697             clear_mat(stepM);
698         }
699         else
700         {
701             copy_mat(M, stepM);
702         }
703
704         if (!doAcceleration)
705         {
706             updateMDLeapfrogGeneral<AccelerationType::none>(start,
707                                                             nrend,
708                                                             doNoseHoover,
709                                                             dt,
710                                                             dtPressureCouple,
711                                                             cTC,
712                                                             cAcceleration,
713                                                             acceleration,
714                                                             invMassPerDim,
715                                                             ekind,
716                                                             box,
717                                                             x,
718                                                             xprime,
719                                                             v,
720                                                             f,
721                                                             nh_vxi,
722                                                             nsttcouple,
723                                                             stepM);
724         }
725         else if (useConstantAcceleration)
726         {
727             updateMDLeapfrogGeneral<AccelerationType::group>(start,
728                                                              nrend,
729                                                              doNoseHoover,
730                                                              dt,
731                                                              dtPressureCouple,
732                                                              cTC,
733                                                              cAcceleration,
734                                                              acceleration,
735                                                              invMassPerDim,
736                                                              ekind,
737                                                              box,
738                                                              x,
739                                                              xprime,
740                                                              v,
741                                                              f,
742                                                              nh_vxi,
743                                                              nsttcouple,
744                                                              stepM);
745         }
746         else
747         {
748             updateMDLeapfrogGeneral<AccelerationType::cosine>(start,
749                                                               nrend,
750                                                               doNoseHoover,
751                                                               dt,
752                                                               dtPressureCouple,
753                                                               cTC,
754                                                               cAcceleration,
755                                                               acceleration,
756                                                               invMassPerDim,
757                                                               ekind,
758                                                               box,
759                                                               x,
760                                                               xprime,
761                                                               v,
762                                                               f,
763                                                               nh_vxi,
764                                                               nsttcouple,
765                                                               stepM);
766         }
767     }
768     else
769     {
770         /* We determine if we have a single T-coupling lambda value for all
771          * atoms. That allows for better SIMD acceleration in the template.
772          * If we do not do temperature coupling (in the run or this step),
773          * all scaling values are 1, so we effectively have a single value.
774          */
775         bool haveSingleTempScaleValue = (!doTempCouple || ekind->ngtc == 1);
776
777         /* Extract some pointers needed by all cases */
778         gmx::ArrayRef<const t_grp_tcstat> tcstat = ekind->tcstat;
779
780         if (doParrinelloRahman)
781         {
782             GMX_ASSERT(!doPROffDiagonal,
783                        "updateMDLeapfrogSimple only support diagonal Parrinello-Rahman scaling "
784                        "matrices");
785
786             rvec diagM;
787             for (int d = 0; d < DIM; d++)
788             {
789                 diagM[d] = M[d][d];
790             }
791
792             if (haveSingleTempScaleValue)
793             {
794                 updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::single, ApplyParrinelloRahmanVScaling::diagonal>(
795                         start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, diagM, x, xprime, v, f);
796             }
797             else
798             {
799                 updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::multiple, ApplyParrinelloRahmanVScaling::diagonal>(
800                         start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, diagM, x, xprime, v, f);
801             }
802         }
803         else
804         {
805             if (haveSingleTempScaleValue)
806             {
807                 /* Note that modern compilers are pretty good at vectorizing
808                  * updateMDLeapfrogSimple(). But the SIMD version will still
809                  * be faster because invMass lowers the cache pressure
810                  * compared to invMassPerDim.
811                  */
812 #if GMX_HAVE_SIMD_UPDATE
813                 /* Check if we can use invmass instead of invMassPerDim */
814                 if (!havePartiallyFrozenAtoms)
815                 {
816                     updateMDLeapfrogSimpleSimd<StoreUpdatedVelocities::yes>(
817                             start, nrend, dt, invmass, tcstat, x, xprime, v, f);
818                 }
819                 else
820 #endif
821                 {
822                     updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::single, ApplyParrinelloRahmanVScaling::no>(
823                             start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, nullptr, x, xprime, v, f);
824                 }
825             }
826             else
827             {
828                 updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::multiple, ApplyParrinelloRahmanVScaling::no>(
829                         start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, nullptr, x, xprime, v, f);
830             }
831         }
832     }
833 }
834 /*! \brief Handles the Leap-frog MD x and v integration */
835 static void doUpdateMDDoNotUpdateVelocities(int                      start,
836                                             int                      nrend,
837                                             real                     dt,
838                                             const rvec* gmx_restrict x,
839                                             rvec* gmx_restrict       xprime,
840                                             const rvec* gmx_restrict v,
841                                             const rvec* gmx_restrict f,
842                                             bool gmx_unused          havePartiallyFrozenAtoms,
843                                             gmx::ArrayRef<const real> gmx_unused invmass,
844                                             gmx::ArrayRef<const rvec>            invMassPerDim,
845                                             const gmx_ekindata_t&                ekind)
846 {
847     GMX_ASSERT(nrend == start || xprime != x,
848                "For SIMD optimization certain compilers need to have xprime != x");
849
850     gmx::ArrayRef<const t_grp_tcstat> tcstat = ekind.tcstat;
851
852     /* Check if we can use invmass instead of invMassPerDim */
853 #if GMX_HAVE_SIMD_UPDATE
854     if (!havePartiallyFrozenAtoms)
855     {
856         updateMDLeapfrogSimpleSimd<StoreUpdatedVelocities::no>(
857                 start, nrend, dt, invmass, tcstat, x, xprime, v, f);
858     }
859     else
860 #endif
861     {
862         updateMDLeapfrogSimple<StoreUpdatedVelocities::no, NumTempScaleValues::single, ApplyParrinelloRahmanVScaling::no>(
863                 start, nrend, dt, dt, invMassPerDim, tcstat, gmx::ArrayRef<const unsigned short>(), nullptr, x, xprime, v, f);
864     }
865 }
866
867 static void do_update_vv_vel(int                                 start,
868                              int                                 nrend,
869                              real                                dt,
870                              gmx::ArrayRef<const ivec>           nFreeze,
871                              gmx::ArrayRef<const unsigned short> cAcceleration,
872                              const rvec*                         acceleration,
873                              gmx::ArrayRef<const real>           invmass,
874                              gmx::ArrayRef<const ParticleType>   ptype,
875                              gmx::ArrayRef<const unsigned short> cFREEZE,
876                              rvec                                v[],
877                              const rvec                          f[],
878                              gmx_bool                            bExtended,
879                              real                                veta,
880                              real                                alpha)
881 {
882     int  gf = 0, ga = 0;
883     int  n, d;
884     real g, mv1, mv2;
885
886     if (bExtended)
887     {
888         g   = 0.25 * dt * veta * alpha;
889         mv1 = std::exp(-g);
890         mv2 = gmx::series_sinhx(g);
891     }
892     else
893     {
894         mv1 = 1.0;
895         mv2 = 1.0;
896     }
897     for (n = start; n < nrend; n++)
898     {
899         real w_dt = invmass[n] * dt;
900         if (!cFREEZE.empty())
901         {
902             gf = cFREEZE[n];
903         }
904         if (!cAcceleration.empty())
905         {
906             ga = cAcceleration[n];
907         }
908
909         for (d = 0; d < DIM; d++)
910         {
911             if ((ptype[n] != ParticleType::Shell) && !nFreeze[gf][d])
912             {
913                 v[n][d] = mv1 * (mv1 * v[n][d] + 0.5 * (w_dt * mv2 * f[n][d]))
914                           + 0.5 * acceleration[ga][d] * dt;
915             }
916             else
917             {
918                 v[n][d] = 0.0;
919             }
920         }
921     }
922 } /* do_update_vv_vel */
923
924 static void do_update_vv_pos(int                                 start,
925                              int                                 nrend,
926                              real                                dt,
927                              gmx::ArrayRef<const ivec>           nFreeze,
928                              gmx::ArrayRef<const ParticleType>   ptype,
929                              gmx::ArrayRef<const unsigned short> cFREEZE,
930                              const rvec                          x[],
931                              rvec                                xprime[],
932                              const rvec                          v[],
933                              gmx_bool                            bExtended,
934                              real                                veta)
935 {
936     int  gf = 0;
937     int  n, d;
938     real g, mr1, mr2;
939
940     /* Would it make more sense if Parrinello-Rahman was put here? */
941     if (bExtended)
942     {
943         g   = 0.5 * dt * veta;
944         mr1 = std::exp(g);
945         mr2 = gmx::series_sinhx(g);
946     }
947     else
948     {
949         mr1 = 1.0;
950         mr2 = 1.0;
951     }
952
953     for (n = start; n < nrend; n++)
954     {
955
956         if (!cFREEZE.empty())
957         {
958             gf = cFREEZE[n];
959         }
960
961         for (d = 0; d < DIM; d++)
962         {
963             if ((ptype[n] != ParticleType::Shell) && !nFreeze[gf][d])
964             {
965                 xprime[n][d] = mr1 * (mr1 * x[n][d] + mr2 * dt * v[n][d]);
966             }
967             else
968             {
969                 xprime[n][d] = x[n][d];
970             }
971         }
972     }
973 } /* do_update_vv_pos */
974
975 gmx_stochd_t::gmx_stochd_t(const t_inputrec& inputRecord)
976 {
977     const t_grpopts* opts = &inputRecord.opts;
978     const int        ngtc = opts->ngtc;
979
980     if (inputRecord.eI == IntegrationAlgorithm::BD)
981     {
982         bd_rf.resize(ngtc);
983     }
984     else if (EI_SD(inputRecord.eI))
985     {
986         sdc.resize(ngtc);
987         sdsig.resize(ngtc);
988
989         for (int gt = 0; gt < ngtc; gt++)
990         {
991             if (opts->tau_t[gt] > 0)
992             {
993                 sdc[gt].em = std::exp(-inputRecord.delta_t / opts->tau_t[gt]);
994             }
995             else
996             {
997                 /* No friction and noise on this group */
998                 sdc[gt].em = 1;
999             }
1000         }
1001     }
1002     else if (ETC_ANDERSEN(inputRecord.etc))
1003     {
1004         randomize_group.resize(ngtc);
1005         boltzfac.resize(ngtc);
1006
1007         /* for now, assume that all groups, if randomized, are randomized at the same rate, i.e. tau_t is the same. */
1008         /* since constraint groups don't necessarily match up with temperature groups! This is checked in readir.c */
1009
1010         for (int gt = 0; gt < ngtc; gt++)
1011         {
1012             real reft = std::max<real>(0, opts->ref_t[gt]);
1013             if ((opts->tau_t[gt] > 0)
1014                 && (reft > 0)) /* tau_t or ref_t = 0 means that no randomization is done */
1015             {
1016                 randomize_group[gt] = true;
1017                 boltzfac[gt]        = gmx::c_boltz * opts->ref_t[gt];
1018             }
1019             else
1020             {
1021                 randomize_group[gt] = false;
1022             }
1023         }
1024     }
1025 }
1026
1027 void Update::Impl::update_temperature_constants(const t_inputrec& inputRecord)
1028 {
1029     if (inputRecord.eI == IntegrationAlgorithm::BD)
1030     {
1031         if (inputRecord.bd_fric != 0)
1032         {
1033             for (int gt = 0; gt < inputRecord.opts.ngtc; gt++)
1034             {
1035                 sd_.bd_rf[gt] = std::sqrt(2.0 * gmx::c_boltz * inputRecord.opts.ref_t[gt]
1036                                           / (inputRecord.bd_fric * inputRecord.delta_t));
1037             }
1038         }
1039         else
1040         {
1041             for (int gt = 0; gt < inputRecord.opts.ngtc; gt++)
1042             {
1043                 sd_.bd_rf[gt] = std::sqrt(2.0 * gmx::c_boltz * inputRecord.opts.ref_t[gt]);
1044             }
1045         }
1046     }
1047     if (inputRecord.eI == IntegrationAlgorithm::SD1)
1048     {
1049         for (int gt = 0; gt < inputRecord.opts.ngtc; gt++)
1050         {
1051             real kT = gmx::c_boltz * inputRecord.opts.ref_t[gt];
1052             /* The mass is accounted for later, since this differs per atom */
1053             sd_.sdsig[gt].V = std::sqrt(kT * (1 - sd_.sdc[gt].em * sd_.sdc[gt].em));
1054         }
1055     }
1056 }
1057
1058 Update::Impl::Impl(const t_inputrec& inputRecord, BoxDeformation* boxDeformation) :
1059     sd_(inputRecord), deform_(boxDeformation)
1060 {
1061     update_temperature_constants(inputRecord);
1062     xp_.resizeWithPadding(0);
1063 }
1064
1065 void Update::updateAfterPartition(int                                 numAtoms,
1066                                   gmx::ArrayRef<const unsigned short> cFREEZE,
1067                                   gmx::ArrayRef<const unsigned short> cTC,
1068                                   gmx::ArrayRef<const unsigned short> cAcceleration)
1069 {
1070
1071     impl_->xp()->resizeWithPadding(numAtoms);
1072     impl_->cFREEZE_       = cFREEZE;
1073     impl_->cTC_           = cTC;
1074     impl_->cAcceleration_ = cAcceleration;
1075 }
1076
1077 /*! \brief Sets the SD update type */
1078 enum class SDUpdate : int
1079 {
1080     ForcesOnly,
1081     FrictionAndNoiseOnly,
1082     Combined
1083 };
1084
1085 /*! \brief SD integrator update
1086  *
1087  * Two phases are required in the general case of a constrained
1088  * update, the first phase from the contribution of forces, before
1089  * applying constraints, and then a second phase applying the friction
1090  * and noise, and then further constraining. For details, see
1091  * Goga2012.
1092  *
1093  * Without constraints, the two phases can be combined, for
1094  * efficiency.
1095  *
1096  * Thus three instantiations of this templated function will be made,
1097  * two with only one contribution, and one with both contributions. */
1098 template<SDUpdate updateType>
1099 static void doSDUpdateGeneral(const gmx_stochd_t&                 sd,
1100                               int                                 start,
1101                               int                                 nrend,
1102                               real                                dt,
1103                               gmx::ArrayRef<const ivec>           nFreeze,
1104                               gmx::ArrayRef<const real>           invmass,
1105                               gmx::ArrayRef<const ParticleType>   ptype,
1106                               gmx::ArrayRef<const unsigned short> cFREEZE,
1107                               gmx::ArrayRef<const unsigned short> cTC,
1108                               gmx::ArrayRef<const unsigned short> cAcceleration,
1109                               const rvec*                         acceleration,
1110                               const rvec                          x[],
1111                               rvec                                xprime[],
1112                               rvec                                v[],
1113                               const rvec                          f[],
1114                               int64_t                             step,
1115                               int                                 seed,
1116                               const int*                          gatindex)
1117 {
1118     // cTC, cACC and cFREEZE can be nullptr any time, but various
1119     // instantiations do not make sense with particular pointer
1120     // values.
1121     if (updateType == SDUpdate::ForcesOnly)
1122     {
1123         GMX_ASSERT(f != nullptr, "SD update with only forces requires forces");
1124         GMX_ASSERT(cTC.empty(), "SD update with only forces cannot handle temperature groups");
1125     }
1126     if (updateType == SDUpdate::FrictionAndNoiseOnly)
1127     {
1128         GMX_ASSERT(f == nullptr, "SD update with only noise cannot handle forces");
1129         GMX_ASSERT(cAcceleration.empty(),
1130                    "SD update with only noise cannot handle acceleration groups");
1131     }
1132     if (updateType == SDUpdate::Combined)
1133     {
1134         GMX_ASSERT(f != nullptr, "SD update with forces and noise requires forces");
1135     }
1136
1137     // Even 0 bits internal counter gives 2x64 ints (more than enough for three table lookups)
1138     gmx::ThreeFry2x64<0>                       rng(seed, gmx::RandomDomain::UpdateCoordinates);
1139     gmx::TabulatedNormalDistribution<real, 14> dist;
1140
1141     for (int n = start; n < nrend; n++)
1142     {
1143         int globalAtomIndex = gatindex ? gatindex[n] : n;
1144         rng.restart(step, globalAtomIndex);
1145         dist.reset();
1146
1147         real inverseMass = invmass[n];
1148         real invsqrtMass = std::sqrt(inverseMass);
1149
1150         int freezeGroup       = !cFREEZE.empty() ? cFREEZE[n] : 0;
1151         int accelerationGroup = !cAcceleration.empty() ? cAcceleration[n] : 0;
1152         int temperatureGroup  = !cTC.empty() ? cTC[n] : 0;
1153
1154         for (int d = 0; d < DIM; d++)
1155         {
1156             if ((ptype[n] != ParticleType::Shell) && !nFreeze[freezeGroup][d])
1157             {
1158                 if (updateType == SDUpdate::ForcesOnly)
1159                 {
1160                     real vn = v[n][d] + (inverseMass * f[n][d] + acceleration[accelerationGroup][d]) * dt;
1161                     v[n][d] = vn;
1162                     // Simple position update.
1163                     xprime[n][d] = x[n][d] + v[n][d] * dt;
1164                 }
1165                 else if (updateType == SDUpdate::FrictionAndNoiseOnly)
1166                 {
1167                     real vn = v[n][d];
1168                     v[n][d] = (vn * sd.sdc[temperatureGroup].em
1169                                + invsqrtMass * sd.sdsig[temperatureGroup].V * dist(rng));
1170                     // The previous phase already updated the
1171                     // positions with a full v*dt term that must
1172                     // now be half removed.
1173                     xprime[n][d] = xprime[n][d] + 0.5 * (v[n][d] - vn) * dt;
1174                 }
1175                 else
1176                 {
1177                     real vn = v[n][d] + (inverseMass * f[n][d] + acceleration[accelerationGroup][d]) * dt;
1178                     v[n][d] = (vn * sd.sdc[temperatureGroup].em
1179                                + invsqrtMass * sd.sdsig[temperatureGroup].V * dist(rng));
1180                     // Here we include half of the friction+noise
1181                     // update of v into the position update.
1182                     xprime[n][d] = x[n][d] + 0.5 * (vn + v[n][d]) * dt;
1183                 }
1184             }
1185             else
1186             {
1187                 // When using constraints, the update is split into
1188                 // two phases, but we only need to zero the update of
1189                 // virtual, shell or frozen particles in at most one
1190                 // of the phases.
1191                 if (updateType != SDUpdate::FrictionAndNoiseOnly)
1192                 {
1193                     v[n][d]      = 0.0;
1194                     xprime[n][d] = x[n][d];
1195                 }
1196             }
1197         }
1198     }
1199 }
1200
1201 static void do_update_sd(int                                 start,
1202                          int                                 nrend,
1203                          real                                dt,
1204                          int64_t                             step,
1205                          const rvec* gmx_restrict            x,
1206                          rvec* gmx_restrict                  xprime,
1207                          rvec* gmx_restrict                  v,
1208                          const rvec* gmx_restrict            f,
1209                          gmx::ArrayRef<const ivec>           nFreeze,
1210                          gmx::ArrayRef<const real>           invmass,
1211                          gmx::ArrayRef<const ParticleType>   ptype,
1212                          gmx::ArrayRef<const unsigned short> cFREEZE,
1213                          gmx::ArrayRef<const unsigned short> cTC,
1214                          gmx::ArrayRef<const unsigned short> cAcceleration,
1215                          const rvec*                         acceleration,
1216                          int                                 seed,
1217                          const t_commrec*                    cr,
1218                          const gmx_stochd_t&                 sd,
1219                          bool                                haveConstraints)
1220 {
1221     if (haveConstraints)
1222     {
1223         // With constraints, the SD update is done in 2 parts
1224         doSDUpdateGeneral<SDUpdate::ForcesOnly>(sd,
1225                                                 start,
1226                                                 nrend,
1227                                                 dt,
1228                                                 nFreeze,
1229                                                 invmass,
1230                                                 ptype,
1231                                                 cFREEZE,
1232                                                 gmx::ArrayRef<const unsigned short>(),
1233                                                 cAcceleration,
1234                                                 acceleration,
1235                                                 x,
1236                                                 xprime,
1237                                                 v,
1238                                                 f,
1239                                                 step,
1240                                                 seed,
1241                                                 nullptr);
1242     }
1243     else
1244     {
1245         doSDUpdateGeneral<SDUpdate::Combined>(
1246                 sd,
1247                 start,
1248                 nrend,
1249                 dt,
1250                 nFreeze,
1251                 invmass,
1252                 ptype,
1253                 cFREEZE,
1254                 cTC,
1255                 cAcceleration,
1256                 acceleration,
1257                 x,
1258                 xprime,
1259                 v,
1260                 f,
1261                 step,
1262                 seed,
1263                 haveDDAtomOrdering(*cr) ? cr->dd->globalAtomIndices.data() : nullptr);
1264     }
1265 }
1266
1267 static void do_update_bd(int                                 start,
1268                          int                                 nrend,
1269                          real                                dt,
1270                          int64_t                             step,
1271                          const rvec* gmx_restrict            x,
1272                          rvec* gmx_restrict                  xprime,
1273                          rvec* gmx_restrict                  v,
1274                          const rvec* gmx_restrict            f,
1275                          gmx::ArrayRef<const ivec>           nFreeze,
1276                          gmx::ArrayRef<const real>           invmass,
1277                          gmx::ArrayRef<const ParticleType>   ptype,
1278                          gmx::ArrayRef<const unsigned short> cFREEZE,
1279                          gmx::ArrayRef<const unsigned short> cTC,
1280                          real                                friction_coefficient,
1281                          const real*                         rf,
1282                          int                                 seed,
1283                          const int*                          gatindex)
1284 {
1285     /* note -- these appear to be full step velocities . . .  */
1286     int  gf = 0, gt = 0;
1287     real vn;
1288     real invfr = 0;
1289     int  n, d;
1290     // Use 1 bit of internal counters to give us 2*2 64-bits values per stream
1291     // Each 64-bit value is enough for 4 normal distribution table numbers.
1292     gmx::ThreeFry2x64<0>                       rng(seed, gmx::RandomDomain::UpdateCoordinates);
1293     gmx::TabulatedNormalDistribution<real, 14> dist;
1294
1295     if (friction_coefficient != 0)
1296     {
1297         invfr = 1.0 / friction_coefficient;
1298     }
1299
1300     for (n = start; (n < nrend); n++)
1301     {
1302         int ng = gatindex ? gatindex[n] : n;
1303
1304         rng.restart(step, ng);
1305         dist.reset();
1306
1307         if (!cFREEZE.empty())
1308         {
1309             gf = cFREEZE[n];
1310         }
1311         if (!cTC.empty())
1312         {
1313             gt = cTC[n];
1314         }
1315         for (d = 0; (d < DIM); d++)
1316         {
1317             if ((ptype[n] != ParticleType::Shell) && !nFreeze[gf][d])
1318             {
1319                 if (friction_coefficient != 0)
1320                 {
1321                     vn = invfr * f[n][d] + rf[gt] * dist(rng);
1322                 }
1323                 else
1324                 {
1325                     /* NOTE: invmass = 2/(mass*friction_constant*dt) */
1326                     vn = 0.5 * invmass[n] * f[n][d] * dt
1327                          + std::sqrt(0.5 * invmass[n]) * rf[gt] * dist(rng);
1328                 }
1329
1330                 v[n][d]      = vn;
1331                 xprime[n][d] = x[n][d] + vn * dt;
1332             }
1333             else
1334             {
1335                 v[n][d]      = 0.0;
1336                 xprime[n][d] = x[n][d];
1337             }
1338         }
1339     }
1340 }
1341
1342 extern void init_ekinstate(ekinstate_t* ekinstate, const t_inputrec* ir)
1343 {
1344     ekinstate->ekin_n = ir->opts.ngtc;
1345     snew(ekinstate->ekinh, ekinstate->ekin_n);
1346     snew(ekinstate->ekinf, ekinstate->ekin_n);
1347     snew(ekinstate->ekinh_old, ekinstate->ekin_n);
1348     ekinstate->ekinscalef_nhc.resize(ekinstate->ekin_n);
1349     ekinstate->ekinscaleh_nhc.resize(ekinstate->ekin_n);
1350     ekinstate->vscale_nhc.resize(ekinstate->ekin_n);
1351     ekinstate->dekindl          = 0;
1352     ekinstate->mvcos            = 0;
1353     ekinstate->hasReadEkinState = false;
1354 }
1355
1356 void update_ekinstate(ekinstate_t* ekinstate, const gmx_ekindata_t* ekind)
1357 {
1358     int i;
1359
1360     for (i = 0; i < ekinstate->ekin_n; i++)
1361     {
1362         copy_mat(ekind->tcstat[i].ekinh, ekinstate->ekinh[i]);
1363         copy_mat(ekind->tcstat[i].ekinf, ekinstate->ekinf[i]);
1364         copy_mat(ekind->tcstat[i].ekinh_old, ekinstate->ekinh_old[i]);
1365         ekinstate->ekinscalef_nhc[i] = ekind->tcstat[i].ekinscalef_nhc;
1366         ekinstate->ekinscaleh_nhc[i] = ekind->tcstat[i].ekinscaleh_nhc;
1367         ekinstate->vscale_nhc[i]     = ekind->tcstat[i].vscale_nhc;
1368     }
1369
1370     copy_mat(ekind->ekin, ekinstate->ekin_total);
1371     ekinstate->dekindl = ekind->dekindl;
1372     ekinstate->mvcos   = ekind->cosacc.mvcos;
1373 }
1374
1375 void restore_ekinstate_from_state(const t_commrec* cr, gmx_ekindata_t* ekind, const ekinstate_t* ekinstate)
1376 {
1377     int i, n;
1378
1379     if (MASTER(cr))
1380     {
1381         for (i = 0; i < ekinstate->ekin_n; i++)
1382         {
1383             copy_mat(ekinstate->ekinh[i], ekind->tcstat[i].ekinh);
1384             copy_mat(ekinstate->ekinf[i], ekind->tcstat[i].ekinf);
1385             copy_mat(ekinstate->ekinh_old[i], ekind->tcstat[i].ekinh_old);
1386             ekind->tcstat[i].ekinscalef_nhc = ekinstate->ekinscalef_nhc[i];
1387             ekind->tcstat[i].ekinscaleh_nhc = ekinstate->ekinscaleh_nhc[i];
1388             ekind->tcstat[i].vscale_nhc     = ekinstate->vscale_nhc[i];
1389         }
1390
1391         copy_mat(ekinstate->ekin_total, ekind->ekin);
1392
1393         ekind->dekindl      = ekinstate->dekindl;
1394         ekind->cosacc.mvcos = ekinstate->mvcos;
1395         n                   = ekinstate->ekin_n;
1396     }
1397
1398     if (PAR(cr))
1399     {
1400         gmx_bcast(sizeof(n), &n, cr->mpi_comm_mygroup);
1401         for (i = 0; i < n; i++)
1402         {
1403             gmx_bcast(DIM * DIM * sizeof(ekind->tcstat[i].ekinh[0][0]),
1404                       ekind->tcstat[i].ekinh[0],
1405                       cr->mpi_comm_mygroup);
1406             gmx_bcast(DIM * DIM * sizeof(ekind->tcstat[i].ekinf[0][0]),
1407                       ekind->tcstat[i].ekinf[0],
1408                       cr->mpi_comm_mygroup);
1409             gmx_bcast(DIM * DIM * sizeof(ekind->tcstat[i].ekinh_old[0][0]),
1410                       ekind->tcstat[i].ekinh_old[0],
1411                       cr->mpi_comm_mygroup);
1412
1413             gmx_bcast(sizeof(ekind->tcstat[i].ekinscalef_nhc),
1414                       &(ekind->tcstat[i].ekinscalef_nhc),
1415                       cr->mpi_comm_mygroup);
1416             gmx_bcast(sizeof(ekind->tcstat[i].ekinscaleh_nhc),
1417                       &(ekind->tcstat[i].ekinscaleh_nhc),
1418                       cr->mpi_comm_mygroup);
1419             gmx_bcast(sizeof(ekind->tcstat[i].vscale_nhc), &(ekind->tcstat[i].vscale_nhc), cr->mpi_comm_mygroup);
1420         }
1421         gmx_bcast(DIM * DIM * sizeof(ekind->ekin[0][0]), ekind->ekin[0], cr->mpi_comm_mygroup);
1422
1423         gmx_bcast(sizeof(ekind->dekindl), &ekind->dekindl, cr->mpi_comm_mygroup);
1424         gmx_bcast(sizeof(ekind->cosacc.mvcos), &ekind->cosacc.mvcos, cr->mpi_comm_mygroup);
1425     }
1426 }
1427
1428 void getThreadAtomRange(int numThreads, int threadIndex, int numAtoms, int* startAtom, int* endAtom)
1429 {
1430 #if GMX_HAVE_SIMD_UPDATE
1431     constexpr int blockSize = GMX_SIMD_REAL_WIDTH;
1432 #else
1433     constexpr int blockSize = 1;
1434 #endif
1435     int numBlocks = (numAtoms + blockSize - 1) / blockSize;
1436
1437     *startAtom = ((numBlocks * threadIndex) / numThreads) * blockSize;
1438     *endAtom   = ((numBlocks * (threadIndex + 1)) / numThreads) * blockSize;
1439     if (threadIndex == numThreads - 1)
1440     {
1441         *endAtom = numAtoms;
1442     }
1443 }
1444
1445 void Update::Impl::update_sd_second_half(const t_inputrec&                 inputRecord,
1446                                          int64_t                           step,
1447                                          real*                             dvdlambda,
1448                                          int                               homenr,
1449                                          gmx::ArrayRef<const ParticleType> ptype,
1450                                          gmx::ArrayRef<const real>         invMass,
1451                                          t_state*                          state,
1452                                          const t_commrec*                  cr,
1453                                          t_nrnb*                           nrnb,
1454                                          gmx_wallcycle*                    wcycle,
1455                                          gmx::Constraints*                 constr,
1456                                          bool                              do_log,
1457                                          bool                              do_ene)
1458 {
1459     if (!constr)
1460     {
1461         return;
1462     }
1463     if (inputRecord.eI == IntegrationAlgorithm::SD1)
1464     {
1465
1466         /* Cast delta_t from double to real to make the integrators faster.
1467          * The only reason for having delta_t double is to get accurate values
1468          * for t=delta_t*step when step is larger than float precision.
1469          * For integration dt the accuracy of real suffices, since with
1470          * integral += dt*integrand the increment is nearly always (much) smaller
1471          * than the integral (and the integrand has real precision).
1472          */
1473         real dt = inputRecord.delta_t;
1474
1475         wallcycle_start(wcycle, WallCycleCounter::Update);
1476
1477         int nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1478
1479 #pragma omp parallel for num_threads(nth) schedule(static)
1480         for (int th = 0; th < nth; th++)
1481         {
1482             try
1483             {
1484                 int start_th, end_th;
1485                 getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
1486
1487                 doSDUpdateGeneral<SDUpdate::FrictionAndNoiseOnly>(
1488                         sd_,
1489                         start_th,
1490                         end_th,
1491                         dt,
1492                         gmx::arrayRefFromArray(inputRecord.opts.nFreeze, inputRecord.opts.ngfrz),
1493                         invMass,
1494                         ptype,
1495                         cFREEZE_,
1496                         cTC_,
1497                         cAcceleration_,
1498                         inputRecord.opts.acceleration,
1499                         state->x.rvec_array(),
1500                         xp_.rvec_array(),
1501                         state->v.rvec_array(),
1502                         nullptr,
1503                         step,
1504                         inputRecord.ld_seed,
1505                         haveDDAtomOrdering(*cr) ? cr->dd->globalAtomIndices.data() : nullptr);
1506             }
1507             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1508         }
1509         inc_nrnb(nrnb, eNR_UPDATE, homenr);
1510         wallcycle_stop(wcycle, WallCycleCounter::Update);
1511
1512         /* Constrain the coordinates upd->xp for half a time step */
1513         bool computeVirial = false;
1514         constr->apply(do_log,
1515                       do_ene,
1516                       step,
1517                       1,
1518                       0.5,
1519                       state->x.arrayRefWithPadding(),
1520                       xp_.arrayRefWithPadding(),
1521                       ArrayRef<RVec>(),
1522                       state->box,
1523                       state->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Bonded)],
1524                       dvdlambda,
1525                       state->v.arrayRefWithPadding(),
1526                       computeVirial,
1527                       nullptr,
1528                       ConstraintVariable::Positions);
1529     }
1530 }
1531
1532 void Update::Impl::finish_update(const t_inputrec&                   inputRecord,
1533                                  const bool                          havePartiallyFrozenAtoms,
1534                                  const int                           homenr,
1535                                  gmx::ArrayRef<const unsigned short> cFREEZE,
1536                                  t_state*                            state,
1537                                  gmx_wallcycle*                      wcycle,
1538                                  const bool                          haveConstraints)
1539 {
1540     /* NOTE: Currently we always integrate to a temporary buffer and
1541      * then copy the results back here.
1542      */
1543
1544     wallcycle_start_nocount(wcycle, WallCycleCounter::Update);
1545
1546     auto xp = makeConstArrayRef(xp_).subArray(0, homenr);
1547     auto x  = makeArrayRef(state->x).subArray(0, homenr);
1548
1549     if (havePartiallyFrozenAtoms && haveConstraints)
1550     {
1551         /* We have atoms that are frozen along some, but not all dimensions,
1552          * then constraints will have moved them also along the frozen dimensions.
1553          * To freeze such degrees of freedom we do not copy them back here.
1554          */
1555         const ivec* nFreeze = inputRecord.opts.nFreeze;
1556
1557         for (int i = 0; i < homenr; i++)
1558         {
1559             const int g = cFREEZE[i];
1560
1561             for (int d = 0; d < DIM; d++)
1562             {
1563                 if (nFreeze[g][d] == 0)
1564                 {
1565                     x[i][d] = xp[i][d];
1566                 }
1567             }
1568         }
1569     }
1570     else
1571     {
1572         /* We have no frozen atoms or fully frozen atoms which have not
1573          * been moved by the update, so we can simply copy all coordinates.
1574          */
1575         int gmx_unused nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1576 #pragma omp parallel for num_threads(nth) schedule(static)
1577         for (int i = 0; i < homenr; i++)
1578         {
1579             // Trivial statement, does not throw
1580             x[i] = xp[i];
1581         }
1582     }
1583
1584     wallcycle_stop(wcycle, WallCycleCounter::Update);
1585 }
1586
1587 void Update::Impl::update_coords(const t_inputrec&                 inputRecord,
1588                                  int64_t                           step,
1589                                  int                               homenr,
1590                                  bool                              havePartiallyFrozenAtoms,
1591                                  gmx::ArrayRef<const ParticleType> ptype,
1592                                  gmx::ArrayRef<const real>         invMass,
1593                                  gmx::ArrayRef<const rvec>         invMassPerDim,
1594                                  t_state*                          state,
1595                                  const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
1596                                  t_fcdata*                                        fcdata,
1597                                  const gmx_ekindata_t*                            ekind,
1598                                  const matrix                                     M,
1599                                  int                                              updatePart,
1600                                  const t_commrec*                                 cr,
1601                                  const bool                                       haveConstraints)
1602 {
1603     /* Running the velocity half does nothing except for velocity verlet */
1604     if ((updatePart == etrtVELOCITY1 || updatePart == etrtVELOCITY2) && !EI_VV(inputRecord.eI))
1605     {
1606         gmx_incons("update_coords called for velocity without VV integrator");
1607     }
1608
1609     /* Cast to real for faster code, no loss in precision (see comment above) */
1610     real dt = inputRecord.delta_t;
1611
1612     /* We need to update the NMR restraint history when time averaging is used */
1613     if (state->flags & enumValueToBitMask(StateEntry::DisreRm3Tav))
1614     {
1615         update_disres_history(*fcdata->disres, &state->hist);
1616     }
1617     if (state->flags & enumValueToBitMask(StateEntry::OrireDtav))
1618     {
1619         GMX_ASSERT(fcdata, "Need valid fcdata");
1620         fcdata->orires->updateHistory();
1621     }
1622
1623     /* ############# START The update of velocities and positions ######### */
1624     int nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1625
1626 #pragma omp parallel for num_threads(nth) schedule(static)
1627     for (int th = 0; th < nth; th++)
1628     {
1629         try
1630         {
1631             int start_th, end_th;
1632             getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
1633
1634             const rvec* x_rvec  = state->x.rvec_array();
1635             rvec*       xp_rvec = xp_.rvec_array();
1636             rvec*       v_rvec  = state->v.rvec_array();
1637             const rvec* f_rvec  = as_rvec_array(f.unpaddedConstArrayRef().data());
1638
1639             switch (inputRecord.eI)
1640             {
1641                 case (IntegrationAlgorithm::MD):
1642                     do_update_md(start_th,
1643                                  end_th,
1644                                  dt,
1645                                  step,
1646                                  x_rvec,
1647                                  xp_rvec,
1648                                  v_rvec,
1649                                  f_rvec,
1650                                  inputRecord.etc,
1651                                  inputRecord.epc,
1652                                  inputRecord.nsttcouple,
1653                                  inputRecord.nstpcouple,
1654                                  cTC_,
1655                                  inputRecord.useConstantAcceleration,
1656                                  cAcceleration_,
1657                                  inputRecord.opts.acceleration,
1658                                  invMass,
1659                                  invMassPerDim,
1660                                  ekind,
1661                                  state->box,
1662                                  state->nosehoover_vxi.data(),
1663                                  M,
1664                                  havePartiallyFrozenAtoms);
1665                     break;
1666                 case (IntegrationAlgorithm::SD1):
1667                     do_update_sd(start_th,
1668                                  end_th,
1669                                  dt,
1670                                  step,
1671                                  x_rvec,
1672                                  xp_rvec,
1673                                  v_rvec,
1674                                  f_rvec,
1675                                  gmx::arrayRefFromArray(inputRecord.opts.nFreeze, inputRecord.opts.ngfrz),
1676                                  invMass,
1677                                  ptype,
1678                                  cFREEZE_,
1679                                  cTC_,
1680                                  cAcceleration_,
1681                                  inputRecord.opts.acceleration,
1682                                  inputRecord.ld_seed,
1683                                  cr,
1684                                  sd_,
1685                                  haveConstraints);
1686                     break;
1687                 case (IntegrationAlgorithm::BD):
1688                     do_update_bd(start_th,
1689                                  end_th,
1690                                  dt,
1691                                  step,
1692                                  x_rvec,
1693                                  xp_rvec,
1694                                  v_rvec,
1695                                  f_rvec,
1696                                  gmx::arrayRefFromArray(inputRecord.opts.nFreeze, inputRecord.opts.ngfrz),
1697                                  invMass,
1698                                  ptype,
1699                                  cFREEZE_,
1700                                  cTC_,
1701                                  inputRecord.bd_fric,
1702                                  sd_.bd_rf.data(),
1703                                  inputRecord.ld_seed,
1704                                  haveDDAtomOrdering(*cr) ? cr->dd->globalAtomIndices.data() : nullptr);
1705                     break;
1706                 case (IntegrationAlgorithm::VV):
1707                 case (IntegrationAlgorithm::VVAK):
1708                 {
1709                     gmx_bool bExtended = (inputRecord.etc == TemperatureCoupling::NoseHoover
1710                                           || inputRecord.epc == PressureCoupling::ParrinelloRahman
1711                                           || inputRecord.epc == PressureCoupling::Mttk);
1712
1713                     /* assuming barostat coupled to group 0 */
1714                     real alpha = 1.0 + DIM / static_cast<real>(inputRecord.opts.nrdf[0]);
1715                     switch (updatePart)
1716                     {
1717                         case etrtVELOCITY1:
1718                         case etrtVELOCITY2:
1719                             do_update_vv_vel(start_th,
1720                                              end_th,
1721                                              dt,
1722                                              gmx::arrayRefFromArray(inputRecord.opts.nFreeze,
1723                                                                     inputRecord.opts.ngfrz),
1724                                              cAcceleration_,
1725                                              inputRecord.opts.acceleration,
1726                                              invMass,
1727                                              ptype,
1728                                              cFREEZE_,
1729                                              v_rvec,
1730                                              f_rvec,
1731                                              bExtended,
1732                                              state->veta,
1733                                              alpha);
1734                             break;
1735                         case etrtPOSITION:
1736                             do_update_vv_pos(start_th,
1737                                              end_th,
1738                                              dt,
1739                                              gmx::arrayRefFromArray(inputRecord.opts.nFreeze,
1740                                                                     inputRecord.opts.ngfrz),
1741                                              ptype,
1742                                              cFREEZE_,
1743                                              x_rvec,
1744                                              xp_rvec,
1745                                              v_rvec,
1746                                              bExtended,
1747                                              state->veta);
1748                             break;
1749                     }
1750                     break;
1751                 }
1752                 default: gmx_fatal(FARGS, "Don't know how to update coordinates");
1753             }
1754         }
1755         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1756     }
1757 }
1758
1759 void Update::Impl::update_for_constraint_virial(const t_inputrec&         inputRecord,
1760                                                 int                       homenr,
1761                                                 bool                      havePartiallyFrozenAtoms,
1762                                                 gmx::ArrayRef<const real> invmass,
1763                                                 gmx::ArrayRef<const rvec> invMassPerDim,
1764                                                 const t_state&            state,
1765                                                 const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
1766                                                 const gmx_ekindata_t& ekind)
1767 {
1768     GMX_ASSERT(inputRecord.eI == IntegrationAlgorithm::MD || inputRecord.eI == IntegrationAlgorithm::SD1,
1769                "Only leap-frog is supported here");
1770
1771     // Cast to real for faster code, no loss in precision
1772     const real dt = inputRecord.delta_t;
1773
1774     const int nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1775
1776 #pragma omp parallel for num_threads(nth) schedule(static)
1777     for (int th = 0; th < nth; th++)
1778     {
1779         try
1780         {
1781             int start_th, end_th;
1782             getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
1783
1784             const rvec* x_rvec  = state.x.rvec_array();
1785             rvec*       xp_rvec = xp_.rvec_array();
1786             const rvec* v_rvec  = state.v.rvec_array();
1787             const rvec* f_rvec  = as_rvec_array(f.unpaddedConstArrayRef().data());
1788
1789             doUpdateMDDoNotUpdateVelocities(
1790                     start_th, end_th, dt, x_rvec, xp_rvec, v_rvec, f_rvec, havePartiallyFrozenAtoms, invmass, invMassPerDim, ekind);
1791         }
1792         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1793     }
1794 }