800d68aa1d830587c6add17129fed533a4eb181c
[alexxy/gromacs.git] / src / gromacs / mdlib / update.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "update.h"
41
42 #include <cmath>
43 #include <cstdio>
44
45 #include <algorithm>
46 #include <memory>
47
48 #include "gromacs/domdec/domdec_struct.h"
49 #include "gromacs/gmxlib/network.h"
50 #include "gromacs/gmxlib/nrnb.h"
51 #include "gromacs/listed_forces/disre.h"
52 #include "gromacs/listed_forces/orires.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/paddedvector.h"
55 #include "gromacs/math/units.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdlib/boxdeformation.h"
58 #include "gromacs/mdlib/constr.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdlib/stat.h"
61 #include "gromacs/mdlib/tgroup.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/fcdata.h"
64 #include "gromacs/mdtypes/group.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pbcutil/pbc.h"
69 #include "gromacs/pulling/pull.h"
70 #include "gromacs/random/tabulatednormaldistribution.h"
71 #include "gromacs/random/threefry.h"
72 #include "gromacs/simd/simd.h"
73 #include "gromacs/timing/wallcycle.h"
74 #include "gromacs/topology/atoms.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxassert.h"
79 #include "gromacs/utility/smalloc.h"
80
81 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
82
83 struct gmx_sd_const_t
84 {
85     double em = 0;
86 };
87
88 struct gmx_sd_sigma_t
89 {
90     real V = 0;
91 };
92
93 struct gmx_stochd_t
94 {
95     /* BD stuff */
96     std::vector<real> bd_rf;
97     /* SD stuff */
98     std::vector<gmx_sd_const_t> sdc;
99     std::vector<gmx_sd_sigma_t> sdsig;
100     /* andersen temperature control stuff */
101     std::vector<bool> randomize_group;
102     std::vector<real> boltzfac;
103
104     explicit gmx_stochd_t(const t_inputrec& inputRecord);
105 };
106
107 //! pImpled implementation for Update
108 class Update::Impl
109 {
110 public:
111     //! Constructor
112     Impl(const t_inputrec& inputRecord, BoxDeformation* boxDeformation);
113     //! Destructor
114     ~Impl() = default;
115
116     void update_coords(const t_inputrec&                                inputRecord,
117                        int64_t                                          step,
118                        int                                              homenr,
119                        bool                                             havePartiallyFrozenAtoms,
120                        gmx::ArrayRef<const ParticleType>                ptype,
121                        gmx::ArrayRef<const real>                        invMass,
122                        gmx::ArrayRef<const rvec>                        invMassPerDim,
123                        t_state*                                         state,
124                        const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
125                        t_fcdata*                                        fcdata,
126                        const gmx_ekindata_t*                            ekind,
127                        const matrix                                     M,
128                        int                                              UpdatePart,
129                        const t_commrec*                                 cr,
130                        bool                                             haveConstraints);
131
132     void finish_update(const t_inputrec&                   inputRecord,
133                        bool                                havePartiallyFrozenAtoms,
134                        int                                 homenr,
135                        gmx::ArrayRef<const unsigned short> cFREEZE,
136                        t_state*                            state,
137                        gmx_wallcycle*                      wcycle,
138                        bool                                haveConstraints);
139
140     void update_sd_second_half(const t_inputrec&                 inputRecord,
141                                int64_t                           step,
142                                real*                             dvdlambda,
143                                int                               homenr,
144                                gmx::ArrayRef<const ParticleType> ptype,
145                                gmx::ArrayRef<const real>         invMass,
146                                t_state*                          state,
147                                const t_commrec*                  cr,
148                                t_nrnb*                           nrnb,
149                                gmx_wallcycle*                    wcycle,
150                                gmx::Constraints*                 constr,
151                                bool                              do_log,
152                                bool                              do_ene);
153
154     void update_for_constraint_virial(const t_inputrec&         inputRecord,
155                                       int                       homenr,
156                                       bool                      havePartiallyFrozenAtoms,
157                                       gmx::ArrayRef<const real> invmass,
158                                       gmx::ArrayRef<const rvec> invMassPerDim,
159                                       const t_state&            state,
160                                       const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
161                                       const gmx_ekindata_t&                            ekind);
162
163     void update_temperature_constants(const t_inputrec& inputRecord);
164
165     const std::vector<bool>& getAndersenRandomizeGroup() const { return sd_.randomize_group; }
166
167     const std::vector<real>& getBoltzmanFactor() const { return sd_.boltzfac; }
168
169     PaddedVector<RVec>* xp() { return &xp_; }
170
171     BoxDeformation* deform() const { return deform_; }
172
173     //! Group index for freezing
174     gmx::ArrayRef<const unsigned short> cFREEZE_;
175     //! Group index for temperature coupling
176     gmx::ArrayRef<const unsigned short> cTC_;
177
178 private:
179     //! stochastic dynamics struct
180     gmx_stochd_t sd_;
181     //! xprime for constraint algorithms
182     PaddedVector<RVec> xp_;
183     //! Box deformation handler (or nullptr if inactive).
184     BoxDeformation* deform_ = nullptr;
185 };
186
187 Update::Update(const t_inputrec& inputRecord, BoxDeformation* boxDeformation) :
188     impl_(new Impl(inputRecord, boxDeformation)){};
189
190 Update::~Update(){};
191
192 const std::vector<bool>& Update::getAndersenRandomizeGroup() const
193 {
194     return impl_->getAndersenRandomizeGroup();
195 }
196
197 const std::vector<real>& Update::getBoltzmanFactor() const
198 {
199     return impl_->getBoltzmanFactor();
200 }
201
202 PaddedVector<RVec>* Update::xp()
203 {
204     return impl_->xp();
205 }
206
207 BoxDeformation* Update::deform() const
208 {
209     return impl_->deform();
210 }
211
212 void Update::update_coords(const t_inputrec&                 inputRecord,
213                            int64_t                           step,
214                            const int                         homenr,
215                            const bool                        havePartiallyFrozenAtoms,
216                            gmx::ArrayRef<const ParticleType> ptype,
217                            gmx::ArrayRef<const real>         invMass,
218                            gmx::ArrayRef<const rvec>         invMassPerDim,
219                            t_state*                          state,
220                            const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
221                            t_fcdata*                                        fcdata,
222                            const gmx_ekindata_t*                            ekind,
223                            const matrix                                     M,
224                            int                                              updatePart,
225                            const t_commrec*                                 cr,
226                            const bool                                       haveConstraints)
227 {
228     return impl_->update_coords(inputRecord,
229                                 step,
230                                 homenr,
231                                 havePartiallyFrozenAtoms,
232                                 ptype,
233                                 invMass,
234                                 invMassPerDim,
235                                 state,
236                                 f,
237                                 fcdata,
238                                 ekind,
239                                 M,
240                                 updatePart,
241                                 cr,
242                                 haveConstraints);
243 }
244
245 void Update::finish_update(const t_inputrec& inputRecord,
246                            const bool        havePartiallyFrozenAtoms,
247                            const int         homenr,
248                            t_state*          state,
249                            gmx_wallcycle*    wcycle,
250                            const bool        haveConstraints)
251 {
252     return impl_->finish_update(
253             inputRecord, havePartiallyFrozenAtoms, homenr, impl_->cFREEZE_, state, wcycle, haveConstraints);
254 }
255
256 void Update::update_sd_second_half(const t_inputrec&                 inputRecord,
257                                    int64_t                           step,
258                                    real*                             dvdlambda,
259                                    const int                         homenr,
260                                    gmx::ArrayRef<const ParticleType> ptype,
261                                    gmx::ArrayRef<const real>         invMass,
262                                    t_state*                          state,
263                                    const t_commrec*                  cr,
264                                    t_nrnb*                           nrnb,
265                                    gmx_wallcycle*                    wcycle,
266                                    gmx::Constraints*                 constr,
267                                    bool                              do_log,
268                                    bool                              do_ene)
269 {
270     return impl_->update_sd_second_half(
271             inputRecord, step, dvdlambda, homenr, ptype, invMass, state, cr, nrnb, wcycle, constr, do_log, do_ene);
272 }
273
274 void Update::update_for_constraint_virial(const t_inputrec&         inputRecord,
275                                           const int                 homenr,
276                                           const bool                havePartiallyFrozenAtoms,
277                                           gmx::ArrayRef<const real> invmass,
278                                           gmx::ArrayRef<const rvec> invMassPerDim,
279                                           const t_state&            state,
280                                           const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
281                                           const gmx_ekindata_t&                            ekind)
282 {
283     return impl_->update_for_constraint_virial(
284             inputRecord, homenr, havePartiallyFrozenAtoms, invmass, invMassPerDim, state, f, ekind);
285 }
286
287 void Update::update_temperature_constants(const t_inputrec& inputRecord)
288 {
289     return impl_->update_temperature_constants(inputRecord);
290 }
291
292 /*! \brief Sets whether we store the updated velocities */
293 enum class StoreUpdatedVelocities
294 {
295     yes, //!< Store the updated velocities
296     no   //!< Do not store the updated velocities
297 };
298
299 /*! \brief Sets the number of different temperature coupling values */
300 enum class NumTempScaleValues
301 {
302     single,  //!< Single T-scaling value (either one group or all values =1)
303     multiple //!< Multiple T-scaling values, need to use T-group indices
304 };
305
306 /*! \brief Sets if to apply no or diagonal Parrinello-Rahman pressure scaling
307  *
308  * Note that this enum is only used in updateMDLeapfrogSimple(), which does
309  * not handle fully anistropic Parrinello-Rahman scaling, so we only have
310  * options \p no and \p diagonal here and no anistropic option.
311  */
312 enum class ApplyParrinelloRahmanVScaling
313 {
314     no,      //!< Do not apply velocity scaling (not a PR-coupling run or step)
315     diagonal //!< Apply velocity scaling using a diagonal matrix
316 };
317
318 /*! \brief Integrate using leap-frog with T-scaling and optionally diagonal Parrinello-Rahman p-coupling
319  *
320  * \tparam       storeUpdatedVelocities Tells whether we should store the updated velocities
321  * \tparam       numTempScaleValues     The number of different T-couple values
322  * \tparam       applyPRVScaling        Apply Parrinello-Rahman velocity scaling
323  * \param[in]    start                  Index of first atom to update
324  * \param[in]    nrend                  Last atom to update: \p nrend - 1
325  * \param[in]    dt                     The time step
326  * \param[in]    dtPressureCouple       Time step for pressure coupling
327  * \param[in]    invMassPerDim          1/mass per atom and dimension
328  * \param[in]    tcstat                 Temperature coupling information
329  * \param[in]    cTC                    T-coupling group index per atom
330  * \param[in]    pRVScaleMatrixDiagonal Parrinello-Rahman v-scale matrix diagonal
331  * \param[in]    x                      Input coordinates
332  * \param[out]   xprime                 Updated coordinates
333  * \param[inout] v                      Velocities, type either rvec* or const rvec*
334  * \param[in]    f                      Forces
335  *
336  * We expect this template to get good SIMD acceleration by most compilers,
337  * unlike the more complex general template.
338  * Note that we might get even better SIMD acceleration when we introduce
339  * aligned (and padded) memory, possibly with some hints for the compilers.
340  */
341 template<StoreUpdatedVelocities storeUpdatedVelocities, NumTempScaleValues numTempScaleValues, ApplyParrinelloRahmanVScaling applyPRVScaling, typename VelocityType>
342 static std::enable_if_t<std::is_same<VelocityType, rvec>::value || std::is_same<VelocityType, const rvec>::value, void>
343 updateMDLeapfrogSimple(int                                 start,
344                        int                                 nrend,
345                        real                                dt,
346                        real                                dtPressureCouple,
347                        gmx::ArrayRef<const rvec>           invMassPerDim,
348                        gmx::ArrayRef<const t_grp_tcstat>   tcstat,
349                        gmx::ArrayRef<const unsigned short> cTC,
350                        const rvec                          pRVScaleMatrixDiagonal,
351                        const rvec* gmx_restrict            x,
352                        rvec* gmx_restrict                  xprime,
353                        VelocityType* gmx_restrict          v,
354                        const rvec* gmx_restrict            f)
355 {
356     real lambdaGroup;
357
358     if (numTempScaleValues == NumTempScaleValues::single)
359     {
360         lambdaGroup = tcstat[0].lambda;
361     }
362
363     for (int a = start; a < nrend; a++)
364     {
365         if (numTempScaleValues == NumTempScaleValues::multiple)
366         {
367             lambdaGroup = tcstat[cTC[a]].lambda;
368         }
369
370         for (int d = 0; d < DIM; d++)
371         {
372             /* Note that using rvec invMassPerDim results in more efficient
373              * SIMD code, but this increases the cache pressure.
374              * For large systems with PME on the CPU this slows down the
375              * (then already slow) update by 20%. If all data remains in cache,
376              * using rvec is much faster.
377              */
378             real vNew = lambdaGroup * v[a][d] + f[a][d] * invMassPerDim[a][d] * dt;
379
380             if (applyPRVScaling == ApplyParrinelloRahmanVScaling::diagonal)
381             {
382                 vNew -= dtPressureCouple * pRVScaleMatrixDiagonal[d] * v[a][d];
383             }
384             if constexpr (storeUpdatedVelocities == StoreUpdatedVelocities::yes)
385             {
386                 v[a][d] = vNew;
387             }
388             // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
389             xprime[a][d] = x[a][d] + vNew * dt;
390         }
391     }
392 }
393
394 #if GMX_SIMD && GMX_SIMD_HAVE_REAL
395 #    define GMX_HAVE_SIMD_UPDATE 1
396 #else
397 #    define GMX_HAVE_SIMD_UPDATE 0
398 #endif
399
400 #if GMX_HAVE_SIMD_UPDATE
401
402 /*! \brief Load (aligned) the contents of GMX_SIMD_REAL_WIDTH rvec elements sequentially into 3 SIMD registers
403  *
404  * The loaded output is:
405  * \p r0: { r[index][XX], r[index][YY], ... }
406  * \p r1: { ... }
407  * \p r2: { ..., r[index+GMX_SIMD_REAL_WIDTH-1][YY], r[index+GMX_SIMD_REAL_WIDTH-1][ZZ] }
408  *
409  * \param[in]  r      Real to an rvec array, has to be aligned to SIMD register width
410  * \param[in]  index  Index of the first rvec triplet of reals to load
411  * \param[out] r0     Pointer to first SIMD register
412  * \param[out] r1     Pointer to second SIMD register
413  * \param[out] r2     Pointer to third SIMD register
414  */
415 static inline void simdLoadRvecs(const rvec* r, int index, SimdReal* r0, SimdReal* r1, SimdReal* r2)
416 {
417     const real* realPtr = r[index];
418
419     GMX_ASSERT(isSimdAligned(realPtr), "Pointer should be SIMD aligned");
420
421     *r0 = simdLoad(realPtr + 0 * GMX_SIMD_REAL_WIDTH);
422     *r1 = simdLoad(realPtr + 1 * GMX_SIMD_REAL_WIDTH);
423     *r2 = simdLoad(realPtr + 2 * GMX_SIMD_REAL_WIDTH);
424 }
425
426 /*! \brief Store (aligned) 3 SIMD registers sequentially to GMX_SIMD_REAL_WIDTH rvec elements
427  *
428  * The stored output is:
429  * \p r[index] = { { r0[0], r0[1], ... }
430  * ...
431  * \p r[index+GMX_SIMD_REAL_WIDTH-1] =  { ... , r2[GMX_SIMD_REAL_WIDTH-2], r2[GMX_SIMD_REAL_WIDTH-1] }
432  *
433  * \param[out] r      Pointer to an rvec array, has to be aligned to SIMD register width
434  * \param[in]  index  Index of the first rvec triplet of reals to store to
435  * \param[in]  r0     First SIMD register
436  * \param[in]  r1     Second SIMD register
437  * \param[in]  r2     Third SIMD register
438  */
439 static inline void simdStoreRvecs(rvec* r, int index, SimdReal r0, SimdReal r1, SimdReal r2)
440 {
441     real* realPtr = r[index];
442
443     GMX_ASSERT(isSimdAligned(realPtr), "Pointer should be SIMD aligned");
444
445     store(realPtr + 0 * GMX_SIMD_REAL_WIDTH, r0);
446     store(realPtr + 1 * GMX_SIMD_REAL_WIDTH, r1);
447     store(realPtr + 2 * GMX_SIMD_REAL_WIDTH, r2);
448 }
449
450 /*! \brief Integrate using leap-frog with single group T-scaling and SIMD
451  *
452  * \tparam       storeUpdatedVelocities Tells whether we should store the updated velocities
453  * \param[in]    start                  Index of first atom to update
454  * \param[in]    nrend                  Last atom to update: \p nrend - 1
455  * \param[in]    dt                     The time step
456  * \param[in]    invMass                1/mass per atom
457  * \param[in]    tcstat                 Temperature coupling information
458  * \param[in]    x                      Input coordinates
459  * \param[out]   xprime                 Updated coordinates
460  * \param[inout] v                      Velocities, type either rvec* or const rvec*
461  * \param[in]    f                      Forces
462  */
463 template<StoreUpdatedVelocities storeUpdatedVelocities, typename VelocityType>
464 static std::enable_if_t<std::is_same<VelocityType, rvec>::value || std::is_same<VelocityType, const rvec>::value, void>
465 updateMDLeapfrogSimpleSimd(int                               start,
466                            int                               nrend,
467                            real                              dt,
468                            gmx::ArrayRef<const real>         invMass,
469                            gmx::ArrayRef<const t_grp_tcstat> tcstat,
470                            const rvec* gmx_restrict          x,
471                            rvec* gmx_restrict                xprime,
472                            VelocityType* gmx_restrict        v,
473                            const rvec* gmx_restrict          f)
474 {
475     SimdReal timestep(dt);
476     SimdReal lambdaSystem(tcstat[0].lambda);
477
478     /* We declare variables here, since code is often slower when declaring them inside the loop */
479
480     /* Note: We should implement a proper PaddedVector, so we don't need this check */
481     GMX_ASSERT(isSimdAligned(invMass.data()), "invMass should be aligned");
482
483     for (int a = start; a < nrend; a += GMX_SIMD_REAL_WIDTH)
484     {
485         SimdReal invMass0, invMass1, invMass2;
486         expandScalarsToTriplets(simdLoad(invMass.data() + a), &invMass0, &invMass1, &invMass2);
487
488         SimdReal v0, v1, v2;
489         SimdReal f0, f1, f2;
490         simdLoadRvecs(v, a, &v0, &v1, &v2);
491         simdLoadRvecs(f, a, &f0, &f1, &f2);
492
493         v0 = fma(f0 * invMass0, timestep, lambdaSystem * v0);
494         v1 = fma(f1 * invMass1, timestep, lambdaSystem * v1);
495         v2 = fma(f2 * invMass2, timestep, lambdaSystem * v2);
496
497         if constexpr (storeUpdatedVelocities == StoreUpdatedVelocities::yes)
498         {
499             simdStoreRvecs(v, a, v0, v1, v2);
500         }
501
502         // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
503         SimdReal x0, x1, x2;
504         simdLoadRvecs(x, a, &x0, &x1, &x2);
505
506         SimdReal xprime0 = fma(v0, timestep, x0);
507         SimdReal xprime1 = fma(v1, timestep, x1);
508         SimdReal xprime2 = fma(v2, timestep, x2);
509
510         simdStoreRvecs(xprime, a, xprime0, xprime1, xprime2);
511     }
512 }
513
514 #endif // GMX_HAVE_SIMD_UPDATE
515
516 /*! \brief Sets the NEMD acceleration type */
517 enum class AccelerationType
518 {
519     none,
520     cosine
521 };
522
523 /*! \brief Integrate using leap-frog with support for everything.
524  *
525  * \tparam        accelerationType  Type of NEMD acceleration.
526  * \param[in]     start             Index of first atom to update.
527  * \param[in]     nrend             Last atom to update: \p nrend - 1.
528  * \param[in]     doNoseHoover      If to apply Nose-Hoover T-coupling.
529  * \param[in]     dt                The time step.
530  * \param[in]     dtPressureCouple  Time step for pressure coupling, is 0 when no pressure
531  *                                  coupling should be applied at this step.
532  * \param[in]     cTC               Temperature coupling group indices
533  * \param[in]     invMassPerDim     Inverse mass per dimension
534  * \param[in]     ekind             Kinetic energy data.
535  * \param[in]     box               The box dimensions.
536  * \param[in]     x                 Input coordinates.
537  * \param[out]    xprime            Updated coordinates.
538  * \param[inout]  v                 Velocities.
539  * \param[in]     f                 Forces.
540  * \param[in]     nh_vxi            Nose-Hoover velocity scaling factors.
541  * \param[in]     nsttcouple        Frequency of the temperature coupling steps.
542  * \param[in]     M                 Parrinello-Rahman scaling matrix.
543  */
544 template<AccelerationType accelerationType>
545 static void updateMDLeapfrogGeneral(int                                 start,
546                                     int                                 nrend,
547                                     bool                                doNoseHoover,
548                                     real                                dt,
549                                     real                                dtPressureCouple,
550                                     gmx::ArrayRef<const unsigned short> cTC,
551                                     gmx::ArrayRef<const rvec>           invMassPerDim,
552                                     const gmx_ekindata_t*               ekind,
553                                     const matrix                        box,
554                                     const rvec* gmx_restrict            x,
555                                     rvec* gmx_restrict                  xprime,
556                                     rvec* gmx_restrict                  v,
557                                     const rvec* gmx_restrict            f,
558                                     const double* gmx_restrict          nh_vxi,
559                                     const int                           nsttcouple,
560                                     const matrix                        M)
561 {
562     /* This is a version of the leap-frog integrator that supports
563      * all combinations of T-coupling, P-coupling and NEMD.
564      * Nose-Hoover uses the reversible leap-frog integrator from
565      * Holian et al. Phys Rev E 52(3) : 2338, 1995
566      */
567
568     gmx::ArrayRef<const t_grp_tcstat> tcstat = ekind->tcstat;
569
570     /* Initialize group values, changed later when multiple groups are used */
571     int gt = 0;
572
573     real omega_Z = 2 * static_cast<real>(M_PI) / box[ZZ][ZZ];
574
575     for (int n = start; n < nrend; n++)
576     {
577         if (!cTC.empty())
578         {
579             gt = cTC[n];
580         }
581         real lg = tcstat[gt].lambda;
582
583         rvec vRel;
584         real cosineZ, vCosine;
585         switch (accelerationType)
586         {
587             case AccelerationType::none: copy_rvec(v[n], vRel); break;
588             case AccelerationType::cosine:
589                 cosineZ = std::cos(x[n][ZZ] * omega_Z);
590                 vCosine = cosineZ * ekind->cosacc.vcos;
591                 /* Avoid scaling the cosine profile velocity */
592                 copy_rvec(v[n], vRel);
593                 vRel[XX] -= vCosine;
594                 break;
595         }
596
597         real factorNH = 0.0;
598         if (doNoseHoover)
599         {
600             /* Here we account for multiple time stepping, by increasing
601              * the Nose-Hoover correction by nsttcouple
602              * TODO: This can be pre-computed.
603              */
604             factorNH = 0.5 * nsttcouple * dt * nh_vxi[gt];
605         }
606
607         for (int d = 0; d < DIM; d++)
608         {
609             real vNew = (lg * vRel[d]
610                          + (f[n][d] * invMassPerDim[n][d] * dt - factorNH * vRel[d]
611                             - dtPressureCouple * iprod(M[d], vRel)))
612                         / (1 + factorNH);
613             switch (accelerationType)
614             {
615                 case AccelerationType::none: break;
616                 case AccelerationType::cosine:
617                     if (d == XX)
618                     {
619                         /* Add back the mean velocity and apply acceleration */
620                         vNew += vCosine + cosineZ * ekind->cosacc.cos_accel * dt;
621                     }
622                     break;
623             }
624             v[n][d]      = vNew;
625             xprime[n][d] = x[n][d] + vNew * dt;
626         }
627     }
628 }
629
630 /*! \brief Handles the Leap-frog MD x and v integration */
631 static void do_update_md(int                                  start,
632                          int                                  nrend,
633                          real                                 dt,
634                          int64_t                              step,
635                          const rvec* gmx_restrict             x,
636                          rvec* gmx_restrict                   xprime,
637                          rvec* gmx_restrict                   v,
638                          const rvec* gmx_restrict             f,
639                          const TemperatureCoupling            etc,
640                          const PressureCoupling               epc,
641                          const int                            nsttcouple,
642                          const int                            nstpcouple,
643                          gmx::ArrayRef<const unsigned short>  cTC,
644                          gmx::ArrayRef<const real> gmx_unused invmass,
645                          gmx::ArrayRef<const rvec>            invMassPerDim,
646                          const gmx_ekindata_t*                ekind,
647                          const matrix                         box,
648                          const double* gmx_restrict           nh_vxi,
649                          const matrix                         M,
650                          bool gmx_unused                      havePartiallyFrozenAtoms)
651 {
652     GMX_ASSERT(nrend == start || xprime != x,
653                "For SIMD optimization certain compilers need to have xprime != x");
654
655     /* Note: Berendsen pressure scaling is handled after do_update_md() */
656     bool doTempCouple =
657             (etc != TemperatureCoupling::No && do_per_step(step + nsttcouple - 1, nsttcouple));
658     bool doNoseHoover       = (etc == TemperatureCoupling::NoseHoover && doTempCouple);
659     bool doParrinelloRahman = (epc == PressureCoupling::ParrinelloRahman
660                                && do_per_step(step + nstpcouple - 1, nstpcouple));
661     bool doPROffDiagonal = (doParrinelloRahman && (M[YY][XX] != 0 || M[ZZ][XX] != 0 || M[ZZ][YY] != 0));
662
663     real dtPressureCouple = (doParrinelloRahman ? nstpcouple * dt : 0);
664
665     /* NEMD cosine acceleration is applied in updateMDLeapFrogGeneral */
666     bool doAcceleration = (ekind->cosacc.cos_accel != 0);
667
668     if (doNoseHoover || doPROffDiagonal || doAcceleration)
669     {
670         matrix stepM;
671         if (!doParrinelloRahman)
672         {
673             /* We should not apply PR scaling at this step */
674             clear_mat(stepM);
675         }
676         else
677         {
678             copy_mat(M, stepM);
679         }
680
681         if (!doAcceleration)
682         {
683             updateMDLeapfrogGeneral<AccelerationType::none>(start,
684                                                             nrend,
685                                                             doNoseHoover,
686                                                             dt,
687                                                             dtPressureCouple,
688                                                             cTC,
689                                                             invMassPerDim,
690                                                             ekind,
691                                                             box,
692                                                             x,
693                                                             xprime,
694                                                             v,
695                                                             f,
696                                                             nh_vxi,
697                                                             nsttcouple,
698                                                             stepM);
699         }
700         else
701         {
702             updateMDLeapfrogGeneral<AccelerationType::cosine>(start,
703                                                               nrend,
704                                                               doNoseHoover,
705                                                               dt,
706                                                               dtPressureCouple,
707                                                               cTC,
708                                                               invMassPerDim,
709                                                               ekind,
710                                                               box,
711                                                               x,
712                                                               xprime,
713                                                               v,
714                                                               f,
715                                                               nh_vxi,
716                                                               nsttcouple,
717                                                               stepM);
718         }
719     }
720     else
721     {
722         /* We determine if we have a single T-coupling lambda value for all
723          * atoms. That allows for better SIMD acceleration in the template.
724          * If we do not do temperature coupling (in the run or this step),
725          * all scaling values are 1, so we effectively have a single value.
726          */
727         bool haveSingleTempScaleValue = (!doTempCouple || ekind->ngtc == 1);
728
729         /* Extract some pointers needed by all cases */
730         gmx::ArrayRef<const t_grp_tcstat> tcstat = ekind->tcstat;
731
732         if (doParrinelloRahman)
733         {
734             GMX_ASSERT(!doPROffDiagonal,
735                        "updateMDLeapfrogSimple only support diagonal Parrinello-Rahman scaling "
736                        "matrices");
737
738             rvec diagM;
739             for (int d = 0; d < DIM; d++)
740             {
741                 diagM[d] = M[d][d];
742             }
743
744             if (haveSingleTempScaleValue)
745             {
746                 updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::single, ApplyParrinelloRahmanVScaling::diagonal>(
747                         start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, diagM, x, xprime, v, f);
748             }
749             else
750             {
751                 updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::multiple, ApplyParrinelloRahmanVScaling::diagonal>(
752                         start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, diagM, x, xprime, v, f);
753             }
754         }
755         else
756         {
757             if (haveSingleTempScaleValue)
758             {
759                 /* Note that modern compilers are pretty good at vectorizing
760                  * updateMDLeapfrogSimple(). But the SIMD version will still
761                  * be faster because invMass lowers the cache pressure
762                  * compared to invMassPerDim.
763                  */
764 #if GMX_HAVE_SIMD_UPDATE
765                 /* Check if we can use invmass instead of invMassPerDim */
766                 if (!havePartiallyFrozenAtoms)
767                 {
768                     updateMDLeapfrogSimpleSimd<StoreUpdatedVelocities::yes>(
769                             start, nrend, dt, invmass, tcstat, x, xprime, v, f);
770                 }
771                 else
772 #endif
773                 {
774                     updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::single, ApplyParrinelloRahmanVScaling::no>(
775                             start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, nullptr, x, xprime, v, f);
776                 }
777             }
778             else
779             {
780                 updateMDLeapfrogSimple<StoreUpdatedVelocities::yes, NumTempScaleValues::multiple, ApplyParrinelloRahmanVScaling::no>(
781                         start, nrend, dt, dtPressureCouple, invMassPerDim, tcstat, cTC, nullptr, x, xprime, v, f);
782             }
783         }
784     }
785 }
786 /*! \brief Handles the Leap-frog MD x and v integration */
787 static void doUpdateMDDoNotUpdateVelocities(int                      start,
788                                             int                      nrend,
789                                             real                     dt,
790                                             const rvec* gmx_restrict x,
791                                             rvec* gmx_restrict       xprime,
792                                             const rvec* gmx_restrict v,
793                                             const rvec* gmx_restrict f,
794                                             bool gmx_unused          havePartiallyFrozenAtoms,
795                                             gmx::ArrayRef<const real> gmx_unused invmass,
796                                             gmx::ArrayRef<const rvec>            invMassPerDim,
797                                             const gmx_ekindata_t&                ekind)
798 {
799     GMX_ASSERT(nrend == start || xprime != x,
800                "For SIMD optimization certain compilers need to have xprime != x");
801
802     gmx::ArrayRef<const t_grp_tcstat> tcstat = ekind.tcstat;
803
804     /* Check if we can use invmass instead of invMassPerDim */
805 #if GMX_HAVE_SIMD_UPDATE
806     if (!havePartiallyFrozenAtoms)
807     {
808         updateMDLeapfrogSimpleSimd<StoreUpdatedVelocities::no>(
809                 start, nrend, dt, invmass, tcstat, x, xprime, v, f);
810     }
811     else
812 #endif
813     {
814         updateMDLeapfrogSimple<StoreUpdatedVelocities::no, NumTempScaleValues::single, ApplyParrinelloRahmanVScaling::no>(
815                 start, nrend, dt, dt, invMassPerDim, tcstat, gmx::ArrayRef<const unsigned short>(), nullptr, x, xprime, v, f);
816     }
817 }
818
819 static void do_update_vv_vel(int                                 start,
820                              int                                 nrend,
821                              real                                dt,
822                              gmx::ArrayRef<const ivec>           nFreeze,
823                              gmx::ArrayRef<const real>           invmass,
824                              gmx::ArrayRef<const ParticleType>   ptype,
825                              gmx::ArrayRef<const unsigned short> cFREEZE,
826                              rvec                                v[],
827                              const rvec                          f[],
828                              gmx_bool                            bExtended,
829                              real                                veta,
830                              real                                alpha)
831 {
832     int  gf = 0;
833     int  n, d;
834     real g, mv1, mv2;
835
836     if (bExtended)
837     {
838         g   = 0.25 * dt * veta * alpha;
839         mv1 = std::exp(-g);
840         mv2 = gmx::series_sinhx(g);
841     }
842     else
843     {
844         mv1 = 1.0;
845         mv2 = 1.0;
846     }
847     for (n = start; n < nrend; n++)
848     {
849         real w_dt = invmass[n] * dt;
850         if (!cFREEZE.empty())
851         {
852             gf = cFREEZE[n];
853         }
854
855         for (d = 0; d < DIM; d++)
856         {
857             if ((ptype[n] != ParticleType::Shell) && !nFreeze[gf][d])
858             {
859                 v[n][d] = mv1 * (mv1 * v[n][d] + 0.5 * (w_dt * mv2 * f[n][d]));
860             }
861             else
862             {
863                 v[n][d] = 0.0;
864             }
865         }
866     }
867 } /* do_update_vv_vel */
868
869 static void do_update_vv_pos(int                                 start,
870                              int                                 nrend,
871                              real                                dt,
872                              gmx::ArrayRef<const ivec>           nFreeze,
873                              gmx::ArrayRef<const ParticleType>   ptype,
874                              gmx::ArrayRef<const unsigned short> cFREEZE,
875                              const rvec                          x[],
876                              rvec                                xprime[],
877                              const rvec                          v[],
878                              gmx_bool                            bExtended,
879                              real                                veta)
880 {
881     int  gf = 0;
882     int  n, d;
883     real g, mr1, mr2;
884
885     /* Would it make more sense if Parrinello-Rahman was put here? */
886     if (bExtended)
887     {
888         g   = 0.5 * dt * veta;
889         mr1 = std::exp(g);
890         mr2 = gmx::series_sinhx(g);
891     }
892     else
893     {
894         mr1 = 1.0;
895         mr2 = 1.0;
896     }
897
898     for (n = start; n < nrend; n++)
899     {
900
901         if (!cFREEZE.empty())
902         {
903             gf = cFREEZE[n];
904         }
905
906         for (d = 0; d < DIM; d++)
907         {
908             if ((ptype[n] != ParticleType::Shell) && !nFreeze[gf][d])
909             {
910                 xprime[n][d] = mr1 * (mr1 * x[n][d] + mr2 * dt * v[n][d]);
911             }
912             else
913             {
914                 xprime[n][d] = x[n][d];
915             }
916         }
917     }
918 } /* do_update_vv_pos */
919
920 gmx_stochd_t::gmx_stochd_t(const t_inputrec& inputRecord)
921 {
922     const t_grpopts* opts = &inputRecord.opts;
923     const int        ngtc = opts->ngtc;
924
925     if (inputRecord.eI == IntegrationAlgorithm::BD)
926     {
927         bd_rf.resize(ngtc);
928     }
929     else if (EI_SD(inputRecord.eI))
930     {
931         sdc.resize(ngtc);
932         sdsig.resize(ngtc);
933
934         for (int gt = 0; gt < ngtc; gt++)
935         {
936             if (opts->tau_t[gt] > 0)
937             {
938                 sdc[gt].em = std::exp(-inputRecord.delta_t / opts->tau_t[gt]);
939             }
940             else
941             {
942                 /* No friction and noise on this group */
943                 sdc[gt].em = 1;
944             }
945         }
946     }
947     else if (ETC_ANDERSEN(inputRecord.etc))
948     {
949         randomize_group.resize(ngtc);
950         boltzfac.resize(ngtc);
951
952         /* for now, assume that all groups, if randomized, are randomized at the same rate, i.e. tau_t is the same. */
953         /* since constraint groups don't necessarily match up with temperature groups! This is checked in readir.c */
954
955         for (int gt = 0; gt < ngtc; gt++)
956         {
957             real reft = std::max<real>(0, opts->ref_t[gt]);
958             if ((opts->tau_t[gt] > 0)
959                 && (reft > 0)) /* tau_t or ref_t = 0 means that no randomization is done */
960             {
961                 randomize_group[gt] = true;
962                 boltzfac[gt]        = gmx::c_boltz * opts->ref_t[gt];
963             }
964             else
965             {
966                 randomize_group[gt] = false;
967             }
968         }
969     }
970 }
971
972 void Update::Impl::update_temperature_constants(const t_inputrec& inputRecord)
973 {
974     if (inputRecord.eI == IntegrationAlgorithm::BD)
975     {
976         if (inputRecord.bd_fric != 0)
977         {
978             for (int gt = 0; gt < inputRecord.opts.ngtc; gt++)
979             {
980                 sd_.bd_rf[gt] = std::sqrt(2.0 * gmx::c_boltz * inputRecord.opts.ref_t[gt]
981                                           / (inputRecord.bd_fric * inputRecord.delta_t));
982             }
983         }
984         else
985         {
986             for (int gt = 0; gt < inputRecord.opts.ngtc; gt++)
987             {
988                 sd_.bd_rf[gt] = std::sqrt(2.0 * gmx::c_boltz * inputRecord.opts.ref_t[gt]);
989             }
990         }
991     }
992     if (inputRecord.eI == IntegrationAlgorithm::SD1)
993     {
994         for (int gt = 0; gt < inputRecord.opts.ngtc; gt++)
995         {
996             real kT = gmx::c_boltz * inputRecord.opts.ref_t[gt];
997             /* The mass is accounted for later, since this differs per atom */
998             sd_.sdsig[gt].V = std::sqrt(kT * (1 - sd_.sdc[gt].em * sd_.sdc[gt].em));
999         }
1000     }
1001 }
1002
1003 Update::Impl::Impl(const t_inputrec& inputRecord, BoxDeformation* boxDeformation) :
1004     sd_(inputRecord), deform_(boxDeformation)
1005 {
1006     update_temperature_constants(inputRecord);
1007     xp_.resizeWithPadding(0);
1008 }
1009
1010 void Update::updateAfterPartition(int                                 numAtoms,
1011                                   gmx::ArrayRef<const unsigned short> cFREEZE,
1012                                   gmx::ArrayRef<const unsigned short> cTC)
1013 {
1014
1015     impl_->xp()->resizeWithPadding(numAtoms);
1016     impl_->cFREEZE_ = cFREEZE;
1017     impl_->cTC_     = cTC;
1018 }
1019
1020 /*! \brief Sets the SD update type */
1021 enum class SDUpdate : int
1022 {
1023     ForcesOnly,
1024     FrictionAndNoiseOnly,
1025     Combined
1026 };
1027
1028 /*! \brief SD integrator update
1029  *
1030  * Two phases are required in the general case of a constrained
1031  * update, the first phase from the contribution of forces, before
1032  * applying constraints, and then a second phase applying the friction
1033  * and noise, and then further constraining. For details, see
1034  * Goga2012.
1035  *
1036  * Without constraints, the two phases can be combined, for
1037  * efficiency.
1038  *
1039  * Thus three instantiations of this templated function will be made,
1040  * two with only one contribution, and one with both contributions. */
1041 template<SDUpdate updateType>
1042 static void doSDUpdateGeneral(const gmx_stochd_t&                 sd,
1043                               int                                 start,
1044                               int                                 nrend,
1045                               real                                dt,
1046                               gmx::ArrayRef<const ivec>           nFreeze,
1047                               gmx::ArrayRef<const real>           invmass,
1048                               gmx::ArrayRef<const ParticleType>   ptype,
1049                               gmx::ArrayRef<const unsigned short> cFREEZE,
1050                               gmx::ArrayRef<const unsigned short> cTC,
1051                               const rvec                          x[],
1052                               rvec                                xprime[],
1053                               rvec                                v[],
1054                               const rvec                          f[],
1055                               int64_t                             step,
1056                               int                                 seed,
1057                               const int*                          gatindex)
1058 {
1059     // cTC and cFREEZE can be nullptr any time, but various
1060     // instantiations do not make sense with particular pointer
1061     // values.
1062     if (updateType == SDUpdate::ForcesOnly)
1063     {
1064         GMX_ASSERT(f != nullptr, "SD update with only forces requires forces");
1065         GMX_ASSERT(cTC.empty(), "SD update with only forces cannot handle temperature groups");
1066     }
1067     if (updateType == SDUpdate::FrictionAndNoiseOnly)
1068     {
1069         GMX_ASSERT(f == nullptr, "SD update with only noise cannot handle forces");
1070     }
1071     if (updateType == SDUpdate::Combined)
1072     {
1073         GMX_ASSERT(f != nullptr, "SD update with forces and noise requires forces");
1074     }
1075
1076     // Even 0 bits internal counter gives 2x64 ints (more than enough for three table lookups)
1077     gmx::ThreeFry2x64<0>                       rng(seed, gmx::RandomDomain::UpdateCoordinates);
1078     gmx::TabulatedNormalDistribution<real, 14> dist;
1079
1080     for (int n = start; n < nrend; n++)
1081     {
1082         int globalAtomIndex = gatindex ? gatindex[n] : n;
1083         rng.restart(step, globalAtomIndex);
1084         dist.reset();
1085
1086         real inverseMass = invmass[n];
1087         real invsqrtMass = std::sqrt(inverseMass);
1088
1089         int freezeGroup      = !cFREEZE.empty() ? cFREEZE[n] : 0;
1090         int temperatureGroup = !cTC.empty() ? cTC[n] : 0;
1091
1092         for (int d = 0; d < DIM; d++)
1093         {
1094             if ((ptype[n] != ParticleType::Shell) && !nFreeze[freezeGroup][d])
1095             {
1096                 if (updateType == SDUpdate::ForcesOnly)
1097                 {
1098                     real vn = v[n][d] + inverseMass * f[n][d] * dt;
1099                     v[n][d] = vn;
1100                     // Simple position update.
1101                     xprime[n][d] = x[n][d] + v[n][d] * dt;
1102                 }
1103                 else if (updateType == SDUpdate::FrictionAndNoiseOnly)
1104                 {
1105                     real vn = v[n][d];
1106                     v[n][d] = (vn * sd.sdc[temperatureGroup].em
1107                                + invsqrtMass * sd.sdsig[temperatureGroup].V * dist(rng));
1108                     // The previous phase already updated the
1109                     // positions with a full v*dt term that must
1110                     // now be half removed.
1111                     xprime[n][d] = xprime[n][d] + 0.5 * (v[n][d] - vn) * dt;
1112                 }
1113                 else
1114                 {
1115                     real vn = v[n][d] + inverseMass * f[n][d] * dt;
1116                     v[n][d] = (vn * sd.sdc[temperatureGroup].em
1117                                + invsqrtMass * sd.sdsig[temperatureGroup].V * dist(rng));
1118                     // Here we include half of the friction+noise
1119                     // update of v into the position update.
1120                     xprime[n][d] = x[n][d] + 0.5 * (vn + v[n][d]) * dt;
1121                 }
1122             }
1123             else
1124             {
1125                 // When using constraints, the update is split into
1126                 // two phases, but we only need to zero the update of
1127                 // virtual, shell or frozen particles in at most one
1128                 // of the phases.
1129                 if (updateType != SDUpdate::FrictionAndNoiseOnly)
1130                 {
1131                     v[n][d]      = 0.0;
1132                     xprime[n][d] = x[n][d];
1133                 }
1134             }
1135         }
1136     }
1137 }
1138
1139 static void do_update_sd(int                                 start,
1140                          int                                 nrend,
1141                          real                                dt,
1142                          int64_t                             step,
1143                          const rvec* gmx_restrict            x,
1144                          rvec* gmx_restrict                  xprime,
1145                          rvec* gmx_restrict                  v,
1146                          const rvec* gmx_restrict            f,
1147                          gmx::ArrayRef<const ivec>           nFreeze,
1148                          gmx::ArrayRef<const real>           invmass,
1149                          gmx::ArrayRef<const ParticleType>   ptype,
1150                          gmx::ArrayRef<const unsigned short> cFREEZE,
1151                          gmx::ArrayRef<const unsigned short> cTC,
1152                          int                                 seed,
1153                          const t_commrec*                    cr,
1154                          const gmx_stochd_t&                 sd,
1155                          bool                                haveConstraints)
1156 {
1157     if (haveConstraints)
1158     {
1159         // With constraints, the SD update is done in 2 parts
1160         doSDUpdateGeneral<SDUpdate::ForcesOnly>(sd,
1161                                                 start,
1162                                                 nrend,
1163                                                 dt,
1164                                                 nFreeze,
1165                                                 invmass,
1166                                                 ptype,
1167                                                 cFREEZE,
1168                                                 gmx::ArrayRef<const unsigned short>(),
1169                                                 x,
1170                                                 xprime,
1171                                                 v,
1172                                                 f,
1173                                                 step,
1174                                                 seed,
1175                                                 nullptr);
1176     }
1177     else
1178     {
1179         doSDUpdateGeneral<SDUpdate::Combined>(
1180                 sd,
1181                 start,
1182                 nrend,
1183                 dt,
1184                 nFreeze,
1185                 invmass,
1186                 ptype,
1187                 cFREEZE,
1188                 cTC,
1189                 x,
1190                 xprime,
1191                 v,
1192                 f,
1193                 step,
1194                 seed,
1195                 haveDDAtomOrdering(*cr) ? cr->dd->globalAtomIndices.data() : nullptr);
1196     }
1197 }
1198
1199 static void do_update_bd(int                                 start,
1200                          int                                 nrend,
1201                          real                                dt,
1202                          int64_t                             step,
1203                          const rvec* gmx_restrict            x,
1204                          rvec* gmx_restrict                  xprime,
1205                          rvec* gmx_restrict                  v,
1206                          const rvec* gmx_restrict            f,
1207                          gmx::ArrayRef<const ivec>           nFreeze,
1208                          gmx::ArrayRef<const real>           invmass,
1209                          gmx::ArrayRef<const ParticleType>   ptype,
1210                          gmx::ArrayRef<const unsigned short> cFREEZE,
1211                          gmx::ArrayRef<const unsigned short> cTC,
1212                          real                                friction_coefficient,
1213                          const real*                         rf,
1214                          int                                 seed,
1215                          const int*                          gatindex)
1216 {
1217     /* note -- these appear to be full step velocities . . .  */
1218     int  gf = 0, gt = 0;
1219     real vn;
1220     real invfr = 0;
1221     int  n, d;
1222     // Use 1 bit of internal counters to give us 2*2 64-bits values per stream
1223     // Each 64-bit value is enough for 4 normal distribution table numbers.
1224     gmx::ThreeFry2x64<0>                       rng(seed, gmx::RandomDomain::UpdateCoordinates);
1225     gmx::TabulatedNormalDistribution<real, 14> dist;
1226
1227     if (friction_coefficient != 0)
1228     {
1229         invfr = 1.0 / friction_coefficient;
1230     }
1231
1232     for (n = start; (n < nrend); n++)
1233     {
1234         int ng = gatindex ? gatindex[n] : n;
1235
1236         rng.restart(step, ng);
1237         dist.reset();
1238
1239         if (!cFREEZE.empty())
1240         {
1241             gf = cFREEZE[n];
1242         }
1243         if (!cTC.empty())
1244         {
1245             gt = cTC[n];
1246         }
1247         for (d = 0; (d < DIM); d++)
1248         {
1249             if ((ptype[n] != ParticleType::Shell) && !nFreeze[gf][d])
1250             {
1251                 if (friction_coefficient != 0)
1252                 {
1253                     vn = invfr * f[n][d] + rf[gt] * dist(rng);
1254                 }
1255                 else
1256                 {
1257                     /* NOTE: invmass = 2/(mass*friction_constant*dt) */
1258                     vn = 0.5 * invmass[n] * f[n][d] * dt
1259                          + std::sqrt(0.5 * invmass[n]) * rf[gt] * dist(rng);
1260                 }
1261
1262                 v[n][d]      = vn;
1263                 xprime[n][d] = x[n][d] + vn * dt;
1264             }
1265             else
1266             {
1267                 v[n][d]      = 0.0;
1268                 xprime[n][d] = x[n][d];
1269             }
1270         }
1271     }
1272 }
1273
1274 extern void init_ekinstate(ekinstate_t* ekinstate, const t_inputrec* ir)
1275 {
1276     ekinstate->ekin_n = ir->opts.ngtc;
1277     snew(ekinstate->ekinh, ekinstate->ekin_n);
1278     snew(ekinstate->ekinf, ekinstate->ekin_n);
1279     snew(ekinstate->ekinh_old, ekinstate->ekin_n);
1280     ekinstate->ekinscalef_nhc.resize(ekinstate->ekin_n);
1281     ekinstate->ekinscaleh_nhc.resize(ekinstate->ekin_n);
1282     ekinstate->vscale_nhc.resize(ekinstate->ekin_n);
1283     ekinstate->dekindl          = 0;
1284     ekinstate->mvcos            = 0;
1285     ekinstate->hasReadEkinState = false;
1286 }
1287
1288 void update_ekinstate(ekinstate_t* ekinstate, const gmx_ekindata_t* ekind)
1289 {
1290     int i;
1291
1292     for (i = 0; i < ekinstate->ekin_n; i++)
1293     {
1294         copy_mat(ekind->tcstat[i].ekinh, ekinstate->ekinh[i]);
1295         copy_mat(ekind->tcstat[i].ekinf, ekinstate->ekinf[i]);
1296         copy_mat(ekind->tcstat[i].ekinh_old, ekinstate->ekinh_old[i]);
1297         ekinstate->ekinscalef_nhc[i] = ekind->tcstat[i].ekinscalef_nhc;
1298         ekinstate->ekinscaleh_nhc[i] = ekind->tcstat[i].ekinscaleh_nhc;
1299         ekinstate->vscale_nhc[i]     = ekind->tcstat[i].vscale_nhc;
1300     }
1301
1302     copy_mat(ekind->ekin, ekinstate->ekin_total);
1303     ekinstate->dekindl = ekind->dekindl;
1304     ekinstate->mvcos   = ekind->cosacc.mvcos;
1305 }
1306
1307 void restore_ekinstate_from_state(const t_commrec* cr, gmx_ekindata_t* ekind, const ekinstate_t* ekinstate)
1308 {
1309     int i, n;
1310
1311     if (MASTER(cr))
1312     {
1313         for (i = 0; i < ekinstate->ekin_n; i++)
1314         {
1315             copy_mat(ekinstate->ekinh[i], ekind->tcstat[i].ekinh);
1316             copy_mat(ekinstate->ekinf[i], ekind->tcstat[i].ekinf);
1317             copy_mat(ekinstate->ekinh_old[i], ekind->tcstat[i].ekinh_old);
1318             ekind->tcstat[i].ekinscalef_nhc = ekinstate->ekinscalef_nhc[i];
1319             ekind->tcstat[i].ekinscaleh_nhc = ekinstate->ekinscaleh_nhc[i];
1320             ekind->tcstat[i].vscale_nhc     = ekinstate->vscale_nhc[i];
1321         }
1322
1323         copy_mat(ekinstate->ekin_total, ekind->ekin);
1324
1325         ekind->dekindl      = ekinstate->dekindl;
1326         ekind->cosacc.mvcos = ekinstate->mvcos;
1327         n                   = ekinstate->ekin_n;
1328     }
1329
1330     if (PAR(cr))
1331     {
1332         gmx_bcast(sizeof(n), &n, cr->mpi_comm_mygroup);
1333         for (i = 0; i < n; i++)
1334         {
1335             gmx_bcast(DIM * DIM * sizeof(ekind->tcstat[i].ekinh[0][0]),
1336                       ekind->tcstat[i].ekinh[0],
1337                       cr->mpi_comm_mygroup);
1338             gmx_bcast(DIM * DIM * sizeof(ekind->tcstat[i].ekinf[0][0]),
1339                       ekind->tcstat[i].ekinf[0],
1340                       cr->mpi_comm_mygroup);
1341             gmx_bcast(DIM * DIM * sizeof(ekind->tcstat[i].ekinh_old[0][0]),
1342                       ekind->tcstat[i].ekinh_old[0],
1343                       cr->mpi_comm_mygroup);
1344
1345             gmx_bcast(sizeof(ekind->tcstat[i].ekinscalef_nhc),
1346                       &(ekind->tcstat[i].ekinscalef_nhc),
1347                       cr->mpi_comm_mygroup);
1348             gmx_bcast(sizeof(ekind->tcstat[i].ekinscaleh_nhc),
1349                       &(ekind->tcstat[i].ekinscaleh_nhc),
1350                       cr->mpi_comm_mygroup);
1351             gmx_bcast(sizeof(ekind->tcstat[i].vscale_nhc), &(ekind->tcstat[i].vscale_nhc), cr->mpi_comm_mygroup);
1352         }
1353         gmx_bcast(DIM * DIM * sizeof(ekind->ekin[0][0]), ekind->ekin[0], cr->mpi_comm_mygroup);
1354
1355         gmx_bcast(sizeof(ekind->dekindl), &ekind->dekindl, cr->mpi_comm_mygroup);
1356         gmx_bcast(sizeof(ekind->cosacc.mvcos), &ekind->cosacc.mvcos, cr->mpi_comm_mygroup);
1357     }
1358 }
1359
1360 void getThreadAtomRange(int numThreads, int threadIndex, int numAtoms, int* startAtom, int* endAtom)
1361 {
1362 #if GMX_HAVE_SIMD_UPDATE
1363     constexpr int blockSize = GMX_SIMD_REAL_WIDTH;
1364 #else
1365     constexpr int blockSize = 1;
1366 #endif
1367     int numBlocks = (numAtoms + blockSize - 1) / blockSize;
1368
1369     *startAtom = ((numBlocks * threadIndex) / numThreads) * blockSize;
1370     *endAtom   = ((numBlocks * (threadIndex + 1)) / numThreads) * blockSize;
1371     if (threadIndex == numThreads - 1)
1372     {
1373         *endAtom = numAtoms;
1374     }
1375 }
1376
1377 void Update::Impl::update_sd_second_half(const t_inputrec&                 inputRecord,
1378                                          int64_t                           step,
1379                                          real*                             dvdlambda,
1380                                          int                               homenr,
1381                                          gmx::ArrayRef<const ParticleType> ptype,
1382                                          gmx::ArrayRef<const real>         invMass,
1383                                          t_state*                          state,
1384                                          const t_commrec*                  cr,
1385                                          t_nrnb*                           nrnb,
1386                                          gmx_wallcycle*                    wcycle,
1387                                          gmx::Constraints*                 constr,
1388                                          bool                              do_log,
1389                                          bool                              do_ene)
1390 {
1391     if (!constr)
1392     {
1393         return;
1394     }
1395     if (inputRecord.eI == IntegrationAlgorithm::SD1)
1396     {
1397
1398         /* Cast delta_t from double to real to make the integrators faster.
1399          * The only reason for having delta_t double is to get accurate values
1400          * for t=delta_t*step when step is larger than float precision.
1401          * For integration dt the accuracy of real suffices, since with
1402          * integral += dt*integrand the increment is nearly always (much) smaller
1403          * than the integral (and the integrand has real precision).
1404          */
1405         real dt = inputRecord.delta_t;
1406
1407         wallcycle_start(wcycle, WallCycleCounter::Update);
1408
1409         int nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1410
1411 #pragma omp parallel for num_threads(nth) schedule(static)
1412         for (int th = 0; th < nth; th++)
1413         {
1414             try
1415             {
1416                 int start_th, end_th;
1417                 getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
1418
1419                 doSDUpdateGeneral<SDUpdate::FrictionAndNoiseOnly>(
1420                         sd_,
1421                         start_th,
1422                         end_th,
1423                         dt,
1424                         gmx::arrayRefFromArray(inputRecord.opts.nFreeze, inputRecord.opts.ngfrz),
1425                         invMass,
1426                         ptype,
1427                         cFREEZE_,
1428                         cTC_,
1429                         state->x.rvec_array(),
1430                         xp_.rvec_array(),
1431                         state->v.rvec_array(),
1432                         nullptr,
1433                         step,
1434                         inputRecord.ld_seed,
1435                         haveDDAtomOrdering(*cr) ? cr->dd->globalAtomIndices.data() : nullptr);
1436             }
1437             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1438         }
1439         inc_nrnb(nrnb, eNR_UPDATE, homenr);
1440         wallcycle_stop(wcycle, WallCycleCounter::Update);
1441
1442         /* Constrain the coordinates upd->xp for half a time step */
1443         bool computeVirial = false;
1444         constr->apply(do_log,
1445                       do_ene,
1446                       step,
1447                       1,
1448                       0.5,
1449                       state->x.arrayRefWithPadding(),
1450                       xp_.arrayRefWithPadding(),
1451                       ArrayRef<RVec>(),
1452                       state->box,
1453                       state->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Bonded)],
1454                       dvdlambda,
1455                       state->v.arrayRefWithPadding(),
1456                       computeVirial,
1457                       nullptr,
1458                       ConstraintVariable::Positions);
1459     }
1460 }
1461
1462 void Update::Impl::finish_update(const t_inputrec&                   inputRecord,
1463                                  const bool                          havePartiallyFrozenAtoms,
1464                                  const int                           homenr,
1465                                  gmx::ArrayRef<const unsigned short> cFREEZE,
1466                                  t_state*                            state,
1467                                  gmx_wallcycle*                      wcycle,
1468                                  const bool                          haveConstraints)
1469 {
1470     /* NOTE: Currently we always integrate to a temporary buffer and
1471      * then copy the results back here.
1472      */
1473
1474     wallcycle_start_nocount(wcycle, WallCycleCounter::Update);
1475
1476     auto xp = makeConstArrayRef(xp_).subArray(0, homenr);
1477     auto x  = makeArrayRef(state->x).subArray(0, homenr);
1478
1479     if (havePartiallyFrozenAtoms && haveConstraints)
1480     {
1481         /* We have atoms that are frozen along some, but not all dimensions,
1482          * then constraints will have moved them also along the frozen dimensions.
1483          * To freeze such degrees of freedom we do not copy them back here.
1484          */
1485         const ivec* nFreeze = inputRecord.opts.nFreeze;
1486
1487         for (int i = 0; i < homenr; i++)
1488         {
1489             const int g = cFREEZE[i];
1490
1491             for (int d = 0; d < DIM; d++)
1492             {
1493                 if (nFreeze[g][d] == 0)
1494                 {
1495                     x[i][d] = xp[i][d];
1496                 }
1497             }
1498         }
1499     }
1500     else
1501     {
1502         /* We have no frozen atoms or fully frozen atoms which have not
1503          * been moved by the update, so we can simply copy all coordinates.
1504          */
1505         int gmx_unused nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1506 #pragma omp parallel for num_threads(nth) schedule(static)
1507         for (int i = 0; i < homenr; i++)
1508         {
1509             // Trivial statement, does not throw
1510             x[i] = xp[i];
1511         }
1512     }
1513
1514     wallcycle_stop(wcycle, WallCycleCounter::Update);
1515 }
1516
1517 void Update::Impl::update_coords(const t_inputrec&                 inputRecord,
1518                                  int64_t                           step,
1519                                  int                               homenr,
1520                                  bool                              havePartiallyFrozenAtoms,
1521                                  gmx::ArrayRef<const ParticleType> ptype,
1522                                  gmx::ArrayRef<const real>         invMass,
1523                                  gmx::ArrayRef<const rvec>         invMassPerDim,
1524                                  t_state*                          state,
1525                                  const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
1526                                  t_fcdata*                                        fcdata,
1527                                  const gmx_ekindata_t*                            ekind,
1528                                  const matrix                                     M,
1529                                  int                                              updatePart,
1530                                  const t_commrec*                                 cr,
1531                                  const bool                                       haveConstraints)
1532 {
1533     /* Running the velocity half does nothing except for velocity verlet */
1534     if ((updatePart == etrtVELOCITY1 || updatePart == etrtVELOCITY2) && !EI_VV(inputRecord.eI))
1535     {
1536         gmx_incons("update_coords called for velocity without VV integrator");
1537     }
1538
1539     /* Cast to real for faster code, no loss in precision (see comment above) */
1540     real dt = inputRecord.delta_t;
1541
1542     /* We need to update the NMR restraint history when time averaging is used */
1543     if (state->flags & enumValueToBitMask(StateEntry::DisreRm3Tav))
1544     {
1545         update_disres_history(*fcdata->disres, &state->hist);
1546     }
1547     if (state->flags & enumValueToBitMask(StateEntry::OrireDtav))
1548     {
1549         GMX_ASSERT(fcdata, "Need valid fcdata");
1550         fcdata->orires->updateHistory();
1551     }
1552
1553     /* ############# START The update of velocities and positions ######### */
1554     int nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1555
1556 #pragma omp parallel for num_threads(nth) schedule(static)
1557     for (int th = 0; th < nth; th++)
1558     {
1559         try
1560         {
1561             int start_th, end_th;
1562             getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
1563
1564             const rvec* x_rvec  = state->x.rvec_array();
1565             rvec*       xp_rvec = xp_.rvec_array();
1566             rvec*       v_rvec  = state->v.rvec_array();
1567             const rvec* f_rvec  = as_rvec_array(f.unpaddedConstArrayRef().data());
1568
1569             switch (inputRecord.eI)
1570             {
1571                 case (IntegrationAlgorithm::MD):
1572                     do_update_md(start_th,
1573                                  end_th,
1574                                  dt,
1575                                  step,
1576                                  x_rvec,
1577                                  xp_rvec,
1578                                  v_rvec,
1579                                  f_rvec,
1580                                  inputRecord.etc,
1581                                  inputRecord.epc,
1582                                  inputRecord.nsttcouple,
1583                                  inputRecord.nstpcouple,
1584                                  cTC_,
1585                                  invMass,
1586                                  invMassPerDim,
1587                                  ekind,
1588                                  state->box,
1589                                  state->nosehoover_vxi.data(),
1590                                  M,
1591                                  havePartiallyFrozenAtoms);
1592                     break;
1593                 case (IntegrationAlgorithm::SD1):
1594                     do_update_sd(start_th,
1595                                  end_th,
1596                                  dt,
1597                                  step,
1598                                  x_rvec,
1599                                  xp_rvec,
1600                                  v_rvec,
1601                                  f_rvec,
1602                                  gmx::arrayRefFromArray(inputRecord.opts.nFreeze, inputRecord.opts.ngfrz),
1603                                  invMass,
1604                                  ptype,
1605                                  cFREEZE_,
1606                                  cTC_,
1607                                  inputRecord.ld_seed,
1608                                  cr,
1609                                  sd_,
1610                                  haveConstraints);
1611                     break;
1612                 case (IntegrationAlgorithm::BD):
1613                     do_update_bd(start_th,
1614                                  end_th,
1615                                  dt,
1616                                  step,
1617                                  x_rvec,
1618                                  xp_rvec,
1619                                  v_rvec,
1620                                  f_rvec,
1621                                  gmx::arrayRefFromArray(inputRecord.opts.nFreeze, inputRecord.opts.ngfrz),
1622                                  invMass,
1623                                  ptype,
1624                                  cFREEZE_,
1625                                  cTC_,
1626                                  inputRecord.bd_fric,
1627                                  sd_.bd_rf.data(),
1628                                  inputRecord.ld_seed,
1629                                  haveDDAtomOrdering(*cr) ? cr->dd->globalAtomIndices.data() : nullptr);
1630                     break;
1631                 case (IntegrationAlgorithm::VV):
1632                 case (IntegrationAlgorithm::VVAK):
1633                 {
1634                     gmx_bool bExtended = (inputRecord.etc == TemperatureCoupling::NoseHoover
1635                                           || inputRecord.epc == PressureCoupling::ParrinelloRahman
1636                                           || inputRecord.epc == PressureCoupling::Mttk);
1637
1638                     /* assuming barostat coupled to group 0 */
1639                     real alpha = 1.0 + DIM / static_cast<real>(inputRecord.opts.nrdf[0]);
1640                     switch (updatePart)
1641                     {
1642                         case etrtVELOCITY1:
1643                         case etrtVELOCITY2:
1644                             do_update_vv_vel(start_th,
1645                                              end_th,
1646                                              dt,
1647                                              gmx::arrayRefFromArray(inputRecord.opts.nFreeze,
1648                                                                     inputRecord.opts.ngfrz),
1649                                              invMass,
1650                                              ptype,
1651                                              cFREEZE_,
1652                                              v_rvec,
1653                                              f_rvec,
1654                                              bExtended,
1655                                              state->veta,
1656                                              alpha);
1657                             break;
1658                         case etrtPOSITION:
1659                             do_update_vv_pos(start_th,
1660                                              end_th,
1661                                              dt,
1662                                              gmx::arrayRefFromArray(inputRecord.opts.nFreeze,
1663                                                                     inputRecord.opts.ngfrz),
1664                                              ptype,
1665                                              cFREEZE_,
1666                                              x_rvec,
1667                                              xp_rvec,
1668                                              v_rvec,
1669                                              bExtended,
1670                                              state->veta);
1671                             break;
1672                     }
1673                     break;
1674                 }
1675                 default: gmx_fatal(FARGS, "Don't know how to update coordinates");
1676             }
1677         }
1678         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1679     }
1680 }
1681
1682 void Update::Impl::update_for_constraint_virial(const t_inputrec&         inputRecord,
1683                                                 int                       homenr,
1684                                                 bool                      havePartiallyFrozenAtoms,
1685                                                 gmx::ArrayRef<const real> invmass,
1686                                                 gmx::ArrayRef<const rvec> invMassPerDim,
1687                                                 const t_state&            state,
1688                                                 const gmx::ArrayRefWithPadding<const gmx::RVec>& f,
1689                                                 const gmx_ekindata_t& ekind)
1690 {
1691     GMX_ASSERT(inputRecord.eI == IntegrationAlgorithm::MD || inputRecord.eI == IntegrationAlgorithm::SD1,
1692                "Only leap-frog is supported here");
1693
1694     // Cast to real for faster code, no loss in precision
1695     const real dt = inputRecord.delta_t;
1696
1697     const int nth = gmx_omp_nthreads_get(ModuleMultiThread::Update);
1698
1699 #pragma omp parallel for num_threads(nth) schedule(static)
1700     for (int th = 0; th < nth; th++)
1701     {
1702         try
1703         {
1704             int start_th, end_th;
1705             getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
1706
1707             const rvec* x_rvec  = state.x.rvec_array();
1708             rvec*       xp_rvec = xp_.rvec_array();
1709             const rvec* v_rvec  = state.v.rvec_array();
1710             const rvec* f_rvec  = as_rvec_array(f.unpaddedConstArrayRef().data());
1711
1712             doUpdateMDDoNotUpdateVelocities(
1713                     start_th, end_th, dt, x_rvec, xp_rvec, v_rvec, f_rvec, havePartiallyFrozenAtoms, invmass, invMassPerDim, ekind);
1714         }
1715         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1716     }
1717 }