Add second LINCS atom update task

[alexxy/gromacs.git] / src / gromacs / mdlib / lincs.cpp
diff --git a/src/gromacs/mdlib/lincs.cpp b/src/gromacs/mdlib/lincs.cpp

index 81fce323554e08b01c229393373f636c7a131af9..d7440620b142188fde444e35d7625d9fe0e4240c 100644 (file)
--- a/src/gromacs/mdlib/lincs.cpp
+++ b/src/gromacs/mdlib/lincs.cpp
@@ -3,7 +3,8 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -52,6 +53,7 @@
  #include <cstdlib>
  
  #include <algorithm>
+#include <optional>
  #include <vector>
  
  #include "gromacs/domdec/domdec.h"
@@ -67,13 +69,12 @@
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/mdtypes/observablesreducer.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/pbcutil/pbc_simd.h"
  #include "gromacs/simd/simd.h"
  #include "gromacs/simd/simd_math.h"
  #include "gromacs/simd/vector_operations.h"
-#include "gromacs/topology/block.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/utility/alignedallocator.h"
  #include "gromacs/utility/arrayref.h"
@@ -83,10 +84,9 @@
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxomp.h"
+#include "gromacs/utility/listoflists.h"
  #include "gromacs/utility/pleasecite.h"
  
-using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
-
  namespace gmx
  {
  
@@ -115,10 +115,12 @@ struct Task
      std::vector<int> triangle;
      //! The bits tell if the matrix element should be used.
      std::vector<int> tri_bits;
-    //! Constraint index for updating atom data.
-    std::vector<int> ind;
-    //! Constraint index for updating atom data.
-    std::vector<int> ind_r;
+    //! Constraint indices for updating atom data.
+    std::vector<int> updateConstraintIndices1;
+    //! Constraint indices for updating atom data, second group.
+    std::vector<int> updateConstraintIndices2;
+    //! Temporay constraint indices for setting up updating of atom data.
+    std::vector<int> updateConstraintIndicesRest;
      //! Temporary variable for virial calculation.
      tensor vir_r_m_dr = { { 0 } };
      //! Temporary variable for lambda derivative.
@@ -198,6 +200,8 @@ public:
      bool bTaskDep = false;
      //! Are there triangle constraints that cross task borders?
      bool bTaskDepTri = false;
+    //! Whether any task has constraints in the second update list.
+    bool haveSecondUpdateTask = false;
      //! Arrays for temporary storage in the LINCS algorithm.
      /*! @{ */
      PaddedVector<gmx::RVec>                   tmpv;
@@ -209,8 +213,24 @@ public:
      /*! @} */
      //! The Lagrange multipliers times -1.
      std::vector<real, AlignedAllocator<real>> mlambda;
-    //! Storage for the constraint RMS relative deviation output.
-    std::array<real, 2> rmsdData = { { 0 } };
+    /*! \brief Callback used after constraining to require reduction
+     * of values later used to compute the constraint RMS relative
+     * deviation, so the latter can be output. */
+    std::optional<ObservablesReducerBuilder::CallbackToRequireReduction> callbackToRequireReduction;
+    /*! \brief View used for reducing the components of the global
+     * relative RMS constraint deviation.
+     *
+     * Can be written any time, but that is only useful when followed
+     * by a call of the callbackToRequireReduction. Useful to read
+     * only from the callback that the ObservablesReducer will later
+     * make after reduction. */
+    ArrayRef<double> rmsdReductionBuffer;
+    /*! \brief The value of the constraint RMS deviation after it has
+     * been computed.
+     *
+     * When DD is active, filled by the ObservablesReducer, otherwise
+     * filled directly here. */
+    std::optional<double> constraintRmsDeviation;
  };
  
  /*! \brief Define simd_width for memory allocation used for SIMD code */
@@ -220,16 +240,11 @@ static const int simd_width = GMX_SIMD_REAL_WIDTH;
  static const int simd_width = 1;
  #endif
  
-ArrayRef<real> lincs_rmsdData(Lincs* lincsd)
-{
-    return lincsd->rmsdData;
-}
-
  real lincs_rmsd(const Lincs* lincsd)
  {
-    if (lincsd->rmsdData[0] > 0)
+    if (lincsd->constraintRmsDeviation.has_value())
      {
-        return std::sqrt(lincsd->rmsdData[1] / lincsd->rmsdData[0]);
+        return real(lincsd->constraintRmsDeviation.value());
      }
      else
      {
@@ -349,10 +364,10 @@ static void lincs_update_atoms_noind(int                            ncons,
                                       real                           preFactor,
                                       gmx::ArrayRef<const real>      fac,
                                       gmx::ArrayRef<const gmx::RVec> r,
-                                     const real*                    invmass,
+                                     gmx::ArrayRef<const real>      invmass,
                                       rvec*                          x)
  {
-    if (invmass != nullptr)
+    if (!invmass.empty())
      {
          for (int b = 0; b < ncons; b++)
          {
@@ -398,10 +413,10 @@ static void lincs_update_atoms_ind(gmx::ArrayRef<const int>       ind,
                                     real                           preFactor,
                                     gmx::ArrayRef<const real>      fac,
                                     gmx::ArrayRef<const gmx::RVec> r,
-                                   const real*                    invmass,
+                                   gmx::ArrayRef<const real>      invmass,
                                     rvec*                          x)
  {
-    if (invmass != nullptr)
+    if (!invmass.empty())
      {
          for (int b : ind)
          {
@@ -447,7 +462,7 @@ static void lincs_update_atoms(Lincs*                         li,
                                 real                           preFactor,
                                 gmx::ArrayRef<const real>      fac,
                                 gmx::ArrayRef<const gmx::RVec> r,
-                               const real*                    invmass,
+                               gmx::ArrayRef<const real>      invmass,
                                 rvec*                          x)
  {
      if (li->ntask == 1)
@@ -461,9 +476,18 @@ static void lincs_update_atoms(Lincs*                         li,
           * constraints that only access our local atom range.
           * This can be done without a barrier.
           */
-        lincs_update_atoms_ind(li->task[th].ind, li->atoms, preFactor, fac, r, invmass, x);
+        lincs_update_atoms_ind(
+                li->task[th].updateConstraintIndices1, li->atoms, preFactor, fac, r, invmass, x);
  
-        if (!li->task[li->ntask].ind.empty())
+        if (li->haveSecondUpdateTask)
+        {
+            /* Second round of update, we need a barrier for cross-task access of x */
+#pragma omp barrier
+            lincs_update_atoms_ind(
+                    li->task[th].updateConstraintIndices2, li->atoms, preFactor, fac, r, invmass, x);
+        }
+
+        if (!li->task[li->ntask].updateConstraintIndices1.empty())
          {
              /* Update the constraints that operate on atoms
               * in multiple thread atom blocks on the master thread.
@@ -471,7 +495,8 @@ static void lincs_update_atoms(Lincs*                         li,
  #pragma omp barrier
  #pragma omp master
              {
-                lincs_update_atoms_ind(li->task[li->ntask].ind, li->atoms, preFactor, fac, r, invmass, x);
+                lincs_update_atoms_ind(
+                        li->task[li->ntask].updateConstraintIndices1, li->atoms, preFactor, fac, r, invmass, x);
              }
          }
      }
@@ -502,13 +527,13 @@ static inline void gmx_simdcall gatherLoadUTransposeTSANSafe(const real*
  static void gmx_simdcall calc_dr_x_f_simd(int                           b0,
                                            int                           b1,
                                            gmx::ArrayRef<const AtomPair> atoms,
-                                          const rvec* gmx_restrict x,
-                                          const rvec* gmx_restrict f,
-                                          const real* gmx_restrict blc,
-                                          const real*              pbc_simd,
-                                          rvec* gmx_restrict r,
-                                          real* gmx_restrict rhs,
-                                          real* gmx_restrict sol)
+                                          const rvec* gmx_restrict      x,
+                                          const rvec* gmx_restrict      f,
+                                          const real* gmx_restrict      blc,
+                                          const real*                   pbc_simd,
+                                          rvec* gmx_restrict            r,
+                                          real* gmx_restrict            rhs,
+                                          real* gmx_restrict            sol)
  {
      assert(b0 % GMX_SIMD_REAL_WIDTH == 0);
  
@@ -568,17 +593,17 @@ static void gmx_simdcall calc_dr_x_f_simd(int                           b0,
  #endif // GMX_SIMD_HAVE_REAL
  
  /*! \brief LINCS projection, works on derivatives of the coordinates. */
-static void do_lincsp(const rvec*        x,
-                      rvec*              f,
-                      rvec*              fp,
-                      t_pbc*             pbc,
-                      Lincs*             lincsd,
-                      int                th,
-                      real*              invmass,
-                      ConstraintVariable econq,
-                      bool               bCalcDHDL,
-                      bool               bCalcVir,
-                      tensor             rmdf)
+static void do_lincsp(ArrayRefWithPadding<const RVec> xPadded,
+                      ArrayRefWithPadding<RVec>       fPadded,
+                      ArrayRef<RVec>                  fp,
+                      t_pbc*                          pbc,
+                      Lincs*                          lincsd,
+                      int                             th,
+                      ArrayRef<const real>            invmass,
+                      ConstraintVariable              econq,
+                      bool                            bCalcDHDL,
+                      bool                            bCalcVir,
+                      tensor                          rmdf)
  {
      const int b0 = lincsd->task[th].b0;
      const int b1 = lincsd->task[th].b1;
@@ -607,6 +632,9 @@ static void do_lincsp(const rvec*        x,
      gmx::ArrayRef<real> rhs2 = lincsd->tmp2;
      gmx::ArrayRef<real> sol  = lincsd->tmp3;
  
+    const rvec* x = as_rvec_array(xPadded.paddedArrayRef().data());
+    rvec*       f = as_rvec_array(fPadded.paddedArrayRef().data());
+
  #if GMX_SIMD_HAVE_REAL
      /* This SIMD code does the same as the plain-C code after the #else.
       * The only difference is that we always call pbc code, as with SIMD
@@ -620,8 +648,8 @@ static void do_lincsp(const rvec*        x,
      /* Compute normalized x i-j vectors, store in r.
       * Compute the inner product of r and xp i-j and store in rhs1.
       */
-    calc_dr_x_f_simd(b0, b1, atoms, x, f, blc.data(), pbc_simd, as_rvec_array(r.data()),
-                     rhs1.data(), sol.data());
+    calc_dr_x_f_simd(
+            b0, b1, atoms, x, f, blc.data(), pbc_simd, as_rvec_array(r.data()), rhs1.data(), sol.data());
  
  #else // GMX_SIMD_HAVE_REAL
  
@@ -705,8 +733,13 @@ static void do_lincsp(const rvec*        x,
      /* When constraining forces, we should not use mass weighting,
       * so we pass invmass=NULL, which results in the use of 1 for all atoms.
       */
-    lincs_update_atoms(lincsd, th, 1.0, sol, r,
-                       (econq != ConstraintVariable::Force) ? invmass : nullptr, fp);
+    lincs_update_atoms(lincsd,
+                       th,
+                       1.0,
+                       sol,
+                       r,
+                       (econq != ConstraintVariable::Force) ? invmass : gmx::ArrayRef<real>(),
+                       as_rvec_array(fp.data()));
  
      if (bCalcDHDL)
      {
@@ -749,14 +782,14 @@ static void do_lincsp(const rvec*        x,
  static void gmx_simdcall calc_dr_x_xp_simd(int                           b0,
                                             int                           b1,
                                             gmx::ArrayRef<const AtomPair> atoms,
-                                           const rvec* gmx_restrict x,
-                                           const rvec* gmx_restrict xp,
-                                           const real* gmx_restrict bllen,
-                                           const real* gmx_restrict blc,
-                                           const real*              pbc_simd,
-                                           rvec* gmx_restrict r,
-                                           real* gmx_restrict rhs,
-                                           real* gmx_restrict sol)
+                                           const rvec* gmx_restrict      x,
+                                           const rvec* gmx_restrict      xp,
+                                           const real* gmx_restrict      bllen,
+                                           const real* gmx_restrict      blc,
+                                           const real*                   pbc_simd,
+                                           rvec* gmx_restrict            r,
+                                           real* gmx_restrict            rhs,
+                                           real* gmx_restrict            sol)
  {
      assert(b0 % GMX_SIMD_REAL_WIDTH == 0);
      alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset2[GMX_SIMD_REAL_WIDTH];
@@ -821,14 +854,14 @@ static void gmx_simdcall calc_dr_x_xp_simd(int                           b0,
  gmx_unused static void calc_dist_iter(int                           b0,
                                        int                           b1,
                                        gmx::ArrayRef<const AtomPair> atoms,
-                                      const rvec* gmx_restrict xp,
-                                      const real* gmx_restrict bllen,
-                                      const real* gmx_restrict blc,
-                                      const t_pbc*             pbc,
-                                      real                     wfac,
-                                      real* gmx_restrict rhs,
-                                      real* gmx_restrict sol,
-                                      bool*              bWarn)
+                                      const rvec* gmx_restrict      xp,
+                                      const real* gmx_restrict      bllen,
+                                      const real* gmx_restrict      blc,
+                                      const t_pbc*                  pbc,
+                                      real                          wfac,
+                                      real* gmx_restrict            rhs,
+                                      real* gmx_restrict            sol,
+                                      bool*                         bWarn)
  {
      for (int b = b0; b < b1; b++)
      {
@@ -868,14 +901,14 @@ gmx_unused static void calc_dist_iter(int                           b0,
  static void gmx_simdcall calc_dist_iter_simd(int                           b0,
                                               int                           b1,
                                               gmx::ArrayRef<const AtomPair> atoms,
-                                             const rvec* gmx_restrict x,
-                                             const real* gmx_restrict bllen,
-                                             const real* gmx_restrict blc,
-                                             const real*              pbc_simd,
-                                             real                     wfac,
-                                             real* gmx_restrict rhs,
-                                             real* gmx_restrict sol,
-                                             bool*              bWarn)
+                                             const rvec* gmx_restrict      x,
+                                             const real* gmx_restrict      bllen,
+                                             const real* gmx_restrict      blc,
+                                             const real*                   pbc_simd,
+                                             real                          wfac,
+                                             real* gmx_restrict            rhs,
+                                             real* gmx_restrict            sol,
+                                             bool*                         bWarn)
  {
      SimdReal min_S(GMX_REAL_MIN);
      SimdReal two_S(2.0);
@@ -944,22 +977,26 @@ static void gmx_simdcall calc_dist_iter_simd(int                           b0,
  #endif // GMX_SIMD_HAVE_REAL
  
  //! Implements LINCS constraining.
-static void do_lincs(const rvec*      x,
-                     rvec*            xp,
-                     const matrix     box,
-                     t_pbc*           pbc,
-                     Lincs*           lincsd,
-                     int              th,
-                     const real*      invmass,
-                     const t_commrec* cr,
-                     bool             bCalcDHDL,
-                     real             wangle,
-                     bool*            bWarn,
-                     real             invdt,
-                     rvec* gmx_restrict v,
-                     bool               bCalcVir,
-                     tensor             vir_r_m_dr)
+static void do_lincs(ArrayRefWithPadding<const RVec> xPadded,
+                     ArrayRefWithPadding<RVec>       xpPadded,
+                     const matrix                    box,
+                     t_pbc*                          pbc,
+                     Lincs*                          lincsd,
+                     int                             th,
+                     ArrayRef<const real>            invmass,
+                     const t_commrec*                cr,
+                     bool                            bCalcDHDL,
+                     real                            wangle,
+                     bool*                           bWarn,
+                     real                            invdt,
+                     ArrayRef<RVec>                  vRef,
+                     bool                            bCalcVir,
+                     tensor                          vir_r_m_dr)
  {
+    const rvec*        x  = as_rvec_array(xPadded.paddedArrayRef().data());
+    rvec*              xp = as_rvec_array(xpPadded.paddedArrayRef().data());
+    rvec* gmx_restrict v  = as_rvec_array(vRef.data());
+
      const int b0 = lincsd->task[th].b0;
      const int b1 = lincsd->task[th].b1;
  
@@ -992,8 +1029,8 @@ static void do_lincs(const rvec*      x,
      /* Compute normalized x i-j vectors, store in r.
       * Compute the inner product of r and xp i-j and store in rhs1.
       */
-    calc_dr_x_xp_simd(b0, b1, atoms, x, xp, bllen.data(), blc.data(), pbc_simd,
-                      as_rvec_array(r.data()), rhs1.data(), sol.data());
+    calc_dr_x_xp_simd(
+            b0, b1, atoms, x, xp, bllen.data(), blc.data(), pbc_simd, as_rvec_array(r.data()), rhs1.data(), sol.data());
  
  #else // GMX_SIMD_HAVE_REAL
  
@@ -1084,20 +1121,20 @@ static void do_lincs(const rvec*      x,
  
      real wfac;
  
-    wfac = std::cos(DEG2RAD * wangle);
+    wfac = std::cos(gmx::c_deg2Rad * wangle);
      wfac = wfac * wfac;
  
      for (int iter = 0; iter < lincsd->nIter; iter++)
      {
-        if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints))
+        if ((lincsd->bCommIter && haveDDAtomOrdering(*cr) && cr->dd->constraints))
          {
  #pragma omp barrier
  #pragma omp master
              {
                  /* Communicate the corrected non-local coordinates */
-                if (DOMAINDECOMP(cr))
+                if (haveDDAtomOrdering(*cr))
                  {
-                    dd_move_x_constraints(cr->dd, box, xp, nullptr, FALSE);
+                    dd_move_x_constraints(cr->dd, box, xpPadded.unpaddedArrayRef(), ArrayRef<RVec>(), FALSE);
                  }
              }
  #pragma omp barrier
@@ -1108,11 +1145,10 @@ static void do_lincs(const rvec*      x,
          }
  
  #if GMX_SIMD_HAVE_REAL
-        calc_dist_iter_simd(b0, b1, atoms, xp, bllen.data(), blc.data(), pbc_simd, wfac,
-                            rhs1.data(), sol.data(), bWarn);
+        calc_dist_iter_simd(
+                b0, b1, atoms, xp, bllen.data(), blc.data(), pbc_simd, wfac, rhs1.data(), sol.data(), bWarn);
  #else
-        calc_dist_iter(b0, b1, atoms, xp, bllen.data(), blc.data(), pbc, wfac, rhs1.data(),
-                       sol.data(), bWarn);
+        calc_dist_iter(b0, b1, atoms, xp, bllen.data(), blc.data(), pbc, wfac, rhs1.data(), sol.data(), bWarn);
          /* 20*ncons flops */
  #endif // GMX_SIMD_HAVE_REAL
  
@@ -1206,7 +1242,11 @@ static void do_lincs(const rvec*      x,
  }
  
  /*! \brief Sets the elements in the LINCS matrix for task task. */
-static void set_lincs_matrix_task(Lincs* li, Task* li_task, const real* invmass, int* ncc_triangle, int* nCrossTaskTriangles)
+static void set_lincs_matrix_task(Lincs*               li,
+                                  Task*                li_task,
+                                  ArrayRef<const real> invmass,
+                                  int*                 ncc_triangle,
+                                  int*                 nCrossTaskTriangles)
  {
      /* Construct the coupling coefficient matrix blmf */
      li_task->ntriangle   = 0;
@@ -1294,7 +1334,7 @@ static void set_lincs_matrix_task(Lincs* li, Task* li_task, const real* invmass,
  }
  
  /*! \brief Sets the elements in the LINCS matrix. */
-static void set_lincs_matrix(Lincs* li, real* invmass, real lambda)
+static void set_lincs_matrix(Lincs* li, ArrayRef<const real> invmass, real lambda)
  {
      const real invsqrt2 = 0.7071067811865475244;
  
@@ -1325,8 +1365,7 @@ static void set_lincs_matrix(Lincs* li, real* invmass, real lambda)
      if (debug)
      {
          fprintf(debug, "The %d constraints participate in %d triangles\n", li->nc, li->ntriangle);
-        fprintf(debug, "There are %d constraint couplings, of which %d in triangles\n", li->ncc,
-                li->ncc_triangle);
+        fprintf(debug, "There are %d constraint couplings, of which %d in triangles\n", li->ncc, li->ncc_triangle);
          if (li->ntriangle > 0 && li->ntask > 1)
          {
              fprintf(debug,
@@ -1342,33 +1381,29 @@ static void set_lincs_matrix(Lincs* li, real* invmass, real lambda)
  }
  
  //! Finds all triangles of atoms that share constraints to a central atom.
-static int count_triangle_constraints(const InteractionLists& ilist, const t_blocka& at2con)
+static int count_triangle_constraints(const InteractionLists& ilist, const ListOfLists<int>& at2con)
  {
-    int ncon1, ncon_tot;
-    int c0, n1, c1, ac1, n2, c2;
-    int ncon_triangle;
-
-    ncon1    = ilist[F_CONSTR].size() / 3;
-    ncon_tot = ncon1 + ilist[F_CONSTRNC].size() / 3;
+    const int ncon1    = ilist[F_CONSTR].size() / 3;
+    const int ncon_tot = ncon1 + ilist[F_CONSTRNC].size() / 3;
  
      gmx::ArrayRef<const int> ia1 = ilist[F_CONSTR].iatoms;
      gmx::ArrayRef<const int> ia2 = ilist[F_CONSTRNC].iatoms;
  
-    ncon_triangle = 0;
-    for (c0 = 0; c0 < ncon_tot; c0++)
+    int ncon_triangle = 0;
+    for (int c0 = 0; c0 < ncon_tot; c0++)
      {
          bool       bTriangle = FALSE;
          const int* iap       = constr_iatomptr(ia1, ia2, c0);
          const int  a00       = iap[1];
          const int  a01       = iap[2];
-        for (n1 = at2con.index[a01]; n1 < at2con.index[a01 + 1]; n1++)
+        for (const int c1 : at2con[a01])
          {
-            c1 = at2con.a[n1];
              if (c1 != c0)
              {
                  const int* iap = constr_iatomptr(ia1, ia2, c1);
                  const int  a10 = iap[1];
                  const int  a11 = iap[2];
+                int        ac1;
                  if (a10 == a01)
                  {
                      ac1 = a11;
@@ -1377,9 +1412,8 @@ static int count_triangle_constraints(const InteractionLists& ilist, const t_blo
                  {
                      ac1 = a10;
                  }
-                for (n2 = at2con.index[ac1]; n2 < at2con.index[ac1 + 1]; n2++)
+                for (const int c2 : at2con[ac1])
                  {
-                    c2 = at2con.a[n2];
                      if (c2 != c0 && c2 != c1)
                      {
                          const int* iap = constr_iatomptr(ia1, ia2, c2);
@@ -1403,40 +1437,37 @@ static int count_triangle_constraints(const InteractionLists& ilist, const t_blo
  }
  
  //! Finds sequences of sequential constraints.
-static bool more_than_two_sequential_constraints(const InteractionLists& ilist, const t_blocka& at2con)
+static bool more_than_two_sequential_constraints(const InteractionLists& ilist, const ListOfLists<int>& at2con)
  {
-    int  ncon1, ncon_tot, c;
-    bool bMoreThanTwoSequentialConstraints;
-
-    ncon1    = ilist[F_CONSTR].size() / 3;
-    ncon_tot = ncon1 + ilist[F_CONSTRNC].size() / 3;
+    const int ncon1    = ilist[F_CONSTR].size() / 3;
+    const int ncon_tot = ncon1 + ilist[F_CONSTRNC].size() / 3;
  
      gmx::ArrayRef<const int> ia1 = ilist[F_CONSTR].iatoms;
      gmx::ArrayRef<const int> ia2 = ilist[F_CONSTRNC].iatoms;
  
-    bMoreThanTwoSequentialConstraints = FALSE;
-    for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
+    for (int c = 0; c < ncon_tot; c++)
      {
          const int* iap = constr_iatomptr(ia1, ia2, c);
          const int  a1  = iap[1];
          const int  a2  = iap[2];
          /* Check if this constraint has constraints connected at both atoms */
-        if (at2con.index[a1 + 1] - at2con.index[a1] > 1 && at2con.index[a2 + 1] - at2con.index[a2] > 1)
+        if (at2con[a1].ssize() > 1 && at2con[a2].ssize() > 1)
          {
-            bMoreThanTwoSequentialConstraints = TRUE;
+            return true;
          }
      }
  
-    return bMoreThanTwoSequentialConstraints;
+    return false;
  }
  
-Lincs* init_lincs(FILE*                    fplog,
-                  const gmx_mtop_t&        mtop,
-                  int                      nflexcon_global,
-                  ArrayRef<const t_blocka> at2con,
-                  bool                     bPLINCS,
-                  int                      nIter,
-                  int                      nProjOrder)
+Lincs* init_lincs(FILE*                            fplog,
+                  const gmx_mtop_t&                mtop,
+                  int                              nflexcon_global,
+                  ArrayRef<const ListOfLists<int>> atomToConstraintsPerMolType,
+                  bool                             bPLINCS,
+                  int                              nIter,
+                  int                              nProjOrder,
+                  ObservablesReducerBuilder*       observablesReducerBuilder)
  {
      // TODO this should become a unique_ptr
      Lincs* li;
@@ -1458,9 +1489,10 @@ Lincs* init_lincs(FILE*                    fplog,
      li->max_connect = 0;
      for (size_t mt = 0; mt < mtop.moltype.size(); mt++)
      {
+        const auto& at2con = atomToConstraintsPerMolType[mt];
          for (int a = 0; a < mtop.moltype[mt].atoms.nr; a++)
          {
-            li->max_connect = std::max(li->max_connect, at2con[mt].index[a + 1] - at2con[mt].index[a]);
+            li->max_connect = std::max(li->max_connect, int(at2con[a].ssize()));
          }
      }
  
@@ -1468,11 +1500,12 @@ Lincs* init_lincs(FILE*                    fplog,
      bMoreThanTwoSeq  = FALSE;
      for (const gmx_molblock_t& molb : mtop.molblock)
      {
-        const gmx_moltype_t& molt = mtop.moltype[molb.type];
+        const gmx_moltype_t& molt   = mtop.moltype[molb.type];
+        const auto&          at2con = atomToConstraintsPerMolType[molb.type];
  
-        li->ncg_triangle += molb.nmol * count_triangle_constraints(molt.ilist, at2con[molb.type]);
+        li->ncg_triangle += molb.nmol * count_triangle_constraints(molt.ilist, at2con);
  
-        if (!bMoreThanTwoSeq && more_than_two_sequential_constraints(molt.ilist, at2con[molb.type]))
+        if (!bMoreThanTwoSeq && more_than_two_sequential_constraints(molt.ilist, at2con))
          {
              bMoreThanTwoSeq = TRUE;
          }
@@ -1498,12 +1531,11 @@ Lincs* init_lincs(FILE*                    fplog,
       * The current constraint to task assignment code can create independent
       * tasks only when not more than two constraints are connected sequentially.
       */
-    li->ntask    = gmx_omp_nthreads_get(emntLINCS);
+    li->ntask    = gmx_omp_nthreads_get(ModuleMultiThread::Lincs);
      li->bTaskDep = (li->ntask > 1 && bMoreThanTwoSeq);
      if (debug)
      {
-        fprintf(debug, "LINCS: using %d threads, tasks are %sdependent\n", li->ntask,
-                li->bTaskDep ? "" : "in");
+        fprintf(debug, "LINCS: using %d threads, tasks are %sdependent\n", li->ntask, li->bTaskDep ? "" : "in");
      }
      if (li->ntask == 1)
      {
@@ -1539,10 +1571,34 @@ Lincs* init_lincs(FILE*                    fplog,
                      "%d constraints are involved in constraint triangles,\n"
                      "will apply an additional matrix expansion of order %d for couplings\n"
                      "between constraints inside triangles\n",
-                    li->ncg_triangle, li->nOrder);
+                    li->ncg_triangle,
+                    li->nOrder);
          }
      }
  
+    if (observablesReducerBuilder)
+    {
+        ObservablesReducerBuilder::CallbackFromBuilder callbackFromBuilder =
+                [li](ObservablesReducerBuilder::CallbackToRequireReduction c, gmx::ArrayRef<double> v) {
+                    li->callbackToRequireReduction = std::move(c);
+                    li->rmsdReductionBuffer        = v;
+                };
+
+        // Make the callback that runs afer reduction.
+        ObservablesReducerBuilder::CallbackAfterReduction callbackAfterReduction = [li](gmx::Step /*step*/) {
+            if (li->rmsdReductionBuffer[0] > 0)
+            {
+                li->constraintRmsDeviation =
+                        std::sqrt(li->rmsdReductionBuffer[1] / li->rmsdReductionBuffer[0]);
+            }
+        };
+
+        const int reductionValuesRequired = 2;
+        observablesReducerBuilder->addSubscriber(reductionValuesRequired,
+                                                 std::move(callbackFromBuilder),
+                                                 std::move(callbackAfterReduction));
+    }
+
      return li;
  }
  
@@ -1594,8 +1650,9 @@ static void lincs_thread_setup(Lincs* li, int natoms)
  
              bitmask_init_low_bits(&mask, th);
  
-            li_task->ind.clear();
-            li_task->ind_r.clear();
+            li_task->updateConstraintIndices1.clear();
+            li_task->updateConstraintIndices2.clear();
+            li_task->updateConstraintIndicesRest.clear();
              for (b = li_task->b0; b < li_task->b1; b++)
              {
                  /* We let the constraint with the lowest thread index
@@ -1605,47 +1662,115 @@ static void lincs_thread_setup(Lincs* li, int natoms)
                      && bitmask_is_disjoint(atf[li->atoms[b].index2], mask))
                  {
                      /* Add the constraint to the local atom update index */
-                    li_task->ind.push_back(b);
+                    li_task->updateConstraintIndices1.push_back(b);
                  }
                  else
                  {
-                    /* Add the constraint to the rest block */
-                    li_task->ind_r.push_back(b);
+                    /* Store the constraint to the rest block */
+                    li_task->updateConstraintIndicesRest.push_back(b);
                  }
              }
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
      }
  
-    /* We need to copy all constraints which have not be assigned
-     * to a thread to a separate list which will be handled by one thread.
-     */
-    Task* li_m = &li->task[li->ntask];
-
-    li_m->ind.clear();
-    for (int th = 0; th < li->ntask; th++)
+    if (li->bTaskDep)
      {
-        const Task& li_task = li->task[th];
+        /* Assign the rest constraint to a second thread task or a master test task */
  
-        for (int ind_r : li_task.ind_r)
+        /* Clear the atom flags */
+        for (gmx_bitmask_t& mask : atf)
          {
-            li_m->ind.push_back(ind_r);
+            bitmask_clear(&mask);
          }
  
-        if (debug)
+        for (int th = 0; th < li->ntask; th++)
          {
-            fprintf(debug, "LINCS thread %d: %zu constraints\n", th, li_task.ind.size());
+            const Task* li_task = &li->task[th];
+
+            /* For each atom set a flag for constraints from each */
+            for (int b : li_task->updateConstraintIndicesRest)
+            {
+                bitmask_set_bit(&atf[li->atoms[b].index1], th);
+                bitmask_set_bit(&atf[li->atoms[b].index2], th);
+            }
          }
-    }
  
-    if (debug)
-    {
-        fprintf(debug, "LINCS thread r: %zu constraints\n", li_m->ind.size());
+#pragma omp parallel for num_threads(li->ntask) schedule(static)
+        for (int th = 0; th < li->ntask; th++)
+        {
+            try
+            {
+                Task& li_task = li->task[th];
+
+                gmx_bitmask_t mask;
+                bitmask_init_low_bits(&mask, th);
+
+                for (int& b : li_task.updateConstraintIndicesRest)
+                {
+                    /* We let the constraint with the highest thread index
+                     * operate on atoms with constraints from multiple threads.
+                     */
+                    if (bitmask_is_disjoint(atf[li->atoms[b].index1], mask)
+                        && bitmask_is_disjoint(atf[li->atoms[b].index2], mask))
+                    {
+                        li_task.updateConstraintIndices2.push_back(b);
+                        // mark the entry in updateConstraintIndicesRest as invalid, so we do not assign it again
+                        b = -1;
+                    }
+                }
+            }
+            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
+        }
+
+        /* We need to copy all constraints which have not been assigned
+         * to a thread to a separate list which will be handled by one thread.
+         */
+        Task* li_m = &li->task[li->ntask];
+
+        li->haveSecondUpdateTask = false;
+        li_m->updateConstraintIndices1.clear();
+        for (int th = 0; th < li->ntask; th++)
+        {
+            const Task& li_task = li->task[th];
+
+            for (int constraint : li_task.updateConstraintIndicesRest)
+            {
+                if (constraint >= 0)
+                {
+                    li_m->updateConstraintIndices1.push_back(constraint);
+                }
+                else
+                {
+                    li->haveSecondUpdateTask = true;
+                }
+            }
+
+            if (debug)
+            {
+                fprintf(debug,
+                        "LINCS thread %d: %zu constraints, %zu constraints\n",
+                        th,
+                        li_task.updateConstraintIndices1.size(),
+                        li_task.updateConstraintIndices2.size());
+            }
+        }
+
+        if (debug)
+        {
+            fprintf(debug, "LINCS thread r: %zu constraints\n", li_m->updateConstraintIndices1.size());
+        }
      }
  }
  
  //! Assign a constraint.
-static void assign_constraint(Lincs* li, int constraint_index, int a1, int a2, real lenA, real lenB, const t_blocka* at2con)
+static void assign_constraint(Lincs*                  li,
+                              int                     constraint_index,
+                              int                     a1,
+                              int                     a2,
+                              real                    lenA,
+                              real                    lenB,
+                              const ListOfLists<int>& at2con)
  {
      int con;
  
@@ -1664,8 +1789,7 @@ static void assign_constraint(Lincs* li, int constraint_index, int a1, int a2, r
      /* Make space in the constraint connection matrix for constraints
       * connected to both end of the current constraint.
       */
-    li->ncc += at2con->index[a1 + 1] - at2con->index[a1] - 1 + at2con->index[a2 + 1]
-               - at2con->index[a2] - 1;
+    li->ncc += at2con[a1].ssize() - 1 + at2con[a2].ssize() - 1;
  
      li->blnr[con + 1] = li->ncc;
  
@@ -1675,13 +1799,13 @@ static void assign_constraint(Lincs* li, int constraint_index, int a1, int a2, r
  
  /*! \brief Check if constraint with topology index constraint_index is connected
   * to other constraints, and if so add those connected constraints to our task. */
-static void check_assign_connected(Lincs*          li,
-                                   const t_iatom*  iatom,
-                                   const t_idef&   idef,
-                                   bool            bDynamics,
-                                   int             a1,
-                                   int             a2,
-                                   const t_blocka* at2con)
+static void check_assign_connected(Lincs*                        li,
+                                   gmx::ArrayRef<const int>      iatom,
+                                   const InteractionDefinitions& idef,
+                                   bool                          bDynamics,
+                                   int                           a1,
+                                   int                           a2,
+                                   const ListOfLists<int>&       at2con)
  {
      /* Currently this function only supports constraint groups
       * in which all constraints share at least one atom
@@ -1690,28 +1814,18 @@ static void check_assign_connected(Lincs*          li,
       * connected constraints. We need to assign those
       * to the same task.
       */
-    int end;
-
-    for (end = 0; end < 2; end++)
+    for (int end = 0; end < 2; end++)
      {
-        int a, k;
-
-        a = (end == 0 ? a1 : a2);
+        const int a = (end == 0 ? a1 : a2);
  
-        for (k = at2con->index[a]; k < at2con->index[a + 1]; k++)
+        for (const int cc : at2con[a])
          {
-            int cc;
-
-            cc = at2con->a[k];
              /* Check if constraint cc has not yet been assigned */
              if (li->con_index[cc] == -1)
              {
-                int  type;
-                real lenA, lenB;
-
-                type = iatom[cc * 3];
-                lenA = idef.iparams[type].constr.dA;
-                lenB = idef.iparams[type].constr.dB;
+                const int  type = iatom[cc * 3];
+                const real lenA = idef.iparams[type].constr.dA;
+                const real lenB = idef.iparams[type].constr.dB;
  
                  if (bDynamics || lenA != 0 || lenB != 0)
                  {
@@ -1725,24 +1839,21 @@ static void check_assign_connected(Lincs*          li,
  /*! \brief Check if constraint with topology index constraint_index is involved
   * in a constraint triangle, and if so add the other two constraints
   * in the triangle to our task. */
-static void check_assign_triangle(Lincs*          li,
-                                  const t_iatom*  iatom,
-                                  const t_idef&   idef,
-                                  bool            bDynamics,
-                                  int             constraint_index,
-                                  int             a1,
-                                  int             a2,
-                                  const t_blocka* at2con)
+static void check_assign_triangle(Lincs*                        li,
+                                  gmx::ArrayRef<const int>      iatom,
+                                  const InteractionDefinitions& idef,
+                                  bool                          bDynamics,
+                                  int                           constraint_index,
+                                  int                           a1,
+                                  int                           a2,
+                                  const ListOfLists<int>&       at2con)
  {
-    int nca, cc[32], ca[32], k;
+    int nca, cc[32], ca[32];
      int c_triangle[2] = { -1, -1 };
  
      nca = 0;
-    for (k = at2con->index[a1]; k < at2con->index[a1 + 1]; k++)
+    for (const int c : at2con[a1])
      {
-        int c;
-
-        c = at2con->a[k];
          if (c != constraint_index)
          {
              int aa1, aa2;
@@ -1764,11 +1875,8 @@ static void check_assign_triangle(Lincs*          li,
          }
      }
  
-    for (k = at2con->index[a2]; k < at2con->index[a2 + 1]; k++)
+    for (const int c : at2con[a2])
      {
-        int c;
-
-        c = at2con->a[k];
          if (c != constraint_index)
          {
              int aa1, aa2, i;
@@ -1827,7 +1935,7 @@ static void check_assign_triangle(Lincs*          li,
  }
  
  //! Sets matrix indices.
-static void set_matrix_indices(Lincs* li, const Task& li_task, const t_blocka* at2con, bool bSortMatrix)
+static void set_matrix_indices(Lincs* li, const Task& li_task, const ListOfLists<int>& at2con, bool bSortMatrix)
  {
      for (int b = li_task.b0; b < li_task.b1; b++)
      {
@@ -1835,17 +1943,17 @@ static void set_matrix_indices(Lincs* li, const Task& li_task, const t_blocka* a
          const int a2 = li->atoms[b].index2;
  
          int i = li->blnr[b];
-        for (int k = at2con->index[a1]; k < at2con->index[a1 + 1]; k++)
+        for (const int constraint : at2con[a1])
          {
-            int concon = li->con_index[at2con->a[k]];
+            const int concon = li->con_index[constraint];
              if (concon != b)
              {
                  li->blbnb[i++] = concon;
              }
          }
-        for (int k = at2con->index[a2]; k < at2con->index[a2 + 1]; k++)
+        for (const int constraint : at2con[a2])
          {
-            int concon = li->con_index[at2con->a[k]];
+            const int concon = li->con_index[constraint];
              if (concon != b)
              {
                  li->blbnb[i++] = concon;
@@ -1860,12 +1968,14 @@ static void set_matrix_indices(Lincs* li, const Task& li_task, const t_blocka* a
      }
  }
  
-void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_commrec* cr, Lincs* li)
+void set_lincs(const InteractionDefinitions& idef,
+               const int                     numAtoms,
+               ArrayRef<const real>          invmass,
+               const real                    lambda,
+               bool                          bDynamics,
+               const t_commrec*              cr,
+               Lincs*                        li)
  {
-    int      natoms;
-    t_blocka at2con;
-    t_iatom* iatom;
-
      li->nc_real = 0;
      li->nc      = 0;
      li->ncc     = 0;
@@ -1876,15 +1986,15 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
      {
          li->task[i].b0 = 0;
          li->task[i].b1 = 0;
-        li->task[i].ind.clear();
+        li->task[i].updateConstraintIndices1.clear();
      }
      if (li->ntask > 1)
      {
-        li->task[li->ntask].ind.clear();
+        li->task[li->ntask].updateConstraintIndices1.clear();
      }
  
      /* This is the local topology, so there are only F_CONSTR constraints */
-    if (idef.il[F_CONSTR].nr == 0)
+    if (idef.il[F_CONSTR].empty())
      {
          /* There are no constraints,
           * we do not need to fill any data structures.
@@ -1897,13 +2007,14 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
          fprintf(debug, "Building the LINCS connectivity\n");
      }
  
-    if (DOMAINDECOMP(cr))
+    int natoms;
+    if (haveDDAtomOrdering(*cr))
      {
          if (cr->dd->constraints)
          {
              int start;
  
-            dd_get_constraint_range(cr->dd, &start, &natoms);
+            dd_get_constraint_range(*cr->dd, &start, &natoms);
          }
          else
          {
@@ -1912,12 +2023,13 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
      }
      else
      {
-        natoms = md.homenr;
+        natoms = numAtoms;
      }
  
-    at2con = make_at2con(natoms, idef.il, idef.iparams, flexibleConstraintTreatment(bDynamics));
+    const ListOfLists<int> at2con =
+            make_at2con(natoms, idef.il, idef.iparams, flexibleConstraintTreatment(bDynamics));
  
-    const int ncon_tot = idef.il[F_CONSTR].nr / 3;
+    const int ncon_tot = idef.il[F_CONSTR].size() / 3;
  
      /* Ensure we have enough padding for aligned loads for each thread */
      const int numEntries = ncon_tot + li->ntask * simd_width;
@@ -1930,7 +2042,7 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
      li->blnr.resize(numEntries + 1);
      li->bllen.resize(numEntries);
      li->tmpv.resizeWithPadding(numEntries);
-    if (DOMAINDECOMP(cr))
+    if (haveDDAtomOrdering(*cr))
      {
          li->nlocat.resize(numEntries);
      }
@@ -1940,7 +2052,7 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
      li->tmp4.resize(numEntries);
      li->mlambda.resize(numEntries);
  
-    iatom = idef.il[F_CONSTR].iatoms;
+    gmx::ArrayRef<const int> iatom = idef.il[F_CONSTR].iatoms;
  
      li->blnr[0] = li->ncc;
  
@@ -2003,6 +2115,8 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
           */
          li_task->b0 = li->nc;
  
+        gmx::ArrayRef<const t_iparams> iparams = idef.iparams;
+
          while (con < ncon_tot && li->nc - li_task->b0 < ncon_target)
          {
              if (li->con_index[con] == -1)
@@ -2013,26 +2127,26 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
                  type = iatom[3 * con];
                  a1   = iatom[3 * con + 1];
                  a2   = iatom[3 * con + 2];
-                lenA = idef.iparams[type].constr.dA;
-                lenB = idef.iparams[type].constr.dB;
+                lenA = iparams[type].constr.dA;
+                lenB = iparams[type].constr.dB;
                  /* Skip the flexible constraints when not doing dynamics */
                  if (bDynamics || lenA != 0 || lenB != 0)
                  {
-                    assign_constraint(li, con, a1, a2, lenA, lenB, &at2con);
+                    assign_constraint(li, con, a1, a2, lenA, lenB, at2con);
  
                      if (li->ntask > 1 && !li->bTaskDep)
                      {
                          /* We can generate independent tasks. Check if we
                           * need to assign connected constraints to our task.
                           */
-                        check_assign_connected(li, iatom, idef, bDynamics, a1, a2, &at2con);
+                        check_assign_connected(li, iatom, idef, bDynamics, a1, a2, at2con);
                      }
                      if (li->ntask > 1 && li->ncg_triangle > 0)
                      {
                          /* Ensure constraints in one triangle are assigned
                           * to the same task.
                           */
-                        check_assign_triangle(li, iatom, idef, bDynamics, con, a1, a2, &at2con);
+                        check_assign_triangle(li, iatom, idef, bDynamics, con, a1, a2, at2con);
                      }
                  }
              }
@@ -2078,7 +2192,7 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
      /* Without DD we order the blbnb matrix to optimize memory access.
       * With DD the overhead of sorting is more than the gain during access.
       */
-    bSortMatrix = !DOMAINDECOMP(cr);
+    bSortMatrix = !haveDDAtomOrdering(*cr);
  
      li->blbnb.resize(li->ncc);
  
@@ -2096,13 +2210,11 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
                  li_task.tri_bits.resize(li_task.b1 - li_task.b0);
              }
  
-            set_matrix_indices(li, li_task, &at2con, bSortMatrix);
+            set_matrix_indices(li, li_task, at2con, bSortMatrix);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
      }
  
-    done_blocka(&at2con);
-
      if (cr->dd == nullptr)
      {
          /* Since the matrix is static, we should free some memory */
@@ -2129,22 +2241,21 @@ void set_lincs(const t_idef& idef, const t_mdatoms& md, bool bDynamics, const t_
  
      if (debug)
      {
-        fprintf(debug, "Number of constraints is %d, padded %d, couplings %d\n", li->nc_real,
-                li->nc, li->ncc);
+        fprintf(debug, "Number of constraints is %d, padded %d, couplings %d\n", li->nc_real, li->nc, li->ncc);
      }
  
      if (li->ntask > 1)
      {
-        lincs_thread_setup(li, md.nr);
+        lincs_thread_setup(li, numAtoms);
      }
  
-    set_lincs_matrix(li, md.invmass, md.lambda);
+    set_lincs_matrix(li, invmass, lambda);
  }
  
  //! Issues a warning when LINCS constraints cannot be satisfied.
  static void lincs_warning(gmx_domdec_t*                 dd,
-                          const rvec*                   x,
-                          rvec*                         xprime,
+                          ArrayRef<const RVec>          x,
+                          ArrayRef<const RVec>          xprime,
                            t_pbc*                        pbc,
                            int                           ncons,
                            gmx::ArrayRef<const AtomPair> atoms,
@@ -2153,7 +2264,7 @@ static void lincs_warning(gmx_domdec_t*                 dd,
                            int                           maxwarn,
                            int*                          warncount)
  {
-    real wfac = std::cos(DEG2RAD * wangle);
+    real wfac = std::cos(gmx::c_deg2Rad * wangle);
  
      fprintf(stderr,
              "bonds that rotated more than %g degrees:\n"
@@ -2181,8 +2292,14 @@ static void lincs_warning(gmx_domdec_t*                 dd,
          real cosine = ::iprod(v0, v1) / (d0 * d1);
          if (cosine < wfac)
          {
-            fprintf(stderr, " %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n", ddglatnr(dd, i),
-                    ddglatnr(dd, j), RAD2DEG * std::acos(cosine), d0, d1, bllen[b]);
+            fprintf(stderr,
+                    " %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
+                    ddglatnr(dd, i),
+                    ddglatnr(dd, j),
+                    gmx::c_rad2Deg * std::acos(cosine),
+                    d0,
+                    d1,
+                    bllen[b]);
              if (!std::isfinite(d1))
              {
                  gmx_fatal(FARGS, "Bond length not finite.");
@@ -2193,7 +2310,7 @@ static void lincs_warning(gmx_domdec_t*                 dd,
      }
      if (*warncount > maxwarn)
      {
-        too_many_constraint_warnings(econtLINCS, *warncount);
+        too_many_constraint_warnings(ConstraintAlgorithm::Lincs, *warncount);
      }
  }
  
@@ -2211,7 +2328,7 @@ struct LincsDeviations
  };
  
  //! Determine how well the constraints have been satisfied.
-static LincsDeviations makeLincsDeviations(const Lincs& lincsd, const rvec* x, const t_pbc* pbc)
+static LincsDeviations makeLincsDeviations(const Lincs& lincsd, ArrayRef<const RVec> x, const t_pbc* pbc)
  {
      LincsDeviations                result;
      const ArrayRef<const AtomPair> atoms  = lincsd.atoms;
@@ -2260,28 +2377,29 @@ static LincsDeviations makeLincsDeviations(const Lincs& lincsd, const rvec* x, c
      return result;
  }
  
-bool constrain_lincs(bool                  computeRmsd,
-                     const t_inputrec&     ir,
-                     int64_t               step,
-                     Lincs*                lincsd,
-                     const t_mdatoms&      md,
-                     const t_commrec*      cr,
-                     const gmx_multisim_t* ms,
-                     const rvec*           x,
-                     rvec*                 xprime,
-                     rvec*                 min_proj,
-                     const matrix          box,
-                     t_pbc*                pbc,
-                     real                  lambda,
-                     real*                 dvdlambda,
-                     real                  invdt,
-                     rvec*                 v,
-                     bool                  bCalcVir,
-                     tensor                vir_r_m_dr,
-                     ConstraintVariable    econq,
-                     t_nrnb*               nrnb,
-                     int                   maxwarn,
-                     int*                  warncount)
+bool constrain_lincs(bool                            computeRmsd,
+                     const t_inputrec&               ir,
+                     int64_t                         step,
+                     Lincs*                          lincsd,
+                     ArrayRef<const real>            invmass,
+                     const t_commrec*                cr,
+                     const gmx_multisim_t*           ms,
+                     ArrayRefWithPadding<const RVec> xPadded,
+                     ArrayRefWithPadding<RVec>       xprimePadded,
+                     ArrayRef<RVec>                  min_proj,
+                     const matrix                    box,
+                     t_pbc*                          pbc,
+                     const bool                      hasMassPerturbed,
+                     real                            lambda,
+                     real*                           dvdlambda,
+                     real                            invdt,
+                     ArrayRef<RVec>                  v,
+                     bool                            bCalcVir,
+                     tensor                          vir_r_m_dr,
+                     ConstraintVariable              econq,
+                     t_nrnb*                         nrnb,
+                     int                             maxwarn,
+                     int*                            warncount)
  {
      bool bOK = TRUE;
  
@@ -2289,28 +2407,26 @@ bool constrain_lincs(bool                  computeRmsd,
       * We can also easily check if any constraint length is changed,
       * if not dH/dlambda=0 and we can also set the boolean to FALSE.
       */
-    bool bCalcDHDL = (ir.efep != efepNO && dvdlambda != nullptr);
+    bool bCalcDHDL = (ir.efep != FreeEnergyPerturbationType::No && dvdlambda != nullptr);
  
      if (lincsd->nc == 0 && cr->dd == nullptr)
      {
-        if (computeRmsd)
-        {
-            lincsd->rmsdData = { { 0 } };
-        }
-
          return bOK;
      }
  
+    ArrayRef<const RVec> x      = xPadded.unpaddedArrayRef();
+    ArrayRef<RVec>       xprime = xprimePadded.unpaddedArrayRef();
+
      if (econq == ConstraintVariable::Positions)
      {
          /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
           * also with efep!=fepNO.
           */
-        if (ir.efep != efepNO)
+        if (ir.efep != FreeEnergyPerturbationType::No)
          {
-            if (md.nMassPerturbed && lincsd->matlam != md.lambda)
+            if (hasMassPerturbed && lincsd->matlam != lambda)
              {
-                set_lincs_matrix(lincsd, md.invmass, md.lambda);
+                set_lincs_matrix(lincsd, invmass, lambda);
              }
  
              for (int i = 0; i < lincsd->nc; i++)
@@ -2352,7 +2468,8 @@ bool constrain_lincs(bool                  computeRmsd,
          {
              LincsDeviations deviations = makeLincsDeviations(*lincsd, xprime, pbc);
              fprintf(debug, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
-            fprintf(debug, "       Before LINCS          %.6f    %.6f %6d %6d\n",
+            fprintf(debug,
+                    "       Before LINCS          %.6f    %.6f %6d %6d\n",
                      std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints),
                      deviations.maxDeviation,
                      ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index1),
@@ -2374,8 +2491,21 @@ bool constrain_lincs(bool                  computeRmsd,
  
                  clear_mat(lincsd->task[th].vir_r_m_dr);
  
-                do_lincs(x, xprime, box, pbc, lincsd, th, md.invmass, cr, bCalcDHDL, ir.LincsWarnAngle,
-                         &bWarn, invdt, v, bCalcVir, th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
+                do_lincs(xPadded,
+                         xprimePadded,
+                         box,
+                         pbc,
+                         lincsd,
+                         th,
+                         invmass,
+                         cr,
+                         bCalcDHDL,
+                         ir.LincsWarnAngle,
+                         &bWarn,
+                         invdt,
+                         v,
+                         bCalcVir,
+                         th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
              }
              GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
          }
@@ -2386,19 +2516,32 @@ bool constrain_lincs(bool                  computeRmsd,
  
              if (computeRmsd)
              {
-                // This is reduced across domains in compute_globals and
-                // reported to the log file.
-                lincsd->rmsdData[0] = deviations.numConstraints;
-                lincsd->rmsdData[1] = deviations.sumSquaredDeviation;
-            }
-            else
-            {
-                // This is never read
-                lincsd->rmsdData = { { 0 } };
+                if (lincsd->callbackToRequireReduction.has_value())
+                {
+                    // This is reduced across domains in compute_globals and
+                    // reported to the log file.
+                    lincsd->rmsdReductionBuffer[0] = deviations.numConstraints;
+                    lincsd->rmsdReductionBuffer[1] = deviations.sumSquaredDeviation;
+
+                    // Call the ObservablesReducer via the callback it
+                    // gave us for the purpose.
+                    ObservablesReducerStatus status =
+                            lincsd->callbackToRequireReduction.value()(ReductionRequirement::Soon);
+                    GMX_RELEASE_ASSERT(status == ObservablesReducerStatus::ReadyToReduce,
+                                       "The LINCS RMSD is computed after observables have been "
+                                       "reduced, please reorder them.");
+                }
+                else
+                {
+                    // Compute the deviation directly
+                    lincsd->constraintRmsDeviation =
+                            std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints);
+                }
              }
              if (printDebugOutput)
              {
-                fprintf(debug, "        After LINCS          %.6f    %.6f %6d %6d\n\n",
+                fprintf(debug,
+                        "        After LINCS          %.6f    %.6f %6d %6d\n\n",
                          std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints),
                          deviations.maxDeviation,
                          ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index1),
@@ -2412,21 +2555,23 @@ bool constrain_lincs(bool                  computeRmsd,
                      std::string simMesg;
                      if (isMultiSim(ms))
                      {
-                        simMesg += gmx::formatString(" in simulation %d", ms->sim);
+                        simMesg += gmx::formatString(" in simulation %d", ms->simulationIndex_);
                      }
                      fprintf(stderr,
                              "\nStep %" PRId64
                              ", time %g (ps)  LINCS WARNING%s\n"
                              "relative constraint deviation after LINCS:\n"
                              "rms %.6f, max %.6f (between atoms %d and %d)\n",
-                            step, ir.init_t + step * ir.delta_t, simMesg.c_str(),
+                            step,
+                            ir.init_t + step * ir.delta_t,
+                            simMesg.c_str(),
                              std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints),
                              deviations.maxDeviation,
                              ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index1),
                              ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index2));
  
-                    lincs_warning(cr->dd, x, xprime, pbc, lincsd->nc, lincsd->atoms, lincsd->bllen,
-                                  ir.LincsWarnAngle, maxwarn, warncount);
+                    lincs_warning(
+                            cr->dd, x, xprime, pbc, lincsd->nc, lincsd->atoms, lincsd->bllen, ir.LincsWarnAngle, maxwarn, warncount);
                  }
                  bOK = (deviations.maxDeviation < 0.5);
              }
@@ -2452,8 +2597,17 @@ bool constrain_lincs(bool                  computeRmsd,
              {
                  int th = gmx_omp_get_thread_num();
  
-                do_lincsp(x, xprime, min_proj, pbc, lincsd, th, md.invmass, econq, bCalcDHDL,
-                          bCalcVir, th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
+                do_lincsp(xPadded,
+                          xprimePadded,
+                          min_proj,
+                          pbc,
+                          lincsd,
+                          th,
+                          invmass,
+                          econq,
+                          bCalcDHDL,
+                          bCalcVir,
+                          th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
              }
              GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
          }
@@ -2494,7 +2648,7 @@ bool constrain_lincs(bool                  computeRmsd,
      {
          inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder * lincsd->ncc_triangle);
      }
-    if (v)
+    if (!v.empty())
      {
          inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc_real * 2);
      }