*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <cstdlib>
#include <algorithm>
+#include <optional>
#include <vector>
#include "gromacs/domdec/domdec.h"
#include "gromacs/domdec/domdec_struct.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/math/functions.h"
+#include "gromacs/math/paddedvector.h"
#include "gromacs/math/units.h"
#include "gromacs/math/vec.h"
#include "gromacs/mdlib/constr.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/mdrun.h"
+#include "gromacs/mdrunutility/multisim.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/mdtypes/observablesreducer.h"
#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pbcutil/pbc-simd.h"
+#include "gromacs/pbcutil/pbc_simd.h"
#include "gromacs/simd/simd.h"
#include "gromacs/simd/simd_math.h"
#include "gromacs/simd/vector_operations.h"
-#include "gromacs/topology/block.h"
#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/alignedallocator.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/bitmask.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxomp.h"
+#include "gromacs/utility/listoflists.h"
#include "gromacs/utility/pleasecite.h"
-#include "gromacs/utility/smalloc.h"
-
-using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
namespace gmx
{
+//! Indices of the two atoms involved in a single constraint
+struct AtomPair
+{
+ //! \brief Constructor, does not initialize to catch bugs and faster construction
+ AtomPair() {}
+
+ //! Index of atom 1
+ int index1;
+ //! Index of atom 2
+ int index2;
+};
+
//! Unit of work within LINCS.
struct Task
{
//! First constraint for this task.
- int b0 = 0;
+ int b0 = 0;
//! b1-1 is the last constraint for this task.
- int b1 = 0;
+ int b1 = 0;
//! The number of constraints in triangles.
- int ntriangle = 0;
+ int ntriangle = 0;
//! The list of triangle constraints.
- int *triangle = nullptr;
+ std::vector<int> triangle;
//! The bits tell if the matrix element should be used.
- int *tri_bits = nullptr;
- //! Allocation size of triangle and tri_bits.
- int tri_alloc = 0;
- //! Number of indices.
- int nind = 0;
- //! Constraint index for updating atom data.
- int *ind = nullptr;
- //! Number of indices.
- int nind_r = 0;
- //! Constraint index for updating atom data.
- int *ind_r = nullptr;
- //! Allocation size of ind and ind_r.
- int ind_nalloc = 0;
+ std::vector<int> tri_bits;
+ //! Constraint indices for updating atom data.
+ std::vector<int> updateConstraintIndices1;
+ //! Constraint indices for updating atom data, second group.
+ std::vector<int> updateConstraintIndices2;
+ //! Temporay constraint indices for setting up updating of atom data.
+ std::vector<int> updateConstraintIndicesRest;
//! Temporary variable for virial calculation.
- tensor vir_r_m_dr = {{0}};
+ tensor vir_r_m_dr = { { 0 } };
//! Temporary variable for lambda derivative.
- real dhdlambda;
+ real dhdlambda;
};
/*! \brief Data for LINCS algorithm.
*/
class Lincs
{
- public:
- //! The global number of constraints.
- int ncg = 0;
- //! The global number of flexible constraints.
- int ncg_flex = 0;
- //! The global number of constraints in triangles.
- int ncg_triangle = 0;
- //! The number of iterations.
- int nIter = 0;
- //! The order of the matrix expansion.
- int nOrder = 0;
- //! The maximum number of constraints connected to a single atom.
- int max_connect = 0;
-
- //! The number of real constraints.
- int nc_real = 0;
- //! The number of constraints including padding for SIMD.
- int nc = 0;
- //! The number we allocated memory for.
- int nc_alloc = 0;
- //! The number of constraint connections.
- int ncc = 0;
- //! The number we allocated memory for.
- int ncc_alloc = 0;
- //! The FE lambda value used for filling blc and blmf.
- real matlam = 0;
- //! mapping from topology to LINCS constraints.
- int *con_index = nullptr;
- //! The reference distance in topology A.
- real *bllen0 = nullptr;
- //! The reference distance in top B - the r.d. in top A.
- real *ddist = nullptr;
- //! The atom pairs involved in the constraints.
- int *bla = nullptr;
- //! 1/sqrt(invmass1 invmass2).
- real *blc = nullptr;
- //! As blc, but with all masses 1.
- real *blc1 = nullptr;
- //! Index into blbnb and blmf.
- int *blnr = nullptr;
- //! List of constraint connections.
- int *blbnb = nullptr;
- //! The local number of constraints in triangles.
- int ntriangle = 0;
- //! The number of constraint connections in triangles.
- int ncc_triangle = 0;
- //! Communicate before each LINCS interation.
- bool bCommIter = false;
- //! Matrix of mass factors for constraint connections.
- real *blmf = nullptr;
- //! As blmf, but with all masses 1.
- real *blmf1 = nullptr;
- //! The reference bond length.
- real *bllen = nullptr;
- //! The local atom count per constraint, can be NULL.
- int *nlocat = nullptr;
-
- /*! \brief The number of tasks used for LINCS work.
- *
- * \todo This is mostly used to loop over \c task, which would
- * be nicer to do with range-based for loops, but the thread
- * index is used for constructing bit masks and organizing the
- * virial output buffer, so other things need to change,
- * first. */
- int ntask = 0;
- /*! \brief LINCS thread division */
- std::vector<Task> task;
- //! Atom flags for thread parallelization.
- gmx_bitmask_t *atf = nullptr;
- //! Allocation size of atf
- int atf_nalloc = 0;
- //! Are the LINCS tasks interdependent?
- bool bTaskDep = false;
- //! Are there triangle constraints that cross task borders?
- bool bTaskDepTri = false;
- //! Arrays for temporary storage in the LINCS algorithm.
- /*! @{ */
- rvec *tmpv = nullptr;
- real *tmpncc = nullptr;
- real *tmp1 = nullptr;
- real *tmp2 = nullptr;
- real *tmp3 = nullptr;
- real *tmp4 = nullptr;
- /*! @} */
- //! The Lagrange multipliers times -1.
- real *mlambda = nullptr;
- //! Storage for the constraint RMS relative deviation output.
- std::array<real, 2> rmsdData = {{0}};
+public:
+ //! The global number of constraints.
+ int ncg = 0;
+ //! The global number of flexible constraints.
+ int ncg_flex = 0;
+ //! The global number of constraints in triangles.
+ int ncg_triangle = 0;
+ //! The number of iterations.
+ int nIter = 0;
+ //! The order of the matrix expansion.
+ int nOrder = 0;
+ //! The maximum number of constraints connected to a single atom.
+ int max_connect = 0;
+
+ //! The number of real constraints.
+ int nc_real = 0;
+ //! The number of constraints including padding for SIMD.
+ int nc = 0;
+ //! The number of constraint connections.
+ int ncc = 0;
+ //! The FE lambda value used for filling blc and blmf.
+ real matlam = 0;
+ //! mapping from topology to LINCS constraints.
+ std::vector<int> con_index;
+ //! The reference distance in topology A.
+ std::vector<real, AlignedAllocator<real>> bllen0;
+ //! The reference distance in top B - the r.d. in top A.
+ std::vector<real, AlignedAllocator<real>> ddist;
+ //! The atom pairs involved in the constraints.
+ std::vector<AtomPair> atoms;
+ //! 1/sqrt(invmass1 invmass2).
+ std::vector<real, AlignedAllocator<real>> blc;
+ //! As blc, but with all masses 1.
+ std::vector<real, AlignedAllocator<real>> blc1;
+ //! Index into blbnb and blmf.
+ std::vector<int> blnr;
+ //! List of constraint connections.
+ std::vector<int> blbnb;
+ //! The local number of constraints in triangles.
+ int ntriangle = 0;
+ //! The number of constraint connections in triangles.
+ int ncc_triangle = 0;
+ //! Communicate before each LINCS interation.
+ bool bCommIter = false;
+ //! Matrix of mass factors for constraint connections.
+ std::vector<real> blmf;
+ //! As blmf, but with all masses 1.
+ std::vector<real> blmf1;
+ //! The reference bond length.
+ std::vector<real, AlignedAllocator<real>> bllen;
+ //! The local atom count per constraint, can be NULL.
+ std::vector<int> nlocat;
+
+ /*! \brief The number of tasks used for LINCS work.
+ *
+ * \todo This is mostly used to loop over \c task, which would
+ * be nicer to do with range-based for loops, but the thread
+ * index is used for constructing bit masks and organizing the
+ * virial output buffer, so other things need to change,
+ * first. */
+ int ntask = 0;
+ /*! \brief LINCS thread division */
+ std::vector<Task> task;
+ //! Atom flags for thread parallelization.
+ std::vector<gmx_bitmask_t> atf;
+ //! Are the LINCS tasks interdependent?
+ bool bTaskDep = false;
+ //! Are there triangle constraints that cross task borders?
+ bool bTaskDepTri = false;
+ //! Whether any task has constraints in the second update list.
+ bool haveSecondUpdateTask = false;
+ //! Arrays for temporary storage in the LINCS algorithm.
+ /*! @{ */
+ PaddedVector<gmx::RVec> tmpv;
+ std::vector<real> tmpncc;
+ std::vector<real, AlignedAllocator<real>> tmp1;
+ std::vector<real, AlignedAllocator<real>> tmp2;
+ std::vector<real, AlignedAllocator<real>> tmp3;
+ std::vector<real, AlignedAllocator<real>> tmp4;
+ /*! @} */
+ //! The Lagrange multipliers times -1.
+ std::vector<real, AlignedAllocator<real>> mlambda;
+ /*! \brief Callback used after constraining to require reduction
+ * of values later used to compute the constraint RMS relative
+ * deviation, so the latter can be output. */
+ std::optional<ObservablesReducerBuilder::CallbackToRequireReduction> callbackToRequireReduction;
+ /*! \brief View used for reducing the components of the global
+ * relative RMS constraint deviation.
+ *
+ * Can be written any time, but that is only useful when followed
+ * by a call of the callbackToRequireReduction. Useful to read
+ * only from the callback that the ObservablesReducer will later
+ * make after reduction. */
+ ArrayRef<double> rmsdReductionBuffer;
+ /*! \brief The value of the constraint RMS deviation after it has
+ * been computed.
+ *
+ * When DD is active, filled by the ObservablesReducer, otherwise
+ * filled directly here. */
+ std::optional<double> constraintRmsDeviation;
};
/*! \brief Define simd_width for memory allocation used for SIMD code */
static const int simd_width = 1;
#endif
-/*! \brief Align to 128 bytes, consistent with the current implementation of
- AlignedAllocator, which currently forces 128 byte alignment. */
-static const int align_bytes = 128;
-
-ArrayRef<real> lincs_rmsdData(Lincs *lincsd)
-{
- return lincsd->rmsdData;
-}
-
-real lincs_rmsd(const Lincs *lincsd)
+real lincs_rmsd(const Lincs* lincsd)
{
- if (lincsd->rmsdData[0] > 0)
+ if (lincsd->constraintRmsDeviation.has_value())
{
- return std::sqrt(lincsd->rmsdData[1]/lincsd->rmsdData[0]);
+ return real(lincsd->constraintRmsDeviation.value());
}
else
{
* This function will return with up to date thread-local
* constraint data, without an OpenMP barrier.
*/
-static void lincs_matrix_expand(const Lincs *lincsd,
- const Task *li_task,
- const real *blcc,
- real *rhs1, real *rhs2, real *sol)
+static void lincs_matrix_expand(const Lincs& lincsd,
+ const Task& li_task,
+ gmx::ArrayRef<const real> blcc,
+ gmx::ArrayRef<real> rhs1,
+ gmx::ArrayRef<real> rhs2,
+ gmx::ArrayRef<real> sol)
{
- int b0, b1, nrec, rec;
- const int *blnr = lincsd->blnr;
- const int *blbnb = lincsd->blbnb;
+ gmx::ArrayRef<const int> blnr = lincsd.blnr;
+ gmx::ArrayRef<const int> blbnb = lincsd.blbnb;
- b0 = li_task->b0;
- b1 = li_task->b1;
- nrec = lincsd->nOrder;
+ const int b0 = li_task.b0;
+ const int b1 = li_task.b1;
+ const int nrec = lincsd.nOrder;
- for (rec = 0; rec < nrec; rec++)
+ for (int rec = 0; rec < nrec; rec++)
{
- int b;
-
- if (lincsd->bTaskDep)
+ if (lincsd.bTaskDep)
{
#pragma omp barrier
}
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
real mvb;
int n;
mvb = 0;
- for (n = blnr[b]; n < blnr[b+1]; n++)
+ for (n = blnr[b]; n < blnr[b + 1]; n++)
{
- mvb = mvb + blcc[n]*rhs1[blbnb[n]];
+ mvb = mvb + blcc[n] * rhs1[blbnb[n]];
}
rhs2[b] = mvb;
sol[b] = sol[b] + mvb;
}
- real *swap;
-
- swap = rhs1;
- rhs1 = rhs2;
- rhs2 = swap;
- } /* nrec*(ncons+2*nrtot) flops */
+ std::swap(rhs1, rhs2);
+ } /* nrec*(ncons+2*nrtot) flops */
- if (lincsd->ntriangle > 0)
+ if (lincsd.ntriangle > 0)
{
/* Perform an extra nrec recursions for only the constraints
* involved in rigid triangles.
* is around 0.4 (and 0.7*0.7=0.5).
*/
- if (lincsd->bTaskDep)
+ if (lincsd.bTaskDep)
{
/* We need a barrier here, since other threads might still be
* reading the contents of rhs1 and/o rhs2.
* LINCS task. This means no barriers are required during the extra
* iterations for the triangle constraints.
*/
- const int *triangle = li_task->triangle;
- const int *tri_bits = li_task->tri_bits;
+ gmx::ArrayRef<const int> triangle = li_task.triangle;
+ gmx::ArrayRef<const int> tri_bits = li_task.tri_bits;
- for (rec = 0; rec < nrec; rec++)
+ for (int rec = 0; rec < nrec; rec++)
{
- int tb;
-
- for (tb = 0; tb < li_task->ntriangle; tb++)
+ for (int tb = 0; tb < li_task.ntriangle; tb++)
{
int b, bits, nr0, nr1, n;
real mvb;
bits = tri_bits[tb];
mvb = 0;
nr0 = blnr[b];
- nr1 = blnr[b+1];
+ nr1 = blnr[b + 1];
for (n = nr0; n < nr1; n++)
{
if (bits & (1 << (n - nr0)))
{
- mvb = mvb + blcc[n]*rhs1[blbnb[n]];
+ mvb = mvb + blcc[n] * rhs1[blbnb[n]];
}
}
rhs2[b] = mvb;
sol[b] = sol[b] + mvb;
}
- real *swap;
+ std::swap(rhs1, rhs2);
+ } /* nrec*(ntriangle + ncc_triangle*2) flops */
- swap = rhs1;
- rhs1 = rhs2;
- rhs2 = swap;
- } /* nrec*(ntriangle + ncc_triangle*2) flops */
-
- if (lincsd->bTaskDepTri)
+ if (lincsd.bTaskDepTri)
{
/* The constraints triangles are decoupled from each other,
* but constraints in one triangle cross thread task borders.
}
//! Update atomic coordinates when an index is not required.
-static void lincs_update_atoms_noind(int ncons, const int *bla,
- real prefac,
- const real *fac, rvec *r,
- const real *invmass,
- rvec *x)
+static void lincs_update_atoms_noind(int ncons,
+ gmx::ArrayRef<const AtomPair> atoms,
+ real preFactor,
+ gmx::ArrayRef<const real> fac,
+ gmx::ArrayRef<const gmx::RVec> r,
+ gmx::ArrayRef<const real> invmass,
+ rvec* x)
{
- int b, i, j;
- real mvb, im1, im2, tmp0, tmp1, tmp2;
-
- if (invmass != nullptr)
- {
- for (b = 0; b < ncons; b++)
- {
- i = bla[2*b];
- j = bla[2*b+1];
- mvb = prefac*fac[b];
- im1 = invmass[i];
- im2 = invmass[j];
- tmp0 = r[b][0]*mvb;
- tmp1 = r[b][1]*mvb;
- tmp2 = r[b][2]*mvb;
- x[i][0] -= tmp0*im1;
- x[i][1] -= tmp1*im1;
- x[i][2] -= tmp2*im1;
- x[j][0] += tmp0*im2;
- x[j][1] += tmp1*im2;
- x[j][2] += tmp2*im2;
- } /* 16 ncons flops */
+ if (!invmass.empty())
+ {
+ for (int b = 0; b < ncons; b++)
+ {
+ int i = atoms[b].index1;
+ int j = atoms[b].index2;
+ real mvb = preFactor * fac[b];
+ real im1 = invmass[i];
+ real im2 = invmass[j];
+ real tmp0 = r[b][0] * mvb;
+ real tmp1 = r[b][1] * mvb;
+ real tmp2 = r[b][2] * mvb;
+ x[i][0] -= tmp0 * im1;
+ x[i][1] -= tmp1 * im1;
+ x[i][2] -= tmp2 * im1;
+ x[j][0] += tmp0 * im2;
+ x[j][1] += tmp1 * im2;
+ x[j][2] += tmp2 * im2;
+ } /* 16 ncons flops */
}
else
{
- for (b = 0; b < ncons; b++)
+ for (int b = 0; b < ncons; b++)
{
- i = bla[2*b];
- j = bla[2*b+1];
- mvb = prefac*fac[b];
- tmp0 = r[b][0]*mvb;
- tmp1 = r[b][1]*mvb;
- tmp2 = r[b][2]*mvb;
+ int i = atoms[b].index1;
+ int j = atoms[b].index2;
+ real mvb = preFactor * fac[b];
+ real tmp0 = r[b][0] * mvb;
+ real tmp1 = r[b][1] * mvb;
+ real tmp2 = r[b][2] * mvb;
x[i][0] -= tmp0;
x[i][1] -= tmp1;
x[i][2] -= tmp2;
}
//! Update atomic coordinates when an index is required.
-static void lincs_update_atoms_ind(int ncons, const int *ind, const int *bla,
- real prefac,
- const real *fac, rvec *r,
- const real *invmass,
- rvec *x)
+static void lincs_update_atoms_ind(gmx::ArrayRef<const int> ind,
+ gmx::ArrayRef<const AtomPair> atoms,
+ real preFactor,
+ gmx::ArrayRef<const real> fac,
+ gmx::ArrayRef<const gmx::RVec> r,
+ gmx::ArrayRef<const real> invmass,
+ rvec* x)
{
- int bi, b, i, j;
- real mvb, im1, im2, tmp0, tmp1, tmp2;
-
- if (invmass != nullptr)
- {
- for (bi = 0; bi < ncons; bi++)
- {
- b = ind[bi];
- i = bla[2*b];
- j = bla[2*b+1];
- mvb = prefac*fac[b];
- im1 = invmass[i];
- im2 = invmass[j];
- tmp0 = r[b][0]*mvb;
- tmp1 = r[b][1]*mvb;
- tmp2 = r[b][2]*mvb;
- x[i][0] -= tmp0*im1;
- x[i][1] -= tmp1*im1;
- x[i][2] -= tmp2*im1;
- x[j][0] += tmp0*im2;
- x[j][1] += tmp1*im2;
- x[j][2] += tmp2*im2;
- } /* 16 ncons flops */
+ if (!invmass.empty())
+ {
+ for (int b : ind)
+ {
+ int i = atoms[b].index1;
+ int j = atoms[b].index2;
+ real mvb = preFactor * fac[b];
+ real im1 = invmass[i];
+ real im2 = invmass[j];
+ real tmp0 = r[b][0] * mvb;
+ real tmp1 = r[b][1] * mvb;
+ real tmp2 = r[b][2] * mvb;
+ x[i][0] -= tmp0 * im1;
+ x[i][1] -= tmp1 * im1;
+ x[i][2] -= tmp2 * im1;
+ x[j][0] += tmp0 * im2;
+ x[j][1] += tmp1 * im2;
+ x[j][2] += tmp2 * im2;
+ } /* 16 ncons flops */
}
else
{
- for (bi = 0; bi < ncons; bi++)
+ for (int b : ind)
{
- b = ind[bi];
- i = bla[2*b];
- j = bla[2*b+1];
- mvb = prefac*fac[b];
- tmp0 = r[b][0]*mvb;
- tmp1 = r[b][1]*mvb;
- tmp2 = r[b][2]*mvb;
+ int i = atoms[b].index1;
+ int j = atoms[b].index2;
+ real mvb = preFactor * fac[b];
+ real tmp0 = r[b][0] * mvb;
+ real tmp1 = r[b][1] * mvb;
+ real tmp2 = r[b][2] * mvb;
x[i][0] -= tmp0;
x[i][1] -= tmp1;
x[i][2] -= tmp2;
x[j][0] += tmp0;
x[j][1] += tmp1;
x[j][2] += tmp2;
- } /* 16 ncons flops */
+ } /* 16 ncons flops */
}
}
//! Update coordinates for atoms.
-static void lincs_update_atoms(Lincs *li, int th,
- real prefac,
- const real *fac, rvec *r,
- const real *invmass,
- rvec *x)
+static void lincs_update_atoms(Lincs* li,
+ int th,
+ real preFactor,
+ gmx::ArrayRef<const real> fac,
+ gmx::ArrayRef<const gmx::RVec> r,
+ gmx::ArrayRef<const real> invmass,
+ rvec* x)
{
if (li->ntask == 1)
{
/* Single thread, we simply update for all constraints */
- lincs_update_atoms_noind(li->nc_real,
- li->bla, prefac, fac, r, invmass, x);
+ lincs_update_atoms_noind(li->nc_real, li->atoms, preFactor, fac, r, invmass, x);
}
else
{
* constraints that only access our local atom range.
* This can be done without a barrier.
*/
- lincs_update_atoms_ind(li->task[th].nind, li->task[th].ind,
- li->bla, prefac, fac, r, invmass, x);
+ lincs_update_atoms_ind(
+ li->task[th].updateConstraintIndices1, li->atoms, preFactor, fac, r, invmass, x);
+
+ if (li->haveSecondUpdateTask)
+ {
+ /* Second round of update, we need a barrier for cross-task access of x */
+#pragma omp barrier
+ lincs_update_atoms_ind(
+ li->task[th].updateConstraintIndices2, li->atoms, preFactor, fac, r, invmass, x);
+ }
- if (li->task[li->ntask].nind > 0)
+ if (!li->task[li->ntask].updateConstraintIndices1.empty())
{
/* Update the constraints that operate on atoms
* in multiple thread atom blocks on the master thread.
#pragma omp barrier
#pragma omp master
{
- lincs_update_atoms_ind(li->task[li->ntask].nind,
- li->task[li->ntask].ind,
- li->bla, prefac, fac, r, invmass, x);
+ lincs_update_atoms_ind(
+ li->task[li->ntask].updateConstraintIndices1, li->atoms, preFactor, fac, r, invmass, x);
}
}
}
#if GMX_SIMD_HAVE_REAL
//! Helper function so that we can run TSAN with SIMD support (where implemented).
-template <int align>
-static inline void gmx_simdcall
-gatherLoadUTransposeTSANSafe(const real *base,
- const std::int32_t *offset,
- SimdReal *v0,
- SimdReal *v1,
- SimdReal *v2)
+template<int align>
+static inline void gmx_simdcall gatherLoadUTransposeTSANSafe(const real* base,
+ const std::int32_t* offset,
+ SimdReal* v0,
+ SimdReal* v1,
+ SimdReal* v2)
{
-#if (CMAKE_BUILD_TYPE == CMAKE_BUILD_TYPE_TSAN) && GMX_SIMD_X86_AVX2_256
+# if (CMAKE_BUILD_TYPE == CMAKE_BUILD_TYPE_TSAN) && GMX_SIMD_X86_AVX2_256
// This function is only implemented in this case
gatherLoadUTransposeSafe<align>(base, offset, v0, v1, v2);
-#else
+# else
gatherLoadUTranspose<align>(base, offset, v0, v1, v2);
-#endif
+# endif
}
/*! \brief Calculate the constraint distance vectors r to project on from x.
* Determine the right-hand side of the matrix equation using quantity f.
* This function only differs from calc_dr_x_xp_simd below in that
* no constraint length is subtracted and no PBC is used for f. */
-static void gmx_simdcall
-calc_dr_x_f_simd(int b0,
- int b1,
- const int * bla,
- const rvec * gmx_restrict x,
- const rvec * gmx_restrict f,
- const real * gmx_restrict blc,
- const real * pbc_simd,
- rvec * gmx_restrict r,
- real * gmx_restrict rhs,
- real * gmx_restrict sol)
+static void gmx_simdcall calc_dr_x_f_simd(int b0,
+ int b1,
+ gmx::ArrayRef<const AtomPair> atoms,
+ const rvec* gmx_restrict x,
+ const rvec* gmx_restrict f,
+ const real* gmx_restrict blc,
+ const real* pbc_simd,
+ rvec* gmx_restrict r,
+ real* gmx_restrict rhs,
+ real* gmx_restrict sol)
{
assert(b0 % GMX_SIMD_REAL_WIDTH == 0);
for (int bs = b0; bs < b1; bs += GMX_SIMD_REAL_WIDTH)
{
- SimdReal x0_S, y0_S, z0_S;
- SimdReal x1_S, y1_S, z1_S;
- SimdReal rx_S, ry_S, rz_S, n2_S, il_S;
- SimdReal fx_S, fy_S, fz_S, ip_S, rhs_S;
- alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset0[GMX_SIMD_REAL_WIDTH];
- alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset1[GMX_SIMD_REAL_WIDTH];
+ SimdReal x0_S, y0_S, z0_S;
+ SimdReal x1_S, y1_S, z1_S;
+ SimdReal rx_S, ry_S, rz_S, n2_S, il_S;
+ SimdReal fx_S, fy_S, fz_S, ip_S, rhs_S;
+ alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset0[GMX_SIMD_REAL_WIDTH];
+ alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset1[GMX_SIMD_REAL_WIDTH];
for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
{
- offset0[i] = bla[bs*2 + i*2];
- offset1[i] = bla[bs*2 + i*2 + 1];
+ offset0[i] = atoms[bs + i].index1;
+ offset1[i] = atoms[bs + i].index2;
}
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(x), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(x), offset1, &x1_S, &y1_S, &z1_S);
rx_S = x0_S - x1_S;
ry_S = y0_S - y1_S;
rz_S = z0_S - z1_S;
pbc_correct_dx_simd(&rx_S, &ry_S, &rz_S, pbc_simd);
- n2_S = norm2(rx_S, ry_S, rz_S);
- il_S = invsqrt(n2_S);
+ n2_S = norm2(rx_S, ry_S, rz_S);
+ il_S = invsqrt(n2_S);
- rx_S = rx_S * il_S;
- ry_S = ry_S * il_S;
- rz_S = rz_S * il_S;
+ rx_S = rx_S * il_S;
+ ry_S = ry_S * il_S;
+ rz_S = rz_S * il_S;
- transposeScatterStoreU<3>(reinterpret_cast<real *>(r + bs), offset2, rx_S, ry_S, rz_S);
+ transposeScatterStoreU<3>(reinterpret_cast<real*>(r + bs), offset2, rx_S, ry_S, rz_S);
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(f), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(f), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(f), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(f), offset1, &x1_S, &y1_S, &z1_S);
fx_S = x0_S - x1_S;
fy_S = y0_S - y1_S;
fz_S = z0_S - z1_S;
- ip_S = iprod(rx_S, ry_S, rz_S, fx_S, fy_S, fz_S);
+ ip_S = iprod(rx_S, ry_S, rz_S, fx_S, fy_S, fz_S);
rhs_S = load<SimdReal>(blc + bs) * ip_S;
#endif // GMX_SIMD_HAVE_REAL
/*! \brief LINCS projection, works on derivatives of the coordinates. */
-static void do_lincsp(const rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
- Lincs *lincsd, int th,
- real *invmass,
- ConstraintVariable econq, bool bCalcDHDL,
- bool bCalcVir, tensor rmdf)
+static void do_lincsp(ArrayRefWithPadding<const RVec> xPadded,
+ ArrayRefWithPadding<RVec> fPadded,
+ ArrayRef<RVec> fp,
+ t_pbc* pbc,
+ Lincs* lincsd,
+ int th,
+ ArrayRef<const real> invmass,
+ ConstraintVariable econq,
+ bool bCalcDHDL,
+ bool bCalcVir,
+ tensor rmdf)
{
- int b0, b1, b;
- int *bla, *blnr, *blbnb;
- rvec *r;
- real *blc, *blmf, *blcc, *rhs1, *rhs2, *sol;
-
- b0 = lincsd->task[th].b0;
- b1 = lincsd->task[th].b1;
-
- bla = lincsd->bla;
- r = lincsd->tmpv;
- blnr = lincsd->blnr;
- blbnb = lincsd->blbnb;
+ const int b0 = lincsd->task[th].b0;
+ const int b1 = lincsd->task[th].b1;
+
+ gmx::ArrayRef<const AtomPair> atoms = lincsd->atoms;
+ gmx::ArrayRef<gmx::RVec> r = lincsd->tmpv;
+ gmx::ArrayRef<const int> blnr = lincsd->blnr;
+ gmx::ArrayRef<const int> blbnb = lincsd->blbnb;
+
+ gmx::ArrayRef<const real> blc;
+ gmx::ArrayRef<const real> blmf;
if (econq != ConstraintVariable::Force)
{
/* Use mass-weighted parameters */
blc = lincsd->blc1;
blmf = lincsd->blmf1;
}
- blcc = lincsd->tmpncc;
- rhs1 = lincsd->tmp1;
- rhs2 = lincsd->tmp2;
- sol = lincsd->tmp3;
+ gmx::ArrayRef<real> blcc = lincsd->tmpncc;
+ gmx::ArrayRef<real> rhs1 = lincsd->tmp1;
+ gmx::ArrayRef<real> rhs2 = lincsd->tmp2;
+ gmx::ArrayRef<real> sol = lincsd->tmp3;
+
+ const rvec* x = as_rvec_array(xPadded.paddedArrayRef().data());
+ rvec* f = as_rvec_array(fPadded.paddedArrayRef().data());
#if GMX_SIMD_HAVE_REAL
/* This SIMD code does the same as the plain-C code after the #else.
* The only difference is that we always call pbc code, as with SIMD
* the overhead of pbc computation (when not needed) is small.
*/
- alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9*GMX_SIMD_REAL_WIDTH];
+ alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
/* Convert the pbc struct for SIMD */
set_pbc_simd(pbc, pbc_simd);
/* Compute normalized x i-j vectors, store in r.
* Compute the inner product of r and xp i-j and store in rhs1.
*/
- calc_dr_x_f_simd(b0, b1, bla, x, f, blc,
- pbc_simd,
- r, rhs1, sol);
+ calc_dr_x_f_simd(
+ b0, b1, atoms, x, f, blc.data(), pbc_simd, as_rvec_array(r.data()), rhs1.data(), sol.data());
-#else // GMX_SIMD_HAVE_REAL
+#else // GMX_SIMD_HAVE_REAL
/* Compute normalized i-j vectors */
if (pbc)
{
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
rvec dx;
- pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
+ pbc_dx_aiuc(pbc, x[atoms[b].index1], x[atoms[b].index2], dx);
unitv(dx, r[b]);
}
}
else
{
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
rvec dx;
- rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
+ rvec_sub(x[atoms[b].index1], x[atoms[b].index2], dx);
unitv(dx, r[b]);
- } /* 16 ncons flops */
+ } /* 16 ncons flops */
}
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- int i, j;
- real mvb;
-
- i = bla[2*b];
- j = bla[2*b+1];
- mvb = blc[b]*(r[b][0]*(f[i][0] - f[j][0]) +
- r[b][1]*(f[i][1] - f[j][1]) +
- r[b][2]*(f[i][2] - f[j][2]));
+ int i = atoms[b].index1;
+ int j = atoms[b].index2;
+ real mvb = blc[b]
+ * (r[b][0] * (f[i][0] - f[j][0]) + r[b][1] * (f[i][1] - f[j][1])
+ + r[b][2] * (f[i][2] - f[j][2]));
rhs1[b] = mvb;
sol[b] = mvb;
/* 7 flops */
}
-#endif // GMX_SIMD_HAVE_REAL
+#endif // GMX_SIMD_HAVE_REAL
if (lincsd->bTaskDep)
{
}
/* Construct the (sparse) LINCS matrix */
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- int n;
-
- for (n = blnr[b]; n < blnr[b+1]; n++)
+ for (int n = blnr[b]; n < blnr[b + 1]; n++)
{
- blcc[n] = blmf[n]*::iprod(r[b], r[blbnb[n]]);
- } /* 6 nr flops */
+ blcc[n] = blmf[n] * ::iprod(r[b], r[blbnb[n]]);
+ } /* 6 nr flops */
}
/* Together: 23*ncons + 6*nrtot flops */
- lincs_matrix_expand(lincsd, &lincsd->task[th], blcc, rhs1, rhs2, sol);
+ lincs_matrix_expand(*lincsd, lincsd->task[th], blcc, rhs1, rhs2, sol);
/* nrec*(ncons+2*nrtot) flops */
if (econq == ConstraintVariable::Deriv_FlexCon)
/* We only want to constraint the flexible constraints,
* so we mask out the normal ones by setting sol to 0.
*/
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
{
}
/* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
sol[b] *= blc[b];
}
/* When constraining forces, we should not use mass weighting,
* so we pass invmass=NULL, which results in the use of 1 for all atoms.
*/
- lincs_update_atoms(lincsd, th, 1.0, sol, r,
- (econq != ConstraintVariable::Force) ? invmass : nullptr, fp);
+ lincs_update_atoms(lincsd,
+ th,
+ 1.0,
+ sol,
+ r,
+ (econq != ConstraintVariable::Force) ? invmass : gmx::ArrayRef<real>(),
+ as_rvec_array(fp.data()));
if (bCalcDHDL)
{
- real dhdlambda;
-
- dhdlambda = 0;
- for (b = b0; b < b1; b++)
+ real dhdlambda = 0;
+ for (int b = b0; b < b1; b++)
{
- dhdlambda -= sol[b]*lincsd->ddist[b];
+ dhdlambda -= sol[b] * lincsd->ddist[b];
}
lincsd->task[th].dhdlambda = dhdlambda;
* where delta f is the constraint correction
* of the quantity that is being constrained.
*/
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- real mvb, tmp1;
- int i, j;
-
- mvb = lincsd->bllen[b]*sol[b];
- for (i = 0; i < DIM; i++)
+ const real mvb = lincsd->bllen[b] * sol[b];
+ for (int i = 0; i < DIM; i++)
{
- tmp1 = mvb*r[b][i];
- for (j = 0; j < DIM; j++)
+ const real tmp1 = mvb * r[b][i];
+ for (int j = 0; j < DIM; j++)
{
- rmdf[i][j] += tmp1*r[b][j];
+ rmdf[i][j] += tmp1 * r[b][j];
}
}
- } /* 23 ncons flops */
+ } /* 23 ncons flops */
}
}
/*! \brief Calculate the constraint distance vectors r to project on from x.
*
* Determine the right-hand side of the matrix equation using coordinates xp. */
-static void gmx_simdcall
-calc_dr_x_xp_simd(int b0,
- int b1,
- const int * bla,
- const rvec * gmx_restrict x,
- const rvec * gmx_restrict xp,
- const real * gmx_restrict bllen,
- const real * gmx_restrict blc,
- const real * pbc_simd,
- rvec * gmx_restrict r,
- real * gmx_restrict rhs,
- real * gmx_restrict sol)
+static void gmx_simdcall calc_dr_x_xp_simd(int b0,
+ int b1,
+ gmx::ArrayRef<const AtomPair> atoms,
+ const rvec* gmx_restrict x,
+ const rvec* gmx_restrict xp,
+ const real* gmx_restrict bllen,
+ const real* gmx_restrict blc,
+ const real* pbc_simd,
+ rvec* gmx_restrict r,
+ real* gmx_restrict rhs,
+ real* gmx_restrict sol)
{
assert(b0 % GMX_SIMD_REAL_WIDTH == 0);
alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset2[GMX_SIMD_REAL_WIDTH];
for (int bs = b0; bs < b1; bs += GMX_SIMD_REAL_WIDTH)
{
- SimdReal x0_S, y0_S, z0_S;
- SimdReal x1_S, y1_S, z1_S;
- SimdReal rx_S, ry_S, rz_S, n2_S, il_S;
- SimdReal rxp_S, ryp_S, rzp_S, ip_S, rhs_S;
- alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset0[GMX_SIMD_REAL_WIDTH];
- alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset1[GMX_SIMD_REAL_WIDTH];
+ SimdReal x0_S, y0_S, z0_S;
+ SimdReal x1_S, y1_S, z1_S;
+ SimdReal rx_S, ry_S, rz_S, n2_S, il_S;
+ SimdReal rxp_S, ryp_S, rzp_S, ip_S, rhs_S;
+ alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset0[GMX_SIMD_REAL_WIDTH];
+ alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset1[GMX_SIMD_REAL_WIDTH];
for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
{
- offset0[i] = bla[bs*2 + i*2];
- offset1[i] = bla[bs*2 + i*2 + 1];
+ offset0[i] = atoms[bs + i].index1;
+ offset1[i] = atoms[bs + i].index2;
}
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(x), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(x), offset1, &x1_S, &y1_S, &z1_S);
rx_S = x0_S - x1_S;
ry_S = y0_S - y1_S;
rz_S = z0_S - z1_S;
pbc_correct_dx_simd(&rx_S, &ry_S, &rz_S, pbc_simd);
- n2_S = norm2(rx_S, ry_S, rz_S);
- il_S = invsqrt(n2_S);
+ n2_S = norm2(rx_S, ry_S, rz_S);
+ il_S = invsqrt(n2_S);
- rx_S = rx_S * il_S;
- ry_S = ry_S * il_S;
- rz_S = rz_S * il_S;
+ rx_S = rx_S * il_S;
+ ry_S = ry_S * il_S;
+ rz_S = rz_S * il_S;
- transposeScatterStoreU<3>(reinterpret_cast<real *>(r + bs), offset2, rx_S, ry_S, rz_S);
+ transposeScatterStoreU<3>(reinterpret_cast<real*>(r + bs), offset2, rx_S, ry_S, rz_S);
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(xp), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(xp), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(xp), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(xp), offset1, &x1_S, &y1_S, &z1_S);
rxp_S = x0_S - x1_S;
ryp_S = y0_S - y1_S;
pbc_correct_dx_simd(&rxp_S, &ryp_S, &rzp_S, pbc_simd);
- ip_S = iprod(rx_S, ry_S, rz_S, rxp_S, ryp_S, rzp_S);
+ ip_S = iprod(rx_S, ry_S, rz_S, rxp_S, ryp_S, rzp_S);
rhs_S = load<SimdReal>(blc + bs) * (ip_S - load<SimdReal>(bllen + bs));
#endif // GMX_SIMD_HAVE_REAL
/*! \brief Determine the distances and right-hand side for the next iteration. */
-gmx_unused static void calc_dist_iter(
- int b0,
- int b1,
- const int * bla,
- const rvec * gmx_restrict xp,
- const real * gmx_restrict bllen,
- const real * gmx_restrict blc,
- const t_pbc * pbc,
- real wfac,
- real * gmx_restrict rhs,
- real * gmx_restrict sol,
- bool * bWarn)
+gmx_unused static void calc_dist_iter(int b0,
+ int b1,
+ gmx::ArrayRef<const AtomPair> atoms,
+ const rvec* gmx_restrict xp,
+ const real* gmx_restrict bllen,
+ const real* gmx_restrict blc,
+ const t_pbc* pbc,
+ real wfac,
+ real* gmx_restrict rhs,
+ real* gmx_restrict sol,
+ bool* bWarn)
{
- int b;
-
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- real len, len2, dlen2, mvb;
+ real len = bllen[b];
rvec dx;
-
- len = bllen[b];
if (pbc)
{
- pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
+ pbc_dx_aiuc(pbc, xp[atoms[b].index1], xp[atoms[b].index2], dx);
}
else
{
- rvec_sub(xp[bla[2*b]], xp[bla[2*b+1]], dx);
+ rvec_sub(xp[atoms[b].index1], xp[atoms[b].index2], dx);
}
- len2 = len*len;
- dlen2 = 2*len2 - ::norm2(dx);
- if (dlen2 < wfac*len2)
+ real len2 = len * len;
+ real dlen2 = 2 * len2 - ::norm2(dx);
+ if (dlen2 < wfac * len2)
{
/* not race free - see detailed comment in caller */
*bWarn = TRUE;
}
+ real mvb;
if (dlen2 > 0)
{
- mvb = blc[b]*(len - dlen2*gmx::invsqrt(dlen2));
+ mvb = blc[b] * (len - dlen2 * gmx::invsqrt(dlen2));
}
else
{
- mvb = blc[b]*len;
+ mvb = blc[b] * len;
}
- rhs[b] = mvb;
- sol[b] = mvb;
- } /* 20*ncons flops */
+ rhs[b] = mvb;
+ sol[b] = mvb;
+ } /* 20*ncons flops */
}
#if GMX_SIMD_HAVE_REAL
/*! \brief As calc_dist_iter(), but using SIMD intrinsics. */
-static void gmx_simdcall
-calc_dist_iter_simd(int b0,
- int b1,
- const int * bla,
- const rvec * gmx_restrict x,
- const real * gmx_restrict bllen,
- const real * gmx_restrict blc,
- const real * pbc_simd,
- real wfac,
- real * gmx_restrict rhs,
- real * gmx_restrict sol,
- bool * bWarn)
+static void gmx_simdcall calc_dist_iter_simd(int b0,
+ int b1,
+ gmx::ArrayRef<const AtomPair> atoms,
+ const rvec* gmx_restrict x,
+ const real* gmx_restrict bllen,
+ const real* gmx_restrict blc,
+ const real* pbc_simd,
+ real wfac,
+ real* gmx_restrict rhs,
+ real* gmx_restrict sol,
+ bool* bWarn)
{
- SimdReal min_S(GMX_REAL_MIN);
- SimdReal two_S(2.0);
- SimdReal wfac_S(wfac);
- SimdBool warn_S;
-
- int bs;
+ SimdReal min_S(GMX_REAL_MIN);
+ SimdReal two_S(2.0);
+ SimdReal wfac_S(wfac);
+ SimdBool warn_S;
assert(b0 % GMX_SIMD_REAL_WIDTH == 0);
/* Initialize all to FALSE */
warn_S = (two_S < setZero());
- for (bs = b0; bs < b1; bs += GMX_SIMD_REAL_WIDTH)
+ for (int bs = b0; bs < b1; bs += GMX_SIMD_REAL_WIDTH)
{
- SimdReal x0_S, y0_S, z0_S;
- SimdReal x1_S, y1_S, z1_S;
- SimdReal rx_S, ry_S, rz_S, n2_S;
- SimdReal len_S, len2_S, dlen2_S, lc_S, blc_S;
- alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset0[GMX_SIMD_REAL_WIDTH];
- alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset1[GMX_SIMD_REAL_WIDTH];
+ SimdReal x0_S, y0_S, z0_S;
+ SimdReal x1_S, y1_S, z1_S;
+ SimdReal rx_S, ry_S, rz_S, n2_S;
+ SimdReal len_S, len2_S, dlen2_S, lc_S, blc_S;
+ alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset0[GMX_SIMD_REAL_WIDTH];
+ alignas(GMX_SIMD_ALIGNMENT) std::int32_t offset1[GMX_SIMD_REAL_WIDTH];
for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
{
- offset0[i] = bla[bs*2 + i*2];
- offset1[i] = bla[bs*2 + i*2 + 1];
+ offset0[i] = atoms[bs + i].index1;
+ offset1[i] = atoms[bs + i].index2;
}
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(x), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real*>(x), offset1, &x1_S, &y1_S, &z1_S);
rx_S = x0_S - x1_S;
ry_S = y0_S - y1_S;
pbc_correct_dx_simd(&rx_S, &ry_S, &rz_S, pbc_simd);
- n2_S = norm2(rx_S, ry_S, rz_S);
+ n2_S = norm2(rx_S, ry_S, rz_S);
- len_S = load<SimdReal>(bllen + bs);
- len2_S = len_S * len_S;
+ len_S = load<SimdReal>(bllen + bs);
+ len2_S = len_S * len_S;
dlen2_S = fms(two_S, len2_S, n2_S);
- warn_S = warn_S || (dlen2_S < (wfac_S * len2_S));
+ warn_S = warn_S || (dlen2_S < (wfac_S * len2_S));
/* Avoid 1/0 by taking the max with REAL_MIN.
* Note: when dlen2 is close to zero (90 degree constraint rotation),
*/
dlen2_S = max(dlen2_S, min_S);
- lc_S = fnma(dlen2_S, invsqrt(dlen2_S), len_S);
+ lc_S = fnma(dlen2_S, invsqrt(dlen2_S), len_S);
- blc_S = load<SimdReal>(blc + bs);
+ blc_S = load<SimdReal>(blc + bs);
- lc_S = blc_S * lc_S;
+ lc_S = blc_S * lc_S;
store(rhs + bs, lc_S);
store(sol + bs, lc_S);
#endif // GMX_SIMD_HAVE_REAL
//! Implements LINCS constraining.
-static void do_lincs(const rvec *x, rvec *xp, matrix box, t_pbc *pbc,
- Lincs *lincsd, int th,
- const real *invmass,
- const t_commrec *cr,
- bool bCalcDHDL,
- real wangle, bool *bWarn,
- real invdt, rvec * gmx_restrict v,
- bool bCalcVir, tensor vir_r_m_dr)
+static void do_lincs(ArrayRefWithPadding<const RVec> xPadded,
+ ArrayRefWithPadding<RVec> xpPadded,
+ const matrix box,
+ t_pbc* pbc,
+ Lincs* lincsd,
+ int th,
+ ArrayRef<const real> invmass,
+ const t_commrec* cr,
+ bool bCalcDHDL,
+ real wangle,
+ bool* bWarn,
+ real invdt,
+ ArrayRef<RVec> vRef,
+ bool bCalcVir,
+ tensor vir_r_m_dr)
{
- int b0, b1, b, i, j, n, iter;
- int *bla, *blnr, *blbnb;
- rvec *r;
- real *blc, *blmf, *bllen, *blcc, *rhs1, *rhs2, *sol, *blc_sol, *mlambda;
- int *nlocat;
-
- b0 = lincsd->task[th].b0;
- b1 = lincsd->task[th].b1;
-
- bla = lincsd->bla;
- r = lincsd->tmpv;
- blnr = lincsd->blnr;
- blbnb = lincsd->blbnb;
- blc = lincsd->blc;
- blmf = lincsd->blmf;
- bllen = lincsd->bllen;
- blcc = lincsd->tmpncc;
- rhs1 = lincsd->tmp1;
- rhs2 = lincsd->tmp2;
- sol = lincsd->tmp3;
- blc_sol = lincsd->tmp4;
- mlambda = lincsd->mlambda;
- nlocat = lincsd->nlocat;
+ const rvec* x = as_rvec_array(xPadded.paddedArrayRef().data());
+ rvec* xp = as_rvec_array(xpPadded.paddedArrayRef().data());
+ rvec* gmx_restrict v = as_rvec_array(vRef.data());
+
+ const int b0 = lincsd->task[th].b0;
+ const int b1 = lincsd->task[th].b1;
+
+ gmx::ArrayRef<const AtomPair> atoms = lincsd->atoms;
+ gmx::ArrayRef<gmx::RVec> r = lincsd->tmpv;
+ gmx::ArrayRef<const int> blnr = lincsd->blnr;
+ gmx::ArrayRef<const int> blbnb = lincsd->blbnb;
+ gmx::ArrayRef<const real> blc = lincsd->blc;
+ gmx::ArrayRef<const real> blmf = lincsd->blmf;
+ gmx::ArrayRef<const real> bllen = lincsd->bllen;
+ gmx::ArrayRef<real> blcc = lincsd->tmpncc;
+ gmx::ArrayRef<real> rhs1 = lincsd->tmp1;
+ gmx::ArrayRef<real> rhs2 = lincsd->tmp2;
+ gmx::ArrayRef<real> sol = lincsd->tmp3;
+ gmx::ArrayRef<real> blc_sol = lincsd->tmp4;
+ gmx::ArrayRef<real> mlambda = lincsd->mlambda;
+ gmx::ArrayRef<const int> nlocat = lincsd->nlocat;
#if GMX_SIMD_HAVE_REAL
* The only difference is that we always call pbc code, as with SIMD
* the overhead of pbc computation (when not needed) is small.
*/
- alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9*GMX_SIMD_REAL_WIDTH];
+ alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
/* Convert the pbc struct for SIMD */
set_pbc_simd(pbc, pbc_simd);
/* Compute normalized x i-j vectors, store in r.
* Compute the inner product of r and xp i-j and store in rhs1.
*/
- calc_dr_x_xp_simd(b0, b1, bla, x, xp, bllen, blc,
- pbc_simd,
- r, rhs1, sol);
+ calc_dr_x_xp_simd(
+ b0, b1, atoms, x, xp, bllen.data(), blc.data(), pbc_simd, as_rvec_array(r.data()), rhs1.data(), sol.data());
-#else // GMX_SIMD_HAVE_REAL
+#else // GMX_SIMD_HAVE_REAL
if (pbc)
{
/* Compute normalized i-j vectors */
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
rvec dx;
- real mvb;
-
- pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
+ pbc_dx_aiuc(pbc, x[atoms[b].index1], x[atoms[b].index2], dx);
unitv(dx, r[b]);
- pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
- mvb = blc[b]*(::iprod(r[b], dx) - bllen[b]);
- rhs1[b] = mvb;
- sol[b] = mvb;
+ pbc_dx_aiuc(pbc, xp[atoms[b].index1], xp[atoms[b].index2], dx);
+ real mvb = blc[b] * (::iprod(r[b], dx) - bllen[b]);
+ rhs1[b] = mvb;
+ sol[b] = mvb;
}
}
else
{
/* Compute normalized i-j vectors */
- for (b = b0; b < b1; b++)
- {
- real tmp0, tmp1, tmp2, rlen, mvb;
-
- i = bla[2*b];
- j = bla[2*b+1];
- tmp0 = x[i][0] - x[j][0];
- tmp1 = x[i][1] - x[j][1];
- tmp2 = x[i][2] - x[j][2];
- rlen = gmx::invsqrt(tmp0*tmp0 + tmp1*tmp1 + tmp2*tmp2);
- r[b][0] = rlen*tmp0;
- r[b][1] = rlen*tmp1;
- r[b][2] = rlen*tmp2;
+ for (int b = b0; b < b1; b++)
+ {
+ int i = atoms[b].index1;
+ int j = atoms[b].index2;
+ real tmp0 = x[i][0] - x[j][0];
+ real tmp1 = x[i][1] - x[j][1];
+ real tmp2 = x[i][2] - x[j][2];
+ real rlen = gmx::invsqrt(tmp0 * tmp0 + tmp1 * tmp1 + tmp2 * tmp2);
+ r[b][0] = rlen * tmp0;
+ r[b][1] = rlen * tmp1;
+ r[b][2] = rlen * tmp2;
/* 16 ncons flops */
- i = bla[2*b];
- j = bla[2*b+1];
- mvb = blc[b]*(r[b][0]*(xp[i][0] - xp[j][0]) +
- r[b][1]*(xp[i][1] - xp[j][1]) +
- r[b][2]*(xp[i][2] - xp[j][2]) - bllen[b]);
+ real mvb = blc[b]
+ * (r[b][0] * (xp[i][0] - xp[j][0]) + r[b][1] * (xp[i][1] - xp[j][1])
+ + r[b][2] * (xp[i][2] - xp[j][2]) - bllen[b]);
rhs1[b] = mvb;
sol[b] = mvb;
/* 10 flops */
/* Together: 26*ncons + 6*nrtot flops */
}
-#endif // GMX_SIMD_HAVE_REAL
+#endif // GMX_SIMD_HAVE_REAL
if (lincsd->bTaskDep)
{
}
/* Construct the (sparse) LINCS matrix */
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- for (n = blnr[b]; n < blnr[b+1]; n++)
+ for (int n = blnr[b]; n < blnr[b + 1]; n++)
{
- blcc[n] = blmf[n]*::iprod(r[b], r[blbnb[n]]);
+ blcc[n] = blmf[n] * gmx::dot(r[b], r[blbnb[n]]);
}
}
/* Together: 26*ncons + 6*nrtot flops */
- lincs_matrix_expand(lincsd, &lincsd->task[th], blcc, rhs1, rhs2, sol);
+ lincs_matrix_expand(*lincsd, lincsd->task[th], blcc, rhs1, rhs2, sol);
/* nrec*(ncons+2*nrtot) flops */
#if GMX_SIMD_HAVE_REAL
- for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
+ for (int b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
{
- SimdReal t1 = load<SimdReal>(blc + b);
- SimdReal t2 = load<SimdReal>(sol + b);
- store(mlambda + b, t1 * t2);
+ SimdReal t1 = load<SimdReal>(blc.data() + b);
+ SimdReal t2 = load<SimdReal>(sol.data() + b);
+ store(mlambda.data() + b, t1 * t2);
}
#else
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- mlambda[b] = blc[b]*sol[b];
+ mlambda[b] = blc[b] * sol[b];
}
-#endif // GMX_SIMD_HAVE_REAL
+#endif // GMX_SIMD_HAVE_REAL
/* Update the coordinates */
lincs_update_atoms(lincsd, th, 1.0, mlambda, r, invmass, xp);
real wfac;
- wfac = std::cos(DEG2RAD*wangle);
- wfac = wfac*wfac;
+ wfac = std::cos(gmx::c_deg2Rad * wangle);
+ wfac = wfac * wfac;
- for (iter = 0; iter < lincsd->nIter; iter++)
+ for (int iter = 0; iter < lincsd->nIter; iter++)
{
- if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints))
+ if ((lincsd->bCommIter && haveDDAtomOrdering(*cr) && cr->dd->constraints))
{
#pragma omp barrier
#pragma omp master
{
/* Communicate the corrected non-local coordinates */
- if (DOMAINDECOMP(cr))
+ if (haveDDAtomOrdering(*cr))
{
- dd_move_x_constraints(cr->dd, box, xp, nullptr, FALSE);
+ dd_move_x_constraints(cr->dd, box, xpPadded.unpaddedArrayRef(), ArrayRef<RVec>(), FALSE);
}
}
#pragma omp barrier
}
#if GMX_SIMD_HAVE_REAL
- calc_dist_iter_simd(b0, b1, bla, xp, bllen, blc, pbc_simd, wfac,
- rhs1, sol, bWarn);
+ calc_dist_iter_simd(
+ b0, b1, atoms, xp, bllen.data(), blc.data(), pbc_simd, wfac, rhs1.data(), sol.data(), bWarn);
#else
- calc_dist_iter(b0, b1, bla, xp, bllen, blc, pbc, wfac,
- rhs1, sol, bWarn);
+ calc_dist_iter(b0, b1, atoms, xp, bllen.data(), blc.data(), pbc, wfac, rhs1.data(), sol.data(), bWarn);
/* 20*ncons flops */
-#endif // GMX_SIMD_HAVE_REAL
+#endif // GMX_SIMD_HAVE_REAL
- lincs_matrix_expand(lincsd, &lincsd->task[th], blcc, rhs1, rhs2, sol);
+ lincs_matrix_expand(*lincsd, lincsd->task[th], blcc, rhs1, rhs2, sol);
/* nrec*(ncons+2*nrtot) flops */
#if GMX_SIMD_HAVE_REAL
- for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
+ for (int b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
{
- SimdReal t1 = load<SimdReal>(blc + b);
- SimdReal t2 = load<SimdReal>(sol + b);
+ SimdReal t1 = load<SimdReal>(blc.data() + b);
+ SimdReal t2 = load<SimdReal>(sol.data() + b);
SimdReal mvb = t1 * t2;
- store(blc_sol + b, mvb);
- store(mlambda + b, load<SimdReal>(mlambda + b) + mvb);
+ store(blc_sol.data() + b, mvb);
+ store(mlambda.data() + b, load<SimdReal>(mlambda.data() + b) + mvb);
}
#else
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- real mvb;
-
- mvb = blc[b]*sol[b];
- blc_sol[b] = mvb;
+ real mvb = blc[b] * sol[b];
+ blc_sol[b] = mvb;
mlambda[b] += mvb;
}
-#endif // GMX_SIMD_HAVE_REAL
+#endif // GMX_SIMD_HAVE_REAL
/* Update the coordinates */
lincs_update_atoms(lincsd, th, 1.0, blc_sol, r, invmass, xp);
/* 16 ncons flops */
}
- if (nlocat != nullptr && (bCalcDHDL || bCalcVir))
+ if (!nlocat.empty() && (bCalcDHDL || bCalcVir))
{
if (lincsd->bTaskDep)
{
}
/* Only account for local atoms */
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- mlambda[b] *= 0.5*nlocat[b];
+ mlambda[b] *= 0.5 * nlocat[b];
}
}
if (bCalcDHDL)
{
- real dhdl;
-
- dhdl = 0;
- for (b = b0; b < b1; b++)
+ real dhdl = 0;
+ for (int b = b0; b < b1; b++)
{
/* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
* later after the contributions are reduced over the threads.
*/
- dhdl -= lincsd->mlambda[b]*lincsd->ddist[b];
+ dhdl -= lincsd->mlambda[b] * lincsd->ddist[b];
}
lincsd->task[th].dhdlambda = dhdl;
}
if (bCalcVir)
{
/* Constraint virial */
- for (b = b0; b < b1; b++)
+ for (int b = b0; b < b1; b++)
{
- real tmp0, tmp1;
-
- tmp0 = -bllen[b]*mlambda[b];
- for (i = 0; i < DIM; i++)
+ real tmp0 = -bllen[b] * mlambda[b];
+ for (int i = 0; i < DIM; i++)
{
- tmp1 = tmp0*r[b][i];
- for (j = 0; j < DIM; j++)
+ real tmp1 = tmp0 * r[b][i];
+ for (int j = 0; j < DIM; j++)
{
- vir_r_m_dr[i][j] -= tmp1*r[b][j];
+ vir_r_m_dr[i][j] -= tmp1 * r[b][j];
}
}
- } /* 22 ncons flops */
+ } /* 22 ncons flops */
}
/* Total:
}
/*! \brief Sets the elements in the LINCS matrix for task task. */
-static void set_lincs_matrix_task(Lincs *li,
- Task *li_task,
- const real *invmass,
- int *ncc_triangle,
- int *nCrossTaskTriangles)
+static void set_lincs_matrix_task(Lincs* li,
+ Task* li_task,
+ ArrayRef<const real> invmass,
+ int* ncc_triangle,
+ int* nCrossTaskTriangles)
{
- int i;
-
/* Construct the coupling coefficient matrix blmf */
li_task->ntriangle = 0;
*ncc_triangle = 0;
*nCrossTaskTriangles = 0;
- for (i = li_task->b0; i < li_task->b1; i++)
+ for (int i = li_task->b0; i < li_task->b1; i++)
{
- int a1, a2, n;
-
- a1 = li->bla[2*i];
- a2 = li->bla[2*i+1];
- for (n = li->blnr[i]; (n < li->blnr[i+1]); n++)
+ const int a1 = li->atoms[i].index1;
+ const int a2 = li->atoms[i].index2;
+ for (int n = li->blnr[i]; n < li->blnr[i + 1]; n++)
{
- int k, sign, center, end;
-
- k = li->blbnb[n];
+ const int k = li->blbnb[n];
/* If we are using multiple, independent tasks for LINCS,
* the calls to check_assign_connected should have
*/
assert(li->bTaskDep || (k >= li_task->b0 && k < li_task->b1));
- if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
+ int sign;
+ if (a1 == li->atoms[k].index1 || a2 == li->atoms[k].index2)
{
sign = -1;
}
{
sign = 1;
}
- if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
+ int center;
+ int end;
+ if (a1 == li->atoms[k].index1 || a1 == li->atoms[k].index2)
{
center = a1;
end = a2;
center = a2;
end = a1;
}
- li->blmf[n] = sign*invmass[center]*li->blc[i]*li->blc[k];
- li->blmf1[n] = sign*0.5;
+ li->blmf[n] = sign * invmass[center] * li->blc[i] * li->blc[k];
+ li->blmf1[n] = sign * 0.5;
if (li->ncg_triangle > 0)
{
- int nk, kk;
-
/* Look for constraint triangles */
- for (nk = li->blnr[k]; (nk < li->blnr[k+1]); nk++)
+ for (int nk = li->blnr[k]; nk < li->blnr[k + 1]; nk++)
{
- kk = li->blbnb[nk];
- if (kk != i && kk != k &&
- (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
+ const int kk = li->blbnb[nk];
+ if (kk != i && kk != k && (li->atoms[kk].index1 == end || li->atoms[kk].index2 == end))
{
/* Check if the constraints in this triangle actually
* belong to a different task. We still assign them
* here, since it's convenient for the triangle
* iterations, but we then need an extra barrier.
*/
- if (k < li_task->b0 || k >= li_task->b1 ||
- kk < li_task->b0 || kk >= li_task->b1)
+ if (k < li_task->b0 || k >= li_task->b1 || kk < li_task->b0 || kk >= li_task->b1)
{
(*nCrossTaskTriangles)++;
}
- if (li_task->ntriangle == 0 ||
- li_task->triangle[li_task->ntriangle - 1] < i)
+ if (li_task->ntriangle == 0 || li_task->triangle[li_task->ntriangle - 1] < i)
{
/* Add this constraint to the triangle list */
li_task->triangle[li_task->ntriangle] = i;
li_task->tri_bits[li_task->ntriangle] = 0;
li_task->ntriangle++;
- if (li->blnr[i+1] - li->blnr[i] > static_cast<int>(sizeof(li_task->tri_bits[0])*8 - 1))
+ if (li->blnr[i + 1] - li->blnr[i]
+ > static_cast<int>(sizeof(li_task->tri_bits[0]) * 8 - 1))
{
- gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %zu allowed for constraints participating in triangles",
- li->blnr[i+1] - li->blnr[i],
- sizeof(li_task->tri_bits[0])*8-1);
+ gmx_fatal(FARGS,
+ "A constraint is connected to %d constraints, this is "
+ "more than the %zu allowed for constraints participating "
+ "in triangles",
+ li->blnr[i + 1] - li->blnr[i],
+ sizeof(li_task->tri_bits[0]) * 8 - 1);
}
}
- li_task->tri_bits[li_task->ntriangle-1] |= (1 << (n - li->blnr[i]));
+ li_task->tri_bits[li_task->ntriangle - 1] |= (1 << (n - li->blnr[i]));
(*ncc_triangle)++;
}
}
}
/*! \brief Sets the elements in the LINCS matrix. */
-static void set_lincs_matrix(Lincs *li, real *invmass, real lambda)
+static void set_lincs_matrix(Lincs* li, ArrayRef<const real> invmass, real lambda)
{
- int i;
const real invsqrt2 = 0.7071067811865475244;
- for (i = 0; (i < li->nc); i++)
+ for (int i = 0; (i < li->nc); i++)
{
- int a1, a2;
-
- a1 = li->bla[2*i];
- a2 = li->bla[2*i+1];
- li->blc[i] = gmx::invsqrt(invmass[a1] + invmass[a2]);
- li->blc1[i] = invsqrt2;
+ const int a1 = li->atoms[i].index1;
+ const int a2 = li->atoms[i].index2;
+ li->blc[i] = gmx::invsqrt(invmass[a1] + invmass[a2]);
+ li->blc1[i] = invsqrt2;
}
/* Construct the coupling coefficient matrix blmf */
- int th, ntriangle = 0, ncc_triangle = 0, nCrossTaskTriangles = 0;
+ int ntriangle = 0, ncc_triangle = 0, nCrossTaskTriangles = 0;
#pragma omp parallel for reduction(+: ntriangle, ncc_triangle, nCrossTaskTriangles) num_threads(li->ntask) schedule(static)
- for (th = 0; th < li->ntask; th++)
+ for (int th = 0; th < li->ntask; th++)
{
try
{
- set_lincs_matrix_task(li, &li->task[th], invmass,
- &ncc_triangle, &nCrossTaskTriangles);
+ set_lincs_matrix_task(li, &li->task[th], invmass, &ncc_triangle, &nCrossTaskTriangles);
ntriangle += li->task[th].ntriangle;
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
li->ntriangle = ntriangle;
li->ncc_triangle = ncc_triangle;
if (debug)
{
- fprintf(debug, "The %d constraints participate in %d triangles\n",
- li->nc, li->ntriangle);
- fprintf(debug, "There are %d constraint couplings, of which %d in triangles\n",
- li->ncc, li->ncc_triangle);
+ fprintf(debug, "The %d constraints participate in %d triangles\n", li->nc, li->ntriangle);
+ fprintf(debug, "There are %d constraint couplings, of which %d in triangles\n", li->ncc, li->ncc_triangle);
if (li->ntriangle > 0 && li->ntask > 1)
{
- fprintf(debug, "%d constraint triangles contain constraints assigned to different tasks\n",
+ fprintf(debug,
+ "%d constraint triangles contain constraints assigned to different tasks\n",
nCrossTaskTriangles);
}
}
}
//! Finds all triangles of atoms that share constraints to a central atom.
-static int count_triangle_constraints(const InteractionLists &ilist,
- const t_blocka &at2con)
+static int count_triangle_constraints(const InteractionLists& ilist, const ListOfLists<int>& at2con)
{
- int ncon1, ncon_tot;
- int c0, n1, c1, ac1, n2, c2;
- int ncon_triangle;
-
- ncon1 = ilist[F_CONSTR].size()/3;
- ncon_tot = ncon1 + ilist[F_CONSTRNC].size()/3;
+ const int ncon1 = ilist[F_CONSTR].size() / 3;
+ const int ncon_tot = ncon1 + ilist[F_CONSTRNC].size() / 3;
gmx::ArrayRef<const int> ia1 = ilist[F_CONSTR].iatoms;
gmx::ArrayRef<const int> ia2 = ilist[F_CONSTRNC].iatoms;
- ncon_triangle = 0;
- for (c0 = 0; c0 < ncon_tot; c0++)
+ int ncon_triangle = 0;
+ for (int c0 = 0; c0 < ncon_tot; c0++)
{
bool bTriangle = FALSE;
- const int *iap = constr_iatomptr(ia1, ia2, c0);
+ const int* iap = constr_iatomptr(ia1, ia2, c0);
const int a00 = iap[1];
const int a01 = iap[2];
- for (n1 = at2con.index[a01]; n1 < at2con.index[a01+1]; n1++)
+ for (const int c1 : at2con[a01])
{
- c1 = at2con.a[n1];
if (c1 != c0)
{
- const int *iap = constr_iatomptr(ia1, ia2, c1);
+ const int* iap = constr_iatomptr(ia1, ia2, c1);
const int a10 = iap[1];
const int a11 = iap[2];
+ int ac1;
if (a10 == a01)
{
ac1 = a11;
{
ac1 = a10;
}
- for (n2 = at2con.index[ac1]; n2 < at2con.index[ac1+1]; n2++)
+ for (const int c2 : at2con[ac1])
{
- c2 = at2con.a[n2];
if (c2 != c0 && c2 != c1)
{
- const int *iap = constr_iatomptr(ia1, ia2, c2);
+ const int* iap = constr_iatomptr(ia1, ia2, c2);
const int a20 = iap[1];
const int a21 = iap[2];
if (a20 == a00 || a21 == a00)
}
//! Finds sequences of sequential constraints.
-static bool more_than_two_sequential_constraints(const InteractionLists &ilist,
- const t_blocka &at2con)
+static bool more_than_two_sequential_constraints(const InteractionLists& ilist, const ListOfLists<int>& at2con)
{
- int ncon1, ncon_tot, c;
- bool bMoreThanTwoSequentialConstraints;
-
- ncon1 = ilist[F_CONSTR].size()/3;
- ncon_tot = ncon1 + ilist[F_CONSTRNC].size()/3;
+ const int ncon1 = ilist[F_CONSTR].size() / 3;
+ const int ncon_tot = ncon1 + ilist[F_CONSTRNC].size() / 3;
gmx::ArrayRef<const int> ia1 = ilist[F_CONSTR].iatoms;
gmx::ArrayRef<const int> ia2 = ilist[F_CONSTRNC].iatoms;
- bMoreThanTwoSequentialConstraints = FALSE;
- for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
+ for (int c = 0; c < ncon_tot; c++)
{
- const int *iap = constr_iatomptr(ia1, ia2, c);
+ const int* iap = constr_iatomptr(ia1, ia2, c);
const int a1 = iap[1];
const int a2 = iap[2];
/* Check if this constraint has constraints connected at both atoms */
- if (at2con.index[a1+1] - at2con.index[a1] > 1 &&
- at2con.index[a2+1] - at2con.index[a2] > 1)
+ if (at2con[a1].ssize() > 1 && at2con[a2].ssize() > 1)
{
- bMoreThanTwoSequentialConstraints = TRUE;
+ return true;
}
}
- return bMoreThanTwoSequentialConstraints;
+ return false;
}
-Lincs *init_lincs(FILE *fplog, const gmx_mtop_t &mtop,
- int nflexcon_global, ArrayRef<const t_blocka> at2con,
- bool bPLINCS, int nIter, int nProjOrder)
+Lincs* init_lincs(FILE* fplog,
+ const gmx_mtop_t& mtop,
+ int nflexcon_global,
+ ArrayRef<const ListOfLists<int>> atomToConstraintsPerMolType,
+ bool bPLINCS,
+ int nIter,
+ int nProjOrder,
+ ObservablesReducerBuilder* observablesReducerBuilder)
{
// TODO this should become a unique_ptr
- Lincs *li;
- bool bMoreThanTwoSeq;
+ Lincs* li;
+ bool bMoreThanTwoSeq;
if (fplog)
{
- fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n",
- bPLINCS ? " Parallel" : "");
+ fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n", bPLINCS ? " Parallel" : "");
}
li = new Lincs;
- li->ncg =
- gmx_mtop_ftype_count(mtop, F_CONSTR) +
- gmx_mtop_ftype_count(mtop, F_CONSTRNC);
+ li->ncg = gmx_mtop_ftype_count(mtop, F_CONSTR) + gmx_mtop_ftype_count(mtop, F_CONSTRNC);
li->ncg_flex = nflexcon_global;
li->nIter = nIter;
li->max_connect = 0;
for (size_t mt = 0; mt < mtop.moltype.size(); mt++)
{
+ const auto& at2con = atomToConstraintsPerMolType[mt];
for (int a = 0; a < mtop.moltype[mt].atoms.nr; a++)
{
- li->max_connect = std::max(li->max_connect,
- at2con[mt].index[a + 1] - at2con[mt].index[a]);
+ li->max_connect = std::max(li->max_connect, int(at2con[a].ssize()));
}
}
li->ncg_triangle = 0;
bMoreThanTwoSeq = FALSE;
- for (const gmx_molblock_t &molb : mtop.molblock)
+ for (const gmx_molblock_t& molb : mtop.molblock)
{
- const gmx_moltype_t &molt = mtop.moltype[molb.type];
+ const gmx_moltype_t& molt = mtop.moltype[molb.type];
+ const auto& at2con = atomToConstraintsPerMolType[molb.type];
- li->ncg_triangle +=
- molb.nmol*
- count_triangle_constraints(molt.ilist, at2con[molb.type]);
+ li->ncg_triangle += molb.nmol * count_triangle_constraints(molt.ilist, at2con);
- if (!bMoreThanTwoSeq &&
- more_than_two_sequential_constraints(molt.ilist, at2con[molb.type]))
+ if (!bMoreThanTwoSeq && more_than_two_sequential_constraints(molt.ilist, at2con))
{
bMoreThanTwoSeq = TRUE;
}
if (debug && bPLINCS)
{
- fprintf(debug, "PLINCS communication before each iteration: %d\n",
- static_cast<int>(li->bCommIter));
+ fprintf(debug, "PLINCS communication before each iteration: %d\n", static_cast<int>(li->bCommIter));
}
/* LINCS can run on any number of threads.
* The current constraint to task assignment code can create independent
* tasks only when not more than two constraints are connected sequentially.
*/
- li->ntask = gmx_omp_nthreads_get(emntLINCS);
+ li->ntask = gmx_omp_nthreads_get(ModuleMultiThread::Lincs);
li->bTaskDep = (li->ntask > 1 && bMoreThanTwoSeq);
if (debug)
{
- fprintf(debug, "LINCS: using %d threads, tasks are %sdependent\n",
- li->ntask, li->bTaskDep ? "" : "in");
+ fprintf(debug, "LINCS: using %d threads, tasks are %sdependent\n", li->ntask, li->bTaskDep ? "" : "in");
}
if (li->ntask == 1)
{
fprintf(fplog, "The number of constraints is %d\n", li->ncg);
if (bPLINCS)
{
- fprintf(fplog, "There are constraints between atoms in different decomposition domains,\n"
+ fprintf(fplog,
+ "There are constraints between atoms in different decomposition domains,\n"
"will communicate selected coordinates each lincs iteration\n");
}
if (li->ncg_triangle > 0)
"%d constraints are involved in constraint triangles,\n"
"will apply an additional matrix expansion of order %d for couplings\n"
"between constraints inside triangles\n",
- li->ncg_triangle, li->nOrder);
+ li->ncg_triangle,
+ li->nOrder);
}
}
+ if (observablesReducerBuilder)
+ {
+ ObservablesReducerBuilder::CallbackFromBuilder callbackFromBuilder =
+ [li](ObservablesReducerBuilder::CallbackToRequireReduction c, gmx::ArrayRef<double> v) {
+ li->callbackToRequireReduction = std::move(c);
+ li->rmsdReductionBuffer = v;
+ };
+
+ // Make the callback that runs afer reduction.
+ ObservablesReducerBuilder::CallbackAfterReduction callbackAfterReduction = [li](gmx::Step /*step*/) {
+ if (li->rmsdReductionBuffer[0] > 0)
+ {
+ li->constraintRmsDeviation =
+ std::sqrt(li->rmsdReductionBuffer[1] / li->rmsdReductionBuffer[0]);
+ }
+ };
+
+ const int reductionValuesRequired = 2;
+ observablesReducerBuilder->addSubscriber(reductionValuesRequired,
+ std::move(callbackFromBuilder),
+ std::move(callbackAfterReduction));
+ }
+
return li;
}
-void done_lincs(Lincs *li)
+void done_lincs(Lincs* li)
{
delete li;
}
/*! \brief Sets up the work division over the threads. */
-static void lincs_thread_setup(Lincs *li, int natoms)
+static void lincs_thread_setup(Lincs* li, int natoms)
{
- Task *li_m;
- int th;
- gmx_bitmask_t *atf;
- int a;
+ li->atf.resize(natoms);
- if (natoms > li->atf_nalloc)
- {
- li->atf_nalloc = over_alloc_large(natoms);
- srenew(li->atf, li->atf_nalloc);
- }
+ gmx::ArrayRef<gmx_bitmask_t> atf = li->atf;
- atf = li->atf;
/* Clear the atom flags */
- for (a = 0; a < natoms; a++)
+ for (gmx_bitmask_t& mask : atf)
{
- bitmask_clear(&atf[a]);
+ bitmask_clear(&mask);
}
if (li->ntask > BITMASK_SIZE)
gmx_fatal(FARGS, "More than %d threads is not supported for LINCS.", BITMASK_SIZE);
}
- for (th = 0; th < li->ntask; th++)
+ for (int th = 0; th < li->ntask; th++)
{
- Task *li_task;
- int b;
-
- li_task = &li->task[th];
+ const Task* li_task = &li->task[th];
/* For each atom set a flag for constraints from each */
- for (b = li_task->b0; b < li_task->b1; b++)
+ for (int b = li_task->b0; b < li_task->b1; b++)
{
- bitmask_set_bit(&atf[li->bla[b*2 ]], th);
- bitmask_set_bit(&atf[li->bla[b*2 + 1]], th);
+ bitmask_set_bit(&atf[li->atoms[b].index1], th);
+ bitmask_set_bit(&atf[li->atoms[b].index2], th);
}
}
#pragma omp parallel for num_threads(li->ntask) schedule(static)
- for (th = 0; th < li->ntask; th++)
+ for (int th = 0; th < li->ntask; th++)
{
try
{
- Task *li_task;
- gmx_bitmask_t mask;
- int b;
+ Task* li_task;
+ gmx_bitmask_t mask;
+ int b;
li_task = &li->task[th];
- if (li_task->b1 - li_task->b0 > li_task->ind_nalloc)
- {
- li_task->ind_nalloc = over_alloc_large(li_task->b1-li_task->b0);
- srenew(li_task->ind, li_task->ind_nalloc);
- srenew(li_task->ind_r, li_task->ind_nalloc);
- }
-
bitmask_init_low_bits(&mask, th);
- li_task->nind = 0;
- li_task->nind_r = 0;
+ li_task->updateConstraintIndices1.clear();
+ li_task->updateConstraintIndices2.clear();
+ li_task->updateConstraintIndicesRest.clear();
for (b = li_task->b0; b < li_task->b1; b++)
{
/* We let the constraint with the lowest thread index
* operate on atoms with constraints from multiple threads.
*/
- if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) &&
- bitmask_is_disjoint(atf[li->bla[b*2+1]], mask))
+ if (bitmask_is_disjoint(atf[li->atoms[b].index1], mask)
+ && bitmask_is_disjoint(atf[li->atoms[b].index2], mask))
{
/* Add the constraint to the local atom update index */
- li_task->ind[li_task->nind++] = b;
+ li_task->updateConstraintIndices1.push_back(b);
}
else
{
- /* Add the constraint to the rest block */
- li_task->ind_r[li_task->nind_r++] = b;
+ /* Store the constraint to the rest block */
+ li_task->updateConstraintIndicesRest.push_back(b);
}
}
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
- /* We need to copy all constraints which have not be assigned
- * to a thread to a separate list which will be handled by one thread.
- */
- li_m = &li->task[li->ntask];
-
- li_m->nind = 0;
- for (th = 0; th < li->ntask; th++)
+ if (li->bTaskDep)
{
- Task *li_task;
- int b;
+ /* Assign the rest constraint to a second thread task or a master test task */
- li_task = &li->task[th];
+ /* Clear the atom flags */
+ for (gmx_bitmask_t& mask : atf)
+ {
+ bitmask_clear(&mask);
+ }
- if (li_m->nind + li_task->nind_r > li_m->ind_nalloc)
+ for (int th = 0; th < li->ntask; th++)
{
- li_m->ind_nalloc = over_alloc_large(li_m->nind+li_task->nind_r);
- srenew(li_m->ind, li_m->ind_nalloc);
+ const Task* li_task = &li->task[th];
+
+ /* For each atom set a flag for constraints from each */
+ for (int b : li_task->updateConstraintIndicesRest)
+ {
+ bitmask_set_bit(&atf[li->atoms[b].index1], th);
+ bitmask_set_bit(&atf[li->atoms[b].index2], th);
+ }
+ }
+
+#pragma omp parallel for num_threads(li->ntask) schedule(static)
+ for (int th = 0; th < li->ntask; th++)
+ {
+ try
+ {
+ Task& li_task = li->task[th];
+
+ gmx_bitmask_t mask;
+ bitmask_init_low_bits(&mask, th);
+
+ for (int& b : li_task.updateConstraintIndicesRest)
+ {
+ /* We let the constraint with the highest thread index
+ * operate on atoms with constraints from multiple threads.
+ */
+ if (bitmask_is_disjoint(atf[li->atoms[b].index1], mask)
+ && bitmask_is_disjoint(atf[li->atoms[b].index2], mask))
+ {
+ li_task.updateConstraintIndices2.push_back(b);
+ // mark the entry in updateConstraintIndicesRest as invalid, so we do not assign it again
+ b = -1;
+ }
+ }
+ }
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
- for (b = 0; b < li_task->nind_r; b++)
+ /* We need to copy all constraints which have not been assigned
+ * to a thread to a separate list which will be handled by one thread.
+ */
+ Task* li_m = &li->task[li->ntask];
+
+ li->haveSecondUpdateTask = false;
+ li_m->updateConstraintIndices1.clear();
+ for (int th = 0; th < li->ntask; th++)
{
- li_m->ind[li_m->nind++] = li_task->ind_r[b];
+ const Task& li_task = li->task[th];
+
+ for (int constraint : li_task.updateConstraintIndicesRest)
+ {
+ if (constraint >= 0)
+ {
+ li_m->updateConstraintIndices1.push_back(constraint);
+ }
+ else
+ {
+ li->haveSecondUpdateTask = true;
+ }
+ }
+
+ if (debug)
+ {
+ fprintf(debug,
+ "LINCS thread %d: %zu constraints, %zu constraints\n",
+ th,
+ li_task.updateConstraintIndices1.size(),
+ li_task.updateConstraintIndices2.size());
+ }
}
if (debug)
{
- fprintf(debug, "LINCS thread %d: %d constraints\n",
- th, li_task->nind);
+ fprintf(debug, "LINCS thread r: %zu constraints\n", li_m->updateConstraintIndices1.size());
}
}
-
- if (debug)
- {
- fprintf(debug, "LINCS thread r: %d constraints\n",
- li_m->nind);
- }
-}
-
-/*! \brief There is no realloc with alignment, so here we make one for reals.
- * Note that this function does not preserve the contents of the memory.
- */
-static void resize_real_aligned(real **ptr, int nelem)
-{
- sfree_aligned(*ptr);
- snew_aligned(*ptr, nelem, align_bytes);
}
//! Assign a constraint.
-static void assign_constraint(Lincs *li,
- int constraint_index,
- int a1, int a2,
- real lenA, real lenB,
- const t_blocka *at2con)
+static void assign_constraint(Lincs* li,
+ int constraint_index,
+ int a1,
+ int a2,
+ real lenA,
+ real lenB,
+ const ListOfLists<int>& at2con)
{
int con;
/* Make an mapping of local topology constraint index to LINCS index */
li->con_index[constraint_index] = con;
- li->bllen0[con] = lenA;
- li->ddist[con] = lenB - lenA;
+ li->bllen0[con] = lenA;
+ li->ddist[con] = lenB - lenA;
/* Set the length to the topology A length */
- li->bllen[con] = lenA;
- li->bla[2*con] = a1;
- li->bla[2*con+1] = a2;
+ li->bllen[con] = lenA;
+ li->atoms[con].index1 = a1;
+ li->atoms[con].index2 = a2;
/* Make space in the constraint connection matrix for constraints
* connected to both end of the current constraint.
*/
- li->ncc +=
- at2con->index[a1 + 1] - at2con->index[a1] - 1 +
- at2con->index[a2 + 1] - at2con->index[a2] - 1;
+ li->ncc += at2con[a1].ssize() - 1 + at2con[a2].ssize() - 1;
li->blnr[con + 1] = li->ncc;
/*! \brief Check if constraint with topology index constraint_index is connected
* to other constraints, and if so add those connected constraints to our task. */
-static void check_assign_connected(Lincs *li,
- const t_iatom *iatom,
- const t_idef &idef,
- bool bDynamics,
- int a1, int a2,
- const t_blocka *at2con)
+static void check_assign_connected(Lincs* li,
+ gmx::ArrayRef<const int> iatom,
+ const InteractionDefinitions& idef,
+ bool bDynamics,
+ int a1,
+ int a2,
+ const ListOfLists<int>& at2con)
{
/* Currently this function only supports constraint groups
* in which all constraints share at least one atom
* connected constraints. We need to assign those
* to the same task.
*/
- int end;
-
- for (end = 0; end < 2; end++)
+ for (int end = 0; end < 2; end++)
{
- int a, k;
-
- a = (end == 0 ? a1 : a2);
+ const int a = (end == 0 ? a1 : a2);
- for (k = at2con->index[a]; k < at2con->index[a + 1]; k++)
+ for (const int cc : at2con[a])
{
- int cc;
-
- cc = at2con->a[k];
/* Check if constraint cc has not yet been assigned */
if (li->con_index[cc] == -1)
{
- int type;
- real lenA, lenB;
-
- type = iatom[cc*3];
- lenA = idef.iparams[type].constr.dA;
- lenB = idef.iparams[type].constr.dB;
+ const int type = iatom[cc * 3];
+ const real lenA = idef.iparams[type].constr.dA;
+ const real lenB = idef.iparams[type].constr.dB;
if (bDynamics || lenA != 0 || lenB != 0)
{
- assign_constraint(li, cc, iatom[3*cc + 1], iatom[3*cc + 2], lenA, lenB, at2con);
+ assign_constraint(li, cc, iatom[3 * cc + 1], iatom[3 * cc + 2], lenA, lenB, at2con);
}
}
}
/*! \brief Check if constraint with topology index constraint_index is involved
* in a constraint triangle, and if so add the other two constraints
* in the triangle to our task. */
-static void check_assign_triangle(Lincs *li,
- const t_iatom *iatom,
- const t_idef &idef,
- bool bDynamics,
- int constraint_index,
- int a1, int a2,
- const t_blocka *at2con)
+static void check_assign_triangle(Lincs* li,
+ gmx::ArrayRef<const int> iatom,
+ const InteractionDefinitions& idef,
+ bool bDynamics,
+ int constraint_index,
+ int a1,
+ int a2,
+ const ListOfLists<int>& at2con)
{
- int nca, cc[32], ca[32], k;
+ int nca, cc[32], ca[32];
int c_triangle[2] = { -1, -1 };
nca = 0;
- for (k = at2con->index[a1]; k < at2con->index[a1 + 1]; k++)
+ for (const int c : at2con[a1])
{
- int c;
-
- c = at2con->a[k];
if (c != constraint_index)
{
int aa1, aa2;
- aa1 = iatom[c*3 + 1];
- aa2 = iatom[c*3 + 2];
+ aa1 = iatom[c * 3 + 1];
+ aa2 = iatom[c * 3 + 2];
if (aa1 != a1)
{
cc[nca] = c;
}
}
- for (k = at2con->index[a2]; k < at2con->index[a2 + 1]; k++)
+ for (const int c : at2con[a2])
{
- int c;
-
- c = at2con->a[k];
if (c != constraint_index)
{
int aa1, aa2, i;
- aa1 = iatom[c*3 + 1];
- aa2 = iatom[c*3 + 2];
+ aa1 = iatom[c * 3 + 1];
+ aa2 = iatom[c * 3 + 2];
if (aa1 != a2)
{
for (i = 0; i < nca; i++)
int i, type;
real lenA, lenB;
- i = c_triangle[end]*3;
+ i = c_triangle[end] * 3;
type = iatom[i];
lenA = idef.iparams[type].constr.dA;
lenB = idef.iparams[type].constr.dB;
}
//! Sets matrix indices.
-static void set_matrix_indices(Lincs *li,
- const Task *li_task,
- const t_blocka *at2con,
- bool bSortMatrix)
+static void set_matrix_indices(Lincs* li, const Task& li_task, const ListOfLists<int>& at2con, bool bSortMatrix)
{
- int b;
-
- for (b = li_task->b0; b < li_task->b1; b++)
+ for (int b = li_task.b0; b < li_task.b1; b++)
{
- int a1, a2, i, k;
-
- a1 = li->bla[b*2];
- a2 = li->bla[b*2 + 1];
+ const int a1 = li->atoms[b].index1;
+ const int a2 = li->atoms[b].index2;
- i = li->blnr[b];
- for (k = at2con->index[a1]; k < at2con->index[a1 + 1]; k++)
+ int i = li->blnr[b];
+ for (const int constraint : at2con[a1])
{
- int concon;
-
- concon = li->con_index[at2con->a[k]];
+ const int concon = li->con_index[constraint];
if (concon != b)
{
li->blbnb[i++] = concon;
}
}
- for (k = at2con->index[a2]; k < at2con->index[a2 + 1]; k++)
+ for (const int constraint : at2con[a2])
{
- int concon;
-
- concon = li->con_index[at2con->a[k]];
+ const int concon = li->con_index[constraint];
if (concon != b)
{
li->blbnb[i++] = concon;
if (bSortMatrix)
{
/* Order the blbnb matrix to optimize memory access */
- std::sort(&(li->blbnb[li->blnr[b]]), &(li->blbnb[li->blnr[b+1]]));
+ std::sort(li->blbnb.begin() + li->blnr[b], li->blbnb.begin() + li->blnr[b + 1]);
}
}
}
-void set_lincs(const t_idef &idef,
- const t_mdatoms &md,
- bool bDynamics,
- const t_commrec *cr,
- Lincs *li)
+void set_lincs(const InteractionDefinitions& idef,
+ const int numAtoms,
+ ArrayRef<const real> invmass,
+ const real lambda,
+ bool bDynamics,
+ const t_commrec* cr,
+ Lincs* li)
{
- int natoms;
- t_blocka at2con;
- t_iatom *iatom;
- int i, ncc_alloc_old, ncon_tot;
-
li->nc_real = 0;
li->nc = 0;
li->ncc = 0;
/* Zero the thread index ranges.
* Otherwise without local constraints we could return with old ranges.
*/
- for (i = 0; i < li->ntask; i++)
+ for (int i = 0; i < li->ntask; i++)
{
- li->task[i].b0 = 0;
- li->task[i].b1 = 0;
- li->task[i].nind = 0;
+ li->task[i].b0 = 0;
+ li->task[i].b1 = 0;
+ li->task[i].updateConstraintIndices1.clear();
}
if (li->ntask > 1)
{
- li->task[li->ntask].nind = 0;
+ li->task[li->ntask].updateConstraintIndices1.clear();
}
/* This is the local topology, so there are only F_CONSTR constraints */
- if (idef.il[F_CONSTR].nr == 0)
+ if (idef.il[F_CONSTR].empty())
{
/* There are no constraints,
* we do not need to fill any data structures.
fprintf(debug, "Building the LINCS connectivity\n");
}
- if (DOMAINDECOMP(cr))
+ int natoms;
+ if (haveDDAtomOrdering(*cr))
{
if (cr->dd->constraints)
{
int start;
- dd_get_constraint_range(cr->dd, &start, &natoms);
+ dd_get_constraint_range(*cr->dd, &start, &natoms);
}
else
{
}
else
{
- natoms = md.homenr;
+ natoms = numAtoms;
}
- at2con = make_at2con(natoms, idef.il, idef.iparams,
- flexibleConstraintTreatment(bDynamics));
+ const ListOfLists<int> at2con =
+ make_at2con(natoms, idef.il, idef.iparams, flexibleConstraintTreatment(bDynamics));
- ncon_tot = idef.il[F_CONSTR].nr/3;
+ const int ncon_tot = idef.il[F_CONSTR].size() / 3;
/* Ensure we have enough padding for aligned loads for each thread */
- if (ncon_tot + li->ntask*simd_width > li->nc_alloc || li->nc_alloc == 0)
- {
- int old_alloc = li->nc_alloc;
- li->nc_alloc = over_alloc_dd(ncon_tot + li->ntask*simd_width);
- srenew(li->con_index, li->nc_alloc);
- resize_real_aligned(&li->bllen0, li->nc_alloc);
- resize_real_aligned(&li->ddist, li->nc_alloc);
- srenew(li->bla, 2*li->nc_alloc);
- resize_real_aligned(&li->blc, li->nc_alloc);
- resize_real_aligned(&li->blc1, li->nc_alloc);
- srenew(li->blnr, li->nc_alloc + 1);
- resize_real_aligned(&li->bllen, li->nc_alloc);
- srenew(li->tmpv, li->nc_alloc);
- /* Need to clear the SIMD padding */
- for (int i = old_alloc; i < li->nc_alloc; i++)
- {
- clear_rvec(li->tmpv[i]);
- }
- if (DOMAINDECOMP(cr))
- {
- srenew(li->nlocat, li->nc_alloc);
- }
- resize_real_aligned(&li->tmp1, li->nc_alloc);
- resize_real_aligned(&li->tmp2, li->nc_alloc);
- resize_real_aligned(&li->tmp3, li->nc_alloc);
- resize_real_aligned(&li->tmp4, li->nc_alloc);
- resize_real_aligned(&li->mlambda, li->nc_alloc);
- }
-
- iatom = idef.il[F_CONSTR].iatoms;
-
- ncc_alloc_old = li->ncc_alloc;
- li->blnr[0] = li->ncc;
+ const int numEntries = ncon_tot + li->ntask * simd_width;
+ li->con_index.resize(numEntries);
+ li->bllen0.resize(numEntries);
+ li->ddist.resize(numEntries);
+ li->atoms.resize(numEntries);
+ li->blc.resize(numEntries);
+ li->blc1.resize(numEntries);
+ li->blnr.resize(numEntries + 1);
+ li->bllen.resize(numEntries);
+ li->tmpv.resizeWithPadding(numEntries);
+ if (haveDDAtomOrdering(*cr))
+ {
+ li->nlocat.resize(numEntries);
+ }
+ li->tmp1.resize(numEntries);
+ li->tmp2.resize(numEntries);
+ li->tmp3.resize(numEntries);
+ li->tmp4.resize(numEntries);
+ li->mlambda.resize(numEntries);
+
+ gmx::ArrayRef<const int> iatom = idef.il[F_CONSTR].iatoms;
+
+ li->blnr[0] = li->ncc;
/* Assign the constraints for li->ntask LINCS tasks.
* We target a uniform distribution of constraints over the tasks.
* (e.g. because we are doing EM) we get imbalance, but since that doesn't
* happen during normal MD, that's ok.
*/
- int ncon_assign, ncon_target, con, th;
/* Determine the number of constraints we need to assign here */
- ncon_assign = ncon_tot;
+ int ncon_assign = ncon_tot;
if (!bDynamics)
{
/* With energy minimization, flexible constraints are ignored
/* Set the target constraint count per task to exactly uniform,
* this might be overridden below.
*/
- ncon_target = (ncon_assign + li->ntask - 1)/li->ntask;
+ int ncon_target = (ncon_assign + li->ntask - 1) / li->ntask;
/* Mark all constraints as unassigned by setting their index to -1 */
- for (con = 0; con < ncon_tot; con++)
+ for (int con = 0; con < ncon_tot; con++)
{
li->con_index[con] = -1;
}
- con = 0;
- for (th = 0; th < li->ntask; th++)
+ int con = 0;
+ for (int th = 0; th < li->ntask; th++)
{
- Task *li_task;
+ Task* li_task;
li_task = &li->task[th];
* There are several ways to round here, we choose the one
* that alternates block sizes, which helps with Intel HT.
*/
- ncon_target = ((ncon_assign*(th + 1))/li->ntask - li->nc_real + GMX_SIMD_REAL_WIDTH - 1) & ~(GMX_SIMD_REAL_WIDTH - 1);
+ ncon_target = ((ncon_assign * (th + 1)) / li->ntask - li->nc_real + GMX_SIMD_REAL_WIDTH - 1)
+ & ~(GMX_SIMD_REAL_WIDTH - 1);
}
-#endif // GMX_SIMD==2 && GMX_SIMD_HAVE_REAL
+#endif // GMX_SIMD==2 && GMX_SIMD_HAVE_REAL
/* Continue filling the arrays where we left off with the previous task,
* including padding for SIMD.
*/
li_task->b0 = li->nc;
+ gmx::ArrayRef<const t_iparams> iparams = idef.iparams;
+
while (con < ncon_tot && li->nc - li_task->b0 < ncon_target)
{
if (li->con_index[con] == -1)
int type, a1, a2;
real lenA, lenB;
- type = iatom[3*con];
- a1 = iatom[3*con + 1];
- a2 = iatom[3*con + 2];
- lenA = idef.iparams[type].constr.dA;
- lenB = idef.iparams[type].constr.dB;
+ type = iatom[3 * con];
+ a1 = iatom[3 * con + 1];
+ a2 = iatom[3 * con + 2];
+ lenA = iparams[type].constr.dA;
+ lenB = iparams[type].constr.dB;
/* Skip the flexible constraints when not doing dynamics */
if (bDynamics || lenA != 0 || lenB != 0)
{
- assign_constraint(li, con, a1, a2, lenA, lenB, &at2con);
+ assign_constraint(li, con, a1, a2, lenA, lenB, at2con);
if (li->ntask > 1 && !li->bTaskDep)
{
/* We can generate independent tasks. Check if we
* need to assign connected constraints to our task.
*/
- check_assign_connected(li, iatom, idef, bDynamics,
- a1, a2, &at2con);
+ check_assign_connected(li, iatom, idef, bDynamics, a1, a2, at2con);
}
if (li->ntask > 1 && li->ncg_triangle > 0)
{
/* Ensure constraints in one triangle are assigned
* to the same task.
*/
- check_assign_triangle(li, iatom, idef, bDynamics,
- con, a1, a2, &at2con);
+ check_assign_triangle(li, iatom, idef, bDynamics, con, a1, a2, at2con);
}
}
}
*/
int i, last;
- li->nc = ((li_task->b1 + simd_width - 1)/simd_width)*simd_width;
+ li->nc = ((li_task->b1 + simd_width - 1) / simd_width) * simd_width;
last = li_task->b1 - 1;
for (i = li_task->b1; i < li->nc; i++)
{
- li->bla[i*2 ] = li->bla[last*2 ];
- li->bla[i*2 + 1] = li->bla[last*2 + 1];
- li->bllen0[i] = li->bllen0[last];
- li->ddist[i] = li->ddist[last];
- li->bllen[i] = li->bllen[last];
- li->blnr[i + 1] = li->blnr[last + 1];
+ li->atoms[i] = li->atoms[last];
+ li->bllen0[i] = li->bllen0[last];
+ li->ddist[i] = li->ddist[last];
+ li->bllen[i] = li->bllen[last];
+ li->blnr[i + 1] = li->blnr[last + 1];
}
}
if (debug)
{
- fprintf(debug, "LINCS task %d constraints %d - %d\n",
- th, li_task->b0, li_task->b1);
+ fprintf(debug, "LINCS task %d constraints %d - %d\n", th, li_task->b0, li_task->b1);
}
}
/* Without DD we order the blbnb matrix to optimize memory access.
* With DD the overhead of sorting is more than the gain during access.
*/
- bSortMatrix = !DOMAINDECOMP(cr);
+ bSortMatrix = !haveDDAtomOrdering(*cr);
- if (li->ncc > li->ncc_alloc)
- {
- li->ncc_alloc = over_alloc_small(li->ncc);
- srenew(li->blbnb, li->ncc_alloc);
- }
+ li->blbnb.resize(li->ncc);
#pragma omp parallel for num_threads(li->ntask) schedule(static)
- for (th = 0; th < li->ntask; th++)
+ for (int th = 0; th < li->ntask; th++)
{
try
{
- Task *li_task;
+ Task& li_task = li->task[th];
- li_task = &li->task[th];
-
- if (li->ncg_triangle > 0 &&
- li_task->b1 - li_task->b0 > li_task->tri_alloc)
+ if (li->ncg_triangle > 0)
{
/* This is allocating too much, but it is difficult to improve */
- li_task->tri_alloc = over_alloc_dd(li_task->b1 - li_task->b0);
- srenew(li_task->triangle, li_task->tri_alloc);
- srenew(li_task->tri_bits, li_task->tri_alloc);
+ li_task.triangle.resize(li_task.b1 - li_task.b0);
+ li_task.tri_bits.resize(li_task.b1 - li_task.b0);
}
- set_matrix_indices(li, li_task, &at2con, bSortMatrix);
+ set_matrix_indices(li, li_task, at2con, bSortMatrix);
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
- done_blocka(&at2con);
-
if (cr->dd == nullptr)
{
/* Since the matrix is static, we should free some memory */
- li->ncc_alloc = li->ncc;
- srenew(li->blbnb, li->ncc_alloc);
+ li->blbnb.resize(li->ncc);
}
- if (li->ncc_alloc > ncc_alloc_old)
- {
- srenew(li->blmf, li->ncc_alloc);
- srenew(li->blmf1, li->ncc_alloc);
- srenew(li->tmpncc, li->ncc_alloc);
- }
+ li->blmf.resize(li->ncc);
+ li->blmf1.resize(li->ncc);
+ li->tmpncc.resize(li->ncc);
gmx::ArrayRef<const int> nlocat_dd = dd_constraints_nlocalatoms(cr->dd);
if (!nlocat_dd.empty())
}
else
{
- li->nlocat = nullptr;
+ li->nlocat.clear();
}
if (debug)
{
- fprintf(debug, "Number of constraints is %d, padded %d, couplings %d\n",
- li->nc_real, li->nc, li->ncc);
+ fprintf(debug, "Number of constraints is %d, padded %d, couplings %d\n", li->nc_real, li->nc, li->ncc);
}
if (li->ntask > 1)
{
- lincs_thread_setup(li, md.nr);
+ lincs_thread_setup(li, numAtoms);
}
- set_lincs_matrix(li, md.invmass, md.lambda);
+ set_lincs_matrix(li, invmass, lambda);
}
//! Issues a warning when LINCS constraints cannot be satisfied.
-static void lincs_warning(gmx_domdec_t *dd, const rvec *x, rvec *xprime, t_pbc *pbc,
- int ncons, const int *bla, real *bllen, real wangle,
- int maxwarn, int *warncount)
+static void lincs_warning(gmx_domdec_t* dd,
+ ArrayRef<const RVec> x,
+ ArrayRef<const RVec> xprime,
+ t_pbc* pbc,
+ int ncons,
+ gmx::ArrayRef<const AtomPair> atoms,
+ gmx::ArrayRef<const real> bllen,
+ real wangle,
+ int maxwarn,
+ int* warncount)
{
- int b, i, j;
- rvec v0, v1;
- real wfac, d0, d1, cosine;
-
- wfac = std::cos(DEG2RAD*wangle);
+ real wfac = std::cos(gmx::c_deg2Rad * wangle);
fprintf(stderr,
"bonds that rotated more than %g degrees:\n"
" atom 1 atom 2 angle previous, current, constraint length\n",
wangle);
- for (b = 0; b < ncons; b++)
+ for (int b = 0; b < ncons; b++)
{
- i = bla[2*b];
- j = bla[2*b+1];
+ const int i = atoms[b].index1;
+ const int j = atoms[b].index2;
+ rvec v0;
+ rvec v1;
if (pbc)
{
pbc_dx_aiuc(pbc, x[i], x[j], v0);
rvec_sub(x[i], x[j], v0);
rvec_sub(xprime[i], xprime[j], v1);
}
- d0 = norm(v0);
- d1 = norm(v1);
- cosine = ::iprod(v0, v1)/(d0*d1);
+ real d0 = norm(v0);
+ real d1 = norm(v1);
+ real cosine = ::iprod(v0, v1) / (d0 * d1);
if (cosine < wfac)
{
fprintf(stderr,
" %6d %6d %5.1f %8.4f %8.4f %8.4f\n",
- ddglatnr(dd, i), ddglatnr(dd, j),
- RAD2DEG*std::acos(cosine), d0, d1, bllen[b]);
+ ddglatnr(dd, i),
+ ddglatnr(dd, j),
+ gmx::c_rad2Deg * std::acos(cosine),
+ d0,
+ d1,
+ bllen[b]);
if (!std::isfinite(d1))
{
gmx_fatal(FARGS, "Bond length not finite.");
}
if (*warncount > maxwarn)
{
- too_many_constraint_warnings(econtLINCS, *warncount);
+ too_many_constraint_warnings(ConstraintAlgorithm::Lincs, *warncount);
}
}
-//! Determine how well the constraints have been satisfied.
-static void cconerr(const Lincs *lincsd,
- rvec *x, t_pbc *pbc,
- real *ncons_loc, real *ssd, real *max, int *imax)
+//! Status information about how well LINCS satisified the constraints in this domain
+struct LincsDeviations
{
- const int *bla, *nlocat;
- const real *bllen;
- real ma, ssd2;
- int count, im, task;
+ //! The maximum over all bonds in this domain of the relative deviation in bond lengths
+ real maxDeviation = 0;
+ //! Sum over all bonds in this domain of the squared relative deviation
+ real sumSquaredDeviation = 0;
+ //! Index of bond with max deviation
+ int indexOfMaxDeviation = -1;
+ //! Number of bonds constrained in this domain
+ int numConstraints = 0;
+};
- bla = lincsd->bla;
- bllen = lincsd->bllen;
- nlocat = lincsd->nlocat;
+//! Determine how well the constraints have been satisfied.
+static LincsDeviations makeLincsDeviations(const Lincs& lincsd, ArrayRef<const RVec> x, const t_pbc* pbc)
+{
+ LincsDeviations result;
+ const ArrayRef<const AtomPair> atoms = lincsd.atoms;
+ const ArrayRef<const real> bllen = lincsd.bllen;
+ const ArrayRef<const int> nlocat = lincsd.nlocat;
- ma = 0;
- ssd2 = 0;
- im = 0;
- count = 0;
- for (task = 0; task < lincsd->ntask; task++)
+ for (int task = 0; task < lincsd.ntask; task++)
{
- int b;
-
- for (b = lincsd->task[task].b0; b < lincsd->task[task].b1; b++)
+ for (int b = lincsd.task[task].b0; b < lincsd.task[task].b1; b++)
{
- real len, d, r2;
rvec dx;
-
if (pbc)
{
- pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
+ pbc_dx_aiuc(pbc, x[atoms[b].index1], x[atoms[b].index2], dx);
}
else
{
- rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
+ rvec_sub(x[atoms[b].index1], x[atoms[b].index2], dx);
}
- r2 = ::norm2(dx);
- len = r2*gmx::invsqrt(r2);
- d = std::abs(len/bllen[b]-1);
- if (d > ma && (nlocat == nullptr || nlocat[b]))
+ real r2 = ::norm2(dx);
+ real len = r2 * gmx::invsqrt(r2);
+ real d = std::abs(len / bllen[b] - 1.0_real);
+ if (d > result.maxDeviation && (nlocat.empty() || nlocat[b]))
{
- ma = d;
- im = b;
+ result.maxDeviation = d;
+ result.indexOfMaxDeviation = b;
}
- if (nlocat == nullptr)
+ if (nlocat.empty())
{
- ssd2 += d*d;
- count++;
+ result.sumSquaredDeviation += d * d;
+ result.numConstraints++;
}
else
{
- ssd2 += nlocat[b]*d*d;
- count += nlocat[b];
+ result.sumSquaredDeviation += nlocat[b] * d * d;
+ result.numConstraints += nlocat[b];
}
}
}
- *ncons_loc = (nlocat ? 0.5 : 1)*count;
- *ssd = (nlocat ? 0.5 : 1)*ssd2;
- *max = ma;
- *imax = im;
+ if (!nlocat.empty())
+ {
+ result.numConstraints /= 2;
+ result.sumSquaredDeviation *= 0.5;
+ }
+ return result;
}
-bool constrain_lincs(bool computeRmsd,
- const t_inputrec &ir,
- int64_t step,
- Lincs *lincsd, const t_mdatoms &md,
- const t_commrec *cr,
- const gmx_multisim_t *ms,
- const rvec *x, rvec *xprime, rvec *min_proj,
- matrix box, t_pbc *pbc,
- real lambda, real *dvdlambda,
- real invdt, rvec *v,
- bool bCalcVir, tensor vir_r_m_dr,
- ConstraintVariable econq,
- t_nrnb *nrnb,
- int maxwarn, int *warncount)
+bool constrain_lincs(bool computeRmsd,
+ const t_inputrec& ir,
+ int64_t step,
+ Lincs* lincsd,
+ ArrayRef<const real> invmass,
+ const t_commrec* cr,
+ const gmx_multisim_t* ms,
+ ArrayRefWithPadding<const RVec> xPadded,
+ ArrayRefWithPadding<RVec> xprimePadded,
+ ArrayRef<RVec> min_proj,
+ const matrix box,
+ t_pbc* pbc,
+ const bool hasMassPerturbed,
+ real lambda,
+ real* dvdlambda,
+ real invdt,
+ ArrayRef<RVec> v,
+ bool bCalcVir,
+ tensor vir_r_m_dr,
+ ConstraintVariable econq,
+ t_nrnb* nrnb,
+ int maxwarn,
+ int* warncount)
{
- gmx_bool bCalcDHDL;
- char buf2[22], buf3[STRLEN];
- int i, p_imax;
- real ncons_loc, p_ssd, p_max = 0;
- rvec dx;
- bool bOK, bWarn;
-
- bOK = TRUE;
+ bool bOK = TRUE;
/* This boolean should be set by a flag passed to this routine.
* We can also easily check if any constraint length is changed,
* if not dH/dlambda=0 and we can also set the boolean to FALSE.
*/
- bCalcDHDL = (ir.efep != efepNO && dvdlambda != nullptr);
+ bool bCalcDHDL = (ir.efep != FreeEnergyPerturbationType::No && dvdlambda != nullptr);
if (lincsd->nc == 0 && cr->dd == nullptr)
{
- if (computeRmsd)
- {
- lincsd->rmsdData = {{0}};
- }
-
return bOK;
}
+ ArrayRef<const RVec> x = xPadded.unpaddedArrayRef();
+ ArrayRef<RVec> xprime = xprimePadded.unpaddedArrayRef();
+
if (econq == ConstraintVariable::Positions)
{
/* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
* also with efep!=fepNO.
*/
- if (ir.efep != efepNO)
+ if (ir.efep != FreeEnergyPerturbationType::No)
{
- if (md.nMassPerturbed && lincsd->matlam != md.lambda)
+ if (hasMassPerturbed && lincsd->matlam != lambda)
{
- set_lincs_matrix(lincsd, md.invmass, md.lambda);
+ set_lincs_matrix(lincsd, invmass, lambda);
}
- for (i = 0; i < lincsd->nc; i++)
+ for (int i = 0; i < lincsd->nc; i++)
{
- lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
+ lincsd->bllen[i] = lincsd->bllen0[i] + lambda * lincsd->ddist[i];
}
}
/* Set the flexible constraint lengths to the old lengths */
if (pbc != nullptr)
{
- for (i = 0; i < lincsd->nc; i++)
+ for (int i = 0; i < lincsd->nc; i++)
{
if (lincsd->bllen[i] == 0)
{
- pbc_dx_aiuc(pbc, x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]], dx);
+ rvec dx;
+ pbc_dx_aiuc(pbc, x[lincsd->atoms[i].index1], x[lincsd->atoms[i].index2], dx);
lincsd->bllen[i] = norm(dx);
}
}
}
else
{
- for (i = 0; i < lincsd->nc; i++)
+ for (int i = 0; i < lincsd->nc; i++)
{
if (lincsd->bllen[i] == 0)
{
- lincsd->bllen[i] =
- std::sqrt(distance2(x[lincsd->bla[2*i]],
- x[lincsd->bla[2*i+1]]));
+ lincsd->bllen[i] = std::sqrt(
+ distance2(x[lincsd->atoms[i].index1], x[lincsd->atoms[i].index2]));
}
}
}
}
- if (debug)
+ const bool printDebugOutput = ((debug != nullptr) && lincsd->nc > 0);
+ if (printDebugOutput)
{
- cconerr(lincsd, xprime, pbc,
- &ncons_loc, &p_ssd, &p_max, &p_imax);
+ LincsDeviations deviations = makeLincsDeviations(*lincsd, xprime, pbc);
+ fprintf(debug, " Rel. Constraint Deviation: RMS MAX between atoms\n");
+ fprintf(debug,
+ " Before LINCS %.6f %.6f %6d %6d\n",
+ std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints),
+ deviations.maxDeviation,
+ ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index1),
+ ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index2));
}
/* This bWarn var can be updated by multiple threads
* at the same time. But as we only need to detect
* if a warning occurred or not, this is not an issue.
*/
- bWarn = FALSE;
+ bool bWarn = FALSE;
/* The OpenMP parallel region of constrain_lincs for coords */
#pragma omp parallel num_threads(lincsd->ntask)
clear_mat(lincsd->task[th].vir_r_m_dr);
- do_lincs(x, xprime, box, pbc, lincsd, th,
- md.invmass, cr,
+ do_lincs(xPadded,
+ xprimePadded,
+ box,
+ pbc,
+ lincsd,
+ th,
+ invmass,
+ cr,
bCalcDHDL,
- ir.LincsWarnAngle, &bWarn,
- invdt, v, bCalcVir,
+ ir.LincsWarnAngle,
+ &bWarn,
+ invdt,
+ v,
+ bCalcVir,
th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
- if (debug && lincsd->nc > 0)
- {
- fprintf(debug, " Rel. Constraint Deviation: RMS MAX between atoms\n");
- fprintf(debug, " Before LINCS %.6f %.6f %6d %6d\n",
- std::sqrt(p_ssd/ncons_loc), p_max,
- ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
- ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
- }
- if (computeRmsd || debug)
+ if (computeRmsd || printDebugOutput || bWarn)
{
- cconerr(lincsd, xprime, pbc,
- &ncons_loc, &p_ssd, &p_max, &p_imax);
- lincsd->rmsdData[0] = ncons_loc;
- lincsd->rmsdData[1] = p_ssd;
- }
- else
- {
- lincsd->rmsdData = {{0}};
- }
- if (debug && lincsd->nc > 0)
- {
- fprintf(debug,
- " After LINCS %.6f %.6f %6d %6d\n\n",
- std::sqrt(p_ssd/ncons_loc), p_max,
- ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
- ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
- }
+ LincsDeviations deviations = makeLincsDeviations(*lincsd, xprime, pbc);
- if (bWarn)
- {
- if (maxwarn < INT_MAX)
+ if (computeRmsd)
{
- cconerr(lincsd, xprime, pbc,
- &ncons_loc, &p_ssd, &p_max, &p_imax);
- if (isMultiSim(ms))
+ if (lincsd->callbackToRequireReduction.has_value())
{
- sprintf(buf3, " in simulation %d", ms->sim);
+ // This is reduced across domains in compute_globals and
+ // reported to the log file.
+ lincsd->rmsdReductionBuffer[0] = deviations.numConstraints;
+ lincsd->rmsdReductionBuffer[1] = deviations.sumSquaredDeviation;
+
+ // Call the ObservablesReducer via the callback it
+ // gave us for the purpose.
+ ObservablesReducerStatus status =
+ lincsd->callbackToRequireReduction.value()(ReductionRequirement::Soon);
+ GMX_RELEASE_ASSERT(status == ObservablesReducerStatus::ReadyToReduce,
+ "The LINCS RMSD is computed after observables have been "
+ "reduced, please reorder them.");
}
else
{
- buf3[0] = 0;
+ // Compute the deviation directly
+ lincsd->constraintRmsDeviation =
+ std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints);
+ }
+ }
+ if (printDebugOutput)
+ {
+ fprintf(debug,
+ " After LINCS %.6f %.6f %6d %6d\n\n",
+ std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints),
+ deviations.maxDeviation,
+ ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index1),
+ ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index2));
+ }
+
+ if (bWarn)
+ {
+ if (maxwarn < INT_MAX)
+ {
+ std::string simMesg;
+ if (isMultiSim(ms))
+ {
+ simMesg += gmx::formatString(" in simulation %d", ms->simulationIndex_);
+ }
+ fprintf(stderr,
+ "\nStep %" PRId64
+ ", time %g (ps) LINCS WARNING%s\n"
+ "relative constraint deviation after LINCS:\n"
+ "rms %.6f, max %.6f (between atoms %d and %d)\n",
+ step,
+ ir.init_t + step * ir.delta_t,
+ simMesg.c_str(),
+ std::sqrt(deviations.sumSquaredDeviation / deviations.numConstraints),
+ deviations.maxDeviation,
+ ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index1),
+ ddglatnr(cr->dd, lincsd->atoms[deviations.indexOfMaxDeviation].index2));
+
+ lincs_warning(
+ cr->dd, x, xprime, pbc, lincsd->nc, lincsd->atoms, lincsd->bllen, ir.LincsWarnAngle, maxwarn, warncount);
}
- fprintf(stderr,
- "\nStep %s, time %g (ps) LINCS WARNING%s\n"
- "relative constraint deviation after LINCS:\n"
- "rms %.6f, max %.6f (between atoms %d and %d)\n",
- gmx_step_str(step, buf2), ir.init_t+step*ir.delta_t,
- buf3,
- std::sqrt(p_ssd/ncons_loc), p_max,
- ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
- ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
-
- lincs_warning(cr->dd, x, xprime, pbc,
- lincsd->nc, lincsd->bla, lincsd->bllen,
- ir.LincsWarnAngle, maxwarn, warncount);
+ bOK = (deviations.maxDeviation < 0.5);
}
- bOK = (p_max < 0.5);
}
if (lincsd->ncg_flex)
{
- for (i = 0; (i < lincsd->nc); i++)
+ for (int i = 0; (i < lincsd->nc); i++)
{
if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
{
{
int th = gmx_omp_get_thread_num();
- do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
- md.invmass, econq, bCalcDHDL,
- bCalcVir, th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
+ do_lincsp(xPadded,
+ xprimePadded,
+ min_proj,
+ pbc,
+ lincsd,
+ th,
+ invmass,
+ econq,
+ bCalcDHDL,
+ bCalcVir,
+ th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
}
{
/* dhdlambda contains dH/dlambda*dt^2, correct for this */
/* TODO This should probably use invdt, so that sd integrator scaling works properly */
- dhdlambda /= ir.delta_t*ir.delta_t;
+ dhdlambda /= ir.delta_t * ir.delta_t;
}
*dvdlambda += dhdlambda;
}
if (bCalcVir && lincsd->ntask > 1)
{
- for (i = 1; i < lincsd->ntask; i++)
+ for (int i = 1; i < lincsd->ntask; i++)
{
m_add(vir_r_m_dr, lincsd->task[i].vir_r_m_dr, vir_r_m_dr);
}
/* count assuming nit=1 */
inc_nrnb(nrnb, eNR_LINCS, lincsd->nc_real);
- inc_nrnb(nrnb, eNR_LINCSMAT, (2+lincsd->nOrder)*lincsd->ncc);
+ inc_nrnb(nrnb, eNR_LINCSMAT, (2 + lincsd->nOrder) * lincsd->ncc);
if (lincsd->ntriangle > 0)
{
- inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder*lincsd->ncc_triangle);
+ inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder * lincsd->ncc_triangle);
}
- if (v)
+ if (!v.empty())
{
- inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc_real*2);
+ inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc_real * 2);
}
if (bCalcVir)
{
return bOK;
}
-} // namespace gmx
+} // namespace gmx