Merge branch release-5-1

[alexxy/gromacs.git] / src / gromacs / mdlib / clincs.cpp
diff --git a/src/gromacs/mdlib/clincs.cpp b/src/gromacs/mdlib/clincs.cpp

index aab220b83a8c5471b4f2daf3843b1347a00f0631..5a83907139614c665b90c54c27206b6805fb63ba 100644 (file)
--- a/src/gromacs/mdlib/clincs.cpp
+++ b/src/gromacs/mdlib/clincs.cpp
@@ -40,40 +40,42 @@
  #include "config.h"
  
  #include <assert.h>
-#include <math.h>
  #include <stdlib.h>
  
+#include <cmath>
+
  #include <algorithm>
  
  #include "gromacs/domdec/domdec.h"
-#include "gromacs/fileio/gmxfio.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/copyrite.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/fileio/copyrite.h"
+#include "gromacs/gmxlib/gmx_omp_nthreads.h"
  #include "gromacs/legacyheaders/nrnb.h"
  #include "gromacs/legacyheaders/types/commrec.h"
  #include "gromacs/math/units.h"
  #include "gromacs/math/vec.h"
-#include "gromacs/pbcutil/pbc-simd.h"
+#include "gromacs/mdlib/constr.h"
+#include "gromacs/mdlib/mdrun.h"
  #include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pbcutil/pbc-simd.h"
  #include "gromacs/simd/simd.h"
  #include "gromacs/simd/simd_math.h"
  #include "gromacs/simd/vector_operations.h"
  #include "gromacs/topology/block.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/utility/bitmask.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxomp.h"
  #include "gromacs/utility/smalloc.h"
  
  /* MSVC 2010 produces buggy SIMD PBC code, disable SIMD for MSVC <= 2010 */
-#if defined GMX_SIMD_HAVE_REAL && !(defined _MSC_VER && _MSC_VER < 1700) && !defined(__ICL)
-#define LINCS_SIMD
+#if GMX_SIMD_HAVE_REAL && !(defined _MSC_VER && _MSC_VER < 1700) && !defined(__ICL)
+#    define LINCS_SIMD
  #endif
  
  
-#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256)
+#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256
  
  // This was originally work-in-progress for augmenting the SIMD module with
  // masked load/store operations. Instead, that turned into and extended SIMD
@@ -134,7 +136,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_double_t   row0,
  }
  
  
-#    ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
+#    if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
  #        define gmx_hack_simd4_load3_r(mem)      _mm256_maskload_pd((mem), _mm_castsi128_ps(_mm256_set_epi32(0, 0, -1, -1, -1, -1, -1, -1)))
  #        define gmx_hack_simd4_store3_r(mem, x)   _mm256_maskstore_pd((mem), _mm_castsi128_ps(_mm256_set_epi32(0, 0, -1, -1, -1, -1, -1, -1)), (x))
  #    else
@@ -194,7 +196,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_float_t   row0,
      a[6] = _mm256_extractf128_ps(row2, 1);
      a[7] = _mm256_extractf128_ps(row3, 1);
  }
-#ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
+#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
  #        define gmx_hack_simd4_load3_r(mem)      _mm_maskload_ps((mem), _mm_castsi256_pd(_mm_set_epi32(0, -1, -1, -1)))
  #        define gmx_hack_simd4_store3_r(mem, x)   _mm_maskstore_ps((mem), _mm_castsi256_pd(_mm_set_epi32(0, -1, -1, -1)), (x))
  #else
@@ -206,7 +208,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_float_t   row0,
  
  #endif /* AVX */
  
-#ifdef GMX_SIMD_HAVE_REAL
+#if GMX_SIMD_HAVE_REAL
  /*! \brief Store differences between indexed rvecs in SIMD registers.
   *
   * Returns SIMD register with the difference vectors:
@@ -227,7 +229,7 @@ gmx_hack_simd_gather_rvec_dist_pair_index(const rvec      *v,
                                            gmx_simd_real_t *dy,
                                            gmx_simd_real_t *dz)
  {
-#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256)
+#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256
      int              i;
      gmx_simd4_real_t d[GMX_SIMD_REAL_WIDTH];
      gmx_simd_real_t  tmp;
@@ -279,7 +281,7 @@ gmx_simd_store_vec_to_rvec(gmx_simd_real_t  x,
                             real gmx_unused *buf,
                             rvec            *v)
  {
-#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256)
+#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256
      int              i;
      gmx_simd4_real_t s4[GMX_SIMD_REAL_WIDTH];
      gmx_simd_real_t  zero = gmx_simd_setzero_r();
@@ -385,8 +387,9 @@ static const int simd_width = GMX_SIMD_REAL_WIDTH;
  #else
  static const int simd_width = 1;
  #endif
-/* We can't use small memory alignments on many systems, so use min 64 bytes*/
-static const int align_bytes = std::max<int>(64, simd_width*sizeof(real));
+/* Align to 128 bytes, consistent with the current implementation of
+   AlignedAllocator, which currently forces 128 byte alignment. */
+static const int align_bytes = 128;
  
  real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
  {
@@ -1458,8 +1461,12 @@ void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
  #pragma omp parallel for reduction(+: ntriangle, ncc_triangle) num_threads(li->ntask) schedule(static)
      for (th = 0; th < li->ntask; th++)
      {
-        set_lincs_matrix_task(li, &li->task[th], invmass, &ncc_triangle);
-        ntriangle = li->task[th].ntriangle;
+        try
+        {
+            set_lincs_matrix_task(li, &li->task[th], invmass, &ncc_triangle);
+            ntriangle = li->task[th].ntriangle;
+        }
+        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
      li->ntriangle    = ntriangle;
      li->ncc_triangle = ncc_triangle;
@@ -1478,7 +1485,8 @@ void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
      li->matlam = lambda;
  }
  
-static int count_triangle_constraints(t_ilist *ilist, t_blocka *at2con)
+static int count_triangle_constraints(const t_ilist  *ilist,
+                                      const t_blocka *at2con)
  {
      int      ncon1, ncon_tot;
      int      c0, a00, a01, n1, c1, a10, a11, ac1, n2, c2, a20, a21;
@@ -1576,8 +1584,8 @@ static int int_comp(const void *a, const void *b)
      return (*(int *)a) - (*(int *)b);
  }
  
-gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
-                           int nflexcon_global, t_blocka *at2con,
+gmx_lincsdata_t init_lincs(FILE *fplog, const gmx_mtop_t *mtop,
+                           int nflexcon_global, const t_blocka *at2con,
                             gmx_bool bPLINCS, int nIter, int nProjOrder)
  {
      struct gmx_lincsdata *li;
@@ -1673,9 +1681,13 @@ gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
  #pragma omp parallel for num_threads(li->ntask)
      for (th = 0; th < li->ntask; th++)
      {
-        /* Per thread SIMD load buffer for loading 2 simd_width rvecs */
-        snew_aligned(li->task[th].simd_buf, 2*simd_width*DIM,
-                     align_bytes);
+        try
+        {
+            /* Per thread SIMD load buffer for loading 2 simd_width rvecs */
+            snew_aligned(li->task[th].simd_buf, 2*simd_width*DIM,
+                         align_bytes);
+        }
+        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
  
      if (bPLINCS || li->ncg_triangle > 0)
@@ -1752,40 +1764,44 @@ static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms)
  #pragma omp parallel for num_threads(li->ntask) schedule(static)
      for (th = 0; th < li->ntask; th++)
      {
-        lincs_task_t  *li_task;
-        gmx_bitmask_t  mask;
-        int            b;
-
-        li_task = &li->task[th];
-
-        if (li_task->b1 - li_task->b0 > li_task->ind_nalloc)
+        try
          {
-            li_task->ind_nalloc = over_alloc_large(li_task->b1-li_task->b0);
-            srenew(li_task->ind, li_task->ind_nalloc);
-            srenew(li_task->ind_r, li_task->ind_nalloc);
-        }
+            lincs_task_t  *li_task;
+            gmx_bitmask_t  mask;
+            int            b;
  
-        bitmask_init_low_bits(&mask, th);
+            li_task = &li->task[th];
  
-        li_task->nind   = 0;
-        li_task->nind_r = 0;
-        for (b = li_task->b0; b < li_task->b1; b++)
-        {
-            /* We let the constraint with the lowest thread index
-             * operate on atoms with constraints from multiple threads.
-             */
-            if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) &&
-                bitmask_is_disjoint(atf[li->bla[b*2+1]], mask))
+            if (li_task->b1 - li_task->b0 > li_task->ind_nalloc)
              {
-                /* Add the constraint to the local atom update index */
-                li_task->ind[li_task->nind++] = b;
+                li_task->ind_nalloc = over_alloc_large(li_task->b1-li_task->b0);
+                srenew(li_task->ind, li_task->ind_nalloc);
+                srenew(li_task->ind_r, li_task->ind_nalloc);
              }
-            else
+
+            bitmask_init_low_bits(&mask, th);
+
+            li_task->nind   = 0;
+            li_task->nind_r = 0;
+            for (b = li_task->b0; b < li_task->b1; b++)
              {
-                /* Add the constraint to the rest block */
-                li_task->ind_r[li_task->nind_r++] = b;
+                /* We let the constraint with the lowest thread index
+                 * operate on atoms with constraints from multiple threads.
+                 */
+                if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) &&
+                    bitmask_is_disjoint(atf[li->bla[b*2+1]], mask))
+                {
+                    /* Add the constraint to the local atom update index */
+                    li_task->ind[li_task->nind++] = b;
+                }
+                else
+                {
+                    /* Add the constraint to the rest block */
+                    li_task->ind_r[li_task->nind_r++] = b;
+                }
              }
          }
+        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
  
      /* We need to copy all constraints which have not be assigned
@@ -2066,8 +2082,10 @@ static void set_matrix_indices(struct gmx_lincsdata *li,
      }
  }
  
-void set_lincs(t_idef *idef, t_mdatoms *md,
-               gmx_bool bDynamics, t_commrec *cr,
+void set_lincs(const t_idef         *idef,
+               const t_mdatoms      *md,
+               gmx_bool              bDynamics,
+               t_commrec            *cr,
                 struct gmx_lincsdata *li)
  {
      int          natoms, nflexcon;
@@ -2306,20 +2324,24 @@ void set_lincs(t_idef *idef, t_mdatoms *md,
  #pragma omp parallel for num_threads(li->ntask) schedule(static)
      for (th = 0; th < li->ntask; th++)
      {
-        lincs_task_t *li_task;
+        try
+        {
+            lincs_task_t *li_task;
  
-        li_task = &li->task[th];
+            li_task = &li->task[th];
  
-        if (li->ncg_triangle > 0 &&
-            li_task->b1 - li_task->b0 > li_task->tri_alloc)
-        {
-            /* This is allocating too much, but it is difficult to improve */
-            li_task->tri_alloc = over_alloc_dd(li_task->b1 - li_task->b0);
-            srenew(li_task->triangle, li_task->tri_alloc);
-            srenew(li_task->tri_bits, li_task->tri_alloc);
-        }
+            if (li->ncg_triangle > 0 &&
+                li_task->b1 - li_task->b0 > li_task->tri_alloc)
+            {
+                /* This is allocating too much, but it is difficult to improve */
+                li_task->tri_alloc = over_alloc_dd(li_task->b1 - li_task->b0);
+                srenew(li_task->triangle, li_task->tri_alloc);
+                srenew(li_task->tri_bits, li_task->tri_alloc);
+            }
  
-        set_matrix_indices(li, li_task, &at2con, bSortMatrix);
+            set_matrix_indices(li, li_task, &at2con, bSortMatrix);
+        }
+        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
  
      done_blocka(&at2con);
@@ -2417,7 +2439,7 @@ static void lincs_warning(FILE *fplog,
              {
                  fprintf(fplog, "%s", buf);
              }
-            if (!gmx_isfinite(d1))
+            if (!std::isfinite(d1))
              {
                  gmx_fatal(FARGS, "Bond length not finite.");
              }
@@ -2601,16 +2623,20 @@ gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
          /* The OpenMP parallel region of constrain_lincs for coords */
  #pragma omp parallel num_threads(lincsd->ntask)
          {
-            int th = gmx_omp_get_thread_num();
+            try
+            {
+                int th = gmx_omp_get_thread_num();
  
-            clear_mat(lincsd->task[th].vir_r_m_dr);
+                clear_mat(lincsd->task[th].vir_r_m_dr);
  
-            do_lincs(x, xprime, box, pbc, lincsd, th,
-                     md->invmass, cr,
-                     bCalcDHDL,
-                     ir->LincsWarnAngle, &bWarn,
-                     invdt, v, bCalcVir,
-                     th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
+                do_lincs(x, xprime, box, pbc, lincsd, th,
+                         md->invmass, cr,
+                         bCalcDHDL,
+                         ir->LincsWarnAngle, &bWarn,
+                         invdt, v, bCalcVir,
+                         th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
+            }
+            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
  
          if (bLog && fplog && lincsd->nc > 0)
@@ -2702,11 +2728,15 @@ gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
          /* The OpenMP parallel region of constrain_lincs for derivatives */
  #pragma omp parallel num_threads(lincsd->ntask)
          {
-            int th = gmx_omp_get_thread_num();
+            try
+            {
+                int th = gmx_omp_get_thread_num();
  
-            do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
-                      md->invmass, econq, bCalcDHDL,
-                      bCalcVir, th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
+                do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
+                          md->invmass, econq, bCalcDHDL,
+                          bCalcVir, th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr);
+            }
+            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
      }