Separated bonded and Ewald correction threading

author Berk Hess <hess@n176-p33.kthopen.kth.se>

Fri, 27 Feb 2015 09:19:31 +0000 (10:19 +0100)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Wed, 8 Jul 2015 10:50:11 +0000 (12:50 +0200)
author Berk Hess <hess@n176-p33.kthopen.kth.se>
Fri, 27 Feb 2015 09:19:31 +0000 (10:19 +0100)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Wed, 8 Jul 2015 10:50:11 +0000 (12:50 +0200)
diff --git a/src/gromacs/legacyheaders/types/forcerec.h b/src/gromacs/legacyheaders/types/forcerec.h

index 33c44a6442059f0aaf6e522bae9b8bdb2ee3a032..c0d8aa99192648668186357c03be75126fb60a33 100644 (file)
--- a/src/gromacs/legacyheaders/types/forcerec.h
+++ b/src/gromacs/legacyheaders/types/forcerec.h
@@ -60,6 +60,7 @@ extern "C" {
  /* Abstract type for PME that is defined only in the routine that use them. */
  struct gmx_pme_t;
  struct nonbonded_verlet_t;
+struct bonded_threading_t;
  
  /* Structure describing the data in a single table */
  typedef struct
@@ -185,7 +186,7 @@ typedef struct {
  /* Forward declaration of type for managing Ewald tables */
  struct gmx_ewald_tab_t;
  
-typedef struct f_thread_t f_thread_t;
+typedef struct ewald_corr_thread_t ewald_corr_thread_t;
  
  typedef struct {
      interaction_const_t *ic;
@@ -468,18 +469,14 @@ typedef struct {
      real userreal3;
      real userreal4;
  
-    /* Thread local force and energy data */
-    /* FIXME move to bonded_thread_data_t */
-    int         nthreads;
-    int         red_ashift;
-    int         red_nblock;
-    f_thread_t *f_t;
+    /* Pointer to struct for managing threading of bonded force calculation */
+    struct bonded_threading_t *bonded_threading;
  
-    /* Maximum thread count for uniform distribution of bondeds over threads */
-    int   bonded_max_nthread_uniform;
-
-    /* Exclusion load distribution over the threads */
-    int  *excl_load;
+    /* Ewald correction thread local virial and energy data */
+    int                  nthread_ewc;
+    ewald_corr_thread_t *ewc_t;
+    /* Ewald charge correction load distribution over the threads */
+    int                 *excl_load;
  } t_forcerec;
  
  /* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
diff --git a/src/gromacs/listed-forces/listed-forces.cpp b/src/gromacs/listed-forces/listed-forces.cpp

index 3f68624adddcea65c36c0ae5bb72e319f9182611..1fa6afccd2635c5f5a678dc3d9779f975f4b2b8b 100644 (file)
--- a/src/gromacs/listed-forces/listed-forces.cpp
+++ b/src/gromacs/listed-forces/listed-forces.cpp
@@ -60,13 +60,13 @@
  #include "gromacs/listed-forces/bonded.h"
  #include "gromacs/listed-forces/position-restraints.h"
  #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/forcerec-threading.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/simd/simd.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/smalloc.h"
  
+#include "listed-internal.h"
  #include "pairs.h"
  
  namespace
@@ -405,14 +405,18 @@ void calc_listed(const gmx_multisim_t *ms,
                   t_fcdata *fcd, int *global_atom_index,
                   int force_flags)
  {
-    gmx_bool      bCalcEnerVir;
-    int           i;
-    real          dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
-                                                        of lambda, which will be thrown away in the end*/
-    const  t_pbc *pbc_null;
-    int           thread;
+    struct bonded_threading_t *bt;
+    gmx_bool                   bCalcEnerVir;
+    int                        i;
+    /* The dummy array is to have a place to store the dhdl at other values
+       of lambda, which will be thrown away in the end */
+    real                       dvdl[efptNR];
+    const  t_pbc              *pbc_null;
+    int                        thread;
+
+    bt = fr->bonded_threading;
  
-    assert(fr->nthreads == idef->nthreads);
+    assert(bt->nthreads == idef->nthreads);
  
      bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
  
@@ -485,8 +489,8 @@ void calc_listed(const gmx_multisim_t *ms,
      }
  
      wallcycle_sub_start(wcycle, ewcsLISTED);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-    for (thread = 0; thread < fr->nthreads; thread++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+    for (thread = 0; thread < bt->nthreads; thread++)
      {
          int                ftype;
          real              *epot, v;
@@ -505,14 +509,14 @@ void calc_listed(const gmx_multisim_t *ms,
          }
          else
          {
-            zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
-                               fr->red_nblock, 1<<fr->red_ashift);
-
-            ft     = fr->f_t[thread].f;
-            fshift = fr->f_t[thread].fshift;
-            epot   = fr->f_t[thread].ener;
-            grpp   = &fr->f_t[thread].grpp;
-            dvdlt  = fr->f_t[thread].dvdl;
+            zero_thread_forces(&bt->f_t[thread], fr->natoms_force,
+                               bt->red_nblock, 1<<bt->red_ashift);
+
+            ft     = bt->f_t[thread].f;
+            fshift = bt->f_t[thread].fshift;
+            epot   = bt->f_t[thread].ener;
+            grpp   = &bt->f_t[thread].grpp;
+            dvdlt  = bt->f_t[thread].dvdl;
          }
          /* Loop over all bonded force types to calculate the bonded forces */
          for (ftype = 0; (ftype < F_NRE); ftype++)
@@ -530,13 +534,13 @@ void calc_listed(const gmx_multisim_t *ms,
      }
      wallcycle_sub_stop(wcycle, ewcsLISTED);
  
-    if (fr->nthreads > 1)
+    if (bt->nthreads > 1)
      {
          wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
          reduce_thread_forces(fr->natoms_force, f, fr->fshift,
                               enerd->term, &enerd->grpp, dvdl,
-                             fr->nthreads, fr->f_t,
-                             fr->red_nblock, 1<<fr->red_ashift,
+                             bt->nthreads, bt->f_t,
+                             bt->red_nblock, 1<<bt->red_ashift,
                               bCalcEnerVir,
                               force_flags & GMX_FORCE_DHDL);
          wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
diff --git a/src/gromacs/listed-forces/listed-internal.h b/src/gromacs/listed-forces/listed-internal.h

index 7e2982bc1dae7c000e412464b26c0f7bd80eee13..a6fa29c174f8f0e80160af86c960f6448d27c30a 100644 (file)
--- a/src/gromacs/listed-forces/listed-internal.h
+++ b/src/gromacs/listed-forces/listed-internal.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -43,6 +43,40 @@
  #ifndef GMX_LISTED_FORCES_LISTED_INTERNAL_H
  #define GMX_LISTED_FORCES_LISTED_INTERNAL_H
  
+#include "gromacs/legacyheaders/types/forcerec.h"
+#include "gromacs/math/vectypes.h"
+#include "gromacs/topology/idef.h"
+#include "gromacs/utility/bitmask.h"
+
+/*! \internal \brief struct with output for bonded forces, used per thread */
+typedef struct
+{
+    rvec             *f;            /**< Force array */
+    int               f_nalloc;     /**< Allocation size of f */
+    gmx_bitmask_t     red_mask;     /**< Mask for marking which parts of f are filled */
+    rvec             *fshift;       /**< Shift force array, size SHIFTS */
+    real              ener[F_NRE];  /**< Energy array */
+    gmx_grppairener_t grpp;         /**< Group pair energy data for pairs */
+    real              dvdl[efptNR]; /**< Free-energy dV/dl output */
+}
+f_thread_t;
+
+/*! \internal \brief struct contain all data for bonded force threading */
+struct bonded_threading_t
+{
+    /* Thread local force and energy data */
+    int         nthreads;   /**< Number of threads to be used for bondeds */
+    int         red_ashift; /**< Size of force reduction blocks in bits */
+    int         red_nblock; /**< The number of force blocks to reduce */
+    f_thread_t *f_t;        /**< Force/enegry data per thread, size nthreads */
+
+    /* There are two different ways to distribute the bonded force calculation
+     * over the threads. We dedice which to use based on the number of threads.
+     */
+    int bonded_max_nthread_uniform; /**< Maximum thread count for uniform distribution of bondeds over threads */
+};
+
+
  /*! \brief Returns the global topology atom number belonging to local
   * atom index i.
   *
diff --git a/src/gromacs/listed-forces/manage-threading.cpp b/src/gromacs/listed-forces/manage-threading.cpp

index b703d3d8ce0ff5162ac8f7c75d7991ff4cda3aee..183b819befd4c2db6552cc8b9389e8cfcd19a283 100644 (file)
--- a/src/gromacs/listed-forces/manage-threading.cpp
+++ b/src/gromacs/listed-forces/manage-threading.cpp
@@ -53,12 +53,13 @@
  
  #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
  #include "gromacs/listed-forces/listed-forces.h"
-#include "gromacs/mdlib/forcerec-threading.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/stringutil.h"
  
+#include "listed-internal.h"
+
  /*! \brief struct for passing all data required for a function type */
  typedef struct {
      int      ftype; /**< the function type index */
@@ -343,65 +344,68 @@ const int maxBlockBits = BITMASK_SIZE;
  
  void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
  {
-    int t;
-    int ctot, c, b;
+    bonded_threading_t *bt;
+    int                 t;
+    int                 ctot, c, b;
+
+    bt = fr->bonded_threading;
  
-    assert(fr->nthreads >= 1);
+    assert(bt->nthreads >= 1);
  
      /* Divide the bonded interaction over the threads */
      divide_bondeds_over_threads(idef,
-                                fr->nthreads,
-                                fr->bonded_max_nthread_uniform);
+                                bt->nthreads,
+                                bt->bonded_max_nthread_uniform);
  
-    if (fr->nthreads == 1)
+    if (bt->nthreads == 1)
      {
-        fr->red_nblock = 0;
+        bt->red_nblock = 0;
  
          return;
      }
  
-    fr->red_ashift = 6;
-    while (fr->natoms_force > (int)(maxBlockBits*(1U<<fr->red_ashift)))
+    bt->red_ashift = 6;
+    while (fr->natoms_force > (int)(maxBlockBits*(1U<<bt->red_ashift)))
      {
-        fr->red_ashift++;
+        bt->red_ashift++;
      }
      if (debug)
      {
          fprintf(debug, "bonded force buffer block atom shift %d bits\n",
-                fr->red_ashift);
+                bt->red_ashift);
      }
  
      /* Determine to which blocks each thread's bonded force calculation
       * contributes. Store this is a mask for each thread.
       */
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-    for (t = 1; t < fr->nthreads; t++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+    for (t = 1; t < bt->nthreads; t++)
      {
-        calc_bonded_reduction_mask(&fr->f_t[t].red_mask,
-                                   idef, fr->red_ashift, t, fr->nthreads);
+        calc_bonded_reduction_mask(&bt->f_t[t].red_mask,
+                                   idef, bt->red_ashift, t, bt->nthreads);
      }
  
      /* Determine the maximum number of blocks we need to reduce over */
-    fr->red_nblock = 0;
+    bt->red_nblock = 0;
      ctot           = 0;
-    for (t = 0; t < fr->nthreads; t++)
+    for (t = 0; t < bt->nthreads; t++)
      {
          c = 0;
          for (b = 0; b < maxBlockBits; b++)
          {
-            if (bitmask_is_set(fr->f_t[t].red_mask, b))
+            if (bitmask_is_set(bt->f_t[t].red_mask, b))
              {
-                fr->red_nblock = std::max(fr->red_nblock, b+1);
+                bt->red_nblock = std::max(bt->red_nblock, b+1);
                  c++;
              }
          }
          if (debug)
          {
  #if BITMASK_SIZE <= 64 //move into bitmask when it is C++
-            std::string flags = gmx::formatString("%x", fr->f_t[t].red_mask);
+            std::string flags = gmx::formatString("%x", bt->f_t[t].red_mask);
  #else
-            std::string flags = gmx::formatAndJoin(fr->f_t[t].red_mask,
-                                                   fr->f_t[t].red_mask+BITMASK_ALEN,
+            std::string flags = gmx::formatAndJoin(bt->f_t[t].red_mask,
+                                                   bt->f_t[t].red_mask+BITMASK_ALEN,
                                                     "", gmx::StringFormatter("%x"));
  #endif
              fprintf(debug, "thread %d flags %s count %d\n",
@@ -412,38 +416,43 @@ void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
      if (debug)
      {
          fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
-                fr->red_nblock, 1<<fr->red_ashift);
+                bt->red_nblock, 1<<bt->red_ashift);
          fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
-                ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
-                ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
+                ctot*(1<<bt->red_ashift)/(double)fr->natoms_force,
+                ctot*(1<<bt->red_ashift)/(double)(fr->natoms_force*bt->nthreads));
      }
  }
  
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
+void init_bonded_threading(FILE *fplog, int nenergrp,
+                           struct bonded_threading_t **bt_ptr)
  {
+    bonded_threading_t *bt;
+
+    snew(bt, 1);
+
      /* These thread local data structures are used for bondeds only */
-    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
+    bt->nthreads = gmx_omp_nthreads_get(emntBonded);
  
-    if (fr->nthreads > 1)
+    if (bt->nthreads > 1)
      {
          int t;
  
-        snew(fr->f_t, fr->nthreads);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-        for (t = 0; t < fr->nthreads; t++)
+        snew(bt->f_t, bt->nthreads);
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+        for (t = 0; t < bt->nthreads; t++)
          {
              /* Thread 0 uses the global force and energy arrays */
              if (t > 0)
              {
                  int i;
  
-                fr->f_t[t].f        = NULL;
-                fr->f_t[t].f_nalloc = 0;
-                snew(fr->f_t[t].fshift, SHIFTS);
-                fr->f_t[t].grpp.nener = nenergrp*nenergrp;
+                bt->f_t[t].f        = NULL;
+                bt->f_t[t].f_nalloc = 0;
+                snew(bt->f_t[t].fshift, SHIFTS);
+                bt->f_t[t].grpp.nener = nenergrp*nenergrp;
                  for (i = 0; i < egNR; i++)
                  {
-                    snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
+                    snew(bt->f_t[t].grpp.ener[i], bt->f_t[t].grpp.nener);
                  }
              }
          }
@@ -457,16 +466,18 @@ void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
  
          if ((ptr = getenv("GMX_BONDED_NTHREAD_UNIFORM")) != NULL)
          {
-            sscanf(ptr, "%d", &fr->bonded_max_nthread_uniform);
+            sscanf(ptr, "%d", &bt->bonded_max_nthread_uniform);
              if (fplog != NULL)
              {
                  fprintf(fplog, "\nMax threads for uniform bonded distribution set to %d by env.var.\n",
-                        fr->bonded_max_nthread_uniform);
+                        bt->bonded_max_nthread_uniform);
              }
          }
          else
          {
-            fr->bonded_max_nthread_uniform = max_nthread_uniform;
+            bt->bonded_max_nthread_uniform = max_nthread_uniform;
          }
      }
+
+    *bt_ptr = bt;
  }
diff --git a/src/gromacs/listed-forces/manage-threading.h b/src/gromacs/listed-forces/manage-threading.h

index 2096ec63ae1b719f7ebebb5011675cd079d5a28a..c8718da6eb5d71266a3a7e108fa16559ace548b6 100644 (file)
--- a/src/gromacs/listed-forces/manage-threading.h
+++ b/src/gromacs/listed-forces/manage-threading.h
@@ -60,8 +60,13 @@ extern "C" {
   */
  void setup_bonded_threading(t_forcerec *fr, t_idef *idef);
  
-/*! \brief Initialize the bonded threading data structures */
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp);
+/*! \brief Initialize the bonded threading data structures
+ *
+ * Allocates and initializes a bonded threading data structure.
+ * A pointer to this struct is returned as \p *bb_ptr.
+ */
+void init_bonded_threading(FILE *fplog, int nenergrp,
+                           struct bonded_threading_t **bt_ptr);
  
  #ifdef __cplusplus
  }
diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp

index 20ecfe36bdc27f6cb3586aed03692ddd9a9e0bcc..e27e347587d05bdebe58eb1f7063116d067e9c8c 100644 (file)
--- a/src/gromacs/mdlib/force.cpp
+++ b/src/gromacs/mdlib/force.cpp
@@ -113,34 +113,22 @@ void ns(FILE              *fp,
      }
  }
  
-static void reduce_thread_forces(int n, rvec *f,
-                                 tensor vir_q, tensor vir_lj,
-                                 real *Vcorr_q, real *Vcorr_lj,
-                                 real *dvdl_q, real *dvdl_lj,
-                                 int nthreads, f_thread_t *f_t)
+static void reduce_thread_energies(tensor vir_q, tensor vir_lj,
+                                   real *Vcorr_q, real *Vcorr_lj,
+                                   real *dvdl_q, real *dvdl_lj,
+                                   int nthreads,
+                                   ewald_corr_thread_t *ewc_t)
  {
-    int t, i;
-    int nthreads_loop gmx_unused;
+    int t;
  
-    // cppcheck-suppress unreadVariable
-    nthreads_loop = gmx_omp_nthreads_get(emntBonded);
-    /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
-    for (i = 0; i < n; i++)
-    {
-        for (t = 1; t < nthreads; t++)
-        {
-            rvec_inc(f[i], f_t[t].f[i]);
-        }
-    }
      for (t = 1; t < nthreads; t++)
      {
-        *Vcorr_q  += f_t[t].Vcorr_q;
-        *Vcorr_lj += f_t[t].Vcorr_lj;
-        *dvdl_q   += f_t[t].dvdl[efptCOUL];
-        *dvdl_lj  += f_t[t].dvdl[efptVDW];
-        m_add(vir_q, f_t[t].vir_q, vir_q);
-        m_add(vir_lj, f_t[t].vir_lj, vir_lj);
+        *Vcorr_q  += ewc_t[t].Vcorr_q;
+        *Vcorr_lj += ewc_t[t].Vcorr_lj;
+        *dvdl_q   += ewc_t[t].dvdl[efptCOUL];
+        *dvdl_lj  += ewc_t[t].dvdl[efptVDW];
+        m_add(vir_q, ewc_t[t].vir_q, vir_q);
+        m_add(vir_lj, ewc_t[t].vir_lj, vir_lj);
      }
  }
  
@@ -440,17 +428,14 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                      gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                  }
  
-                nthreads = gmx_omp_nthreads_get(emntBonded);
+                nthreads = fr->nthread_ewc;
  #pragma omp parallel for num_threads(nthreads) schedule(static)
                  for (t = 0; t < nthreads; t++)
                  {
-                    int     i;
-                    rvec   *fnv;
                      tensor *vir_q, *vir_lj;
                      real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
                      if (t == 0)
                      {
-                        fnv       = fr->f_novirsum;
                          vir_q     = &fr->vir_el_recip;
                          vir_lj    = &fr->vir_lj_recip;
                          Vcorrt_q  = &Vcorr_q;
@@ -460,23 +445,23 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                      }
                      else
                      {
-                        fnv       = fr->f_t[t].f;
-                        vir_q     = &fr->f_t[t].vir_q;
-                        vir_lj    = &fr->f_t[t].vir_lj;
-                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
-                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
-                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
-                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
-                        for (i = 0; i < fr->natoms_force; i++)
-                        {
-                            clear_rvec(fnv[i]);
-                        }
+                        vir_q     = &fr->ewc_t[t].vir_q;
+                        vir_lj    = &fr->ewc_t[t].vir_lj;
+                        Vcorrt_q  = &fr->ewc_t[t].Vcorr_q;
+                        Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
+                        dvdlt_q   = &fr->ewc_t[t].dvdl[efptCOUL];
+                        dvdlt_lj  = &fr->ewc_t[t].dvdl[efptVDW];
                          clear_mat(*vir_q);
                          clear_mat(*vir_lj);
                      }
                      *dvdlt_q  = 0;
                      *dvdlt_lj = 0;
  
+                    /* Threading is only supported with the Verlet cut-off
+                     * scheme and then only single particle forces (no
+                     * exclusion forces) are calculated, so we can store
+                     * the forces in the normal, single fr->f_novirsum array.
+                     */
                      ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                         cr, t, fr,
                                         md->chargeA, md->chargeB,
@@ -488,19 +473,18 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                                         excl, x, bSB ? boxs : box, mu_tot,
                                         ir->ewald_geometry,
                                         ir->epsilon_surface,
-                                       fnv, *vir_q, *vir_lj,
+                                       fr->f_novirsum, *vir_q, *vir_lj,
                                         Vcorrt_q, Vcorrt_lj,
                                         lambda[efptCOUL], lambda[efptVDW],
                                         dvdlt_q, dvdlt_lj);
                  }
                  if (nthreads > 1)
                  {
-                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
-                                         fr->vir_el_recip, fr->vir_lj_recip,
-                                         &Vcorr_q, &Vcorr_lj,
-                                         &dvdl_long_range_correction_q,
-                                         &dvdl_long_range_correction_lj,
-                                         nthreads, fr->f_t);
+                    reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
+                                           &Vcorr_q, &Vcorr_lj,
+                                           &dvdl_long_range_correction_q,
+                                           &dvdl_long_range_correction_lj,
+                                           nthreads, fr->ewc_t);
                  }
                  wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
              }
diff --git a/src/gromacs/mdlib/forcerec-threading.h b/src/gromacs/mdlib/forcerec-threading.h

index b40a18ba32ff37607fd32f8fd21b1363286337cf..070b476ec56d5861ad2dd60e8368c9f9c4167884 100644 (file)
--- a/src/gromacs/mdlib/forcerec-threading.h
+++ b/src/gromacs/mdlib/forcerec-threading.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -36,19 +36,11 @@
  #ifndef GMX_MDLIB_FORCEREC_THREADING_H
  #define GMX_MDLIB_FORCEREC_THREADING_H
  
-#include "gromacs/utility/bitmask.h"
-
  #ifdef __cplusplus
  extern "C" {
  #endif
  
-struct f_thread_t {
-    rvec             *f;
-    int               f_nalloc;
-    gmx_bitmask_t     red_mask; /* Mask for marking which parts of f are filled */
-    rvec             *fshift;
-    real              ener[F_NRE];
-    gmx_grppairener_t grpp;
+struct ewald_corr_thread_t {
      real              Vcorr_q;
      real              Vcorr_lj;
      real              dvdl[efptNR];
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp

index 7de43327b5d2a1e5456f216aef92012ba9f20f47..b04b09a16baed48e9a6ec1ebe9ffc45ea7ebb0e5 100644 (file)
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -3212,9 +3212,12 @@ void init_forcerec(FILE              *fp,
      }
  
      /* Initialize the thread working data for bonded interactions */
-    init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
+    init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
+                          &fr->bonded_threading);
  
-    snew(fr->excl_load, fr->nthreads+1);
+    fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
+    snew(fr->ewc_t, fr->nthread_ewc);
+    snew(fr->excl_load, fr->nthread_ewc + 1);
  
      /* fr->ic is used both by verlet and group kernels (to some extent) now */
      init_interaction_const(fp, &fr->ic, fr);
@@ -3285,9 +3288,9 @@ void forcerec_set_excl_load(t_forcerec           *fr,
      fr->excl_load[0] = 0;
      n                = 0;
      i                = 0;
-    for (t = 1; t <= fr->nthreads; t++)
+    for (t = 1; t <= fr->nthread_ewc; t++)
      {
-        ntarget = (ntot*t)/fr->nthreads;
+        ntarget = (ntot*t)/fr->nthread_ewc;
          while (i < top->excls.nr && n < ntarget)
          {
              for (j = ind[i]; j < ind[i+1]; j++)
author	Berk Hess <hess@n176-p33.kthopen.kth.se>
	Fri, 27 Feb 2015 09:19:31 +0000 (10:19 +0100)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Wed, 8 Jul 2015 10:50:11 +0000 (12:50 +0200)
src/gromacs/legacyheaders/types/forcerec.h		patch \| blob \| history
src/gromacs/listed-forces/listed-forces.cpp		patch \| blob \| history
src/gromacs/listed-forces/listed-internal.h		patch \| blob \| history
src/gromacs/listed-forces/manage-threading.cpp		patch \| blob \| history
src/gromacs/listed-forces/manage-threading.h		patch \| blob \| history
src/gromacs/mdlib/force.cpp		patch \| blob \| history
src/gromacs/mdlib/forcerec-threading.h		patch \| blob \| history
src/gromacs/mdlib/forcerec.cpp		patch \| blob \| history