Separated bonded and Ewald correction threading
authorBerk Hess <hess@n176-p33.kthopen.kth.se>
Fri, 27 Feb 2015 09:19:31 +0000 (10:19 +0100)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Wed, 8 Jul 2015 10:50:11 +0000 (12:50 +0200)
The bonded and Ewald correction threading code used the same data
structures. These have now been separated and the bonded threading
data is opaque and defined in listed-internal.h.
This change is only code refactoring, except for the removal of
the thread local ewald correction force array and reduction, which
were never needed.

Change-Id: I162255107aa9e153a4c8ff41d786867d6fbed444

src/gromacs/legacyheaders/types/forcerec.h
src/gromacs/listed-forces/listed-forces.cpp
src/gromacs/listed-forces/listed-internal.h
src/gromacs/listed-forces/manage-threading.cpp
src/gromacs/listed-forces/manage-threading.h
src/gromacs/mdlib/force.cpp
src/gromacs/mdlib/forcerec-threading.h
src/gromacs/mdlib/forcerec.cpp

index 33c44a6442059f0aaf6e522bae9b8bdb2ee3a032..c0d8aa99192648668186357c03be75126fb60a33 100644 (file)
@@ -60,6 +60,7 @@ extern "C" {
 /* Abstract type for PME that is defined only in the routine that use them. */
 struct gmx_pme_t;
 struct nonbonded_verlet_t;
+struct bonded_threading_t;
 
 /* Structure describing the data in a single table */
 typedef struct
@@ -185,7 +186,7 @@ typedef struct {
 /* Forward declaration of type for managing Ewald tables */
 struct gmx_ewald_tab_t;
 
-typedef struct f_thread_t f_thread_t;
+typedef struct ewald_corr_thread_t ewald_corr_thread_t;
 
 typedef struct {
     interaction_const_t *ic;
@@ -468,18 +469,14 @@ typedef struct {
     real userreal3;
     real userreal4;
 
-    /* Thread local force and energy data */
-    /* FIXME move to bonded_thread_data_t */
-    int         nthreads;
-    int         red_ashift;
-    int         red_nblock;
-    f_thread_t *f_t;
+    /* Pointer to struct for managing threading of bonded force calculation */
+    struct bonded_threading_t *bonded_threading;
 
-    /* Maximum thread count for uniform distribution of bondeds over threads */
-    int   bonded_max_nthread_uniform;
-
-    /* Exclusion load distribution over the threads */
-    int  *excl_load;
+    /* Ewald correction thread local virial and energy data */
+    int                  nthread_ewc;
+    ewald_corr_thread_t *ewc_t;
+    /* Ewald charge correction load distribution over the threads */
+    int                 *excl_load;
 } t_forcerec;
 
 /* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
index 3f68624adddcea65c36c0ae5bb72e319f9182611..1fa6afccd2635c5f5a678dc3d9779f975f4b2b8b 100644 (file)
 #include "gromacs/listed-forces/bonded.h"
 #include "gromacs/listed-forces/position-restraints.h"
 #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/forcerec-threading.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "listed-internal.h"
 #include "pairs.h"
 
 namespace
@@ -405,14 +405,18 @@ void calc_listed(const gmx_multisim_t *ms,
                  t_fcdata *fcd, int *global_atom_index,
                  int force_flags)
 {
-    gmx_bool      bCalcEnerVir;
-    int           i;
-    real          dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
-                                                        of lambda, which will be thrown away in the end*/
-    const  t_pbc *pbc_null;
-    int           thread;
+    struct bonded_threading_t *bt;
+    gmx_bool                   bCalcEnerVir;
+    int                        i;
+    /* The dummy array is to have a place to store the dhdl at other values
+       of lambda, which will be thrown away in the end */
+    real                       dvdl[efptNR];
+    const  t_pbc              *pbc_null;
+    int                        thread;
+
+    bt = fr->bonded_threading;
 
-    assert(fr->nthreads == idef->nthreads);
+    assert(bt->nthreads == idef->nthreads);
 
     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
 
@@ -485,8 +489,8 @@ void calc_listed(const gmx_multisim_t *ms,
     }
 
     wallcycle_sub_start(wcycle, ewcsLISTED);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-    for (thread = 0; thread < fr->nthreads; thread++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+    for (thread = 0; thread < bt->nthreads; thread++)
     {
         int                ftype;
         real              *epot, v;
@@ -505,14 +509,14 @@ void calc_listed(const gmx_multisim_t *ms,
         }
         else
         {
-            zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
-                               fr->red_nblock, 1<<fr->red_ashift);
-
-            ft     = fr->f_t[thread].f;
-            fshift = fr->f_t[thread].fshift;
-            epot   = fr->f_t[thread].ener;
-            grpp   = &fr->f_t[thread].grpp;
-            dvdlt  = fr->f_t[thread].dvdl;
+            zero_thread_forces(&bt->f_t[thread], fr->natoms_force,
+                               bt->red_nblock, 1<<bt->red_ashift);
+
+            ft     = bt->f_t[thread].f;
+            fshift = bt->f_t[thread].fshift;
+            epot   = bt->f_t[thread].ener;
+            grpp   = &bt->f_t[thread].grpp;
+            dvdlt  = bt->f_t[thread].dvdl;
         }
         /* Loop over all bonded force types to calculate the bonded forces */
         for (ftype = 0; (ftype < F_NRE); ftype++)
@@ -530,13 +534,13 @@ void calc_listed(const gmx_multisim_t *ms,
     }
     wallcycle_sub_stop(wcycle, ewcsLISTED);
 
-    if (fr->nthreads > 1)
+    if (bt->nthreads > 1)
     {
         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
         reduce_thread_forces(fr->natoms_force, f, fr->fshift,
                              enerd->term, &enerd->grpp, dvdl,
-                             fr->nthreads, fr->f_t,
-                             fr->red_nblock, 1<<fr->red_ashift,
+                             bt->nthreads, bt->f_t,
+                             bt->red_nblock, 1<<bt->red_ashift,
                              bCalcEnerVir,
                              force_flags & GMX_FORCE_DHDL);
         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
index 7e2982bc1dae7c000e412464b26c0f7bd80eee13..a6fa29c174f8f0e80160af86c960f6448d27c30a 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef GMX_LISTED_FORCES_LISTED_INTERNAL_H
 #define GMX_LISTED_FORCES_LISTED_INTERNAL_H
 
+#include "gromacs/legacyheaders/types/forcerec.h"
+#include "gromacs/math/vectypes.h"
+#include "gromacs/topology/idef.h"
+#include "gromacs/utility/bitmask.h"
+
+/*! \internal \brief struct with output for bonded forces, used per thread */
+typedef struct
+{
+    rvec             *f;            /**< Force array */
+    int               f_nalloc;     /**< Allocation size of f */
+    gmx_bitmask_t     red_mask;     /**< Mask for marking which parts of f are filled */
+    rvec             *fshift;       /**< Shift force array, size SHIFTS */
+    real              ener[F_NRE];  /**< Energy array */
+    gmx_grppairener_t grpp;         /**< Group pair energy data for pairs */
+    real              dvdl[efptNR]; /**< Free-energy dV/dl output */
+}
+f_thread_t;
+
+/*! \internal \brief struct contain all data for bonded force threading */
+struct bonded_threading_t
+{
+    /* Thread local force and energy data */
+    int         nthreads;   /**< Number of threads to be used for bondeds */
+    int         red_ashift; /**< Size of force reduction blocks in bits */
+    int         red_nblock; /**< The number of force blocks to reduce */
+    f_thread_t *f_t;        /**< Force/enegry data per thread, size nthreads */
+
+    /* There are two different ways to distribute the bonded force calculation
+     * over the threads. We dedice which to use based on the number of threads.
+     */
+    int bonded_max_nthread_uniform; /**< Maximum thread count for uniform distribution of bondeds over threads */
+};
+
+
 /*! \brief Returns the global topology atom number belonging to local
  * atom index i.
  *
index b703d3d8ce0ff5162ac8f7c75d7991ff4cda3aee..183b819befd4c2db6552cc8b9389e8cfcd19a283 100644 (file)
 
 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 #include "gromacs/listed-forces/listed-forces.h"
-#include "gromacs/mdlib/forcerec-threading.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/stringutil.h"
 
+#include "listed-internal.h"
+
 /*! \brief struct for passing all data required for a function type */
 typedef struct {
     int      ftype; /**< the function type index */
@@ -343,65 +344,68 @@ const int maxBlockBits = BITMASK_SIZE;
 
 void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
 {
-    int t;
-    int ctot, c, b;
+    bonded_threading_t *bt;
+    int                 t;
+    int                 ctot, c, b;
+
+    bt = fr->bonded_threading;
 
-    assert(fr->nthreads >= 1);
+    assert(bt->nthreads >= 1);
 
     /* Divide the bonded interaction over the threads */
     divide_bondeds_over_threads(idef,
-                                fr->nthreads,
-                                fr->bonded_max_nthread_uniform);
+                                bt->nthreads,
+                                bt->bonded_max_nthread_uniform);
 
-    if (fr->nthreads == 1)
+    if (bt->nthreads == 1)
     {
-        fr->red_nblock = 0;
+        bt->red_nblock = 0;
 
         return;
     }
 
-    fr->red_ashift = 6;
-    while (fr->natoms_force > (int)(maxBlockBits*(1U<<fr->red_ashift)))
+    bt->red_ashift = 6;
+    while (fr->natoms_force > (int)(maxBlockBits*(1U<<bt->red_ashift)))
     {
-        fr->red_ashift++;
+        bt->red_ashift++;
     }
     if (debug)
     {
         fprintf(debug, "bonded force buffer block atom shift %d bits\n",
-                fr->red_ashift);
+                bt->red_ashift);
     }
 
     /* Determine to which blocks each thread's bonded force calculation
      * contributes. Store this is a mask for each thread.
      */
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-    for (t = 1; t < fr->nthreads; t++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+    for (t = 1; t < bt->nthreads; t++)
     {
-        calc_bonded_reduction_mask(&fr->f_t[t].red_mask,
-                                   idef, fr->red_ashift, t, fr->nthreads);
+        calc_bonded_reduction_mask(&bt->f_t[t].red_mask,
+                                   idef, bt->red_ashift, t, bt->nthreads);
     }
 
     /* Determine the maximum number of blocks we need to reduce over */
-    fr->red_nblock = 0;
+    bt->red_nblock = 0;
     ctot           = 0;
-    for (t = 0; t < fr->nthreads; t++)
+    for (t = 0; t < bt->nthreads; t++)
     {
         c = 0;
         for (b = 0; b < maxBlockBits; b++)
         {
-            if (bitmask_is_set(fr->f_t[t].red_mask, b))
+            if (bitmask_is_set(bt->f_t[t].red_mask, b))
             {
-                fr->red_nblock = std::max(fr->red_nblock, b+1);
+                bt->red_nblock = std::max(bt->red_nblock, b+1);
                 c++;
             }
         }
         if (debug)
         {
 #if BITMASK_SIZE <= 64 //move into bitmask when it is C++
-            std::string flags = gmx::formatString("%x", fr->f_t[t].red_mask);
+            std::string flags = gmx::formatString("%x", bt->f_t[t].red_mask);
 #else
-            std::string flags = gmx::formatAndJoin(fr->f_t[t].red_mask,
-                                                   fr->f_t[t].red_mask+BITMASK_ALEN,
+            std::string flags = gmx::formatAndJoin(bt->f_t[t].red_mask,
+                                                   bt->f_t[t].red_mask+BITMASK_ALEN,
                                                    "", gmx::StringFormatter("%x"));
 #endif
             fprintf(debug, "thread %d flags %s count %d\n",
@@ -412,38 +416,43 @@ void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
     if (debug)
     {
         fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
-                fr->red_nblock, 1<<fr->red_ashift);
+                bt->red_nblock, 1<<bt->red_ashift);
         fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
-                ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
-                ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
+                ctot*(1<<bt->red_ashift)/(double)fr->natoms_force,
+                ctot*(1<<bt->red_ashift)/(double)(fr->natoms_force*bt->nthreads));
     }
 }
 
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
+void init_bonded_threading(FILE *fplog, int nenergrp,
+                           struct bonded_threading_t **bt_ptr)
 {
+    bonded_threading_t *bt;
+
+    snew(bt, 1);
+
     /* These thread local data structures are used for bondeds only */
-    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
+    bt->nthreads = gmx_omp_nthreads_get(emntBonded);
 
-    if (fr->nthreads > 1)
+    if (bt->nthreads > 1)
     {
         int t;
 
-        snew(fr->f_t, fr->nthreads);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-        for (t = 0; t < fr->nthreads; t++)
+        snew(bt->f_t, bt->nthreads);
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+        for (t = 0; t < bt->nthreads; t++)
         {
             /* Thread 0 uses the global force and energy arrays */
             if (t > 0)
             {
                 int i;
 
-                fr->f_t[t].f        = NULL;
-                fr->f_t[t].f_nalloc = 0;
-                snew(fr->f_t[t].fshift, SHIFTS);
-                fr->f_t[t].grpp.nener = nenergrp*nenergrp;
+                bt->f_t[t].f        = NULL;
+                bt->f_t[t].f_nalloc = 0;
+                snew(bt->f_t[t].fshift, SHIFTS);
+                bt->f_t[t].grpp.nener = nenergrp*nenergrp;
                 for (i = 0; i < egNR; i++)
                 {
-                    snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
+                    snew(bt->f_t[t].grpp.ener[i], bt->f_t[t].grpp.nener);
                 }
             }
         }
@@ -457,16 +466,18 @@ void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
 
         if ((ptr = getenv("GMX_BONDED_NTHREAD_UNIFORM")) != NULL)
         {
-            sscanf(ptr, "%d", &fr->bonded_max_nthread_uniform);
+            sscanf(ptr, "%d", &bt->bonded_max_nthread_uniform);
             if (fplog != NULL)
             {
                 fprintf(fplog, "\nMax threads for uniform bonded distribution set to %d by env.var.\n",
-                        fr->bonded_max_nthread_uniform);
+                        bt->bonded_max_nthread_uniform);
             }
         }
         else
         {
-            fr->bonded_max_nthread_uniform = max_nthread_uniform;
+            bt->bonded_max_nthread_uniform = max_nthread_uniform;
         }
     }
+
+    *bt_ptr = bt;
 }
index 2096ec63ae1b719f7ebebb5011675cd079d5a28a..c8718da6eb5d71266a3a7e108fa16559ace548b6 100644 (file)
@@ -60,8 +60,13 @@ extern "C" {
  */
 void setup_bonded_threading(t_forcerec *fr, t_idef *idef);
 
-/*! \brief Initialize the bonded threading data structures */
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp);
+/*! \brief Initialize the bonded threading data structures
+ *
+ * Allocates and initializes a bonded threading data structure.
+ * A pointer to this struct is returned as \p *bb_ptr.
+ */
+void init_bonded_threading(FILE *fplog, int nenergrp,
+                           struct bonded_threading_t **bt_ptr);
 
 #ifdef __cplusplus
 }
index 20ecfe36bdc27f6cb3586aed03692ddd9a9e0bcc..e27e347587d05bdebe58eb1f7063116d067e9c8c 100644 (file)
@@ -113,34 +113,22 @@ void ns(FILE              *fp,
     }
 }
 
-static void reduce_thread_forces(int n, rvec *f,
-                                 tensor vir_q, tensor vir_lj,
-                                 real *Vcorr_q, real *Vcorr_lj,
-                                 real *dvdl_q, real *dvdl_lj,
-                                 int nthreads, f_thread_t *f_t)
+static void reduce_thread_energies(tensor vir_q, tensor vir_lj,
+                                   real *Vcorr_q, real *Vcorr_lj,
+                                   real *dvdl_q, real *dvdl_lj,
+                                   int nthreads,
+                                   ewald_corr_thread_t *ewc_t)
 {
-    int t, i;
-    int nthreads_loop gmx_unused;
+    int t;
 
-    // cppcheck-suppress unreadVariable
-    nthreads_loop = gmx_omp_nthreads_get(emntBonded);
-    /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
-    for (i = 0; i < n; i++)
-    {
-        for (t = 1; t < nthreads; t++)
-        {
-            rvec_inc(f[i], f_t[t].f[i]);
-        }
-    }
     for (t = 1; t < nthreads; t++)
     {
-        *Vcorr_q  += f_t[t].Vcorr_q;
-        *Vcorr_lj += f_t[t].Vcorr_lj;
-        *dvdl_q   += f_t[t].dvdl[efptCOUL];
-        *dvdl_lj  += f_t[t].dvdl[efptVDW];
-        m_add(vir_q, f_t[t].vir_q, vir_q);
-        m_add(vir_lj, f_t[t].vir_lj, vir_lj);
+        *Vcorr_q  += ewc_t[t].Vcorr_q;
+        *Vcorr_lj += ewc_t[t].Vcorr_lj;
+        *dvdl_q   += ewc_t[t].dvdl[efptCOUL];
+        *dvdl_lj  += ewc_t[t].dvdl[efptVDW];
+        m_add(vir_q, ewc_t[t].vir_q, vir_q);
+        m_add(vir_lj, ewc_t[t].vir_lj, vir_lj);
     }
 }
 
@@ -440,17 +428,14 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                 }
 
-                nthreads = gmx_omp_nthreads_get(emntBonded);
+                nthreads = fr->nthread_ewc;
 #pragma omp parallel for num_threads(nthreads) schedule(static)
                 for (t = 0; t < nthreads; t++)
                 {
-                    int     i;
-                    rvec   *fnv;
                     tensor *vir_q, *vir_lj;
                     real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
                     if (t == 0)
                     {
-                        fnv       = fr->f_novirsum;
                         vir_q     = &fr->vir_el_recip;
                         vir_lj    = &fr->vir_lj_recip;
                         Vcorrt_q  = &Vcorr_q;
@@ -460,23 +445,23 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                     }
                     else
                     {
-                        fnv       = fr->f_t[t].f;
-                        vir_q     = &fr->f_t[t].vir_q;
-                        vir_lj    = &fr->f_t[t].vir_lj;
-                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
-                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
-                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
-                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
-                        for (i = 0; i < fr->natoms_force; i++)
-                        {
-                            clear_rvec(fnv[i]);
-                        }
+                        vir_q     = &fr->ewc_t[t].vir_q;
+                        vir_lj    = &fr->ewc_t[t].vir_lj;
+                        Vcorrt_q  = &fr->ewc_t[t].Vcorr_q;
+                        Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
+                        dvdlt_q   = &fr->ewc_t[t].dvdl[efptCOUL];
+                        dvdlt_lj  = &fr->ewc_t[t].dvdl[efptVDW];
                         clear_mat(*vir_q);
                         clear_mat(*vir_lj);
                     }
                     *dvdlt_q  = 0;
                     *dvdlt_lj = 0;
 
+                    /* Threading is only supported with the Verlet cut-off
+                     * scheme and then only single particle forces (no
+                     * exclusion forces) are calculated, so we can store
+                     * the forces in the normal, single fr->f_novirsum array.
+                     */
                     ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                        cr, t, fr,
                                        md->chargeA, md->chargeB,
@@ -488,19 +473,18 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                                        excl, x, bSB ? boxs : box, mu_tot,
                                        ir->ewald_geometry,
                                        ir->epsilon_surface,
-                                       fnv, *vir_q, *vir_lj,
+                                       fr->f_novirsum, *vir_q, *vir_lj,
                                        Vcorrt_q, Vcorrt_lj,
                                        lambda[efptCOUL], lambda[efptVDW],
                                        dvdlt_q, dvdlt_lj);
                 }
                 if (nthreads > 1)
                 {
-                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
-                                         fr->vir_el_recip, fr->vir_lj_recip,
-                                         &Vcorr_q, &Vcorr_lj,
-                                         &dvdl_long_range_correction_q,
-                                         &dvdl_long_range_correction_lj,
-                                         nthreads, fr->f_t);
+                    reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
+                                           &Vcorr_q, &Vcorr_lj,
+                                           &dvdl_long_range_correction_q,
+                                           &dvdl_long_range_correction_lj,
+                                           nthreads, fr->ewc_t);
                 }
                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
             }
index b40a18ba32ff37607fd32f8fd21b1363286337cf..070b476ec56d5861ad2dd60e8368c9f9c4167884 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef GMX_MDLIB_FORCEREC_THREADING_H
 #define GMX_MDLIB_FORCEREC_THREADING_H
 
-#include "gromacs/utility/bitmask.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct f_thread_t {
-    rvec             *f;
-    int               f_nalloc;
-    gmx_bitmask_t     red_mask; /* Mask for marking which parts of f are filled */
-    rvec             *fshift;
-    real              ener[F_NRE];
-    gmx_grppairener_t grpp;
+struct ewald_corr_thread_t {
     real              Vcorr_q;
     real              Vcorr_lj;
     real              dvdl[efptNR];
index 7de43327b5d2a1e5456f216aef92012ba9f20f47..b04b09a16baed48e9a6ec1ebe9ffc45ea7ebb0e5 100644 (file)
@@ -3212,9 +3212,12 @@ void init_forcerec(FILE              *fp,
     }
 
     /* Initialize the thread working data for bonded interactions */
-    init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
+    init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
+                          &fr->bonded_threading);
 
-    snew(fr->excl_load, fr->nthreads+1);
+    fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
+    snew(fr->ewc_t, fr->nthread_ewc);
+    snew(fr->excl_load, fr->nthread_ewc + 1);
 
     /* fr->ic is used both by verlet and group kernels (to some extent) now */
     init_interaction_const(fp, &fr->ic, fr);
@@ -3285,9 +3288,9 @@ void forcerec_set_excl_load(t_forcerec           *fr,
     fr->excl_load[0] = 0;
     n                = 0;
     i                = 0;
-    for (t = 1; t <= fr->nthreads; t++)
+    for (t = 1; t <= fr->nthread_ewc; t++)
     {
-        ntarget = (ntot*t)/fr->nthreads;
+        ntarget = (ntot*t)/fr->nthread_ewc;
         while (i < top->excls.nr && n < ntarget)
         {
             for (j = ind[i]; j < ind[i+1]; j++)