Merge branch release-4-6
[alexxy/gromacs.git] / src / gromacs / gmxlib / bondfree.c
index f03f8738f3a6beb0a17df84d9c2ba8f671102406..ae984652fa819bf7f541c09f1a1f01cb6c2dcc19 100644 (file)
@@ -38,6 +38,7 @@
 #endif
 
 #include <math.h>
+#include <assert.h>
 #include "physics.h"
 #include "vec.h"
 #include "maths.h"
@@ -3757,6 +3758,73 @@ real tab_dihs(int nbonds,
     return vtot;
 }
 
+/* Return if this is a potential calculated in bondfree.c,
+ * i.e. an interaction that actually calculates a potential and
+ * works on multiple atoms (not e.g. a connection or a position restraint).
+ */
+static gmx_inline gmx_bool ftype_is_bonded_potential(int ftype)
+{
+    return
+        (interaction_function[ftype].flags & IF_BOND) &&
+        !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
+        (ftype < F_GB12 || ftype > F_GB14);
+}
+
+static void divide_bondeds_over_threads(t_idef *idef, int nthreads)
+{
+    int ftype;
+    int nat1;
+    int t;
+    int il_nr_thread;
+
+    idef->nthreads = nthreads;
+
+    if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc)
+    {
+        idef->il_thread_division_nalloc = F_NRE*(nthreads+1);
+        snew(idef->il_thread_division, idef->il_thread_division_nalloc);
+    }
+
+    for (ftype = 0; ftype < F_NRE; ftype++)
+    {
+        if (ftype_is_bonded_potential(ftype))
+        {
+            nat1 = interaction_function[ftype].nratoms + 1;
+
+            for (t = 0; t <= nthreads; t++)
+            {
+                /* Divide the interactions equally over the threads.
+                 * When the different types of bonded interactions
+                 * are distributed roughly equally over the threads,
+                 * this should lead to well localized output into
+                 * the force buffer on each thread.
+                 * If this is not the case, a more advanced scheme
+                 * (not implemented yet) will do better.
+                 */
+                il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1;
+
+                /* Ensure that distance restraint pairs with the same label
+                 * end up on the same thread.
+                 * This is slighlty tricky code, since the next for iteration
+                 * may have an initial il_nr_thread lower than the final value
+                 * in the previous iteration, but this will anyhow be increased
+                 * to the approriate value again by this while loop.
+                 */
+                while (ftype == F_DISRES &&
+                       il_nr_thread > 0 &&
+                       il_nr_thread < idef->il[ftype].nr &&
+                       idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label ==
+                       idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label)
+                {
+                    il_nr_thread += nat1;
+                }
+
+                idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread;
+            }
+        }
+    }
+}
+
 static unsigned
 calc_bonded_reduction_mask(const t_idef *idef,
                            int shift,
@@ -3769,9 +3837,7 @@ calc_bonded_reduction_mask(const t_idef *idef,
 
     for (ftype = 0; ftype < F_NRE; ftype++)
     {
-        if (interaction_function[ftype].flags & IF_BOND &&
-            !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
-            (ftype<F_GB12 || ftype>F_GB14))
+        if (ftype_is_bonded_potential(ftype))
         {
             nb = idef->il[ftype].nr;
             if (nb > 0)
@@ -3781,8 +3847,8 @@ calc_bonded_reduction_mask(const t_idef *idef,
                 /* Divide this interaction equally over the threads.
                  * This is not stored: should match division in calc_bonds.
                  */
-                nb0 = (((nb/nat1)* t   )/nt)*nat1;
-                nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
+                nb0 = idef->il_thread_division[ftype*(nt+1)+t];
+                nb1 = idef->il_thread_division[ftype*(nt+1)+t+1];
 
                 for (i = nb0; i < nb1; i += nat1)
                 {
@@ -3798,14 +3864,18 @@ calc_bonded_reduction_mask(const t_idef *idef,
     return mask;
 }
 
-void init_bonded_thread_force_reduction(t_forcerec   *fr,
-                                        const t_idef *idef)
+void setup_bonded_threading(t_forcerec   *fr, t_idef *idef)
 {
 #define MAX_BLOCK_BITS 32
     int t;
     int ctot, c, b;
 
-    if (fr->nthreads <= 1)
+    assert(fr->nthreads >= 1);
+
+    /* Divide the bonded interaction over the threads */
+    divide_bondeds_over_threads(idef, fr->nthreads);
+
+    if (fr->nthreads == 1)
     {
         fr->red_nblock = 0;
 
@@ -4029,14 +4099,14 @@ static real calc_one_bond(FILE *fplog, int thread,
                           rvec x[], rvec f[], rvec fshift[],
                           t_forcerec *fr,
                           const t_pbc *pbc, const t_graph *g,
-                          gmx_enerdata_t gmx_unused *enerd, gmx_grppairener_t *grpp,
+                          gmx_grppairener_t *grpp,
                           t_nrnb *nrnb,
                           real *lambda, real *dvdl,
                           const t_mdatoms *md, t_fcdata *fcd,
                           gmx_bool bCalcEnerVir,
                           int *global_atom_index, gmx_bool bPrintSepPot)
 {
-    int      ind, nat1, nbonds, efptFTYPE;
+    int      nat1, nbonds, efptFTYPE;
     real     v = 0;
     t_iatom *iatoms;
     int      nb0, nbn;
@@ -4050,170 +4120,88 @@ static real calc_one_bond(FILE *fplog, int thread,
         efptFTYPE = efptBONDED;
     }
 
-    if (interaction_function[ftype].flags & IF_BOND &&
-        !(ftype == F_CONNBONDS || ftype == F_POSRES))
-    {
-        ind       = interaction_function[ftype].nrnb_ind;
-        nat1      = interaction_function[ftype].nratoms + 1;
-        nbonds    = idef->il[ftype].nr/nat1;
-        iatoms    = idef->il[ftype].iatoms;
+    nat1      = interaction_function[ftype].nratoms + 1;
+    nbonds    = idef->il[ftype].nr/nat1;
+    iatoms    = idef->il[ftype].iatoms;
 
-        nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
-        nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
+    nb0 = idef->il_thread_division[ftype*(idef->nthreads+1)+thread];
+    nbn = idef->il_thread_division[ftype*(idef->nthreads+1)+thread+1] - nb0;
 
-        if (!IS_LISTED_LJ_C(ftype))
+    if (!IS_LISTED_LJ_C(ftype))
+    {
+        if (ftype == F_CMAP)
         {
-            if (ftype == F_CMAP)
-            {
-                v = cmap_dihs(nbn, iatoms+nb0,
-                              idef->iparams, &idef->cmap_grid,
-                              (const rvec*)x, f, fshift,
-                              pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
-                              md, fcd, global_atom_index);
-            }
+            v = cmap_dihs(nbn, iatoms+nb0,
+                          idef->iparams, &idef->cmap_grid,
+                          (const rvec*)x, f, fshift,
+                          pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
+                          md, fcd, global_atom_index);
+        }
 #ifdef SIMD_BONDEDS
-            else if (ftype == F_ANGLES &&
-                     !bCalcEnerVir && fr->efep == efepNO)
-            {
-                /* No energies, shift forces, dvdl */
-                angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
-                                   idef->iparams,
-                                   (const rvec*)x, f,
-                                   pbc, g, lambda[efptFTYPE], md, fcd,
-                                   global_atom_index);
-                v = 0;
-            }
+        else if (ftype == F_ANGLES &&
+                 !bCalcEnerVir && fr->efep == efepNO)
+        {
+            /* No energies, shift forces, dvdl */
+            angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
+                               idef->iparams,
+                               (const rvec*)x, f,
+                               pbc, g, lambda[efptFTYPE], md, fcd,
+                               global_atom_index);
+            v = 0;
+        }
 #endif
-            else if (ftype == F_PDIHS &&
-                     !bCalcEnerVir && fr->efep == efepNO)
-            {
-                /* No energies, shift forces, dvdl */
+        else if (ftype == F_PDIHS &&
+                 !bCalcEnerVir && fr->efep == efepNO)
+        {
+            /* No energies, shift forces, dvdl */
 #ifndef SIMD_BONDEDS
-                pdihs_noener
+            pdihs_noener
 #else
-                pdihs_noener_simd
+            pdihs_noener_simd
 #endif
-                    (nbn, idef->il[ftype].iatoms+nb0,
-                    idef->iparams,
-                    (const rvec*)x, f,
-                    pbc, g, lambda[efptFTYPE], md, fcd,
-                    global_atom_index);
-                v = 0;
-            }
-            else
-            {
-                v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
-                                                      idef->iparams,
-                                                      (const rvec*)x, f, fshift,
-                                                      pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
-                                                      md, fcd, global_atom_index);
-            }
-            if (bPrintSepPot)
-            {
-                fprintf(fplog, "  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
-                        interaction_function[ftype].longname,
-                        nbonds/nat1, v, lambda[efptFTYPE]);
-            }
+                (nbn, idef->il[ftype].iatoms+nb0,
+                 idef->iparams,
+                 (const rvec*)x, f,
+                 pbc, g, lambda[efptFTYPE], md, fcd,
+                 global_atom_index);
+            v = 0;
         }
         else
         {
-            v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
-                                    pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
-
-            if (bPrintSepPot)
-            {
-                fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
-                        interaction_function[ftype].longname,
-                        interaction_function[F_LJ14].longname, nbonds/nat1, dvdl[efptVDW]);
-                fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
-                        interaction_function[ftype].longname,
-                        interaction_function[F_COUL14].longname, nbonds/nat1, dvdl[efptCOUL]);
-            }
+            v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
+                                                  idef->iparams,
+                                                  (const rvec*)x, f, fshift,
+                                                  pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
+                                                  md, fcd, global_atom_index);
         }
-        if (ind != -1 && thread == 0)
+        if (bPrintSepPot)
         {
-            inc_nrnb(nrnb, ind, nbonds);
+            fprintf(fplog, "  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
+                    interaction_function[ftype].longname,
+                    nbonds, v, lambda[efptFTYPE]);
         }
     }
-
-    return v;
-}
-
-/* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
-   function, or horrible things will happen when doing free energy
-   calculations!  In a good coding world, this would not be a
-   different function, but for speed reasons, it needs to be made a
-   separate function.  TODO for 5.0 - figure out a way to reorganize
-   to reduce duplication.
- */
-
-static real calc_one_bond_foreign(FILE gmx_unused *fplog, int ftype, const t_idef *idef,
-                                  rvec x[], rvec f[], t_forcerec *fr,
-                                  const t_pbc *pbc, const t_graph *g,
-                                  gmx_grppairener_t *grpp, t_nrnb *nrnb,
-                                  real *lambda, real *dvdl,
-                                  const t_mdatoms *md, t_fcdata *fcd,
-                                  int *global_atom_index, gmx_bool gmx_unused bPrintSepPot)
-{
-    int      ind, nat1, nbonds, efptFTYPE, nbonds_np;
-    real     v = 0;
-    t_iatom *iatoms;
-
-    if (IS_RESTRAINT_TYPE(ftype))
-    {
-        efptFTYPE = efptRESTRAINT;
-    }
     else
     {
-        efptFTYPE = efptBONDED;
+        v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
+                                pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
+        
+        if (bPrintSepPot)
+        {
+            fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
+                    interaction_function[ftype].longname,
+                    interaction_function[F_LJ14].longname, nbonds, dvdl[efptVDW]);
+            fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
+                    interaction_function[ftype].longname,
+                    interaction_function[F_COUL14].longname, nbonds, dvdl[efptCOUL]);
+        }
     }
 
-    if (ftype < F_GB12 || ftype > F_GB14)
+    if (thread == 0)
     {
-        if (interaction_function[ftype].flags & IF_BOND &&
-            !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
-        {
-            ind       = interaction_function[ftype].nrnb_ind;
-            nat1      = interaction_function[ftype].nratoms+1;
-            nbonds_np = idef->il[ftype].nr_nonperturbed;
-            nbonds    = idef->il[ftype].nr - nbonds_np;
-            iatoms    = idef->il[ftype].iatoms + nbonds_np;
-            if (nbonds > 0)
-            {
-                if (!IS_LISTED_LJ_C(ftype))
-                {
-                    if (ftype == F_CMAP)
-                    {
-                        v = cmap_dihs(nbonds, iatoms,
-                                      idef->iparams, &idef->cmap_grid,
-                                      (const rvec*)x, f, fr->fshift,
-                                      pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
-                                      global_atom_index);
-                    }
-                    else
-                    {
-                        v =     interaction_function[ftype].ifunc(nbonds, iatoms,
-                                                                  idef->iparams,
-                                                                  (const rvec*)x, f, fr->fshift,
-                                                                  pbc, g, lambda[efptFTYPE], &dvdl[efptFTYPE],
-                                                                  md, fcd, global_atom_index);
-                    }
-                }
-                else
-                {
-                    v = do_nonbonded_listed(ftype, nbonds, iatoms,
-                                            idef->iparams,
-                                            (const rvec*)x, f, fr->fshift,
-                                            pbc, g, lambda, dvdl,
-                                            md, fr, grpp, global_atom_index);
-                }
-                if (ind != -1)
-                {
-                    inc_nrnb(nrnb, ind, nbonds/nat1);
-                }
-            }
-        }
+        inc_nrnb(nrnb, interaction_function[ftype].nrnb_ind, nbonds);
     }
+
     return v;
 }
 
@@ -4238,6 +4226,8 @@ void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
     char          buf[22];
     int           thread;
 
+    assert(fr->nthreads == idef->nthreads);
+
     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
 
     for (i = 0; i < efptNR; i++)
@@ -4285,13 +4275,12 @@ void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
     for (thread = 0; thread < fr->nthreads; thread++)
     {
-        int                ftype, nbonds, ind, nat1;
+        int                ftype;
         real              *epot, v;
         /* thread stuff */
         rvec              *ft, *fshift;
         real              *dvdlt;
         gmx_grppairener_t *grpp;
-        int                nb0, nbn;
 
         if (thread == 0)
         {
@@ -4315,17 +4304,14 @@ void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
         /* Loop over all bonded force types to calculate the bonded forces */
         for (ftype = 0; (ftype < F_NRE); ftype++)
         {
-            if (idef->il[ftype].nr > 0 &&
-                (interaction_function[ftype].flags & IF_BOND) &&
-                (ftype < F_GB12 || ftype > F_GB14) &&
-                !(ftype == F_CONNBONDS || ftype == F_POSRES))
+            if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
             {
                 v = calc_one_bond(fplog, thread, ftype, idef, x,
-                                  ft, fshift, fr, pbc_null, g, enerd, grpp,
+                                  ft, fshift, fr, pbc_null, g, grpp,
                                   nrnb, lambda, dvdlt,
                                   md, fcd, bCalcEnerVir,
                                   global_atom_index, bPrintSepPot);
-                epot[ftype]        += v;
+                epot[ftype] += v;
             }
         }
     }
@@ -4365,12 +4351,12 @@ void calc_bonds_lambda(FILE *fplog,
                        t_fcdata *fcd,
                        int *global_atom_index)
 {
-    int           i, ftype, nbonds_np, nbonds, ind, nat;
-    real          v, dr, dr2;
+    int           i, ftype, nr_nonperturbed, nr;
+    real          v;
     real          dvdl_dum[efptNR];
-    rvec         *f, *fshift_orig;
+    rvec         *f, *fshift;
     const  t_pbc *pbc_null;
-    t_iatom      *iatom_fe;
+    t_idef       idef_fe;
 
     if (fr->bMolPBC)
     {
@@ -4381,21 +4367,43 @@ void calc_bonds_lambda(FILE *fplog,
         pbc_null = NULL;
     }
 
+    /* Copy the whole idef, so we can modify the contents locally */
+    idef_fe          = *idef;
+    idef_fe.nthreads = 1;
+    snew(idef_fe.il_thread_division, F_NRE*(idef_fe.nthreads+1));
+
+    /* We already have the forces, so we use temp buffers here */
     snew(f, fr->natoms_force);
-    /* We want to preserve the fshift array in forcerec */
-    fshift_orig = fr->fshift;
-    snew(fr->fshift, SHIFTS);
+    snew(fshift, SHIFTS);
 
-    /* Loop over all bonded force types to calculate the bonded forces */
+    /* Loop over all bonded force types to calculate the bonded energies */
     for (ftype = 0; (ftype < F_NRE); ftype++)
     {
-        v = calc_one_bond_foreign(fplog, ftype, idef, x,
-                                  f, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum,
-                                  md, fcd, global_atom_index, FALSE);
-        epot[ftype] += v;
+        if (ftype_is_bonded_potential(ftype))
+        {
+            /* Set the work range of thread 0 to the perturbed bondeds only */
+            nr_nonperturbed                       = idef->il[ftype].nr_nonperturbed;
+            nr                                    = idef->il[ftype].nr;
+            idef_fe.il_thread_division[ftype*2+0] = nr_nonperturbed;
+            idef_fe.il_thread_division[ftype*2+1] = nr;
+
+            /* This is only to get the flop count correct */
+            idef_fe.il[ftype].nr = nr - nr_nonperturbed;
+
+            if (nr - nr_nonperturbed > 0)
+            {
+                v = calc_one_bond(fplog, 0, ftype, &idef_fe,
+                                  x, f, fshift, fr, pbc_null, g,
+                                  grpp, nrnb, lambda, dvdl_dum,
+                                  md, fcd, TRUE,
+                                  global_atom_index, FALSE);
+                epot[ftype] += v;
+            }
+        }
     }
 
-    sfree(fr->fshift);
-    fr->fshift = fshift_orig;
+    sfree(fshift);
     sfree(f);
+
+    sfree(idef_fe.il_thread_division);
 }