Avoid PME tuning decreasing rcoulomb

[alexxy/gromacs.git] / src / programs / mdrun / pme_loadbal.c
diff --git a/src/programs/mdrun/pme_loadbal.c b/src/programs/mdrun/pme_loadbal.c

index e13b09d82e81ed06b89b96bfbfa73ab19c63a712..d6fd68f37269d10692d625172706519c053c2cf3 100644 (file)
--- a/src/programs/mdrun/pme_loadbal.c
+++ b/src/programs/mdrun/pme_loadbal.c
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -36,7 +36,8 @@
  #include <config.h>
  #endif
  
-#include "smalloc.h"
+#include "gromacs/utility/smalloc.h"
+#include "types/commrec.h"
  #include "network.h"
  #include "calcgrid.h"
  #include "pme.h"
@@ -57,9 +58,9 @@ typedef struct {
      real      spacing;         /* (largest) PME grid spacing                   */
      ivec      grid;            /* the PME grid dimensions                      */
      real      grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
-    real      ewaldcoeff;      /* the Ewald coefficient                        */
+    real      ewaldcoeff_q;    /* Electrostatic Ewald coefficient            */
+    real      ewaldcoeff_lj;   /* LJ Ewald coefficient, only for the call to send_switchgrid */
      gmx_pme_t pmedata;         /* the data structure used in the PME code      */
-
      int       count;           /* number of times this setup has been timed    */
      double    cycles;          /* the fastest time for this setup in cycles    */
  } pme_setup_t;
@@ -156,19 +157,20 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
      pme_lb->n = 1;
      snew(pme_lb->setup, pme_lb->n);
  
-    pme_lb->rcut_vdw              = ic->rvdw;
-    pme_lb->rcut_coulomb_start    = ir->rcoulomb;
-    pme_lb->nstcalclr_start       = ir->nstcalclr;
-
-    pme_lb->cur                   = 0;
-    pme_lb->setup[0].rcut_coulomb = ic->rcoulomb;
-    pme_lb->setup[0].rlist        = ic->rlist;
-    pme_lb->setup[0].rlistlong    = ic->rlistlong;
-    pme_lb->setup[0].nstcalclr    = ir->nstcalclr;
-    pme_lb->setup[0].grid[XX]     = ir->nkx;
-    pme_lb->setup[0].grid[YY]     = ir->nky;
-    pme_lb->setup[0].grid[ZZ]     = ir->nkz;
-    pme_lb->setup[0].ewaldcoeff   = ic->ewaldcoeff;
+    pme_lb->rcut_vdw                 = ic->rvdw;
+    pme_lb->rcut_coulomb_start       = ir->rcoulomb;
+    pme_lb->nstcalclr_start          = ir->nstcalclr;
+
+    pme_lb->cur                      = 0;
+    pme_lb->setup[0].rcut_coulomb    = ic->rcoulomb;
+    pme_lb->setup[0].rlist           = ic->rlist;
+    pme_lb->setup[0].rlistlong       = ic->rlistlong;
+    pme_lb->setup[0].nstcalclr       = ir->nstcalclr;
+    pme_lb->setup[0].grid[XX]        = ir->nkx;
+    pme_lb->setup[0].grid[YY]        = ir->nky;
+    pme_lb->setup[0].grid[ZZ]        = ir->nkz;
+    pme_lb->setup[0].ewaldcoeff_q    = ic->ewaldcoeff_q;
+    pme_lb->setup[0].ewaldcoeff_lj   = ic->ewaldcoeff_lj;
  
      pme_lb->setup[0].pmedata  = pmedata;
  
@@ -202,8 +204,8 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
      *pme_lb_p = pme_lb;
  }
  
-static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
-                                            int                  pme_order,
+static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t  pme_lb,
+                                            int                   pme_order,
                                              const gmx_domdec_t   *dd)
  {
      pme_setup_t *set;
@@ -259,6 +261,15 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
      while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing || !grid_ok);
  
      set->rcut_coulomb = pme_lb->cut_spacing*sp;
+    if (set->rcut_coulomb < pme_lb->rcut_coulomb_start)
+    {
+        /* This is unlikely, but can happen when e.g. continuing from
+         * a checkpoint after equilibration where the box shrank a lot.
+         * We want to avoid rcoulomb getting smaller than rvdw
+         * and there might be more issues with decreasing rcoulomb.
+         */
+        set->rcut_coulomb = pme_lb->rcut_coulomb_start;
+    }
  
      if (pme_lb->cutoff_scheme == ecutsVERLET)
      {
@@ -314,8 +325,11 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
          set->grid_efficiency *= (set->grid[d]*sp)/norm(pme_lb->box_start[d]);
      }
      /* The Ewald coefficient is inversly proportional to the cut-off */
-    set->ewaldcoeff =
-        pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut_coulomb/set->rcut_coulomb;
+    set->ewaldcoeff_q =
+        pme_lb->setup[0].ewaldcoeff_q*pme_lb->setup[0].rcut_coulomb/set->rcut_coulomb;
+    /* We set ewaldcoeff_lj in set, even when LJ-PME is not used */
+    set->ewaldcoeff_lj =
+        pme_lb->setup[0].ewaldcoeff_lj*pme_lb->setup[0].rcut_coulomb/set->rcut_coulomb;
  
      set->count   = 0;
      set->cycles  = 0;
@@ -372,7 +386,7 @@ static int pme_loadbal_end(pme_load_balancing_t pme_lb)
  }
  
  static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
-                                  gmx_large_int_t step,
+                                  gmx_int64_t step,
                                    pme_load_balancing_t pme_lb)
  {
      char buf[STRLEN], sbuf[22];
@@ -432,7 +446,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
                            interaction_const_t *ic,
                            nonbonded_verlet_t  *nbv,
                            gmx_pme_t           *pmedata,
-                          gmx_large_int_t      step)
+                          gmx_int64_t          step)
  {
      gmx_bool     OK;
      pme_setup_t *set;
@@ -656,11 +670,32 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
  
      set = &pme_lb->setup[pme_lb->cur];
  
-    ic->rcoulomb   = set->rcut_coulomb;
-    ic->rlist      = set->rlist;
-    ic->rlistlong  = set->rlistlong;
-    ir->nstcalclr  = set->nstcalclr;
-    ic->ewaldcoeff = set->ewaldcoeff;
+    ic->rcoulomb     = set->rcut_coulomb;
+    ic->rlist        = set->rlist;
+    ic->rlistlong    = set->rlistlong;
+    ir->nstcalclr    = set->nstcalclr;
+    ic->ewaldcoeff_q = set->ewaldcoeff_q;
+    /* TODO: centralize the code that sets the potentials shifts */
+    if (ic->coulomb_modifier == eintmodPOTSHIFT)
+    {
+        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
+    }
+    if (EVDW_PME(ic->vdwtype))
+    {
+        /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */
+        ic->rvdw            = set->rcut_coulomb;
+        ic->ewaldcoeff_lj   = set->ewaldcoeff_lj;
+        if (ic->vdw_modifier == eintmodPOTSHIFT)
+        {
+            real crc2;
+
+            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
+            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
+            ic->sh_invrc6             = -ic->dispersion_shift.cpot;
+            crc2                      = sqr(ic->ewaldcoeff_lj*ic->rvdw);
+            ic->sh_lj_ewald           = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
+        }
+    }
  
      bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
      if (pme_lb->cutoff_scheme == ecutsVERLET &&
@@ -685,19 +720,14 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
          {
              gmx_barrier(cr);
          }
-#endif /* GMX_THREAD_MPI */
-    }
-    else
-    {
-        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
-                                      rtab);
+#endif  /* GMX_THREAD_MPI */
      }
  
-    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1)
-    {
-        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
-                                      rtab);
-    }
+    /* Usually we won't need the simple tables with GPUs.
+     * But we do with hybrid acceleration and with free energy.
+     * To avoid bugs, we always re-initialize the simple tables here.
+     */
+    init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab);
  
      if (cr->duty & DUTY_PME)
      {
@@ -715,7 +745,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
      else
      {
          /* Tell our PME-only node to switch grid */
-        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff);
+        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj);
      }
  
      if (debug)
@@ -766,7 +796,7 @@ static void print_pme_loadbal_setting(FILE              *fplog,
              name,
              setup->rcut_coulomb, pme_loadbal_rlist(setup),
              setup->grid[XX], setup->grid[YY], setup->grid[ZZ],
-            setup->spacing, 1/setup->ewaldcoeff);
+            setup->spacing, 1/setup->ewaldcoeff_q);
  }
  
  static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
@@ -809,8 +839,8 @@ static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
      {
          md_print_warn(cr, fplog,
                        "NOTE: PME load balancing increased the non-bonded workload by more than 50%%.\n"
-                      "      For better performance use (more) PME nodes (mdrun -npme),\n"
-                      "      or in case you are beyond the scaling limit, use less nodes in total.\n");
+                      "      For better performance, use (more) PME ranks (mdrun -npme),\n"
+                      "      or if you are beyond the scaling limit, use fewer total ranks (or nodes).\n");
      }
      else
      {