Convert ewald module to C++
authorMark Abraham <mark.j.abraham@gmail.com>
Sun, 12 Oct 2014 18:21:18 +0000 (20:21 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Tue, 26 May 2015 14:01:34 +0000 (16:01 +0200)
Used std::min/max/pow, some static casts, eliminated unused variables,
defined module, added some Doxygen, initialized some tensors directly
with zero. Removed all C++ ifdef guards, since this module and all its
callers all now compile as C++!

Reworked the implementations of gmx_pme_pp_init(),
gmx_pme_send_force_vir_ener() and gmx_pme_recv_coeffs_coords() to
handle unused-variable warnings better. Such functions can only ever
be used when MPI is configured, but in that case, the old
implementations had variables whose values had to propagate across
ifdef-ed regions. Now, more of the function bodies are behind an ifdef
and the unused values are marked as such only on the code path where
they are unused.

Simplified logic in long-range-correction.cpp so that humans and the
clang static-analyzer can understand what is going on. Fortunately,
the death of the group scheme will eliminate much of the horror of the
ewald_LRcorrection() function, because calc_excl_corr will be always
false.

Replaced use of undocumented conjmul(a,b) with cmul(a, conjugate(b))
since the latter two are static functions in gmxcomplex.h.

Renamed tabulate_eir() as tabulateStructureFactors().

Removed mpi_type in favour of existing GMX_MPI_REAL

Replaced some macros with constants.

Converted comments into basic Doxygen, plus a suppression in
pme-internal.h because Doxygen seems to think struct fields are public
documentation no matter what you tell it.

Change-Id: Ic9f219cc73dbb586c58a78cdfcd17e2bc1fd7a19

27 files changed:
src/gromacs/ewald/CMakeLists.txt
src/gromacs/ewald/calculate-spline-moduli.cpp [moved from src/gromacs/ewald/calculate-spline-moduli.c with 98% similarity]
src/gromacs/ewald/calculate-spline-moduli.h
src/gromacs/ewald/ewald.cpp [moved from src/gromacs/ewald/ewald.c with 90% similarity]
src/gromacs/ewald/ewald.h
src/gromacs/ewald/long-range-correction.cpp [moved from src/gromacs/ewald/long-range-correction.c with 97% similarity]
src/gromacs/ewald/long-range-correction.h
src/gromacs/ewald/pme-gather.cpp
src/gromacs/ewald/pme-gather.h
src/gromacs/ewald/pme-grid.cpp [moved from src/gromacs/ewald/pme-grid.c with 98% similarity]
src/gromacs/ewald/pme-grid.h
src/gromacs/ewald/pme-internal.h
src/gromacs/ewald/pme-load-balancing.cpp [moved from src/gromacs/ewald/pme-load-balancing.c with 79% similarity]
src/gromacs/ewald/pme-load-balancing.h
src/gromacs/ewald/pme-only.cpp [moved from src/gromacs/ewald/pme-only.c with 98% similarity]
src/gromacs/ewald/pme-pp.cpp [moved from src/gromacs/ewald/pme-pp.c with 80% similarity]
src/gromacs/ewald/pme-redistribute.cpp [moved from src/gromacs/ewald/pme-redistribute.c with 97% similarity]
src/gromacs/ewald/pme-redistribute.h
src/gromacs/ewald/pme-solve.cpp [moved from src/gromacs/ewald/pme-solve.c with 98% similarity]
src/gromacs/ewald/pme-solve.h
src/gromacs/ewald/pme-spline-work.cpp [moved from src/gromacs/ewald/pme-spline-work.c with 98% similarity]
src/gromacs/ewald/pme-spline-work.h
src/gromacs/ewald/pme-spread.cpp
src/gromacs/ewald/pme-spread.h
src/gromacs/ewald/pme.cpp [moved from src/gromacs/ewald/pme.c with 95% similarity]
src/gromacs/ewald/pme.h
src/programs/mdrun/md.cpp

index 5378331deffab16d369a92c52c5abcacf0690bb7..866d0688509695c461e560d2039c2d5d0213ddda 100644 (file)
@@ -32,7 +32,7 @@
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
-file(GLOB EWALD_SOURCES *.c *cpp)
+file(GLOB EWALD_SOURCES *.cpp)
 set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${EWALD_SOURCES} PARENT_SCOPE)
 
 if (BUILD_TESTING)
similarity index 98%
rename from src/gromacs/ewald/calculate-spline-moduli.c
rename to src/gromacs/ewald/calculate-spline-moduli.cpp
index e2f9248b4a02c7afbf9b9888b34e3567009cf375..52d91f05c71de00cc7011b3bfb79aa047fa97416 100644 (file)
 
 #include <math.h>
 
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/legacyheaders/macros.h"
+#include <algorithm>
+
 #include "gromacs/math/vec.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-internal.h"
+
 static void make_dft_mod(real *mod, real *data, int ndata)
 {
     int  i, j;
@@ -74,7 +76,7 @@ static void make_dft_mod(real *mod, real *data, int ndata)
 void make_bspline_moduli(splinevec bsp_mod,
                          int nx, int ny, int nz, int order)
 {
-    int   nmax = max(nx, max(ny, nz));
+    int   nmax = std::max(nx, std::max(ny, nz));
     real *data, *ddata, *bsp_data;
     int   i, k, l;
     real  div;
index adcdd47601baaa1447761f09c854f1608468f7cd..3e33e6a0532959f370e3e92f1575aa444771b108 100644 (file)
 #ifndef GMX_EWALD_CALCULATE_SPLINE_MODULI_H
 #define GMX_EWALD_CALCULATE_SPLINE_MODULI_H
 
-#include "gromacs/ewald/pme-internal.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
 
 /* Calulate plain SPME B-spline interpolation */
 void make_bspline_moduli(splinevec bsp_mod,
@@ -51,8 +47,4 @@ void make_bspline_moduli(splinevec bsp_mod,
 void make_p3m_bspline_moduli(splinevec bsp_mod,
                              int nx, int ny, int nz, int order);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 90%
rename from src/gromacs/ewald/ewald.c
rename to src/gromacs/ewald/ewald.cpp
index eda483e35d2ab0dac086a909c934d3e31905fa5c..cdd67142e1125fa2b21a61aa973f6a75256882b9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * computing energies and forces for the plain-Ewald long-ranged part,
+ * and the correction for overall system charge for all Ewald-family
+ * methods.
+ *
+ * \author David van der Spoel <david.vanderspoel@icm.uu.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
 #include "gmxpre.h"
 
 #include "ewald.h"
 #include <math.h>
 #include <stdio.h>
 
-#include "gromacs/legacyheaders/macros.h"
+#include <cstdlib>
+
+#include <algorithm>
+
 #include "gromacs/legacyheaders/types/commrec.h"
 #include "gromacs/legacyheaders/types/inputrec.h"
 #include "gromacs/math/gmxcomplex.h"
@@ -58,8 +72,6 @@ struct gmx_ewald_tab_t
 
 void init_ewald_tab(struct gmx_ewald_tab_t **et, const t_inputrec *ir, FILE *fp)
 {
-    int n;
-
     snew(*et, 1);
     if (fp)
     {
@@ -69,24 +81,14 @@ void init_ewald_tab(struct gmx_ewald_tab_t **et, const t_inputrec *ir, FILE *fp)
     (*et)->nx       = ir->nkx+1;
     (*et)->ny       = ir->nky+1;
     (*et)->nz       = ir->nkz+1;
-    (*et)->kmax     = max((*et)->nx, max((*et)->ny, (*et)->nz));
+    (*et)->kmax     = std::max((*et)->nx, std::max((*et)->ny, (*et)->nz));
     (*et)->eir      = NULL;
     (*et)->tab_xy   = NULL;
     (*et)->tab_qxyz = NULL;
 }
 
-/* the other routines are in complex.h */
-static t_complex conjmul(t_complex a, t_complex b)
-{
-    t_complex c;
-
-    c.re = a.re*b.re + a.im*b.im;
-    c.im = a.im*b.re - a.re*b.im;
-
-    return c;
-}
-
-static void tabulate_eir(int natom, rvec x[], int kmax, cvec **eir, rvec lll)
+//! Make tables for the structure factor parts
+static void tabulateStructureFactors(int natom, rvec x[], int kmax, cvec **eir, rvec lll)
 {
     int  i, j, m;
 
@@ -161,8 +163,7 @@ real do_ewald(t_inputrec *ir,
     clear_mat(lrvir);
 
     calc_lll(box, lll);
-    /* make tables for the structure factor parts */
-    tabulate_eir(natoms, x, et->kmax, et->eir, lll);
+    tabulateStructureFactors(natoms, x, et->kmax, et->eir, lll);
 
     for (q = 0; q < (bFreeEnergy ? 2 : 1); q++)
     {
@@ -201,7 +202,7 @@ real do_ewald(t_inputrec *ir,
                 {
                     for (n = 0; n < natoms; n++)
                     {
-                        et->tab_xy[n] = conjmul(et->eir[ix][n][XX], et->eir[-iy][n][YY]);
+                        et->tab_xy[n] = cmul(et->eir[ix][n][XX], conjugate(et->eir[-iy][n][YY]));
                     }
                 }
                 for (iz = lowiz; iz < et->nz; iz++)
@@ -222,8 +223,8 @@ real do_ewald(t_inputrec *ir,
                     {
                         for (n = 0; n < natoms; n++)
                         {
-                            et->tab_qxyz[n] = rcmul(charge[n], conjmul(et->tab_xy[n],
-                                                                       et->eir[-iz][n][ZZ]));
+                            et->tab_qxyz[n] = rcmul(charge[n], cmul(et->tab_xy[n],
+                                                                    conjugate(et->eir[-iz][n][ZZ])));
                         }
                     }
 
index 227f786e66dfb3465dfff2185fca4b1623f206dd..6cb8deece429e44cc166f87ffc220730dc490dc2 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \libinternal \defgroup module_ewald Ewald-family treatments of long-ranged forces
+ * \ingroup group_mdrun
+ *
+ * \brief Computes energies and forces for long-ranged interactions
+ * using the Ewald decomposition. Includes plain Ewald, PME, P3M for
+ * Coulomb, PME for Lennard-Jones, load-balancing for PME, and
+ * supporting code.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Erik Lindahl <erik@kth.se>
+ * \author Roland Schulz <roland@rschulz.eu>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \author Christan Wennberg <cwennberg@kth.se>
+ */
+
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the plain-Ewald long-ranged part,
+ * and the correction for overall system charge for all Ewald-family
+ * methods.
+ *
+ * \author David van der Spoel <david.vanderspoel@icm.uu.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
 
 #ifndef GMX_EWALD_EWALD_H
 #define GMX_EWALD_EWALD_H
 #include "gromacs/math/vectypes.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* Forward declaration of type for managing Ewald tables */
 struct gmx_ewald_tab_t;
 
@@ -77,8 +100,4 @@ real
 ewald_charge_correction(t_commrec *cr, t_forcerec *fr, real lambda, matrix box,
                         real *dvdlambda, tensor vir);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 97%
rename from src/gromacs/ewald/long-range-correction.c
rename to src/gromacs/ewald/long-range-correction.cpp
index 5a5b96ca3458b54180472d98840e99a837d39aa0..0dd94130e8e7726ae989be27398f33e9eca46529 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -79,16 +79,17 @@ void ewald_LRcorrection(int start, int end,
     double      Vexcl_q, dvdl_excl_q, dvdl_excl_lj; /* Necessary for precision */
     double      Vexcl_lj;
     real        one_4pi_eps;
-    real        v, vc, qiA, qiB, dr2, rinv, enercorr;
+    real        v, vc, qiA, qiB, dr2, rinv;
     real        Vself_q[2], Vself_lj[2], Vdipole[2], rinv2, ewc_q = fr->ewaldcoeff_q, ewcdr;
     real        ewc_lj = fr->ewaldcoeff_lj, ewc_lj2 = ewc_lj * ewc_lj;
     real        c6Ai   = 0, c6Bi = 0, c6A = 0, c6B = 0, ewcdr2, ewcdr4, c6L = 0, rinv6;
     rvec        df, dx, mutot[2], dipcorrA, dipcorrB;
-    tensor      dxdf_q, dxdf_lj;
-    real        vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
+    tensor      dxdf_q = {{0}}, dxdf_lj = {{0}};
+    real        vol    = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
     real        L1_q, L1_lj, dipole_coeff, qqA, qqB, qqL, vr0_q, vr0_lj = 0;
     gmx_bool    bMolPBC      = fr->bMolPBC;
     gmx_bool    bDoingLBRule = (fr->ljpme_combination_rule == eljpmeLB);
+    gmx_bool    bNeedLongRangeCorrection;
 
     /* This routine can be made faster by using tables instead of analytical interactions
      * However, that requires a thorough verification that they are correct in all cases.
@@ -151,12 +152,8 @@ void ewald_LRcorrection(int start, int end,
         fprintf(debug, "mutot   = %8.3f  %8.3f  %8.3f\n",
                 mutot[0][XX], mutot[0][YY], mutot[0][ZZ]);
     }
-    clear_mat(dxdf_q);
-    if (EVDW_PME(fr->vdwtype))
-    {
-        clear_mat(dxdf_lj);
-    }
-    if ((calc_excl_corr || dipole_coeff != 0) && !bHaveChargeOrTypePerturbed)
+    bNeedLongRangeCorrection = (calc_excl_corr || dipole_coeff != 0);
+    if (bNeedLongRangeCorrection && !bHaveChargeOrTypePerturbed)
     {
         for (i = start; (i < end); i++)
         {
@@ -199,9 +196,6 @@ void ewald_LRcorrection(int start, int end,
                         }
                         if (qqA != 0.0 || c6A != 0.0)
                         {
-                            real fscal;
-
-                            fscal = 0;
                             rvec_sub(x[i], x[k], dx);
                             if (bMolPBC)
                             {
@@ -228,7 +222,7 @@ void ewald_LRcorrection(int start, int end,
                                 rinv2             = rinv*rinv;
                                 if (qqA != 0.0)
                                 {
-                                    real dr;
+                                    real dr, fscal;
 
                                     dr       = 1.0/rinv;
                                     ewcdr    = ewc_q*dr;
@@ -270,6 +264,8 @@ void ewald_LRcorrection(int start, int end,
 
                                 if (c6A != 0.0)
                                 {
+                                    real fscal;
+
                                     rinv6     = rinv2*rinv2*rinv2;
                                     ewcdr2    = ewc_lj2*dr2;
                                     ewcdr4    = ewcdr2*ewcdr2;
@@ -317,7 +313,7 @@ void ewald_LRcorrection(int start, int end,
             }
         }
     }
-    else if (calc_excl_corr || dipole_coeff != 0)
+    else if (bNeedLongRangeCorrection)
     {
         for (i = start; (i < end); i++)
         {
@@ -361,7 +357,6 @@ void ewald_LRcorrection(int start, int end,
                         {
                             real fscal;
 
-                            fscal = 0;
                             qqL   = L1_q*qqA + lambda_q*qqB;
                             if (EVDW_PME(fr->vdwtype))
                             {
index 3832894d0d5d37f7056e879d1c3a5caf461885ad..bf05f4a8589fe70e3647a42d44a89b5807fc2020 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Erik Lindahl <erik@kth.se>
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
 
 #ifndef GMX_EWALD_LONG_RANGE_CORRECTION_H
 #define GMX_EWALD_LONG_RANGE_CORRECTION_H
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /*! \brief Calculate long-range Ewald correction terms.
  *
  * For the group cutoff scheme (only), calculates the correction to
@@ -74,8 +82,4 @@ ewald_LRcorrection(int start, int end,
                    real lambda_q, real lambda_lj,
                    real *dvdlambda_q, real *dvdlambda_lj);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
index 46d63660a086f1e199b98651c8228ea1d356671f..5b28cac160c9873ffb93063b804e988410d0726b 100644 (file)
 
 #include "gmxpre.h"
 
-#include "pme-gather.h"
-
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
-#include "gromacs/ewald/pme-spline-work.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-internal.h"
+#include "pme-simd.h"
+#include "pme-spline-work.h"
+
 #define DO_FSPLINE(order)                      \
     for (ithx = 0; (ithx < order); ithx++)              \
     {                                              \
@@ -79,17 +78,17 @@ void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
                        real scale)
 {
     /* sum forces for local particles */
-    int                     nn, n, ithx, ithy, ithz, i0, j0, k0;
-    int                     index_x, index_xy;
-    int                     nx, ny, nz, pny, pnz;
-    int                 *   idxptr;
-    real                    tx, ty, dx, dy, coefficient;
-    real                    fx, fy, fz, gval;
-    real                    fxy1, fz1;
-    real                   *thx, *thy, *thz, *dthx, *dthy, *dthz;
-    int                     norder;
-    real                    rxx, ryx, ryy, rzx, rzy, rzz;
-    int                     order;
+    int    nn, n, ithx, ithy, ithz, i0, j0, k0;
+    int    index_x, index_xy;
+    int    nx, ny, nz, pny, pnz;
+    int   *idxptr;
+    real   tx, ty, dx, dy, coefficient;
+    real   fx, fy, fz, gval;
+    real   fxy1, fz1;
+    real  *thx, *thy, *thz, *dthx, *dthy, *dthz;
+    int    norder;
+    real   rxx, ryx, ryy, rzx, rzy, rzz;
+    int    order;
 
 #ifdef PME_SIMD4_SPREAD_GATHER
     // cppcheck-suppress unreadVariable cppcheck seems not to analyze code from pme-simd4.h
@@ -158,7 +157,7 @@ void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
 #define PME_GATHER_F_SIMD4_ALIGNED
 #define PME_ORDER 4
 #endif
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
 #else
                     DO_FSPLINE(4);
 #endif
@@ -167,7 +166,7 @@ void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
 #ifdef PME_SIMD4_SPREAD_GATHER
 #define PME_GATHER_F_SIMD4_ALIGNED
 #define PME_ORDER 5
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
 #else
                     DO_FSPLINE(5);
 #endif
index faf0e4ea78dff99096383fdf9f378bf189a976cb..5a47fee3576706f8c9ac43c33cbd4ced1c563787 100644 (file)
 #ifndef GMX_EWALD_PME_GATHER_H
 #define GMX_EWALD_PME_GATHER_H
 
-#include "gromacs/ewald/pme-internal.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
 
 void
 gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
@@ -52,8 +49,4 @@ real
 gather_energy_bsplines(struct gmx_pme_t *pme, real *grid,
                        pme_atomcomm_t *atc);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 98%
rename from src/gromacs/ewald/pme-grid.c
rename to src/gromacs/ewald/pme-grid.cpp
index 5bc7bd1f0871861b1d2e4b4594ad421574e71ac3..dcdca578bd779b37c268c3147e575f9a56d882cd 100644 (file)
 
 #include "config.h"
 
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
+#include <cstdlib>
+
 #include "gromacs/ewald/pme.h"
-#include "gromacs/legacyheaders/macros.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/timing/cyclecounter.h"
 #include "gromacs/utility/smalloc.h"
@@ -55,6 +54,8 @@
 #include "gromacs/utility/futil.h"
 #endif
 
+#include "pme-simd.h"
+
 /* GMX_CACHE_SEP should be a multiple of the SIMD and SIMD4 register size
  * to preserve alignment.
  */
@@ -234,7 +235,7 @@ int copy_pmegrid_to_fftgrid(struct gmx_pme_t *pme, real *pmegrid, real *fftgrid,
 {
     ivec    local_fft_ndata, local_fft_offset, local_fft_size;
     ivec    local_pme_size;
-    int     i, ix, iy, iz;
+    int     ix, iy, iz;
     int     pmeidx, fftidx;
 
     /* Dimensions should be identical for A/B grid, so we just use A here */
@@ -371,13 +372,12 @@ int copy_fftgrid_to_pmegrid(struct gmx_pme_t *pme, const real *fftgrid, real *pm
 
 void wrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
 {
-    int     nx, ny, nz, pnx, pny, pnz, ny_x, overlap, ix, iy, iz;
+    int     nx, ny, nz, pny, pnz, ny_x, overlap, ix, iy, iz;
 
     nx = pme->nkx;
     ny = pme->nky;
     nz = pme->nkz;
 
-    pnx = pme->pmegrid_nx;
     pny = pme->pmegrid_ny;
     pnz = pme->pmegrid_nz;
 
@@ -432,13 +432,12 @@ void wrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
 
 void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
 {
-    int     nx, ny, nz, pnx, pny, pnz, ny_x, overlap, ix;
+    int     nx, ny, nz, pny, pnz, ny_x, overlap, ix;
 
     nx = pme->nkx;
     ny = pme->nky;
     nz = pme->nkz;
 
-    pnx = pme->pmegrid_nx;
     pny = pme->pmegrid_ny;
     pnz = pme->pmegrid_nz;
 
@@ -625,7 +624,7 @@ static void make_subgrid_division(const ivec n, int ovl, int nthread,
     env = getenv("GMX_PME_THREAD_DIVISION");
     if (env != NULL)
     {
-        sscanf(env, "%d %d %d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
+        sscanf(env, "%20d %20d %20d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
     }
 
     if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
@@ -642,7 +641,7 @@ void pmegrids_init(pmegrids_t *grids,
                    int overlap_x,
                    int overlap_y)
 {
-    ivec n, n_base, g0, g1;
+    ivec n, n_base;
     int  t, x, y, z, d, i, tfac;
     int  max_comm_lines = -1;
 
index 00c25375e9036bfddac72396999eec57a5ae3003..d594553bed4a4ecfd0f6b1190045c39d669d80b6 100644 (file)
 
 #include "config.h"
 
-#include "gromacs/ewald/pme-internal.h"
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
 
 #ifdef GMX_MPI
 void
@@ -101,8 +98,4 @@ reuse_pmegrids(const pmegrids_t *oldgrid, pmegrids_t *newgrid);
 void
 dump_local_fftgrid(struct gmx_pme_t *pme, const real *fftgrid);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
index f9e50aa5ff875d43a229a6abd986395ca433670a..f3fcb83bf1d2646b91043c8236b93b0fe4d7ac0d 100644 (file)
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \internal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
 
 /* TODO This file is a temporary holding area for stuff local to the
  * PME code, before it acquires some more normal ewald/file.c and
 #include "gromacs/timing/walltime_accounting.h"
 #include "gromacs/utility/gmxmpi.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+//@{
+//! Grid indices for A state for charge and Lennard-Jones C6
+#define PME_GRID_QA    0
+#define PME_GRID_C6A   2
+//@}
 
-#define PME_GRID_QA    0 /* Gridindex for A-state for Q */
-#define PME_GRID_C6A   2 /* Gridindex for A-state for LJ */
+//@{
+/*! \brief Flags that indicate the number of PME grids in use */
 #define DO_Q           2 /* Electrostatic grids have index q<2 */
 #define DO_Q_AND_LJ    4 /* non-LB LJ grids have index 2 <= q < 4 */
 #define DO_Q_AND_LJ_LB 9 /* With LB rules we need a total of 2+7 grids */
+//@}
 
-/* Pascal triangle coefficients scaled with (1/2)^6 for LJ-PME with LB-rules */
+/*! \brief Pascal triangle coefficients scaled with (1/2)^6 for LJ-PME with LB-rules */
 static const real lb_scale_factor[] = {
     1.0/64, 6.0/64, 15.0/64, 20.0/64,
     15.0/64, 6.0/64, 1.0/64
 };
 
-/* Pascal triangle coefficients used in solve_pme_lj_yzx, only need to do 4 calculations due to symmetry */
+/*! \brief Pascal triangle coefficients used in solve_pme_lj_yzx, only need to do 4 calculations due to symmetry */
 static const real lb_scale_factor_symm[] = { 2.0/64, 12.0/64, 30.0/64, 20.0/64 };
 
-/* We only define a maximum to be able to use local arrays without allocation.
+/*! \brief We only define a maximum to be able to use local arrays without allocation.
  * An order larger than 12 should never be needed, even for test cases.
  * If needed it can be changed here.
  */
 #define PME_ORDER_MAX 12
 
+/*! \brief As gmx_pme_init, but takes most settings, except the grid, from pme_src */
 int gmx_pme_reinit(struct gmx_pme_t **pmedata,
                    t_commrec *        cr,
                    struct gmx_pme_t * pme_src,
                    const t_inputrec * ir,
                    ivec               grid_size);
-/* As gmx_pme_init, but takes most settings, except the grid, from pme_src */
 
 /* The following three routines are for PME/PP node splitting in pme_pp.c */
 
-/* Abstract type for PME <-> PP communication */
+/*! \brief Abstract type for PME <-> PP communication */
 typedef struct gmx_pme_pp *gmx_pme_pp_t;
 
-/* Internal datastructures */
+/* Temporary suppression until these structs become opaque and don't live in
+ * a header that is included by other headers. Also, until then I have no
+ * idea what some of the names mean. */
+
+//! @cond Doxygen_Suppress
+
+/*! \brief Data structure for grid communication */
 typedef struct {
     int send_index0;
     int send_nindex;
@@ -101,6 +120,7 @@ typedef struct {
     int recv_size;   /* Receive buffer width, used with OpenMP */
 } pme_grid_comm_t;
 
+/*! \brief Data structure for grid overlap communication */
 typedef struct {
 #ifdef GMX_MPI
     MPI_Comm         mpi_comm;
@@ -116,14 +136,17 @@ typedef struct {
     real            *recvbuf;
 } pme_overlap_t;
 
+/*! \brief Data structure for organizing particle allocation to threads */
 typedef struct {
     int *n;      /* Cumulative counts of the number of particles per thread */
     int  nalloc; /* Allocation size of i */
     int *i;      /* Particle indices ordered on thread index (n) */
 } thread_plist_t;
 
+/*! \brief Helper typedef for spline vectors */
 typedef real *splinevec[DIM];
 
+/*! \brief Data structure for beta-spline interpolation */
 typedef struct {
     int      *thread_one;
     int       n;
@@ -134,6 +157,7 @@ typedef struct {
     real     *ptr_dtheta_z;
 } splinedata_t;
 
+/*! \brief Data structure for coordinating transfer between PP and PME ranks*/
 typedef struct {
     int      dimind;        /* The index of the dimension, 0=x, 1=y */
     int      nslab;
@@ -172,6 +196,7 @@ typedef struct {
     splinedata_t   *spline;
 } pme_atomcomm_t;
 
+/*! \brief Data structure for a single PME grid */
 typedef struct {
     ivec  ci;     /* The spatial location of this grid         */
     ivec  n;      /* The used size of *grid, including order-1 */
@@ -181,6 +206,7 @@ typedef struct {
     real *grid;   /* The grid local thread, size n             */
 } pmegrid_t;
 
+/*! \brief Data structures for PME grids */
 typedef struct {
     pmegrid_t  grid;         /* The full node grid (non thread-local)            */
     int        nthread;      /* The number of threads operating on this grid     */
@@ -191,10 +217,13 @@ typedef struct {
     ivec       nthread_comm; /* The number of threads to communicate with        */
 } pmegrids_t;
 
+/*! \brief Data structure for spline-interpolation working buffers */
 struct pme_spline_work;
 
+/*! \brief Data structure for working buffers */
 struct pme_solve_work_t;
 
+/*! \brief Master PME data structure */
 typedef struct gmx_pme_t {
     int           ndecompdim; /* The number of decomposition dimensions */
     int           nodeid;     /* Our nodeid in mpi->mpi_comm */
@@ -287,14 +316,10 @@ typedef struct gmx_pme_t {
     real *   sum_qgrid_dd_tmp;
 } t_gmx_pme_t;
 
-void gmx_pme_check_restrictions(int pme_order,
-                                int nkx, int nky, int nkz,
-                                int nnodes_major,
-                                int nnodes_minor,
-                                gmx_bool bUseThreads,
-                                gmx_bool bFatal,
-                                gmx_bool *bValidSettings);
-/* Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+//! @endcond
+
+/*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+ *
  * With bFatal=TRUE, a fatal error is generated on violation,
  * bValidSettings=NULL can be passed.
  * With bFatal=FALSE, *bValidSettings reports the validity of the settings.
@@ -302,14 +327,21 @@ void gmx_pme_check_restrictions(int pme_order,
  * If at calling you bUseThreads is unknown, pass TRUE for conservative
  * checking.
  */
+void gmx_pme_check_restrictions(int pme_order,
+                                int nkx, int nky, int nkz,
+                                int nnodes_major,
+                                int nnodes_minor,
+                                gmx_bool bUseThreads,
+                                gmx_bool bFatal,
+                                gmx_bool *bValidSettings);
 
+/*! \brief Initialize the PME-only side of the PME <-> PP communication */
 gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr);
-/* Initialize the PME-only side of the PME <-> PP communication */
 
+/*! \brief Tell our PME-only node to switch to a new grid size */
 void gmx_pme_send_switchgrid(t_commrec *cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj);
-/* Tell our PME-only node to switch to a new grid size */
 
-/* Return values for gmx_pme_recv_q_x */
+/*! \brief Return values for gmx_pme_recv_q_x */
 enum {
     pmerecvqxX,            /* calculate PME mesh interactions for new x    */
     pmerecvqxFINISH,       /* the simulation should finish, we should quit */
@@ -317,7 +349,15 @@ enum {
     pmerecvqxRESETCOUNTERS /* reset the cycle and flop counters            */
 };
 
-int gmx_pme_recv_coeffs_coords(gmx_pme_pp_t pme_pp,
+/*! \brief Called by PME-only ranks to receive coefficients and coordinates
+ *
+ * The return value is used to control further processing, with meanings:
+ * pmerecvqxX:             all parameters set, chargeA and chargeB can be NULL
+ * pmerecvqxFINISH:        no parameters set
+ * pmerecvqxSWITCHGRID:    only grid_size and *ewaldcoeff are set
+ * pmerecvqxRESETCOUNTERS: *step is set
+ */
+int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
                                int *natoms,
                                real **chargeA, real **chargeB,
                                real **sqrt_c6A, real **sqrt_c6B,
@@ -329,23 +369,12 @@ int gmx_pme_recv_coeffs_coords(gmx_pme_pp_t pme_pp,
                                gmx_bool *bEnerVir, int *pme_flags,
                                gmx_int64_t *step,
                                ivec grid_size, real *ewaldcoeff_q, real *ewaldcoeff_lj);
-;
-/* With return value:
- * pmerecvqxX:             all parameters set, chargeA and chargeB can be NULL
- * pmerecvqxFINISH:        no parameters set
- * pmerecvqxSWITCHGRID:    only grid_size and *ewaldcoeff are set
- * pmerecvqxRESETCOUNTERS: *step is set
- */
 
-void gmx_pme_send_force_vir_ener(gmx_pme_pp_t pme_pp,
+/*! \brief Send the PME mesh force, virial and energy to the PP-only nodes */
+void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
                                  rvec *f, matrix vir_q, real energy_q,
                                  matrix vir_lj, real energy_lj,
                                  real dvdlambda_q, real dvdlambda_lj,
                                  float cycles);
-/* Send the PME mesh force, virial and energy to the PP-only nodes */
-
-#ifdef __cplusplus
-}
-#endif
 
 #endif
similarity index 79%
rename from src/gromacs/ewald/pme-load-balancing.c
rename to src/gromacs/ewald/pme-load-balancing.cpp
index 758a76a38ebc5ccb4165b85d4378d3a5de83c499..73327ca167786669d196766db6a3baf08b05fbb8 100644 (file)
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * managing automatic load balance of PME calculations (Coulomb and
+ * LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
 #include "gmxpre.h"
 
 #include "pme-load-balancing.h"
 
 #include "config.h"
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/pme-internal.h"
 #include "gromacs/legacyheaders/calcgrid.h"
 #include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/macros.h"
 #include "gromacs/legacyheaders/md_logging.h"
 #include "gromacs/legacyheaders/network.h"
 #include "gromacs/legacyheaders/sim_util.h"
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/smalloc.h"
 
-/* Parameters and setting for one PP-PME setup */
-typedef struct {
-    real              rcut_coulomb;    /* Coulomb cut-off                              */
-    real              rlist;           /* pair-list cut-off                            */
-    real              rlistlong;       /* LR pair-list cut-off                         */
-    int               nstcalclr;       /* frequency of evaluating long-range forces for group scheme */
-    real              spacing;         /* (largest) PME grid spacing                   */
-    ivec              grid;            /* the PME grid dimensions                      */
-    real              grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
-    real              ewaldcoeff_q;    /* Electrostatic Ewald coefficient            */
-    real              ewaldcoeff_lj;   /* LJ Ewald coefficient, only for the call to send_switchgrid */
-    struct gmx_pme_t *pmedata;         /* the data structure used in the PME code      */
-    int               count;           /* number of times this setup has been timed    */
-    double            cycles;          /* the fastest time for this setup in cycles    */
-} pme_setup_t;
-
-/* In the initial scan, step by grids that are at least a factor 0.8 coarser */
-#define PME_LB_GRID_SCALE_FAC  0.8
-/* In the initial scan, try to skip grids with uneven x/y/z spacing,
+#include "pme-internal.h"
+
+/*! \brief Parameters and settings for one PP-PME setup */
+struct pme_setup_t {
+    real              rcut_coulomb;    /**< Coulomb cut-off                              */
+    real              rlist;           /**< pair-list cut-off                            */
+    real              rlistlong;       /**< LR pair-list cut-off                         */
+    int               nstcalclr;       /**< frequency of evaluating long-range forces for group scheme */
+    real              spacing;         /**< (largest) PME grid spacing                   */
+    ivec              grid;            /**< the PME grid dimensions                      */
+    real              grid_efficiency; /**< ineffiency factor for non-uniform grids <= 1 */
+    real              ewaldcoeff_q;    /**< Electrostatic Ewald coefficient            */
+    real              ewaldcoeff_lj;   /**< LJ Ewald coefficient, only for the call to send_switchgrid */
+    struct gmx_pme_t *pmedata;         /**< the data structure used in the PME code      */
+    int               count;           /**< number of times this setup has been timed    */
+    double            cycles;          /**< the fastest time for this setup in cycles    */
+};
+
+/*! \brief In the initial scan, step by grids that are at least a factor 0.8 coarser */
+const real gridScaleFactor = 0.8;
+
+/*! \brief In the initial scan, try to skip grids with uneven x/y/z spacing,
  * checking if the "efficiency" is more than 5% worse than the previous grid.
  */
-#define PME_LB_GRID_EFFICIENCY_REL_FAC  1.05
-/* Rerun up till 12% slower setups than the fastest up till now */
-#define PME_LB_SLOW_FAC  1.12
-/* If setups get more than 2% faster, do another round to avoid
+const real relativeEfficiencyFactor = 1.05;
+/*! \brief Rerun until a run is 12% slower setups than the fastest run so far */
+const real maxRelativeSlowdownAccepted = 1.12;
+/*! \brief If setups get more than 2% faster, do another round to avoid
  * choosing a slower setup due to acceleration or fluctuations.
  */
-#define PME_LB_ACCEL_TOL 1.02
+const real maxFluctuationAccepted = 1.02;
 
-enum {
+/*! \brief Enumeration whose values describe the effect limiting the load balancing */
+enum epmelb {
     epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimPMEGRID, epmelblimNR
 };
 
+/*! \brief Descriptive strings matching ::epmelb */
 const char *pmelblim_str[epmelblimNR] =
 { "no", "box size", "domain decompostion", "PME grid restriction" };
 
-struct pme_load_balancing {
-    int          nstage;             /* the current maximum number of stages */
-
-    real         cut_spacing;        /* the minimum cutoff / PME grid spacing ratio */
-    real         rcut_vdw;           /* Vdw cutoff (does not change) */
-    real         rcut_coulomb_start; /* Initial electrostatics cutoff */
-    int          nstcalclr_start;    /* Initial electrostatics cutoff */
-    real         rbuf_coulomb;       /* the pairlist buffer size */
-    real         rbuf_vdw;           /* the pairlist buffer size */
-    matrix       box_start;          /* the initial simulation box */
-    int          n;                  /* the count of setup as well as the allocation size */
-    pme_setup_t *setup;              /* the PME+cutoff setups */
-    int          cur;                /* the current setup */
-    int          fastest;            /* fastest setup up till now */
-    int          start;              /* start of setup range to consider in stage>0 */
-    int          end;                /* end   of setup range to consider in stage>0 */
-    int          elimited;           /* was the balancing limited, uses enum above */
-    int          cutoff_scheme;      /* Verlet or group cut-offs */
-
-    int          stage;              /* the current stage */
+struct pme_load_balancing_t {
+    int          nstage;             /**< the current maximum number of stages */
+
+    real         cut_spacing;        /**< the minimum cutoff / PME grid spacing ratio */
+    real         rcut_vdw;           /**< Vdw cutoff (does not change) */
+    real         rcut_coulomb_start; /**< Initial electrostatics cutoff */
+    int          nstcalclr_start;    /**< Initial electrostatics cutoff */
+    real         rbuf_coulomb;       /**< the pairlist buffer size */
+    real         rbuf_vdw;           /**< the pairlist buffer size */
+    matrix       box_start;          /**< the initial simulation box */
+    int          n;                  /**< the count of setup as well as the allocation size */
+    pme_setup_t *setup;              /**< the PME+cutoff setups */
+    int          cur;                /**< the current setup */
+    int          fastest;            /**< fastest setup up till now */
+    int          start;              /**< start of setup range to consider in stage>0 */
+    int          end;                /**< end   of setup range to consider in stage>0 */
+    int          elimited;           /**< was the balancing limited, uses enum above */
+    int          cutoff_scheme;      /**< Verlet or group cut-offs */
+
+    int          stage;              /**< the current stage */
 };
 
-void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
+void pme_loadbal_init(pme_load_balancing_t **pme_lb_p,
                       const t_inputrec *ir, matrix box,
                       const interaction_const_t *ic,
                       struct gmx_pme_t *pmedata)
 {
-    pme_load_balancing_t pme_lb;
-    real                 spm, sp;
-    int                  d;
+    pme_load_balancing_t *pme_lb;
+    real                  spm, sp;
+    int                   d;
 
     snew(pme_lb, 1);
 
@@ -208,7 +224,8 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
     *pme_lb_p = pme_lb;
 }
 
-static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t  pme_lb,
+/*! \brief Try to increase the cutoff during load balancing */
+static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t *pme_lb,
                                             int                   pme_order,
                                             const gmx_domdec_t   *dd)
 {
@@ -285,8 +302,8 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t  pme_lb,
     {
         tmpr_coulomb          = set->rcut_coulomb + pme_lb->rbuf_coulomb;
         tmpr_vdw              = pme_lb->rcut_vdw + pme_lb->rbuf_vdw;
-        set->rlist            = min(tmpr_coulomb, tmpr_vdw);
-        set->rlistlong        = max(tmpr_coulomb, tmpr_vdw);
+        set->rlist            = std::min(tmpr_coulomb, tmpr_vdw);
+        set->rlistlong        = std::max(tmpr_coulomb, tmpr_vdw);
 
         /* Set the long-range update frequency */
         if (set->rlist == set->rlistlong)
@@ -346,6 +363,7 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t  pme_lb,
     return TRUE;
 }
 
+/*! \brief Print the PME grid */
 static void print_grid(FILE *fp_err, FILE *fp_log,
                        const char *pre,
                        const char *desc,
@@ -376,7 +394,8 @@ static void print_grid(FILE *fp_err, FILE *fp_log,
     }
 }
 
-static int pme_loadbal_end(pme_load_balancing_t pme_lb)
+/*! \brief Return the index of the last setup used in PME load balancing */
+static int pme_loadbal_end(pme_load_balancing_t *pme_lb)
 {
     /* In the initial stage only n is set; end is not set yet */
     if (pme_lb->end > 0)
@@ -389,9 +408,10 @@ static int pme_loadbal_end(pme_load_balancing_t pme_lb)
     }
 }
 
+/*! \brief Print descriptive string about what limits PME load balancing */
 static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
                                   gmx_int64_t step,
-                                  pme_load_balancing_t pme_lb)
+                                  pme_load_balancing_t *pme_lb)
 {
     char buf[STRLEN], sbuf[22];
 
@@ -409,13 +429,16 @@ static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
     }
 }
 
-static void switch_to_stage1(pme_load_balancing_t pme_lb)
+/*! \brief Switch load balancing to stage 1
+ *
+ * In this stage, only reasonably fast setups are run again. */
+static void switch_to_stage1(pme_load_balancing_t *pme_lb)
 {
     pme_lb->start = 0;
     while (pme_lb->start+1 < pme_lb->n &&
            (pme_lb->setup[pme_lb->start].count == 0 ||
             pme_lb->setup[pme_lb->start].cycles >
-            pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC))
+            pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted))
     {
         pme_lb->start++;
     }
@@ -427,7 +450,7 @@ static void switch_to_stage1(pme_load_balancing_t pme_lb)
     pme_lb->end = pme_lb->n;
     if (pme_lb->setup[pme_lb->end-1].count > 0 &&
         pme_lb->setup[pme_lb->end-1].cycles >
-        pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
+        pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted)
     {
         pme_lb->end--;
     }
@@ -440,7 +463,7 @@ static void switch_to_stage1(pme_load_balancing_t pme_lb)
     pme_lb->cur = pme_lb->start - 1;
 }
 
-gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
+gmx_bool pme_load_balance(pme_load_balancing_t      *pme_lb,
                           t_commrec                 *cr,
                           FILE                      *fp_err,
                           FILE                      *fp_log,
@@ -492,7 +515,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
     }
     else
     {
-        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
+        if (cycles*maxFluctuationAccepted < set->cycles &&
             pme_lb->stage == pme_lb->nstage - 1)
         {
             /* The performance went up a lot (due to e.g. DD load balancing).
@@ -506,11 +529,11 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
                         "Increased the number stages to %d"
                         " and ignoring the previous performance\n",
                         set->grid[XX], set->grid[YY], set->grid[ZZ],
-                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
+                        cycles*1e-6, set->cycles*1e-6, maxFluctuationAccepted,
                         pme_lb->nstage);
             }
         }
-        set->cycles = min(set->cycles, cycles);
+        set->cycles = std::min(set->cycles, cycles);
     }
 
     if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
@@ -537,10 +560,10 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
     cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;
 
     /* Check in stage 0 if we should stop scanning grids.
-     * Stop when the time is more than SLOW_FAC longer than the fastest.
+     * Stop when the time is more than maxRelativeSlowDownAccepted longer than the fastest.
      */
     if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
-        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
+        cycles > pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted)
     {
         pme_lb->n = pme_lb->cur + 1;
         /* Done with scanning, go to stage 1 */
@@ -612,10 +635,10 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
                !(pme_lb->setup[pme_lb->cur].grid[XX]*
                  pme_lb->setup[pme_lb->cur].grid[YY]*
                  pme_lb->setup[pme_lb->cur].grid[ZZ] <
-                 gridsize_start*PME_LB_GRID_SCALE_FAC
+                 gridsize_start*gridScaleFactor
                  &&
                  pme_lb->setup[pme_lb->cur].grid_efficiency <
-                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
+                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*relativeEfficiencyFactor));
     }
 
     if (pme_lb->stage > 0 && pme_lb->end == 1)
@@ -642,7 +665,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
         }
         while (pme_lb->stage == pme_lb->nstage - 1 &&
                pme_lb->setup[pme_lb->cur].count > 0 &&
-               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);
+               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*maxRelativeSlowdownAccepted);
 
         if (pme_lb->stage == pme_lb->nstage)
         {
@@ -691,13 +714,13 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
         ic->ewaldcoeff_lj   = set->ewaldcoeff_lj;
         if (ic->vdw_modifier == eintmodPOTSHIFT)
         {
-            real crc2;
+            real       crc2;
 
-            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
-            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
+            ic->dispersion_shift.cpot = -std::pow(static_cast<double>(ic->rvdw), -6.0);
+            ic->repulsion_shift.cpot  = -std::pow(static_cast<double>(ic->rvdw), -12.0);
             ic->sh_invrc6             = -ic->dispersion_shift.cpot;
             crc2                      = sqr(ic->ewaldcoeff_lj*ic->rvdw);
-            ic->sh_lj_ewald           = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
+            ic->sh_lj_ewald           = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*std::pow(static_cast<double>(ic->rvdw), -6.0);
         }
     }
 
@@ -761,16 +784,18 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
     return TRUE;
 }
 
-void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n)
+void restart_pme_loadbal(pme_load_balancing_t *pme_lb, int n)
 {
     pme_lb->nstage += n;
 }
 
+/*! \brief Return product of the number of PME grid points in each dimension */
 static int pme_grid_points(const pme_setup_t *setup)
 {
     return setup->grid[XX]*setup->grid[YY]*setup->grid[ZZ];
 }
 
+/*! \brief Retuern the largest short-range list cut-off radius */
 static real pme_loadbal_rlist(const pme_setup_t *setup)
 {
     /* With the group cut-off scheme we can have twin-range either
@@ -787,8 +812,9 @@ static real pme_loadbal_rlist(const pme_setup_t *setup)
     }
 }
 
+/*! \brief Print one load-balancing setting */
 static void print_pme_loadbal_setting(FILE              *fplog,
-                                      char              *name,
+                                      const char        *name,
                                       const pme_setup_t *setup)
 {
     fprintf(fplog,
@@ -799,15 +825,18 @@ static void print_pme_loadbal_setting(FILE              *fplog,
             setup->spacing, 1/setup->ewaldcoeff_q);
 }
 
-static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
-                                       t_commrec           *cr,
-                                       FILE                *fplog,
-                                       gmx_bool             bNonBondedOnGPU)
+/*! \brief Print all load-balancing settings */
+static void print_pme_loadbal_settings(pme_load_balancing_t *pme_lb,
+                                       t_commrec            *cr,
+                                       FILE                 *fplog,
+                                       gmx_bool              bNonBondedOnGPU)
 {
-    double pp_ratio, grid_ratio;
+    double     pp_ratio, grid_ratio;
+    real       pp_ratio_temporary;
 
-    pp_ratio   = pow(pme_loadbal_rlist(&pme_lb->setup[pme_lb->cur])/pme_loadbal_rlist(&pme_lb->setup[0]), 3.0);
-    grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
+    pp_ratio_temporary = pme_loadbal_rlist(&pme_lb->setup[pme_lb->cur])/pme_loadbal_rlist(&pme_lb->setup[0]);
+    pp_ratio           = std::pow(static_cast<double>(pp_ratio_temporary), 3.0);
+    grid_ratio         = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
         (double)pme_grid_points(&pme_lb->setup[0]);
 
     fprintf(fplog, "\n");
@@ -848,9 +877,10 @@ static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
     }
 }
 
-void pme_loadbal_done(pme_load_balancing_t pme_lb,
-                      t_commrec *cr, FILE *fplog,
-                      gmx_bool bNonBondedOnGPU)
+void pme_loadbal_done(pme_load_balancing_t *pme_lb,
+                      t_commrec            *cr,
+                      FILE                 *fplog,
+                      gmx_bool              bNonBondedOnGPU)
 {
     if (fplog != NULL && (pme_lb->cur > 0 || pme_lb->elimited != epmelblimNO))
     {
index ebd6b72cdefabf509d2c2c6669e68b03a5fb5df4..721ead76f72497dbd9f77a54b5975fd3bb3c04ec 100644 (file)
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * managing automatic load balance of PME calculations (Coulomb and
+ * LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
 
 #ifndef GMX_EWALD_PME_LOAD_BALANCING_H
 #define GMX_EWALD_PME_LOAD_BALANCING_H
 #include "gromacs/legacyheaders/types/interaction_const.h"
 #include "gromacs/legacyheaders/types/state.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct pme_load_balancing *pme_load_balancing_t;
+/*! \brief Object to manage PME load balancing */
+struct pme_load_balancing_t;
 
-/* Initialze the PP-PME load balacing data and infrastructure */
-void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
-                      const t_inputrec *ir, matrix box,
+/*! \brief Initialze the PP-PME load balacing data and infrastructure */
+void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
+                      const t_inputrec          *ir,
+                      matrix                     box,
                       const interaction_const_t *ic,
-                      struct gmx_pme_t *pmedata);
+                      struct gmx_pme_t          *pmedata);
 
-/* Try to adjust the PME grid and Coulomb cut-off.
+/*! \brief Try to adjust the PME grid and Coulomb cut-off.
+ *
  * The adjustment is done to generate a different non-bonded PP and PME load.
  * With separate PME nodes (PP and PME on different processes) or with
  * a GPU (PP on GPU, PME on CPU), PP and PME run on different resources
@@ -64,9 +73,10 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
  * times and acquiring enough statistics, the best performing setup is chosen.
  * Here we try to take into account fluctuations and changes due to external
  * factors as well as DD load balancing.
- * Returns TRUE the load balancing continues, FALSE is the balancing is done.
+ *
+ * \return TRUE the load balancing continues, FALSE is the balancing is done.
  */
-gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
+gmx_bool pme_load_balance(pme_load_balancing_t      *pme_lb,
                           t_commrec                 *cr,
                           FILE                      *fp_err,
                           FILE                      *fp_log,
@@ -78,16 +88,12 @@ gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
                           struct gmx_pme_t **        pmedata,
                           gmx_int64_t                step);
 
-/* Restart the PME load balancing discarding all timings gathered up till now */
-void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n);
+/*! \brief Restart the PME load balancing discarding all timings gathered up till now */
+void restart_pme_loadbal(pme_load_balancing_t *pme_lb, int n);
 
-/* Finish the PME load balancing and print the settings when fplog!=NULL */
-void pme_loadbal_done(pme_load_balancing_t pme_lb,
+/*! \brief Finish the PME load balancing and print the settings when fplog!=NULL */
+void pme_loadbal_done(pme_load_balancing_t *pme_lb,
                       t_commrec *cr, FILE *fplog,
                       gmx_bool bNonBondedOnGPU);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 98%
rename from src/gromacs/ewald/pme-only.c
rename to src/gromacs/ewald/pme-only.cpp
index 8868711ef81d9af7b1c1548b042c0bb1a3f9d658..e5b61f0dde0167566ed53536ec41988fcfd6bfdf 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 
-#include "gromacs/ewald/pme-internal.h"
 #include "gromacs/ewald/pme.h"
 #include "gromacs/fft/parallel_3dfft.h"
 #include "gromacs/fileio/pdbio.h"
-#include "gromacs/legacyheaders/macros.h"
 #include "gromacs/legacyheaders/network.h"
 #include "gromacs/legacyheaders/nrnb.h"
 #include "gromacs/legacyheaders/txtdump.h"
@@ -86,6 +84,8 @@
 #include "gromacs/utility/gmxomp.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-internal.h"
+
 static void reset_pmeonly_counters(gmx_wallcycle_t wcycle,
                                    gmx_walltime_accounting_t walltime_accounting,
                                    t_nrnb *nrnb, t_inputrec *ir,
@@ -165,7 +165,7 @@ int gmx_pmeonly(struct gmx_pme_t *pme,
     int                count;
     gmx_bool           bEnerVir;
     int                pme_flags;
-    gmx_int64_t        step, step_rel;
+    gmx_int64_t        step;
     ivec               grid_switch;
 
     /* This data will only use with PME tuning, i.e. switching PME grids */
@@ -218,8 +218,6 @@ int gmx_pmeonly(struct gmx_pme_t *pme,
             break;
         }
 
-        step_rel = step - ir->init_step;
-
         if (count == 0)
         {
             wallcycle_start(wcycle, ewcRUN);
similarity index 80%
rename from src/gromacs/ewald/pme-pp.c
rename to src/gromacs/ewald/pme-pp.cpp
index cd1e4dbeb81125c65158010ed9ebed45d7c5ae6a..d2fddb3bfae570ff3e0d01adbf6f4f044b4fa629 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * managing the offload of long-ranged PME work to separate MPI rank,
+ * for computing energies and forces (Coulomb and LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
 
 #include "gmxpre.h"
 
@@ -44,7 +53,6 @@
 #include <string.h>
 
 #include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/pme-internal.h"
 #include "gromacs/ewald/pme.h"
 #include "gromacs/legacyheaders/network.h"
 #include "gromacs/legacyheaders/sighandler.h"
 #include "gromacs/utility/gmxmpi.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-internal.h"
+
+/*! \brief MPI Tags used to separate communication of different types of quantities */
 enum {
     eCommType_ChargeA, eCommType_ChargeB, eCommType_SQRTC6A, eCommType_SQRTC6B,
     eCommType_SigmaA, eCommType_SigmaB, eCommType_NR, eCommType_COORD,
     eCommType_CNB
 };
 
-/* Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
+//@{
+/*! \brief Flags used to coordinate PP-PME communication and computation phases
+ *
+ * Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
  * that the six first flags are exactly in this order.
  * If more PP_PME_...-flags are to be introduced be aware of some of
  * the PME-specific flags in pme.h. Currently, they are also passed
@@ -84,67 +98,81 @@ enum {
 
 #define PME_PP_SIGSTOP        (1<<0)
 #define PME_PP_SIGSTOPNSS     (1<<1)
+//@}
 
-typedef struct gmx_pme_pp {
+/*! \brief Master PP-PME communication data structure */
+struct gmx_pme_pp {
 #ifdef GMX_MPI
-    MPI_Comm     mpi_comm_mysim;
+    MPI_Comm     mpi_comm_mysim; /**< MPI communicator for this simulation */
 #endif
-    int          nnode;        /* The number of PP node to communicate with  */
-    int         *node;         /* The PP node ranks                          */
-    int          node_peer;    /* The peer PP node rank                      */
-    int         *nat;          /* The number of atom for each PP node        */
-    int          flags_charge; /* The flags sent along with the last charges */
+    int          nnode;          /**< The number of PP node to communicate with  */
+    int         *node;           /**< The PP node ranks                          */
+    int          node_peer;      /**< The peer PP node rank                      */
+    int         *nat;            /**< The number of atom for each PP node        */
+    int          flags_charge;   /**< The flags sent along with the last charges */
+    //@{
+    /**< Vectors of A- and B-state parameters used to transfer vectors to PME ranks  */
     real        *chargeA;
     real        *chargeB;
     real        *sqrt_c6A;
     real        *sqrt_c6B;
     real        *sigmaA;
     real        *sigmaB;
-    rvec        *x;
-    rvec        *f;
-    int          nalloc;
+    //@}
+    rvec        *x;             /**< Vector of atom coordinates to transfer to PME ranks */
+    rvec        *f;             /**< Vector of atom forces received from PME ranks */
+    int          nalloc;        /**< Allocation size of transfer vectors (>= \p nat) */
 #ifdef GMX_MPI
+    //@{
+    /**< Vectors of MPI objects used in non-blocking communication between multiple PP ranks per PME rank */
     MPI_Request *req;
     MPI_Status  *stat;
+    //@}
 #endif
-} t_gmx_pme_pp;
+};
 
+/*! \brief Helper struct for PP-PME communication of parameters */
 typedef struct gmx_pme_comm_n_box {
-    int             natoms;
-    matrix          box;
-    int             maxshift_x;
-    int             maxshift_y;
-    real            lambda_q;
-    real            lambda_lj;
-    int             flags;
-    gmx_int64_t     step;
-    ivec            grid_size;    /* For PME grid tuning */
-    real            ewaldcoeff_q; /* For PME grid tuning */
+    int             natoms;     /**< Number of atoms */
+    matrix          box;        /**< Box */
+    int             maxshift_x; /**< Maximum shift in x direction */
+    int             maxshift_y; /**< Maximum shift in y direction */
+    real            lambda_q;   /**< Free-energy lambda for electrostatics */
+    real            lambda_lj;  /**< Free-energy lambda for Lennard-Jones */
+    int             flags;      /**< Control flags */
+    gmx_int64_t     step;       /**< MD integration step number */
+    //@{
+    /*! \brief Used in PME grid tuning */
+    ivec            grid_size;
+    real            ewaldcoeff_q;
     real            ewaldcoeff_lj;
+    //@}
 } gmx_pme_comm_n_box_t;
 
+/*! \brief Helper struct for PP-PME communication of virial and energy */
 typedef struct {
+    //@{
+    /*! \brief Virial, energy, and derivative of potential w.r.t. lambda for charge and Lennard-Jones */
     matrix          vir_q;
     matrix          vir_lj;
     real            energy_q;
     real            energy_lj;
     real            dvdlambda_q;
     real            dvdlambda_lj;
-    float           cycles;
-    gmx_stop_cond_t stop_cond;
+    //@}
+    float           cycles;     /**< Counter of CPU cycles used */
+    gmx_stop_cond_t stop_cond;  /**< Flag used in responding to an external signal to terminate */
 } gmx_pme_comm_vir_ene_t;
 
-
-
-
-gmx_pme_pp_t gmx_pme_pp_init(t_commrec gmx_unused *cr)
+gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr)
 {
     struct gmx_pme_pp *pme_pp;
-    int                rank;
 
     snew(pme_pp, 1);
 
 #ifdef GMX_MPI
+    int rank;
+
     pme_pp->mpi_comm_mysim = cr->mpi_comm_mysim;
     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
     get_pme_ddnodes(cr, rank, &pme_pp->nnode, &pme_pp->node, &pme_pp->node_peer);
@@ -153,12 +181,16 @@ gmx_pme_pp_t gmx_pme_pp_init(t_commrec gmx_unused *cr)
     snew(pme_pp->stat, eCommType_NR*pme_pp->nnode);
     pme_pp->nalloc       = 0;
     pme_pp->flags_charge = 0;
+#else
+    GMX_UNUSED_VALUE(cr);
 #endif
 
     return pme_pp;
 }
 
-/* This should be faster with a real non-blocking MPI implementation */
+/*! \brief Block to wait for communication to PME ranks to complete
+ *
+ * This should be faster with a real non-blocking MPI implementation */
 /* #define GMX_PME_DELAYED_WAIT */
 
 static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t gmx_unused *dd)
@@ -172,6 +204,7 @@ static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t gmx_unused *dd)
 #endif
 }
 
+/*! \brief Send data to PME ranks */
 static void gmx_pme_send_coeffs_coords(t_commrec *cr, int flags,
                                        real gmx_unused *chargeA, real gmx_unused *chargeB,
                                        real gmx_unused *c6A, real gmx_unused *c6B,
@@ -403,42 +436,42 @@ void gmx_pme_send_resetcounters(t_commrec gmx_unused *cr, gmx_int64_t gmx_unused
 #endif
 }
 
-int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
-                               int                        *natoms,
-                               real                      **chargeA,
-                               real                      **chargeB,
-                               real                      **sqrt_c6A,
-                               real                      **sqrt_c6B,
-                               real                      **sigmaA,
-                               real                      **sigmaB,
-                               matrix gmx_unused           box,
-                               rvec                      **x,
-                               rvec                      **f,
-                               int gmx_unused             *maxshift_x,
-                               int gmx_unused             *maxshift_y,
-                               gmx_bool gmx_unused        *bFreeEnergy_q,
-                               gmx_bool gmx_unused        *bFreeEnergy_lj,
-                               real gmx_unused            *lambda_q,
-                               real gmx_unused            *lambda_lj,
-                               gmx_bool gmx_unused        *bEnerVir,
-                               int                        *pme_flags,
-                               gmx_int64_t gmx_unused     *step,
-                               ivec gmx_unused             grid_size,
-                               real gmx_unused            *ewaldcoeff_q,
-                               real gmx_unused            *ewaldcoeff_lj)
+int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
+                               int               *natoms,
+                               real             **chargeA,
+                               real             **chargeB,
+                               real             **sqrt_c6A,
+                               real             **sqrt_c6B,
+                               real             **sigmaA,
+                               real             **sigmaB,
+                               matrix             box,
+                               rvec             **x,
+                               rvec             **f,
+                               int               *maxshift_x,
+                               int               *maxshift_y,
+                               gmx_bool          *bFreeEnergy_q,
+                               gmx_bool          *bFreeEnergy_lj,
+                               real              *lambda_q,
+                               real              *lambda_lj,
+                               gmx_bool          *bEnerVir,
+                               int               *pme_flags,
+                               gmx_int64_t       *step,
+                               ivec               grid_size,
+                               real              *ewaldcoeff_q,
+                               real              *ewaldcoeff_lj)
 {
-    gmx_pme_comm_n_box_t cnb;
-    int                  nat = 0, q, messages, sender;
-    real                *charge_pp;
-
-    messages = 0;
+    int                  nat = 0, status;
 
-    /* avoid compiler warning about unused variable without MPI support */
-    cnb.flags  = 0;
     *pme_flags = 0;
 #ifdef GMX_MPI
+    gmx_pme_comm_n_box_t cnb;
+    int                  messages;
+
+    cnb.flags  = 0;
+    messages   = 0;
     do
     {
+
         /* Receive the send count, box and time step from the peer PP node */
         MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
                  pme_pp->node_peer, eCommType_CNB,
@@ -474,7 +507,7 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
         if (cnb.flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
         {
             /* Receive the send counts from the other PP nodes */
-            for (sender = 0; sender < pme_pp->nnode; sender++)
+            for (int sender = 0; sender < pme_pp->nnode; sender++)
             {
                 if (pme_pp->node[sender] == pme_pp->node_peer)
                 {
@@ -492,7 +525,7 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
             messages = 0;
 
             nat = 0;
-            for (sender = 0; sender < pme_pp->nnode; sender++)
+            for (int sender = 0; sender < pme_pp->nnode; sender++)
             {
                 nat += pme_pp->nat[sender];
             }
@@ -533,8 +566,10 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
             *maxshift_y = cnb.maxshift_y;
 
             /* Receive the charges in place */
-            for (q = 0; q < eCommType_NR; q++)
+            for (int q = 0; q < eCommType_NR; q++)
             {
+                real *charge_pp;
+
                 if (!(cnb.flags & (PP_PME_CHARGE<<q)))
                 {
                     continue;
@@ -550,7 +585,7 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
                     default: gmx_incons("Wrong eCommType");
                 }
                 nat = 0;
-                for (sender = 0; sender < pme_pp->nnode; sender++)
+                for (int sender = 0; sender < pme_pp->nnode; sender++)
                 {
                     if (pme_pp->nat[sender] > 0)
                     {
@@ -609,7 +644,7 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
 
             /* Receive the coordinates in place */
             nat = 0;
-            for (sender = 0; sender < pme_pp->nnode; sender++)
+            for (int sender = 0; sender < pme_pp->nnode; sender++)
             {
                 if (pme_pp->nat[sender] > 0)
                 {
@@ -633,8 +668,24 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
         messages = 0;
     }
     while (!(cnb.flags & (PP_PME_COORD | PP_PME_FINISH)));
+    status = ((cnb.flags & PP_PME_FINISH) ? pmerecvqxFINISH : pmerecvqxX);
 
     *step = cnb.step;
+#else
+    GMX_UNUSED_VALUE(box);
+    GMX_UNUSED_VALUE(maxshift_x);
+    GMX_UNUSED_VALUE(maxshift_y);
+    GMX_UNUSED_VALUE(bFreeEnergy_q);
+    GMX_UNUSED_VALUE(bFreeEnergy_lj);
+    GMX_UNUSED_VALUE(lambda_q);
+    GMX_UNUSED_VALUE(lambda_lj);
+    GMX_UNUSED_VALUE(bEnerVir);
+    GMX_UNUSED_VALUE(step);
+    GMX_UNUSED_VALUE(grid_size);
+    GMX_UNUSED_VALUE(ewaldcoeff_q);
+    GMX_UNUSED_VALUE(ewaldcoeff_lj);
+
+    status = pmerecvqxX;
 #endif
 
     *natoms   = nat;
@@ -647,9 +698,9 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp          *pme_pp,
     *x        = pme_pp->x;
     *f        = pme_pp->f;
 
-    return ((cnb.flags & PP_PME_FINISH) ? pmerecvqxFINISH : pmerecvqxX);
+    return status;
 }
-
+/*! \brief Receive virial and energy from PME rank */
 static void receive_virial_energy(t_commrec *cr,
                                   matrix vir_q, real *energy_q,
                                   matrix vir_lj, real *energy_lj,
@@ -738,26 +789,24 @@ void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
                                  real dvdlambda_q, real dvdlambda_lj,
                                  float cycles)
 {
+#ifdef GMX_MPI
     gmx_pme_comm_vir_ene_t cve;
-    int                    messages, ind_start, ind_end, receiver;
-
+    int                    messages, ind_start, ind_end;
     cve.cycles = cycles;
 
     /* Now the evaluated forces have to be transferred to the PP nodes */
     messages = 0;
     ind_end  = 0;
-    for (receiver = 0; receiver < pme_pp->nnode; receiver++)
+    for (int receiver = 0; receiver < pme_pp->nnode; receiver++)
     {
         ind_start = ind_end;
         ind_end   = ind_start + pme_pp->nat[receiver];
-#ifdef GMX_MPI
         if (MPI_Isend(f[ind_start], (ind_end-ind_start)*sizeof(rvec), MPI_BYTE,
                       pme_pp->node[receiver], 0,
                       pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0)
         {
             gmx_comm("MPI_Isend failed in do_pmeonly");
         }
-#endif
     }
 
     /* send virial and energy to our last PP node */
@@ -777,12 +826,22 @@ void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
         fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n",
                 pme_pp->node_peer);
     }
-#ifdef GMX_MPI
     MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
               pme_pp->node_peer, 1,
               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
 
     /* Wait for the forces to arrive */
     MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
+#else
+    gmx_call("MPI not enabled");
+    GMX_UNUSED_VALUE(pme_pp);
+    GMX_UNUSED_VALUE(f);
+    GMX_UNUSED_VALUE(vir_q);
+    GMX_UNUSED_VALUE(energy_q);
+    GMX_UNUSED_VALUE(vir_lj);
+    GMX_UNUSED_VALUE(energy_lj);
+    GMX_UNUSED_VALUE(dvdlambda_q);
+    GMX_UNUSED_VALUE(dvdlambda_lj);
+    GMX_UNUSED_VALUE(cycles);
 #endif
 }
similarity index 97%
rename from src/gromacs/ewald/pme-redistribute.c
rename to src/gromacs/ewald/pme-redistribute.cpp
index 38611b65a6f079e9b9ca63329bbd6472df134e61..44bbdaeb60c26afd428aeac6b7c501af31e35dec 100644 (file)
 
 #include "config.h"
 
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
-#include "gromacs/legacyheaders/macros.h"
+#include <algorithm>
+
 #include "gromacs/legacyheaders/types/commrec.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/utility/gmxmpi.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-internal.h"
+#include "pme-simd.h"
+
 static void pme_calc_pidx(int start, int end,
                           matrix recipbox, rvec x[],
                           pme_atomcomm_t *atc, int *count)
@@ -151,7 +153,7 @@ static void realloc_splinevec(splinevec th, real **ptr_z, int nalloc)
 
 static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
 {
-    int i, d;
+    int i;
 
     srenew(spline->ind, atc->nalloc);
     /* Initialize the index to identity so it works without threads */
@@ -168,7 +170,7 @@ static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
 
 void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
 {
-    int nalloc_old, i, j, nalloc_tpl;
+    int nalloc_old, i;
 
     /* We have to avoid a NULL pointer for atc->x to avoid
      * possible fatal errors in MPI routines.
@@ -176,7 +178,7 @@ void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
     if (atc->n > atc->nalloc || atc->nalloc == 0)
     {
         nalloc_old  = atc->nalloc;
-        atc->nalloc = over_alloc_dd(max(atc->n, 1));
+        atc->nalloc = over_alloc_dd(std::max(atc->n, 1));
 
         if (atc->nslab > 1)
         {
@@ -259,7 +261,7 @@ static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
     commnode  = atc->node_dest;
     buf_index = atc->buf_index;
 
-    nnodes_comm = min(2*atc->maxshift, atc->nslab-1);
+    nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
 
     nsend = 0;
     for (i = 0; i < nnodes_comm; i++)
@@ -363,7 +365,7 @@ void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
     commnode  = atc->node_dest;
     buf_index = atc->buf_index;
 
-    nnodes_comm = min(2*atc->maxshift, atc->nslab-1);
+    nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
 
     local_pos = atc->count[atc->nodeid];
     buf_pos   = 0;
index 782c00f931bac49d50fc377597ce40dc14f0902d..385828fcbff0608bf82e8a445109dd275509275d 100644 (file)
 #ifndef GMX_EWALD_PME_REDISTRIBUTE_H
 #define GMX_EWALD_PME_REDISTRIBUTE_H
 
-#include "gromacs/ewald/pme-internal.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
 
 void
 pme_realloc_atomcomm_things(pme_atomcomm_t *atc);
@@ -53,8 +49,4 @@ void
 do_redist_pos_coeffs(struct gmx_pme_t *pme, t_commrec *cr, int start, int homenr,
                      gmx_bool bFirst, rvec x[], real *data);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 98%
rename from src/gromacs/ewald/pme-solve.c
rename to src/gromacs/ewald/pme-solve.cpp
index 04250befa0695a4691d79a355a46bd2c3202b806..2a14b3b48fc61cae11672b8f4fdc7fffcb96123d 100644 (file)
 
 #include <math.h>
 
-#include "gromacs/ewald/pme-internal.h"
 #include "gromacs/fft/parallel_3dfft.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/simd/simd_math.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-internal.h"
+
 #ifdef GMX_SIMD_HAVE_REAL
 /* Turn on arbitrary width SIMD intrinsics for PME solve */
 #    define PME_SIMD_SOLVE
@@ -194,9 +195,7 @@ void get_pme_ener_vir_lj(struct pme_solve_work_t *work, int nthread,
 gmx_inline static void calc_exponentials_q(int gmx_unused start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
 {
     {
-        const gmx_simd_real_t two = gmx_simd_set1_r(2.0);
         gmx_simd_real_t       f_simd;
-        gmx_simd_real_t       lu;
         gmx_simd_real_t       tmp_d1, d_inv, tmp_r, tmp_e;
         int                   kx;
         f_simd = gmx_simd_set1_r(f);
@@ -288,7 +287,7 @@ int solve_pme_yzx(struct gmx_pme_t *pme, t_complex *grid,
     /* do recip sum over local cells in grid */
     /* y major, z middle, x minor or continuous */
     t_complex               *p0;
-    int                      kx, ky, kz, maxkx, maxky, maxkz;
+    int                      kx, ky, kz, maxkx, maxky;
     int                      nx, ny, nz, iyz0, iyz1, iyz, iy, iz, kxstart, kxend;
     real                     mx, my, mz;
     real                     factor = M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
@@ -327,7 +326,6 @@ int solve_pme_yzx(struct gmx_pme_t *pme, t_complex *grid,
 
     maxkx = (nx+1)/2;
     maxky = (ny+1)/2;
-    maxkz = nz/2+1;
 
     work  = &pme->solve_work[thread];
     mhx   = work->mhx;
@@ -539,7 +537,7 @@ int solve_pme_lj_yzx(struct gmx_pme_t *pme, t_complex **grid, gmx_bool bLB,
     /* do recip sum over local cells in grid */
     /* y major, z middle, x minor or continuous */
     int                      ig, gcount;
-    int                      kx, ky, kz, maxkx, maxky, maxkz;
+    int                      kx, ky, kz, maxkx, maxky;
     int                      nx, ny, nz, iy, iyz0, iyz1, iyz, iz, kxstart, kxend;
     real                     mx, my, mz;
     real                     factor = M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
@@ -550,7 +548,6 @@ int solve_pme_lj_yzx(struct gmx_pme_t *pme, t_complex **grid, gmx_bool bLB,
     real                     rxx, ryx, ryy, rzx, rzy, rzz;
     real                    *mhx, *mhy, *mhz, *m2, *denom, *tmp1, *tmp2;
     real                     mhxk, mhyk, mhzk, m2k;
-    real                     mk;
     struct pme_solve_work_t *work;
     real                     corner_fac;
     ivec                     complex_order;
@@ -574,7 +571,6 @@ int solve_pme_lj_yzx(struct gmx_pme_t *pme, t_complex **grid, gmx_bool bLB,
 
     maxkx = (nx+1)/2;
     maxky = (ny+1)/2;
-    maxkz = nz/2+1;
 
     work  = &pme->solve_work[thread];
     mhx   = work->mhx;
index e6f3d3f33fb7b7cf411392aa0c58a5e23699cce1..6b26a58f1bc37df9b0ac46c5475d0b663b22ae5a 100644 (file)
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct pme_solve_work_t;
 struct gmx_pme_t;
 
@@ -82,8 +78,4 @@ int solve_pme_lj_yzx(struct gmx_pme_t *pme, t_complex **grid, gmx_bool bLB,
                      real ewaldcoeff, real vol,
                      gmx_bool bEnerVir, int nthread, int thread);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 98%
rename from src/gromacs/ewald/pme-spline-work.c
rename to src/gromacs/ewald/pme-spline-work.cpp
index 01459eda2a79db8dda1d84048f17e09a53858cde..795f419d0e088f2b2e622b68d680961f2557dc9c 100644 (file)
 
 #include "pme-spline-work.h"
 
-#include "gromacs/ewald/pme-simd.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/utility/real.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-simd.h"
+
 struct pme_spline_work *make_pme_spline_work(int gmx_unused order)
 {
     struct pme_spline_work *work;
index ed07c2a65c9dec53a3a270340f54862b75f326ee..2ca38b1baee4ff997cca20751bd9f43cf4793159 100644 (file)
 #ifndef GMX_EWALD_PME_SPLINE_WORK_H
 #define GMX_EWALD_PME_SPLINE_WORK_H
 
-#include "gromacs/ewald/pme-simd.h"
 #include "gromacs/simd/simd.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-simd.h"
 
 struct pme_spline_work
 {
@@ -54,8 +51,4 @@ struct pme_spline_work
 
 struct pme_spline_work *make_pme_spline_work(int order);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
index 5a687a49218b4e8b95e71a9761388b80b4a6cfd4..f23262a5686bd827dd6f134c338cd77190c233ee 100644 (file)
 
 #include <algorithm>
 
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
-#include "gromacs/ewald/pme-spline-work.h"
-#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/ewald/pme.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "pme-internal.h"
+#include "pme-simd.h"
+#include "pme-spline-work.h"
+
 /* TODO consider split of pme-spline from this file */
 
 static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
@@ -368,7 +369,7 @@ static void spread_coefficients_bsplines_thread(pmegrid_t
 #define PME_SPREAD_SIMD4_ALIGNED
 #define PME_ORDER 4
 #endif
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
 #else
                     DO_BSPLINE(4);
 #endif
@@ -377,7 +378,7 @@ static void spread_coefficients_bsplines_thread(pmegrid_t
 #ifdef PME_SIMD4_SPREAD_GATHER
 #define PME_SPREAD_SIMD4_ALIGNED
 #define PME_ORDER 5
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
 #else
                     DO_BSPLINE(5);
 #endif
index 66ca50e4224154118cb49a9a09cd3a1bb075ea3b..557d1ad9c092fa462f6c79006ea840f76a94d784 100644 (file)
 #ifndef GMX_EWALD_PME_SPREAD_H
 #define GMX_EWALD_PME_SPREAD_H
 
-#include "gromacs/ewald/pme-internal.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
 
 void
 spread_on_grid(struct gmx_pme_t *pme,
@@ -50,8 +47,4 @@ spread_on_grid(struct gmx_pme_t *pme,
                gmx_bool bCalcSplines, gmx_bool bSpread,
                real *fftgrid, gmx_bool bDoSplines, int grid_index);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 95%
rename from src/gromacs/ewald/pme.c
rename to src/gromacs/ewald/pme.cpp
index 91cff2c4977a6fc5d30c412aeb07cf505372012d..c872e52ca6a83a224216e45c1a5e51cf21b38681 100644 (file)
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Erik Lindahl <erik@kth.se>
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
 /* IMPORTANT FOR DEVELOPERS:
  *
  * Triclinic pme stuff isn't entirely trivial, and we've experienced
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
 
-#include "gromacs/ewald/calculate-spline-moduli.h"
-#include "gromacs/ewald/pme-gather.h"
-#include "gromacs/ewald/pme-grid.h"
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-redistribute.h"
-#include "gromacs/ewald/pme-solve.h"
-#include "gromacs/ewald/pme-spline-work.h"
-#include "gromacs/ewald/pme-spread.h"
-#include "gromacs/fft/fft.h"
 #include "gromacs/fft/parallel_3dfft.h"
-#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/fileio/pdbio.h"
+#include "gromacs/legacyheaders/network.h"
 #include "gromacs/legacyheaders/nrnb.h"
 #include "gromacs/legacyheaders/types/commrec.h"
 #include "gromacs/legacyheaders/types/enums.h"
 #include "gromacs/utility/real.h"
 #include "gromacs/utility/smalloc.h"
 
-#ifdef GMX_DOUBLE
-#define mpi_type MPI_DOUBLE
-#else
-#define mpi_type MPI_FLOAT
-#endif
+#include "calculate-spline-moduli.h"
+#include "pme-gather.h"
+#include "pme-grid.h"
+#include "pme-internal.h"
+#include "pme-redistribute.h"
+#include "pme-solve.h"
+#include "pme-spline-work.h"
+#include "pme-spread.h"
 
-/* GMX_CACHE_SEP should be a multiple of the SIMD and SIMD4 register size
- * to preserve alignment.
+/*! \brief Number of bytes in a cache line.
+ *
+ * Must also be a multiple of the SIMD and SIMD4 register size, to
+ * preserve alignment.
  */
-#define GMX_CACHE_SEP 64
-
+const int gmxCacheLineSize = 64;
 
+//! Set up coordinate communication
 static void setup_coordinate_communication(pme_atomcomm_t *atc)
 {
     int nslab, n, i;
@@ -140,7 +150,7 @@ static void setup_coordinate_communication(pme_atomcomm_t *atc)
 
 int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata)
 {
-    int thread, i;
+    int i;
 
     if (NULL != log)
     {
@@ -170,13 +180,14 @@ int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata)
     return 0;
 }
 
+/*! \brief Round \p n up to the next multiple of \p f */
 static int mult_up(int n, int f)
 {
     return ((n + f - 1)/f)*f;
 }
 
-// TODO change this name to reflect what it actually does
-static double pme_load_imbalance(struct gmx_pme_t *pme)
+/*! \brief Return estimate of the load imbalance from the PME grid not being a good match for the number of PME ranks */
+static double estimate_pme_load_imbalance(struct gmx_pme_t *pme)
 {
     int    nma, nmi;
     double n1, n2, n3;
@@ -193,10 +204,11 @@ static double pme_load_imbalance(struct gmx_pme_t *pme)
     return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
 }
 
+/*! \brief Initialize atom communication data structure */
 static void init_atomcomm(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
                           int dimind, gmx_bool bSpread)
 {
-    int nk, k, s, thread;
+    int thread;
 
     atc->dimind    = dimind;
     atc->nslab     = 1;
@@ -244,14 +256,15 @@ static void init_atomcomm(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
     {
         if (atc->nthread > 1)
         {
-            snew(atc->thread_plist[thread].n, atc->nthread+2*GMX_CACHE_SEP);
-            atc->thread_plist[thread].n += GMX_CACHE_SEP;
+            snew(atc->thread_plist[thread].n, atc->nthread+2*gmxCacheLineSize);
+            atc->thread_plist[thread].n += gmxCacheLineSize;
         }
         snew(atc->spline[thread].thread_one, pme->nthread);
         atc->spline[thread].thread_one[thread] = 1;
     }
 }
 
+/*! \brief Initialize data structure for communication */
 static void
 init_overlap_comm(pme_overlap_t *  ol,
                   int              norder,
@@ -263,9 +276,7 @@ init_overlap_comm(pme_overlap_t *  ol,
                   int              ndata,
                   int              commplainsize)
 {
-    int              lbnd, rbnd, maxlr, b, i;
-    int              exten;
-    int              nn, nk;
+    int              b, i;
     pme_grid_comm_t *pgc;
     gmx_bool         bCont;
     int              fft_start, fft_end, send_index1, recv_index1;
@@ -353,9 +364,9 @@ init_overlap_comm(pme_overlap_t *  ol,
             fft_end   += ndata;
         }
         send_index1       = ol->s2g1[nodeid];
-        send_index1       = min(send_index1, fft_end);
+        send_index1       = std::min(send_index1, fft_end);
         pgc->send_index0  = fft_start;
-        pgc->send_nindex  = max(0, send_index1 - pgc->send_index0);
+        pgc->send_nindex  = std::max(0, send_index1 - pgc->send_index0);
         ol->send_size    += pgc->send_nindex;
 
         /* We always start receiving to the first index of our slab */
@@ -366,9 +377,9 @@ init_overlap_comm(pme_overlap_t *  ol,
         {
             recv_index1 -= ndata;
         }
-        recv_index1      = min(recv_index1, fft_end);
+        recv_index1      = std::min(recv_index1, fft_end);
         pgc->recv_index0 = fft_start;
-        pgc->recv_nindex = max(0, recv_index1 - pgc->recv_index0);
+        pgc->recv_nindex = std::max(0, recv_index1 - pgc->recv_index0);
     }
 
 #ifdef GMX_MPI
@@ -441,6 +452,7 @@ void gmx_pme_check_restrictions(int pme_order,
     return;
 }
 
+/*! \brief Round \p enumerator */
 static int div_round_up(int enumerator, int denominator)
 {
     return (enumerator + denominator - 1)/denominator;
@@ -606,7 +618,7 @@ int gmx_pme_init(struct gmx_pme_t **pmedata,
         double imbal;
 
 #ifdef GMX_MPI
-        MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
+        MPI_Type_contiguous(DIM, GMX_MPI_REAL, &(pme->rvec_mpi));
         MPI_Type_commit(&(pme->rvec_mpi));
 #endif
 
@@ -616,7 +628,7 @@ int gmx_pme_init(struct gmx_pme_t **pmedata,
          * (unless the coefficient distribution is inhomogeneous).
          */
 
-        imbal = pme_load_imbalance(pme);
+        imbal = estimate_pme_load_imbalance(pme);
         if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
         {
             fprintf(stderr,
@@ -851,6 +863,7 @@ void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V
     *V = gather_energy_bsplines(pme, grid->grid.grid, atc);
 }
 
+/*! \brief Calculate initial Lorentz-Berthelot coefficients for LJ-PME */
 static void
 calc_initial_lb_coeffs(struct gmx_pme_t *pme, real *local_c6, real *local_sigma)
 {
@@ -865,6 +878,7 @@ calc_initial_lb_coeffs(struct gmx_pme_t *pme, real *local_c6, real *local_sigma)
     }
 }
 
+/*! \brief Calculate next Lorentz-Berthelot coefficients for LJ-PME */
 static void
 calc_next_lb_coeffs(struct gmx_pme_t *pme, real *local_sigma)
 {
@@ -879,27 +893,25 @@ calc_next_lb_coeffs(struct gmx_pme_t *pme, real *local_sigma)
 int gmx_pme_do(struct gmx_pme_t *pme,
                int start,       int homenr,
                rvec x[],        rvec f[],
-               real *chargeA,   real *chargeB,
-               real *c6A,       real *c6B,
-               real *sigmaA,    real *sigmaB,
-               matrix box, t_commrec *cr,
+               real chargeA[],  real chargeB[],
+               real c6A[],      real c6B[],
+               real sigmaA[],   real sigmaB[],
+               matrix box,      t_commrec *cr,
                int  maxshift_x, int maxshift_y,
                t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
-               matrix vir_q,      real ewaldcoeff_q,
+               matrix vir_q,    real ewaldcoeff_q,
                matrix vir_lj,   real ewaldcoeff_lj,
                real *energy_q,  real *energy_lj,
-               real lambda_q, real lambda_lj,
+               real lambda_q,   real lambda_lj,
                real *dvdlambda_q, real *dvdlambda_lj,
                int flags)
 {
-    int                  d, i, j, k, ntot, npme, grid_index, max_grid_index;
-    int                  nx, ny, nz;
-    int                  n_d, local_ny;
+    int                  d, i, j, npme, grid_index, max_grid_index;
+    int                  n_d;
     pme_atomcomm_t      *atc        = NULL;
     pmegrids_t          *pmegrid    = NULL;
     real                *grid       = NULL;
-    real                *ptr;
-    rvec                *x_d, *f_d;
+    rvec                *f_d;
     real                *coefficient = NULL;
     real                 energy_AB[4];
     matrix               vir_AB[4];
@@ -1142,8 +1154,8 @@ int gmx_pme_do(struct gmx_pme_t *pme,
 
                     if (pme->nodeid == 0)
                     {
-                        ntot  = pme->nkx*pme->nky*pme->nkz;
-                        npme  = ntot*log((real)ntot)/log(2.0);
+                        real ntot = pme->nkx*pme->nky*pme->nkz;
+                        npme  = static_cast<int>(ntot*log(ntot)/log(2.0));
                         inc_nrnb(nrnb, eNR_FFT, 2*npme);
                     }
 
@@ -1298,7 +1310,6 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                 /* Unpack structure */
                 pmegrid    = &pme->pmegrid[grid_index];
                 fftgrid    = pme->fftgrid[grid_index];
-                cfftgrid   = pme->cfftgrid[grid_index];
                 pfft_setup = pme->pfft_setup[grid_index];
                 calc_next_lb_coeffs(pme, local_sigma);
                 grid = pmegrid->grid.grid;
@@ -1398,7 +1409,6 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                     /* Unpack structure */
                     pmegrid    = &pme->pmegrid[grid_index];
                     fftgrid    = pme->fftgrid[grid_index];
-                    cfftgrid   = pme->cfftgrid[grid_index];
                     pfft_setup = pme->pfft_setup[grid_index];
                     grid       = pmegrid->grid.grid;
                     calc_next_lb_coeffs(pme, local_sigma);
@@ -1423,8 +1433,8 @@ int gmx_pme_do(struct gmx_pme_t *pme,
 
                             if (pme->nodeid == 0)
                             {
-                                ntot  = pme->nkx*pme->nky*pme->nkz;
-                                npme  = ntot*log((real)ntot)/log(2.0);
+                                real ntot = pme->nkx*pme->nky*pme->nkz;
+                                npme  = static_cast<int>(ntot*log(ntot)/log(2.0));
                                 inc_nrnb(nrnb, eNR_FFT, 2*npme);
                             }
                             wallcycle_start(wcycle, ewcPME_SPREADGATHER);
index 0c67c22d47be9108fdb95370229aa60dabe41e73..d613b9c67606cd0af60a0c635fea16d00cae3bc2 100644 (file)
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
 
 #ifndef GMX_EWALD_PME_H
 #define GMX_EWALD_PME_H
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 enum {
     GMX_SUM_GRID_FORWARD, GMX_SUM_GRID_BACKWARD
 };
 
+/*! \brief Initialize \p pmedata
+ *
+ * Return value 0 indicates all well, non zero is an error code.
+ */
 int gmx_pme_init(struct gmx_pme_t **pmedata, t_commrec *cr,
                  int nnodes_major, int nnodes_minor,
                  t_inputrec *ir, int homenr,
                  gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
                  gmx_bool bReproducible, int nthread);
-/* Initialize the pme data structures resepectively.
- * Return value 0 indicates all well, non zero is an error code.
- */
 
-int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata);
-/* Destroy the pme data structures resepectively.
- * Return value 0 indicates all well, non zero is an error code.
+/*! \brief Destroy the pme data structures resepectively.
+ *
+ * \return 0 indicates all well, non zero is an error code.
  */
+int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata);
 
+//@{
+/*! \brief Flag values that control what gmx_pme_do() will calculate
+ *
+ * These can be combined with bitwise-OR if more than one thing is required.
+ */
 #define GMX_PME_SPREAD        (1<<0)
 #define GMX_PME_SOLVE         (1<<1)
 #define GMX_PME_CALC_F        (1<<2)
@@ -87,7 +100,15 @@ int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata);
 #define GMX_PME_DO_LJ         (1<<14)
 
 #define GMX_PME_DO_ALL_F  (GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_F)
+//@}
 
+/*! \brief Do a PME calculation for the long range electrostatics and/or LJ.
+ *
+ * The meaning of \p flags is defined above, and determines which
+ * parts of the calculation are performed.
+ *
+ * \return 0 indicates all well, non zero is an error code.
+ */
 int gmx_pme_do(struct gmx_pme_t *pme,
                int start,       int homenr,
                rvec x[],        rvec f[],
@@ -100,31 +121,29 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                matrix vir_q,    real ewaldcoeff_q,
                matrix vir_lj,   real ewaldcoeff_lj,
                real *energy_q,  real *energy_lj,
-               real lambda_q, real lambda_lj,
+               real lambda_q,   real lambda_lj,
                real *dvdlambda_q, real *dvdlambda_lj,
                int flags);
-/* Do a PME calculation for the long range electrostatics and/or LJ.
- * flags, defined above, determine which parts of the calculation are performed.
- * Return value 0 indicates all well, non zero is an error code.
- */
 
+/*! \brief Called on the nodes that do PME exclusively (as slaves) */
 int gmx_pmeonly(struct gmx_pme_t *pme,
                 t_commrec *cr,     t_nrnb *mynrnb,
                 gmx_wallcycle_t wcycle,
                 gmx_walltime_accounting_t walltime_accounting,
                 real ewaldcoeff_q, real ewaldcoeff_lj,
                 t_inputrec *ir);
-/* Called on the nodes that do PME exclusively (as slaves)
- */
 
-void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V);
-/* Calculate the PME grid energy V for n charges with a potential
- * in the pme struct determined before with a call to gmx_pme_do
- * with at least GMX_PME_SPREAD and GMX_PME_SOLVE specified.
- * Note that the charges are not spread on the grid in the pme struct.
- * Currently does not work in parallel or with free energy.
+/*! \brief Calculate the PME grid energy V for n charges.
+ *
+ * The potential (found in \p pme) must have been found already with a
+ * call to gmx_pme_do() with at least GMX_PME_SPREAD and GMX_PME_SOLVE
+ * specified. Note that the charges are not spread on the grid in the
+ * pme struct. Currently does not work in parallel or with free
+ * energy.
  */
+void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V);
 
+/*! \brief Send the charges and maxshift to out PME-only node. */
 void gmx_pme_send_parameters(t_commrec *cr,
                              const interaction_const_t *ic,
                              gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
@@ -132,30 +151,25 @@ void gmx_pme_send_parameters(t_commrec *cr,
                              real *sqrt_c6A, real *sqrt_c6B,
                              real *sigmaA, real *sigmaB,
                              int maxshift_x, int maxshift_y);
-/* Send the charges and maxshift to out PME-only node. */
 
+/*! \brief Send the coordinates to our PME-only node and request a PME calculation */
 void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
                               gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
                               real lambda_q, real lambda_lj,
                               gmx_bool bEnerVir, int pme_flags,
                               gmx_int64_t step);
-/* Send the coordinates to our PME-only node and request a PME calculation */
 
+/*! \brief Tell our PME-only node to finish */
 void gmx_pme_send_finish(t_commrec *cr);
-/* Tell our PME-only node to finish */
 
+/*! \brief Tell our PME-only node to reset all cycle and flop counters */
 void gmx_pme_send_resetcounters(t_commrec *cr, gmx_int64_t step);
-/* Tell our PME-only node to reset all cycle and flop counters */
 
+/*! \brief PP nodes receive the long range forces from the PME nodes */
 void gmx_pme_receive_f(t_commrec *cr,
                        rvec f[], matrix vir_q, real *energy_q,
                        matrix vir_lj, real *energy_lj,
                        real *dvdlambda_q, real *dvdlambda_lj,
                        float *pme_cycles);
-/* PP nodes receive the long range forces from the PME nodes */
-
-#ifdef __cplusplus
-}
-#endif
 
 #endif
index de011d5cb3a1a6653b990819982d99dd5ab0e659..a10d7f6b91d89eadb9c6cca022a7b24b3cd0007e 100644 (file)
@@ -226,9 +226,9 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
                                                                           simulation stops. If equal to zero, don't
                                                                           communicate any more between multisims.*/
     /* PME load balancing data for GPU kernels */
-    pme_load_balancing_t pme_loadbal = NULL;
-    double               cycles_pmes;
-    gmx_bool             bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
+    pme_load_balancing_t *pme_loadbal = NULL;
+    double                cycles_pmes;
+    gmx_bool              bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
 
     /* Interactive MD */
     gmx_bool          bIMDstep = FALSE;