The Verlet buffer size for CPUs was always calculated for 4x4.
With 2-wide SIMD the estimate should be for 4x2, which results
in a slighly larger list buffer.
grompp now always sets rlist for a 4x4 list setup; mdrun anyhow
redetermines rlist at run time (added a note for this in grompp).
Fixes #1757.
Change-Id: If4bf9ad17b82b22c9d9f7c1dd3f88e66f2314df4
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "coulomb.h"
#include "calc_verletbuf.h"
#include "../mdlib/nbnxn_consts.h"
+#include "../mdlib/nbnxn_simd.h"
#ifdef GMX_NBNXN_SIMD
/* The include below sets the SIMD instruction type (precision+width)
int n; /* #atoms of this type in the system */
} verletbuf_atomtype_t;
-void verletbuf_get_list_setup(gmx_bool bGPU,
+void verletbuf_get_list_setup(gmx_bool gmx_unused bSIMD,
+ gmx_bool bGPU,
verletbuf_list_setup_t *list_setup)
{
- list_setup->cluster_size_i = NBNXN_CPU_CLUSTER_I_SIZE;
-
if (bGPU)
{
- list_setup->cluster_size_j = NBNXN_GPU_CLUSTER_SIZE;
+ list_setup->cluster_size_i = NBNXN_GPU_CLUSTER_SIZE;
+ list_setup->cluster_size_j = NBNXN_GPU_CLUSTER_SIZE;
}
else
{
-#ifndef GMX_NBNXN_SIMD
- list_setup->cluster_size_j = NBNXN_CPU_CLUSTER_I_SIZE;
-#else
- list_setup->cluster_size_j = GMX_SIMD_REAL_WIDTH;
+ list_setup->cluster_size_i = NBNXN_CPU_CLUSTER_I_SIZE;
+ list_setup->cluster_size_j = NBNXN_CPU_CLUSTER_I_SIZE;
+#ifdef GMX_NBNXN_SIMD
+ if (bSIMD)
+ {
+ list_setup->cluster_size_j = GMX_SIMD_REAL_WIDTH;
#ifdef GMX_NBNXN_SIMD_2XNN
- /* We assume the smallest cluster size to be on the safe side */
- list_setup->cluster_size_j /= 2;
+ /* We assume the smallest cluster size to be on the safe side */
+ list_setup->cluster_size_j /= 2;
#endif
+ }
#endif
}
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
/* Sets the pair-list setup assumed for the current Gromacs configuration.
* The setup with smallest cluster sizes is return, such that the Verlet
* buffer size estimated with this setup will be conservative.
+ * bSIMD tells if to take into account SIMD, when supported.
+ * bGPU tells to estimate for GPU kernels (bSIMD is ignored with bGPU=TRUE)
*/
-void verletbuf_get_list_setup(gmx_bool bGPU,
+void verletbuf_get_list_setup(gmx_bool bSIMD,
+ gmx_bool bGPU,
verletbuf_list_setup_t *list_setup);
&ls, &n_nonlin_vsite, &rlist_1x1);
/* Set the pair-list buffer size in ir */
- verletbuf_get_list_setup(FALSE, &ls);
+ verletbuf_get_list_setup(FALSE, FALSE, &ls);
calc_verlet_buffer_size(mtop, det(box), ir, buffer_temp,
&ls, &n_nonlin_vsite, &ir->rlist);
ls.cluster_size_i, ls.cluster_size_j,
ir->rlist, ir->rlist-max(ir->rvdw, ir->rcoulomb));
+ printf("Note that mdrun will redetermine rlist based on the actual pair-list setup\n");
+
if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC, box))
{
gmx_fatal(FARGS, "The pair-list cut-off (%g nm) is longer than half the shortest box vector or longer than the smallest box diagonal element (%g nm). Increase the box size or decrease nstlist or increase verlet-buffer-tolerance.", ir->rlistlong, sqrt(max_cutoff2(ir->ePBC, box)));
ir->nstlist = nstlist_cmdline;
}
- verletbuf_get_list_setup(bGPU, &ls);
+ verletbuf_get_list_setup(TRUE, bGPU, &ls);
/* Allow rlist to make the list a given factor larger than the list
* would be with nstlist=10.
* calc_verlet_buffer_size gives the same results for 4x8 and 4x4
* and 4x2 gives a larger buffer than 4x4, this is ok.
*/
- verletbuf_get_list_setup(bUseGPU, &ls);
+ verletbuf_get_list_setup(TRUE, bUseGPU, &ls);
calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new);
{
verletbuf_list_setup_t ls;
- verletbuf_get_list_setup(FALSE, &ls);
+ verletbuf_get_list_setup(TRUE, FALSE, &ls);
calc_verlet_buffer_size(mtop, box_vol, ir, -1, &ls, NULL, &ir->rlist);
}
else