From 5148ccdcc6dc76fa0d10e1fd9c6797be53c553e2 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Fri, 26 Jun 2015 09:21:22 +0200 Subject: [PATCH] Fixed Verlet buffer issue with 2-wide SIMD The Verlet buffer size for CPUs was always calculated for 4x4. With 2-wide SIMD the estimate should be for 4x2, which results in a slighly larger list buffer. grompp now always sets rlist for a 4x4 list setup; mdrun anyhow redetermines rlist at run time (added a note for this in grompp). Fixes #1757. Change-Id: If4bf9ad17b82b22c9d9f7c1dd3f88e66f2314df4 --- src/gromacs/gmxpreprocess/calc_verletbuf.c | 26 +++++++++++++--------- src/gromacs/gmxpreprocess/calc_verletbuf.h | 7 ++++-- src/gromacs/gmxpreprocess/grompp.c | 4 +++- src/programs/mdrun/runner.c | 6 ++--- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/gromacs/gmxpreprocess/calc_verletbuf.c b/src/gromacs/gmxpreprocess/calc_verletbuf.c index 0387dd1483..28f2d91664 100644 --- a/src/gromacs/gmxpreprocess/calc_verletbuf.c +++ b/src/gromacs/gmxpreprocess/calc_verletbuf.c @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -49,6 +49,7 @@ #include "coulomb.h" #include "calc_verletbuf.h" #include "../mdlib/nbnxn_consts.h" +#include "../mdlib/nbnxn_simd.h" #ifdef GMX_NBNXN_SIMD /* The include below sets the SIMD instruction type (precision+width) @@ -117,25 +118,28 @@ typedef struct int n; /* #atoms of this type in the system */ } verletbuf_atomtype_t; -void verletbuf_get_list_setup(gmx_bool bGPU, +void verletbuf_get_list_setup(gmx_bool gmx_unused bSIMD, + gmx_bool bGPU, verletbuf_list_setup_t *list_setup) { - list_setup->cluster_size_i = NBNXN_CPU_CLUSTER_I_SIZE; - if (bGPU) { - list_setup->cluster_size_j = NBNXN_GPU_CLUSTER_SIZE; + list_setup->cluster_size_i = NBNXN_GPU_CLUSTER_SIZE; + list_setup->cluster_size_j = NBNXN_GPU_CLUSTER_SIZE; } else { -#ifndef GMX_NBNXN_SIMD - list_setup->cluster_size_j = NBNXN_CPU_CLUSTER_I_SIZE; -#else - list_setup->cluster_size_j = GMX_SIMD_REAL_WIDTH; + list_setup->cluster_size_i = NBNXN_CPU_CLUSTER_I_SIZE; + list_setup->cluster_size_j = NBNXN_CPU_CLUSTER_I_SIZE; +#ifdef GMX_NBNXN_SIMD + if (bSIMD) + { + list_setup->cluster_size_j = GMX_SIMD_REAL_WIDTH; #ifdef GMX_NBNXN_SIMD_2XNN - /* We assume the smallest cluster size to be on the safe side */ - list_setup->cluster_size_j /= 2; + /* We assume the smallest cluster size to be on the safe side */ + list_setup->cluster_size_j /= 2; #endif + } #endif } } diff --git a/src/gromacs/gmxpreprocess/calc_verletbuf.h b/src/gromacs/gmxpreprocess/calc_verletbuf.h index 65ef492350..d54ecd437c 100644 --- a/src/gromacs/gmxpreprocess/calc_verletbuf.h +++ b/src/gromacs/gmxpreprocess/calc_verletbuf.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -63,8 +63,11 @@ static const real verlet_buffer_ratio_NVE_T0 = 0.10; /* Sets the pair-list setup assumed for the current Gromacs configuration. * The setup with smallest cluster sizes is return, such that the Verlet * buffer size estimated with this setup will be conservative. + * bSIMD tells if to take into account SIMD, when supported. + * bGPU tells to estimate for GPU kernels (bSIMD is ignored with bGPU=TRUE) */ -void verletbuf_get_list_setup(gmx_bool bGPU, +void verletbuf_get_list_setup(gmx_bool bSIMD, + gmx_bool bGPU, verletbuf_list_setup_t *list_setup); diff --git a/src/gromacs/gmxpreprocess/grompp.c b/src/gromacs/gmxpreprocess/grompp.c index b63ce1020f..53bdb699c3 100644 --- a/src/gromacs/gmxpreprocess/grompp.c +++ b/src/gromacs/gmxpreprocess/grompp.c @@ -1374,7 +1374,7 @@ static void set_verlet_buffer(const gmx_mtop_t *mtop, &ls, &n_nonlin_vsite, &rlist_1x1); /* Set the pair-list buffer size in ir */ - verletbuf_get_list_setup(FALSE, &ls); + verletbuf_get_list_setup(FALSE, FALSE, &ls); calc_verlet_buffer_size(mtop, det(box), ir, buffer_temp, &ls, &n_nonlin_vsite, &ir->rlist); @@ -1392,6 +1392,8 @@ static void set_verlet_buffer(const gmx_mtop_t *mtop, ls.cluster_size_i, ls.cluster_size_j, ir->rlist, ir->rlist-max(ir->rvdw, ir->rcoulomb)); + printf("Note that mdrun will redetermine rlist based on the actual pair-list setup\n"); + if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC, box)) { gmx_fatal(FARGS, "The pair-list cut-off (%g nm) is longer than half the shortest box vector or longer than the smallest box diagonal element (%g nm). Increase the box size or decrease nstlist or increase verlet-buffer-tolerance.", ir->rlistlong, sqrt(max_cutoff2(ir->ePBC, box))); diff --git a/src/programs/mdrun/runner.c b/src/programs/mdrun/runner.c index 744e9f501e..b951255d7c 100644 --- a/src/programs/mdrun/runner.c +++ b/src/programs/mdrun/runner.c @@ -602,7 +602,7 @@ static void increase_nstlist(FILE *fp, t_commrec *cr, ir->nstlist = nstlist_cmdline; } - verletbuf_get_list_setup(bGPU, &ls); + verletbuf_get_list_setup(TRUE, bGPU, &ls); /* Allow rlist to make the list a given factor larger than the list * would be with nstlist=10. @@ -727,7 +727,7 @@ static void prepare_verlet_scheme(FILE *fplog, * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 * and 4x2 gives a larger buffer than 4x4, this is ok. */ - verletbuf_get_list_setup(bUseGPU, &ls); + verletbuf_get_list_setup(TRUE, bUseGPU, &ls); calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new); @@ -828,7 +828,7 @@ static void convert_to_verlet_scheme(FILE *fplog, { verletbuf_list_setup_t ls; - verletbuf_get_list_setup(FALSE, &ls); + verletbuf_get_list_setup(TRUE, FALSE, &ls); calc_verlet_buffer_size(mtop, box_vol, ir, -1, &ls, NULL, &ir->rlist); } else -- 2.22.0