* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "config.h"
#include <math.h>
#include <string.h>
#include <assert.h>
-#include "sysstuff.h"
-#include "gromacs/utility/smalloc.h"
-#include "types/commrec.h"
-#include "macros.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/legacyheaders/macros.h"
#include "gromacs/math/utilities.h"
-#include "vec.h"
-#include "pbc.h"
+#include "gromacs/math/vec.h"
#include "nbnxn_consts.h"
/* nbnxn_internal.h included gromacs/simd/macros.h */
#include "nbnxn_internal.h"
-#ifdef GMX_NBNXN_SIMD
+#ifdef GMX_SIMD
#include "gromacs/simd/vector_operations.h"
#endif
#include "nbnxn_atomdata.h"
#include "nbnxn_search.h"
-#include "gmx_omp_nthreads.h"
-#include "nrnb.h"
-#include "ns.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/fileio/gmxfio.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/utility/smalloc.h"
#ifdef NBNXN_SEARCH_BB_SIMD4
/* Always use 4-wide SIMD for bounding box calculations */
{
rvec size;
+ if (n == 0)
+ {
+ /* To avoid zero density we use a minimum of 1 atom */
+ n = 1;
+ }
+
rvec_sub(corner1, corner0, size);
return n/(size[XX]*size[YY]*size[ZZ]);
if (n > grid->na_sc)
{
+ assert(atom_density > 0);
+
/* target cell length */
if (grid->bSimple)
{
int cxy_start, int cxy_end,
int *sort_work)
{
- int cxy;
- int cx, cy, cz = -1, c = -1, ncz;
- int na, ash, na_c, ind, a;
- int subdiv_z, sub_z, na_z, ash_z;
- int subdiv_y, sub_y, na_y, ash_y;
- int subdiv_x, sub_x, na_x, ash_x;
+ int cxy;
+ int cx, cy, cz = -1, c = -1, ncz;
+ int na, ash, na_c, ind, a;
+ int subdiv_z, sub_z, na_z, ash_z;
+ int subdiv_y, sub_y, na_y, ash_y;
+ int subdiv_x, sub_x, na_x, ash_x;
- /* cppcheck-suppress unassignedVariable */
nbnxn_bb_t bb_work_array[2], *bb_work_aligned;
bb_work_aligned = (nbnxn_bb_t *)(((size_t)(bb_work_array+1)) & (~((size_t)15)));
}
/* Sort the super-cell columns along z into the sub-cells. */
-#pragma omp parallel for num_threads(nbs->nthread_max) schedule(static)
- for (thread = 0; thread < nbs->nthread_max; thread++)
+#pragma omp parallel for num_threads(nthread) schedule(static)
+ for (thread = 0; thread < nthread; thread++)
{
if (grid->bSimple)
{
nbs->ePBC = ePBC;
copy_mat(box, nbs->box);
- if (atom_density >= 0)
+ /* Avoid zero density */
+ if (atom_density > 0)
{
grid->atom_density = atom_density;
}
* for the local atoms (dd_zone=0).
*/
nbs->natoms_nonlocal = a1 - nmoved;
+
+ if (debug)
+ {
+ fprintf(debug, "natoms_local = %5d atom_density = %5.1f\n",
+ nbs->natoms_local, grid->atom_density);
+ }
}
else
{
nbs->natoms_nonlocal = max(nbs->natoms_nonlocal, a1);
}
+ /* We always use the home zone (grid[0]) for setting the cell size,
+ * since determining densities for non-local zones is difficult.
+ */
nc_max_grid = set_grid_size_xy(nbs, grid,
dd_zone, n-nmoved, corner0, corner1,
nbs->grid[0].atom_density);
cj_ind_start = nbl_ci->cj_ind_start;
cj_ind_end = nbl_ci->cj_ind_end;
- /* In worst case we have alternating energy groups and create npair lists */
- nri_max = nbl->na_ci*(cj_ind_end - cj_ind_start);
+ /* In worst case we have alternating energy groups
+ * and create #atom-pair lists, which means we need the size
+ * of a cluster pair (na_ci*na_cj) times the number of cj's.
+ */
+ nri_max = nbl->na_ci*nbl->na_cj*(cj_ind_end - cj_ind_start);
if (nlist->nri + nri_max > nlist->maxnri)
{
nlist->maxnri = over_alloc_large(nlist->nri + nri_max);
* Note that the charge has been set to zero,
* but we need to avoid 0/0, as perturbed atoms
* can be on top of each other.
- * (and the LJ parameters have not been zeroed)
*/
nbl->cj[cj_ind].excl &= ~(1U << (i*nbl->na_cj + j));
}
if (nlist->nrj > nlist->jindex[nri])
{
+ /* Actually add this new, non-empty, list */
nlist->nri++;
nlist->jindex[nlist->nri] = nlist->nrj;
}
cj4_ind_start = nbl_sci->cj4_ind_start;
cj4_ind_end = nbl_sci->cj4_ind_end;
- /* No energy groups (yet), so we split lists in max_nrj_fep pairs */
- nri_max = nbl->na_sc*(1 + ((cj4_ind_end - cj4_ind_start)*NBNXN_GPU_JGROUP_SIZE)/max_nrj_fep);
+ /* Here we process one super-cell, max #atoms na_sc, versus a list
+ * cj4 entries, each with max NBNXN_GPU_JGROUP_SIZE cj's, each
+ * of size na_cj atoms.
+ * On the GPU we don't support energy groups (yet).
+ * So for each of the na_sc i-atoms, we need max one FEP list
+ * for each max_nrj_fep j-atoms.
+ */
+ nri_max = nbl->na_sc*nbl->na_cj*(1 + ((cj4_ind_end - cj4_ind_start)*NBNXN_GPU_JGROUP_SIZE)/max_nrj_fep);
if (nlist->nri + nri_max > nlist->maxnri)
{
nlist->maxnri = over_alloc_large(nlist->nri + nri_max);
if (nlist->nrj > nlist->jindex[nri])
{
+ /* Actually add this new, non-empty, list */
nlist->nri++;
nlist->jindex[nlist->nri] = nlist->nrj;
}