* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "nbnxn_atomdata.h"
+#include "config.h"
+
+#include <assert.h>
#include <math.h>
+#include <stdlib.h>
#include <string.h>
-#include <assert.h>
-#include "smalloc.h"
-#include "macros.h"
-#include "vec.h"
-#include "nbnxn_consts.h"
-#include "nbnxn_internal.h"
-#include "nbnxn_atomdata.h"
-#include "nbnxn_search.h"
+
+#include "thread_mpi/atomic.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_internal.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/gmxomp.h"
-#include "gmx_omp_nthreads.h"
+#include "gromacs/utility/smalloc.h"
/* Default nbnxn allocation routine, allocates NBNXN_MEM_ALIGN byte aligned */
void nbnxn_alloc_aligned(void **ptr, size_t nbytes)
}
}
-/* Stores the LJ parameter data in a format convenient for the SIMD kernels */
-static void set_ljparam_simd_data(nbnxn_atomdata_t *nbat)
+/* Stores the LJ parameter data in a format convenient for different kernels */
+static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
{
int nt, i, j;
real c6, c12;
nt = nbat->ntype;
- /* nbfp_s4 stores two parameters using a stride of 4,
- * because this would suit x86 SIMD single-precision
- * quad-load intrinsics. There's a slight inefficiency in
- * allocating and initializing nbfp_s4 when it might not
- * be used, but introducing the conditional code is not
- * really worth it. */
- nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
- for (i = 0; i < nt; i++)
+ if (bSIMD)
{
- for (j = 0; j < nt; j++)
+ /* nbfp_s4 stores two parameters using a stride of 4,
+ * because this would suit x86 SIMD single-precision
+ * quad-load intrinsics. There's a slight inefficiency in
+ * allocating and initializing nbfp_s4 when it might not
+ * be used, but introducing the conditional code is not
+ * really worth it. */
+ nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
+ for (i = 0; i < nt; i++)
{
- nbat->nbfp_s4[(i*nt+j)*4+0] = nbat->nbfp[(i*nt+j)*2+0];
- nbat->nbfp_s4[(i*nt+j)*4+1] = nbat->nbfp[(i*nt+j)*2+1];
- nbat->nbfp_s4[(i*nt+j)*4+2] = 0;
- nbat->nbfp_s4[(i*nt+j)*4+3] = 0;
+ for (j = 0; j < nt; j++)
+ {
+ nbat->nbfp_s4[(i*nt+j)*4+0] = nbat->nbfp[(i*nt+j)*2+0];
+ nbat->nbfp_s4[(i*nt+j)*4+1] = nbat->nbfp[(i*nt+j)*2+1];
+ nbat->nbfp_s4[(i*nt+j)*4+2] = 0;
+ nbat->nbfp_s4[(i*nt+j)*4+3] = 0;
+ }
}
}
+ /* We use combination rule data for SIMD combination rule kernels
+ * and with LJ-PME kernels. We then only need parameters per atom type,
+ * not per pair of atom types.
+ */
switch (nbat->comb_rule)
{
case ljcrGEOM:
for (i = 0; i < nt; i++)
{
- /* Copy the diagonal from the nbfp matrix */
+ /* Store the sqrt of the diagonal from the nbfp matrix */
nbat->nbfp_comb[i*2 ] = sqrt(nbat->nbfp[(i*nt+i)*2 ]);
nbat->nbfp_comb[i*2+1] = sqrt(nbat->nbfp[(i*nt+i)*2+1]);
}
int i, j, nth;
real c6, c12, tol;
char *ptr;
- gmx_bool simple, bCombGeom, bCombLB;
+ gmx_bool simple, bCombGeom, bCombLB, bSIMD;
if (alloc == NULL)
{
gmx_incons("Unknown enbnxninitcombrule");
}
- if (simple)
- {
- set_ljparam_simd_data(nbat);
- }
+ bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
+ nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+
+ set_lj_parameter_data(nbat, bSIMD);
nbat->natoms = 0;
nbat->type = NULL;
{
int pack_x;
- switch (nb_kernel_type)
+ if (bSIMD)
{
- case nbnxnk4xN_SIMD_4xN:
- case nbnxnk4xN_SIMD_2xNN:
- pack_x = max(NBNXN_CPU_CLUSTER_I_SIZE,
- nbnxn_kernel_to_cj_size(nb_kernel_type));
- switch (pack_x)
- {
- case 4:
- nbat->XFormat = nbatX4;
- break;
- case 8:
- nbat->XFormat = nbatX8;
- break;
- default:
- gmx_incons("Unsupported packing width");
- }
- break;
- default:
- nbat->XFormat = nbatXYZ;
- break;
+ pack_x = max(NBNXN_CPU_CLUSTER_I_SIZE,
+ nbnxn_kernel_to_cj_size(nb_kernel_type));
+ switch (pack_x)
+ {
+ case 4:
+ nbat->XFormat = nbatX4;
+ break;
+ case 8:
+ nbat->XFormat = nbatX8;
+ break;
+ default:
+ gmx_incons("Unsupported packing width");
+ }
+ }
+ else
+ {
+ nbat->XFormat = nbatXYZ;
}
nbat->FFormat = nbat->XFormat;
}
}
-/* Sets the atom type and LJ data in nbnxn_atomdata_t */
+/* Sets the atom type in nbnxn_atomdata_t */
static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t *nbat,
int ngrid,
const nbnxn_search_t nbs,
copy_int_to_nbat_int(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc,
type, nbat->ntype-1, nbat->type+ash);
+ }
+ }
+}
- if (nbat->comb_rule != ljcrNONE)
+/* Sets the LJ combination rule parameters in nbnxn_atomdata_t */
+static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t *nbat,
+ int ngrid,
+ const nbnxn_search_t nbs)
+{
+ int g, i, ncz, ash;
+ const nbnxn_grid_t *grid;
+
+ if (nbat->comb_rule != ljcrNONE)
+ {
+ for (g = 0; g < ngrid; g++)
+ {
+ grid = &nbs->grid[g];
+
+ /* Loop over all columns and copy and fill */
+ for (i = 0; i < grid->ncx*grid->ncy; i++)
{
+ ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
+ ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
+
if (nbat->XFormat == nbatX4)
{
copy_lj_to_nbat_lj_comb_x4(nbat->nbfp_comb,
}
}
+/* Set the charges of perturbed atoms in nbnxn_atomdata_t to 0.
+ * This is to automatically remove the RF/PME self term in the nbnxn kernels.
+ * Part of the zero interactions are still calculated in the normal kernels.
+ * All perturbed interactions are calculated in the free energy kernel,
+ * using the original charge and LJ data, not nbnxn_atomdata_t.
+ */
+static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat,
+ int ngrid,
+ const nbnxn_search_t nbs)
+{
+ real *q;
+ int stride_q, g, nsubc, c_offset, c, subc, i, ind;
+ const nbnxn_grid_t *grid;
+
+ if (nbat->XFormat == nbatXYZQ)
+ {
+ q = nbat->x + ZZ + 1;
+ stride_q = STRIDE_XYZQ;
+ }
+ else
+ {
+ q = nbat->q;
+ stride_q = 1;
+ }
+
+ for (g = 0; g < ngrid; g++)
+ {
+ grid = &nbs->grid[g];
+ if (grid->bSimple)
+ {
+ nsubc = 1;
+ }
+ else
+ {
+ nsubc = GPU_NSUBCELL;
+ }
+
+ c_offset = grid->cell0*grid->na_sc;
+
+ /* Loop over all columns and copy and fill */
+ for (c = 0; c < grid->nc*nsubc; c++)
+ {
+ /* Does this cluster contain perturbed particles? */
+ if (grid->fep[c] != 0)
+ {
+ for (i = 0; i < grid->na_c; i++)
+ {
+ /* Is this a perturbed particle? */
+ if (grid->fep[c] & (1 << i))
+ {
+ ind = c_offset + c*grid->na_c + i;
+ /* Set atom type and charge to non-interacting */
+ nbat->type[ind] = nbat->ntype - 1;
+ q[ind*stride_q] = 0;
+ }
+ }
+ }
+ }
+ }
+}
+
/* Copies the energy group indices to a reordered and packed array */
static void copy_egp_to_nbat_egps(const int *a, int na, int na_round,
int na_c, int bit_shift,
int g, i, ncz, ash;
const nbnxn_grid_t *grid;
+ if (nbat->nenergrp == 1)
+ {
+ return;
+ }
+
for (g = 0; g < ngrid; g++)
{
grid = &nbs->grid[g];
nbnxn_atomdata_set_charges(nbat, ngrid, nbs, mdatoms->chargeA);
- if (nbat->nenergrp > 1)
+ if (nbs->bFEP)
{
- nbnxn_atomdata_set_energygroups(nbat, ngrid, nbs, atinfo);
+ nbnxn_atomdata_mask_fep(nbat, ngrid, nbs);
}
+
+ /* This must be done after masking types for FEP */
+ nbnxn_atomdata_set_ljcombparams(nbat, ngrid, nbs);
+
+ nbnxn_atomdata_set_energygroups(nbat, ngrid, nbs, atinfo);
}
/* Copies the shift vector array to nbnxn_atomdata_t */