* kernel type 4xn.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include "typedefs.h"
+#include "config.h"
-#ifdef GMX_NBNXN_SIMD_4XN
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_simd.h"
-#ifdef GMX_NBNXN_HALF_WIDTH_SIMD
-#define GMX_USE_HALF_WIDTH_SIMD_HERE
-#endif
+#ifdef GMX_NBNXN_SIMD_4XN
-#include "gromacs/simd/macros.h"
#include "gromacs/simd/vector_operations.h"
+
#if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
#error "unsupported SIMD width"
#endif
#define GMX_SIMD_J_UNROLL_SIZE 1
#include "nbnxn_kernel_simd_4xn.h"
-#include "../nbnxn_kernel_common.h"
-#include "gmx_omp_nthreads.h"
-#include "types/force_flags.h"
-#include "gmx_fatal.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/utility/fatalerror.h"
/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
*/
enum {
- coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
+ coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
+};
+
+/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
+ */
+enum {
+ vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
};
/* Declare and define the kernel function pointer lookup tables.
* The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch functions.
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
*/
-static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
+static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
{
{
nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
},
{
nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
},
{
nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
},
{
nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
},
{
nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
},
};
-static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
+static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
{
{
nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
},
{
nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
},
{
nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
},
{
nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
},
{
nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
},
};
-static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
+static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
{
{
nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
},
{
nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
},
{
nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
},
{
nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
},
{
nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
},
};
#else /* GMX_NBNXN_SIMD_4XN */
-#include "gmx_fatal.h"
+#include "gromacs/utility/fatalerror.h"
#endif /* GMX_NBNXN_SIMD_4XN */
{
int nnbl;
nbnxn_pairlist_t **nbl;
- int coult, ljtreatment = 0;
+ int coulkt, vdwkt = 0;
int nb;
nnbl = nbl_list->nnbl;
if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
{
- coult = coultRF;
+ coulkt = coulktRF;
}
else
{
{
if (ic->rcoulomb == ic->rvdw)
{
- coult = coultTAB;
+ coulkt = coulktTAB;
}
else
{
- coult = coultTAB_TWIN;
+ coulkt = coulktTAB_TWIN;
}
}
else
{
if (ic->rcoulomb == ic->rvdw)
{
- coult = coultEWALD;
+ coulkt = coulktEWALD;
}
else
{
- coult = coultEWALD_TWIN;
+ coulkt = coulktEWALD_TWIN;
}
}
}
- switch (ic->vdw_modifier)
+ if (ic->vdwtype == evdwCUT)
+ {
+ switch (ic->vdw_modifier)
+ {
+ case eintmodNONE:
+ case eintmodPOTSHIFT:
+ switch (nbat->comb_rule)
+ {
+ case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
+ case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
+ case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
+ default: gmx_incons("Unknown combination rule");
+ }
+ break;
+ case eintmodFORCESWITCH:
+ vdwkt = vdwktLJFORCESWITCH;
+ break;
+ case eintmodPOTSWITCH:
+ vdwkt = vdwktLJPOTSWITCH;
+ break;
+ default:
+ gmx_incons("Unsupported VdW interaction modifier");
+ }
+ }
+ else if (ic->vdwtype == evdwPME)
+ {
+ if (ic->ljpme_comb_rule == eljpmeLB)
+ {
+ gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
+ }
+ vdwkt = vdwktLJEWALDCOMBGEOM;
+ }
+ else
{
- case eintmodNONE:
- case eintmodPOTSHIFT:
- ljtreatment = nbat->comb_rule;
- break;
- /* Switch functions follow after cut-off combination rule kernels */
- case eintmodFORCESWITCH:
- ljtreatment = ljcrNR;
- break;
- case eintmodPOTSWITCH:
- ljtreatment = ljcrNR + 1;
- break;
- default:
- gmx_incons("Unsupported VdW interaction modifier");
+ gmx_incons("Unsupported VdW interaction type");
}
#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
if (!(force_flags & GMX_FORCE_ENERGY))
{
/* Don't calculate energies */
- p_nbk_noener[coult][ljtreatment](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p);
+ p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shift_vec,
+ out->f,
+ fshift_p);
}
else if (out->nV == 1)
{
out->Vvdw[0] = 0;
out->Vc[0] = 0;
- p_nbk_ener[coult][ljtreatment](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->Vvdw,
- out->Vc);
+ p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shift_vec,
+ out->f,
+ fshift_p,
+ out->Vvdw,
+ out->Vc);
}
else
{
out->VSc[i] = 0;
}
- p_nbk_energrp[coult][ljtreatment](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->VSvdw,
- out->VSc);
+ p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shift_vec,
+ out->f,
+ fshift_p,
+ out->VSvdw,
+ out->VSc);
reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
out->VSvdw, out->VSc,