#undef gmx_calc_rsq_pr
#undef gmx_sum4_pr
+#undef gmx_pmecorrF_pr
+#undef gmx_pmecorrV_pr
+
/* By defining GMX_MM128_HERE or GMX_MM256_HERE before including this file
* the same intrinsics, with defines, can be compiled for either 128 or 256
#define gmx_calc_rsq_pr gmx_mm_calc_rsq_ps
#define gmx_sum4_pr gmx_mm_sum4_ps
+#define gmx_pmecorrF_pr gmx_mm_pmecorrF_ps
+#define gmx_pmecorrV_pr gmx_mm_pmecorrV_ps
+
#else /* ifndef GMX_DOUBLE */
#include "gmx_x86_simd_double.h"
#define gmx_calc_rsq_pr gmx_mm_calc_rsq_pd
#define gmx_sum4_pr gmx_mm_sum4_pd
+#define gmx_pmecorrF_pr gmx_mm_pmecorrF_pd
+#define gmx_pmecorrV_pr gmx_mm_pmecorrV_pd
+
#endif /* ifndef GMX_DOUBLE */
#endif /* GMX_MM128_HERE */
#define gmx_calc_rsq_pr gmx_mm256_calc_rsq_ps
#define gmx_sum4_pr gmx_mm256_sum4_ps
+#define gmx_pmecorrF_pr gmx_mm256_pmecorrF_ps
+#define gmx_pmecorrV_pr gmx_mm256_pmecorrV_ps
+
#else
#include "gmx_x86_simd_double.h"
#define gmx_calc_rsq_pr gmx_mm256_calc_rsq_pd
#define gmx_sum4_pr gmx_mm256_sum4_pd
+#define gmx_pmecorrF_pr gmx_mm256_pmecorrF_pd
+#define gmx_pmecorrV_pr gmx_mm256_pmecorrV_pd
+
#endif
#endif /* GMX_MM256_HERE */
#endif
"CUDA 8x8x8", "plain C 8x8x8" };
+enum { ewaldexclTable, ewaldexclAnalytical };
+
/* Atom locality indicator: local, non-local, all, used for calls to:
gridding, pair-search, force calculation, x/f buffer operations */
enum { eatLocal = 0, eatNonlocal = 1, eatAll };
nbnxn_pairlist_set_t nbl_lists; /* pair list(s) */
nbnxn_atomdata_t *nbat; /* atom data */
int kernel_type; /* non-bonded kernel - see enum above */
+ int ewald_excl; /* Ewald exclusion - see enum above */
} nonbonded_verlet_group_t;
/* non-bonded data structure with Verlet-type cut-off */
eNR_NBKERNEL_ALLVSALLGB,
eNR_NBNXN_DIST2,
- eNR_NBNXN_LJ_RF, eNR_NBNXN_LJ_RF_E,
- eNR_NBNXN_LJ_TAB, eNR_NBNXN_LJ_TAB_E,
- eNR_NBNXN_LJ, eNR_NBNXN_LJ_E,
- eNR_NBNXN_RF, eNR_NBNXN_RF_E,
- eNR_NBNXN_TAB, eNR_NBNXN_TAB_E,
+ eNR_NBNXN_LJ_RF, eNR_NBNXN_LJ_RF_E,
+ eNR_NBNXN_LJ_TAB, eNR_NBNXN_LJ_TAB_E,
+ eNR_NBNXN_LJ_EWALD, eNR_NBNXN_LJ_EWALD_E,
+ eNR_NBNXN_LJ, eNR_NBNXN_LJ_E,
+ eNR_NBNXN_RF, eNR_NBNXN_RF_E,
+ eNR_NBNXN_TAB, eNR_NBNXN_TAB_E,
+ eNR_NBNXN_EWALD, eNR_NBNXN_EWALD_E,
eNR_NB14,
eNR_BORN_RADII_STILL, eNR_BORN_RADII_HCT_OBC,
eNR_BORN_CHAINRULE,
*/
{ "NxN RF Elec. + VdW [F]", 38 }, /* nbnxn kernel LJ+RF, no ener */
{ "NxN RF Elec. + VdW [V&F]", 54 },
- { "NxN CSTab Elec. + VdW [F]", 41 }, /* nbnxn kernel LJ+tab, no en */
- { "NxN CSTab Elec. + VdW [V&F]", 59 },
+ { "NxN QSTab Elec. + VdW [F]", 41 }, /* nbnxn kernel LJ+tab, no en */
+ { "NxN QSTab Elec. + VdW [V&F]", 59 },
+ { "NxN Ewald Elec. + VdW [F]", 66 }, /* nbnxn kernel LJ+Ewald, no en */
+ { "NxN Ewald Elec. + VdW [V&F]", 107 },
{ "NxN VdW [F]", 33 }, /* nbnxn kernel LJ, no ener */
{ "NxN VdW [V&F]", 43 },
{ "NxN RF Electrostatics [F]", 31 }, /* nbnxn kernel RF, no ener */
{ "NxN RF Electrostatics [V&F]", 36 },
- { "NxN CSTab Elec. [F]", 34 }, /* nbnxn kernel tab, no ener */
- { "NxN CSTab Elec. [V&F]", 41 },
+ { "NxN QSTab Elec. [F]", 34 }, /* nbnxn kernel tab, no ener */
+ { "NxN QSTab Elec. [V&F]", 41 },
+ { "NxN Ewald Elec. [F]", 61 }, /* nbnxn kernel Ewald, no ener */
+ { "NxN Ewald Elec. [V&F]", 84 },
{ "1,4 nonbonded interactions", 90 },
{ "Born radii (Still)", 47 },
{ "Born radii (HCT/OBC)", 183 },
if (out)
{
- fprintf(out," NB=Group-cutoff nonbonded kernels NxN=N-by-N tile Verlet kernels\n");
- fprintf(out," RF=Reaction-Field VdW=Van der Waals CSTab=Cubic-spline table\n");
+ fprintf(out," NB=Group-cutoff nonbonded kernels NxN=N-by-N cluster Verlet kernels\n");
+ fprintf(out," RF=Reaction-Field VdW=Van der Waals QSTab=quadratic-spline table\n");
fprintf(out," W3=SPC/TIP3p W4=TIP4p (single or pairs)\n");
fprintf(out," V&F=Potential and force V=Potential only F=Force only\n\n");
static void pick_nbnxn_kernel_cpu(FILE *fp,
const t_commrec *cr,
const gmx_cpuid_t cpuid_info,
- int *kernel_type)
+ int *kernel_type,
+ int *ewald_excl)
{
*kernel_type = nbk4x4_PlainC;
+ *ewald_excl = ewaldexclTable;
#ifdef GMX_X86_SSE2
{
gmx_fatal(FARGS,"You requested AVX-256 nbnxn kernels, but GROMACS was built without AVX support");
#endif
}
+
+ /* Analytical Ewald exclusion correction is only an option in the
+ * x86 SIMD kernel. This is faster in single precision
+ * on Bulldozer and slightly faster on Sandy Bridge.
+ */
+#if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE
+ *ewald_excl = ewaldexclAnalytical;
+#endif
+ if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
+ {
+ *ewald_excl = ewaldexclTable;
+ }
+ if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
+ {
+ *ewald_excl = ewaldexclAnalytical;
+ }
+
}
#endif /* GMX_X86_SSE2 */
}
const gmx_hw_info_t *hwinfo,
gmx_bool use_cpu_acceleration,
gmx_bool *bUseGPU,
- int *kernel_type)
+ int *kernel_type,
+ int *ewald_excl)
{
gmx_bool bEmulateGPU, bGPU;
char gpu_err_str[STRLEN];
assert(kernel_type);
*kernel_type = nbkNotSet;
+ *ewald_excl = ewaldexclTable;
/* if bUseGPU == NULL we don't want a GPU (e.g. hybrid mode kernel selection) */
bGPU = (bUseGPU != NULL) && hwinfo->bCanUseGPU;
{
if (use_cpu_acceleration)
{
- pick_nbnxn_kernel_cpu(fp,cr,hwinfo->cpuid_info,kernel_type);
+ pick_nbnxn_kernel_cpu(fp,cr,hwinfo->cpuid_info,
+ kernel_type,ewald_excl);
}
else
{
{
pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
&nbv->bUseGPU,
- &nbv->grp[i].kernel_type);
+ &nbv->grp[i].kernel_type,
+ &nbv->grp[i].ewald_excl);
}
else /* non-local */
{
/* Use GPU for local, select a CPU kernel for non-local */
pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
NULL,
- &nbv->grp[i].kernel_type);
+ &nbv->grp[i].kernel_type,
+ &nbv->grp[i].ewald_excl);
bHybridGPURun = TRUE;
}
#undef CALC_COUL_TAB
+/* Analytical Ewald exclusion interaction electrostatics kernels */
+#define CALC_COUL_EWALD
+
+/* Single cut-off: rcoulomb = rvdw */
+#include "nbnxn_kernel_x86_simd_includes.h"
+
+/* Twin cut-off: rcoulomb >= rvdw */
+#define VDW_CUTOFF_CHECK
+#include "nbnxn_kernel_x86_simd_includes.h"
+#undef VDW_CUTOFF_CHECK
+
+#undef CALC_COUL_EWALD
+
typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t *nbl,
const nbnxn_atomdata_t *nbat,
real *f,
real *fshift);
-enum { coultRF, coultTAB, coultTAB_TWIN, coultNR };
-
+enum { coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR };
+#define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_ener
static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
-{ { nbnxn_kernel_x86_simd128_rf_comb_geom_ener,
- nbnxn_kernel_x86_simd128_rf_comb_lb_ener,
- nbnxn_kernel_x86_simd128_rf_comb_none_ener },
- { nbnxn_kernel_x86_simd128_tab_comb_geom_ener,
- nbnxn_kernel_x86_simd128_tab_comb_lb_ener,
- nbnxn_kernel_x86_simd128_tab_twin_comb_none_ener },
- { nbnxn_kernel_x86_simd128_tab_twin_comb_geom_ener,
- nbnxn_kernel_x86_simd128_tab_twin_comb_lb_ener,
- nbnxn_kernel_x86_simd128_tab_twin_comb_none_ener } };
-
+{ { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
+ { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
+ { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
+ { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
+ { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
+#undef NBK_FN
+
+#define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_energrp
static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
-{ { nbnxn_kernel_x86_simd128_rf_comb_geom_energrp,
- nbnxn_kernel_x86_simd128_rf_comb_lb_energrp,
- nbnxn_kernel_x86_simd128_rf_comb_none_energrp },
- { nbnxn_kernel_x86_simd128_tab_comb_geom_energrp,
- nbnxn_kernel_x86_simd128_tab_comb_lb_energrp,
- nbnxn_kernel_x86_simd128_tab_comb_none_energrp },
- { nbnxn_kernel_x86_simd128_tab_twin_comb_geom_energrp,
- nbnxn_kernel_x86_simd128_tab_twin_comb_lb_energrp,
- nbnxn_kernel_x86_simd128_tab_twin_comb_none_energrp } };
-
+{ { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
+ { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
+ { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
+ { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
+ { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
+#undef NBK_FN
+
+#define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_noener
static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
-{ { nbnxn_kernel_x86_simd128_rf_comb_geom_noener,
- nbnxn_kernel_x86_simd128_rf_comb_lb_noener,
- nbnxn_kernel_x86_simd128_rf_comb_none_noener },
- { nbnxn_kernel_x86_simd128_tab_comb_geom_noener,
- nbnxn_kernel_x86_simd128_tab_comb_lb_noener,
- nbnxn_kernel_x86_simd128_tab_comb_none_noener },
- { nbnxn_kernel_x86_simd128_tab_twin_comb_geom_noener,
- nbnxn_kernel_x86_simd128_tab_twin_comb_lb_noener,
- nbnxn_kernel_x86_simd128_tab_twin_comb_none_noener } };
+{ { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
+ { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
+ { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
+ { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
+ { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
+#undef NBK_FN
static void reduce_group_energies(int ng,int ng_2log,
nbnxn_kernel_x86_simd128(nbnxn_pairlist_set_t *nbl_list,
const nbnxn_atomdata_t *nbat,
const interaction_const_t *ic,
+ int ewald_excl,
rvec *shift_vec,
int force_flags,
int clearF,
}
else
{
- if (ic->rcoulomb == ic->rvdw)
+ if (ewald_excl == ewaldexclTable)
{
- coult = coultTAB;
+ if (ic->rcoulomb == ic->rvdw)
+ {
+ coult = coultTAB;
+ }
+ else
+ {
+ coult = coultTAB_TWIN;
+ }
}
else
{
- coult = coultTAB_TWIN;
+ if (ic->rcoulomb == ic->rvdw)
+ {
+ coult = coultEWALD;
+ }
+ else
+ {
+ coult = coultEWALD_TWIN;
+ }
}
}
nbnxn_kernel_x86_simd128(nbnxn_pairlist_set_t *nbl_list,
const nbnxn_atomdata_t *nbat,
const interaction_const_t *ic,
+ int ewald_excl,
rvec *shift_vec,
int force_flags,
int clearF,
#undef CALC_COUL_TAB
+/* Analytical Ewald exclusion interaction electrostatics kernels */
+#define CALC_COUL_EWALD
+
+/* Single cut-off: rcoulomb = rvdw */
+#include "nbnxn_kernel_x86_simd_includes.h"
+
+/* Twin cut-off: rcoulomb >= rvdw */
+#define VDW_CUTOFF_CHECK
+#include "nbnxn_kernel_x86_simd_includes.h"
+#undef VDW_CUTOFF_CHECK
+
+#undef CALC_COUL_EWALD
+
typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t *nbl,
const nbnxn_atomdata_t *nbat,
real *f,
real *fshift);
-enum { coultRF, coultTAB, coultTAB_TWIN, coultNR };
-
+enum { coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR };
+#define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd256_##elec##_comb_##ljcomb##_ener
static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
-{ { nbnxn_kernel_x86_simd256_rf_comb_geom_ener,
- nbnxn_kernel_x86_simd256_rf_comb_lb_ener,
- nbnxn_kernel_x86_simd256_rf_comb_none_ener },
- { nbnxn_kernel_x86_simd256_tab_comb_geom_ener,
- nbnxn_kernel_x86_simd256_tab_comb_lb_ener,
- nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener },
- { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_ener,
- nbnxn_kernel_x86_simd256_tab_twin_comb_lb_ener,
- nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener } };
-
+{ { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
+ { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
+ { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
+ { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
+ { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
+#undef NBK_FN
+
+#define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd256_##elec##_comb_##ljcomb##_energrp
static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
-{ { nbnxn_kernel_x86_simd256_rf_comb_geom_energrp,
- nbnxn_kernel_x86_simd256_rf_comb_lb_energrp,
- nbnxn_kernel_x86_simd256_rf_comb_none_energrp },
- { nbnxn_kernel_x86_simd256_tab_comb_geom_energrp,
- nbnxn_kernel_x86_simd256_tab_comb_lb_energrp,
- nbnxn_kernel_x86_simd256_tab_comb_none_energrp },
- { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_energrp,
- nbnxn_kernel_x86_simd256_tab_twin_comb_lb_energrp,
- nbnxn_kernel_x86_simd256_tab_twin_comb_none_energrp } };
-
+{ { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
+ { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
+ { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
+ { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
+ { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
+#undef NBK_FN
+
+#define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd256_##elec##_comb_##ljcomb##_noener
static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
-{ { nbnxn_kernel_x86_simd256_rf_comb_geom_noener,
- nbnxn_kernel_x86_simd256_rf_comb_lb_noener,
- nbnxn_kernel_x86_simd256_rf_comb_none_noener },
- { nbnxn_kernel_x86_simd256_tab_comb_geom_noener,
- nbnxn_kernel_x86_simd256_tab_comb_lb_noener,
- nbnxn_kernel_x86_simd256_tab_comb_none_noener },
- { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_noener,
- nbnxn_kernel_x86_simd256_tab_twin_comb_lb_noener,
- nbnxn_kernel_x86_simd256_tab_twin_comb_none_noener } };
+{ { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
+ { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
+ { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
+ { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
+ { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
+#undef NBK_FN
static void reduce_group_energies(int ng,int ng_2log,
nbnxn_kernel_x86_simd256(nbnxn_pairlist_set_t *nbl_list,
const nbnxn_atomdata_t *nbat,
const interaction_const_t *ic,
+ int ewald_excl,
rvec *shift_vec,
int force_flags,
int clearF,
}
else
{
- if (ic->rcoulomb == ic->rvdw)
+ if (ewald_excl == ewaldexclTable)
{
- coult = coultTAB;
+ if (ic->rcoulomb == ic->rvdw)
+ {
+ coult = coultTAB;
+ }
+ else
+ {
+ coult = coultTAB_TWIN;
+ }
}
else
{
- coult = coultTAB_TWIN;
+ if (ic->rcoulomb == ic->rvdw)
+ {
+ coult = coultEWALD;
+ }
+ else
+ {
+ coult = coultEWALD_TWIN;
+ }
}
}
nbnxn_kernel_x86_simd256(nbnxn_pairlist_set_t *nbl_list,
const nbnxn_atomdata_t *nbat,
const interaction_const_t *ic,
+ int ewald_excl,
rvec *shift_vec,
int force_flags,
int clearF,
#define EXCL_FORCES
#endif
-#if !(defined CHECK_EXCLS || defined CALC_ENERGIES) && defined GMX_X86_SSE4_1 && !defined COUNT_PAIRS && !(defined __GNUC__ && (defined CALC_COUL_TAB || (defined CALC_COUL_RF && defined GMX_MM128_HERE)))
+#if !(defined CHECK_EXCLS || defined CALC_ENERGIES || defined CALC_COUL_EWALD) && defined GMX_X86_SSE4_1 && !defined COUNT_PAIRS && !(defined __GNUC__ && (defined CALC_COUL_TAB || (defined CALC_COUL_RF && defined GMX_MM128_HERE)))
/* Without exclusions and energies we only need to mask the cut-off,
* this is faster with blendv (only available with SSE4.1 and later).
* With gcc and PME or RF in 128-bit, blendv is slower;
gmx_mm_pr fsub_SSE2;
gmx_mm_pr fsub_SSE3;
#endif
+#ifdef CALC_COUL_EWALD
+ gmx_mm_pr brsq_SSE0,brsq_SSE1,brsq_SSE2,brsq_SSE3;
+ gmx_mm_pr ewcorr_SSE0,ewcorr_SSE1,ewcorr_SSE2,ewcorr_SSE3;
+#endif
+
/* frcoul = (1/r - fsub)*r */
gmx_mm_pr frcoul_SSE0;
gmx_mm_pr frcoul_SSE1;
gmx_mm_pr ctabv_SSE1;
gmx_mm_pr ctabv_SSE2;
gmx_mm_pr ctabv_SSE3;
+#endif
+#endif
+#if defined CALC_ENERGIES && (defined CALC_COUL_EWALD || defined CALC_COUL_TAB)
/* The potential (PME mesh) we need to subtract from 1/r */
gmx_mm_pr vc_sub_SSE0;
gmx_mm_pr vc_sub_SSE1;
gmx_mm_pr vc_sub_SSE2;
gmx_mm_pr vc_sub_SSE3;
#endif
-#endif
#ifdef CALC_ENERGIES
/* Electrostatic potential */
gmx_mm_pr vcoul_SSE0;
#endif
#endif
+#ifdef CALC_COUL_EWALD
+ brsq_SSE0 = gmx_mul_pr(beta2_SSE,gmx_and_pr(rsq_SSE0,wco_SSE0));
+ brsq_SSE1 = gmx_mul_pr(beta2_SSE,gmx_and_pr(rsq_SSE1,wco_SSE1));
+ brsq_SSE2 = gmx_mul_pr(beta2_SSE,gmx_and_pr(rsq_SSE2,wco_SSE2));
+ brsq_SSE3 = gmx_mul_pr(beta2_SSE,gmx_and_pr(rsq_SSE3,wco_SSE3));
+ ewcorr_SSE0 = gmx_mul_pr(gmx_pmecorrF_pr(brsq_SSE0),beta_SSE);
+ ewcorr_SSE1 = gmx_mul_pr(gmx_pmecorrF_pr(brsq_SSE1),beta_SSE);
+ ewcorr_SSE2 = gmx_mul_pr(gmx_pmecorrF_pr(brsq_SSE2),beta_SSE);
+ ewcorr_SSE3 = gmx_mul_pr(gmx_pmecorrF_pr(brsq_SSE3),beta_SSE);
+ frcoul_SSE0 = gmx_mul_pr(qq_SSE0,gmx_add_pr(rinv_ex_SSE0,gmx_mul_pr(ewcorr_SSE0,brsq_SSE0)));
+ frcoul_SSE1 = gmx_mul_pr(qq_SSE1,gmx_add_pr(rinv_ex_SSE1,gmx_mul_pr(ewcorr_SSE1,brsq_SSE1)));
+ frcoul_SSE2 = gmx_mul_pr(qq_SSE2,gmx_add_pr(rinv_ex_SSE2,gmx_mul_pr(ewcorr_SSE2,brsq_SSE2)));
+ frcoul_SSE3 = gmx_mul_pr(qq_SSE3,gmx_add_pr(rinv_ex_SSE3,gmx_mul_pr(ewcorr_SSE3,brsq_SSE3)));
+
+#ifdef CALC_ENERGIES
+ vc_sub_SSE0 = gmx_mul_pr(gmx_pmecorrV_pr(brsq_SSE0),beta_SSE);
+ vc_sub_SSE1 = gmx_mul_pr(gmx_pmecorrV_pr(brsq_SSE1),beta_SSE);
+ vc_sub_SSE2 = gmx_mul_pr(gmx_pmecorrV_pr(brsq_SSE2),beta_SSE);
+ vc_sub_SSE3 = gmx_mul_pr(gmx_pmecorrV_pr(brsq_SSE3),beta_SSE);
+#endif
+
+#endif /* CALC_COUL_EWALD */
+
#ifdef CALC_COUL_TAB
/* Electrostatic interactions */
r_SSE0 = gmx_mul_pr(rsq_SSE0,rinv_SSE0);
vc_sub_SSE1 = gmx_add_pr(ctabv_SSE1,gmx_mul_pr(gmx_mul_pr(mhalfsp_SSE,frac_SSE1),gmx_add_pr(ctab0_SSE1,fsub_SSE1)));
vc_sub_SSE2 = gmx_add_pr(ctabv_SSE2,gmx_mul_pr(gmx_mul_pr(mhalfsp_SSE,frac_SSE2),gmx_add_pr(ctab0_SSE2,fsub_SSE2)));
vc_sub_SSE3 = gmx_add_pr(ctabv_SSE3,gmx_mul_pr(gmx_mul_pr(mhalfsp_SSE,frac_SSE3),gmx_add_pr(ctab0_SSE3,fsub_SSE3)));
+#endif
+#endif /* CALC_COUL_TAB */
+#if defined CALC_ENERGIES && (defined CALC_COUL_EWALD || defined CALC_COUL_TAB)
#ifndef NO_SHIFT_EWALD
/* Add Ewald potential shift to vc_sub for convenience */
#ifdef CHECK_EXCLS
vcoul_SSE3 = gmx_mul_pr(qq_SSE3,gmx_sub_pr(rinv_ex_SSE3,vc_sub_SSE3));
#endif
-#endif
#ifdef CALC_ENERGIES
/* Mask energy for cut-off and diagonal */
#define NBK_FUNC_NAME(b,s,e) NBK_FUNC_NAME_C(b,s,tab_twin,e)
#endif
#endif
+#ifdef CALC_COUL_EWALD
+#ifndef VDW_CUTOFF_CHECK
+#define NBK_FUNC_NAME(b,s,e) NBK_FUNC_NAME_C(b,s,ewald,e)
+#else
+#define NBK_FUNC_NAME(b,s,e) NBK_FUNC_NAME_C(b,s,ewald_twin,e)
+#endif
+#endif
#ifdef GMX_MM128_HERE
#define NBK_FUNC_NAME_S128_OR_S256(b,e) NBK_FUNC_NAME(b,x86_simd128,e)
#ifdef CALC_ENERGIES
gmx_mm_pr hrc_3_SSE,moh_rc_SSE;
#endif
+
#ifdef CALC_COUL_TAB
/* Coulomb table variables */
gmx_mm_pr invtsp_SSE;
#endif
#ifdef CALC_ENERGIES
gmx_mm_pr mhalfsp_SSE;
- gmx_mm_pr sh_ewald_SSE;
#endif
#endif
+#ifdef CALC_COUL_EWALD
+ gmx_mm_pr beta2_SSE,beta_SSE;
+#endif
+
+#if defined CALC_ENERGIES && (defined CALC_COUL_EWALD || defined CALC_COUL_TAB)
+ gmx_mm_pr sh_ewald_SSE;
+#endif
+
#ifdef LJ_COMB_LB
const real *ljc;
invtsp_SSE = gmx_set1_pr(ic->tabq_scale);
#ifdef CALC_ENERGIES
mhalfsp_SSE = gmx_set1_pr(-0.5/ic->tabq_scale);
-
- sh_ewald_SSE = gmx_set1_pr(ic->sh_ewald);
#endif
#ifdef TAB_FDV0
tab_coul_F = ic->tabq_coul_F;
tab_coul_V = ic->tabq_coul_V;
#endif
+#endif /* CALC_COUL_TAB */
+
+#ifdef CALC_COUL_EWALD
+ beta2_SSE = gmx_set1_pr(ic->ewaldcoeff*ic->ewaldcoeff);
+ beta_SSE = gmx_set1_pr(ic->ewaldcoeff);
+#endif
+
+#if (defined CALC_COUL_TAB || defined CALC_COUL_EWALD) && defined CALC_ENERGIES
+ sh_ewald_SSE = gmx_set1_pr(ic->sh_ewald);
#endif
q = nbat->q;
#else
Vc_sub_self = 0.5*tab_coul_V[0];
#endif
+#endif
+#ifdef CALC_COUL_EWALD
+ /* 0.5*beta*2/sqrt(pi) */
+ Vc_sub_self = 0.5*ic->ewaldcoeff*1.128379167095513;
#endif
for(ia=0; ia<UNROLLI; ia++)
t_nrnb *nrnb,
gmx_wallcycle_t wcycle)
{
- int nnbl, kernel_type, sh_e;
+ int nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
char *env;
nonbonded_verlet_group_t *nbvg;
case nbk4xN_X86_SIMD128:
nbnxn_kernel_x86_simd128(&nbvg->nbl_lists,
nbvg->nbat, ic,
+ nbvg->ewald_excl,
fr->shift_vec,
flags,
clearF,
case nbk4xN_X86_SIMD256:
nbnxn_kernel_x86_simd256(&nbvg->nbl_lists,
nbvg->nbat, ic,
+ nbvg->ewald_excl,
fr->shift_vec,
flags,
clearF,
wallcycle_sub_stop(wcycle, ewcsNONBONDED);
}
- /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
- sh_e = ((flags & GMX_FORCE_ENERGY) ? 1 : 0);
- inc_nrnb(nrnb,
- ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
- eNR_NBNXN_LJ_RF : eNR_NBNXN_LJ_TAB) + sh_e,
+ if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
+ {
+ enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
+ }
+ else if (nbvg->ewald_excl == ewaldexclTable)
+ {
+ enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
+ }
+ else
+ {
+ enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
+ }
+ enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
+ if (flags & GMX_FORCE_ENERGY)
+ {
+ /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
+ enr_nbnxn_kernel_ljc += 1;
+ enr_nbnxn_kernel_lj += 1;
+ }
+
+ inc_nrnb(nrnb,enr_nbnxn_kernel_ljc,
nbvg->nbl_lists.natpair_ljq);
- inc_nrnb(nrnb,eNR_NBNXN_LJ+sh_e,nbvg->nbl_lists.natpair_lj);
- inc_nrnb(nrnb,
- ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
- eNR_NBNXN_RF : eNR_NBNXN_TAB)+sh_e,
+ inc_nrnb(nrnb,enr_nbnxn_kernel_lj,
+ nbvg->nbl_lists.natpair_lj);
+ inc_nrnb(nrnb,enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
nbvg->nbl_lists.natpair_q);
}