+#if defined CALC_ENERGIES && defined CHECK_EXCLS
+ /* The potential shift should be removed for excluded pairs */
+ VLJ_S0 = gmx_simd_blendzero_r(VLJ_S0, interact_S0);
+ VLJ_S1 = gmx_simd_blendzero_r(VLJ_S1, interact_S1);
+#ifndef HALF_LJ
+ VLJ_S2 = gmx_simd_blendzero_r(VLJ_S2, interact_S2);
+ VLJ_S3 = gmx_simd_blendzero_r(VLJ_S3, interact_S3);
+#endif
+#endif
+
+#ifdef LJ_EWALD_GEOM
+ {
+ gmx_simd_real_t c6s_j_S;
+ gmx_simd_real_t c6grid_S0, rinvsix_nm_S0, cr2_S0, expmcr2_S0, poly_S0;
+ gmx_simd_real_t c6grid_S1, rinvsix_nm_S1, cr2_S1, expmcr2_S1, poly_S1;
+#ifndef HALF_LJ
+ gmx_simd_real_t c6grid_S2, rinvsix_nm_S2, cr2_S2, expmcr2_S2, poly_S2;
+ gmx_simd_real_t c6grid_S3, rinvsix_nm_S3, cr2_S3, expmcr2_S3, poly_S3;
+#endif
+#ifdef CALC_ENERGIES
+ gmx_simd_real_t sh_mask_S0;
+ gmx_simd_real_t sh_mask_S1;
+#ifndef HALF_LJ
+ gmx_simd_real_t sh_mask_S2;
+ gmx_simd_real_t sh_mask_S3;
+#endif
+#endif
+
+ /* Determine C6 for the grid using the geometric combination rule */
+ c6s_j_S = gmx_simd_load_r(ljc+aj2+0);
+ c6grid_S0 = gmx_simd_mul_r(c6s_S0, c6s_j_S);
+ c6grid_S1 = gmx_simd_mul_r(c6s_S1, c6s_j_S);
+#ifndef HALF_LJ
+ c6grid_S2 = gmx_simd_mul_r(c6s_S2, c6s_j_S);
+ c6grid_S3 = gmx_simd_mul_r(c6s_S3, c6s_j_S);
+#endif
+
+#ifdef CHECK_EXCLS
+ /* Recalculate rinvsix without exclusion mask (compiler might optimize) */
+ rinvsix_nm_S0 = gmx_simd_mul_r(rinvsq_S0, gmx_simd_mul_r(rinvsq_S0, rinvsq_S0));
+ rinvsix_nm_S1 = gmx_simd_mul_r(rinvsq_S1, gmx_simd_mul_r(rinvsq_S1, rinvsq_S1));
+#ifndef HALF_LJ
+ rinvsix_nm_S2 = gmx_simd_mul_r(rinvsq_S2, gmx_simd_mul_r(rinvsq_S2, rinvsq_S2));
+ rinvsix_nm_S3 = gmx_simd_mul_r(rinvsq_S3, gmx_simd_mul_r(rinvsq_S3, rinvsq_S3));
+#endif
+#else
+ /* We didn't use a mask, so we can copy */
+ rinvsix_nm_S0 = rinvsix_S0;
+ rinvsix_nm_S1 = rinvsix_S1;
+#ifndef HALF_LJ
+ rinvsix_nm_S2 = rinvsix_S2;
+ rinvsix_nm_S3 = rinvsix_S3;
+#endif
+#endif
+
+ cr2_S0 = gmx_simd_mul_r(lje_c2_S, rsq_S0);
+ cr2_S1 = gmx_simd_mul_r(lje_c2_S, rsq_S1);
+#ifndef HALF_LJ
+ cr2_S2 = gmx_simd_mul_r(lje_c2_S, rsq_S2);
+ cr2_S3 = gmx_simd_mul_r(lje_c2_S, rsq_S3);
+#endif
+ expmcr2_S0 = gmx_simd_exp_r(gmx_simd_mul_r(mone_S, cr2_S0));
+ expmcr2_S1 = gmx_simd_exp_r(gmx_simd_mul_r(mone_S, cr2_S1));
+#ifndef HALF_LJ
+ expmcr2_S2 = gmx_simd_exp_r(gmx_simd_mul_r(mone_S, cr2_S2));
+ expmcr2_S3 = gmx_simd_exp_r(gmx_simd_mul_r(mone_S, cr2_S3));
+#endif
+
+ /* 1 + cr2 + 1/2*cr2^2 */
+ poly_S0 = gmx_simd_fmadd_r(gmx_simd_fmadd_r(half_S, cr2_S0, one_S), cr2_S0, one_S);
+ poly_S1 = gmx_simd_fmadd_r(gmx_simd_fmadd_r(half_S, cr2_S1, one_S), cr2_S1, one_S);
+#ifndef HALF_LJ
+ poly_S2 = gmx_simd_fmadd_r(gmx_simd_fmadd_r(half_S, cr2_S2, one_S), cr2_S2, one_S);
+ poly_S3 = gmx_simd_fmadd_r(gmx_simd_fmadd_r(half_S, cr2_S3, one_S), cr2_S3, one_S);
+#endif
+
+ /* We calculate LJ F*r = (6*C6)*(r^-6 - F_mesh/6), we use:
+ * r^-6*cexp*(1 + cr2 + cr2^2/2 + cr2^3/6) = cexp*(r^-6*poly + c^6/6)
+ */
+ frLJ_S0 = gmx_simd_fmadd_r(c6grid_S0, gmx_simd_fnmadd_r(expmcr2_S0, gmx_simd_fmadd_r(rinvsix_nm_S0, poly_S0, lje_c6_6_S), rinvsix_nm_S0), frLJ_S0);
+ frLJ_S1 = gmx_simd_fmadd_r(c6grid_S1, gmx_simd_fnmadd_r(expmcr2_S1, gmx_simd_fmadd_r(rinvsix_nm_S1, poly_S1, lje_c6_6_S), rinvsix_nm_S1), frLJ_S1);
+#ifndef HALF_LJ
+ frLJ_S2 = gmx_simd_fmadd_r(c6grid_S2, gmx_simd_fnmadd_r(expmcr2_S2, gmx_simd_fmadd_r(rinvsix_nm_S2, poly_S2, lje_c6_6_S), rinvsix_nm_S2), frLJ_S2);
+ frLJ_S3 = gmx_simd_fmadd_r(c6grid_S3, gmx_simd_fnmadd_r(expmcr2_S3, gmx_simd_fmadd_r(rinvsix_nm_S3, poly_S3, lje_c6_6_S), rinvsix_nm_S3), frLJ_S3);
+#endif
+
+#ifdef CALC_ENERGIES
+#ifdef CHECK_EXCLS
+ sh_mask_S0 = gmx_simd_blendzero_r(lje_vc_S, interact_S0);
+ sh_mask_S1 = gmx_simd_blendzero_r(lje_vc_S, interact_S1);
+#ifndef HALF_LJ
+ sh_mask_S2 = gmx_simd_blendzero_r(lje_vc_S, interact_S2);
+ sh_mask_S3 = gmx_simd_blendzero_r(lje_vc_S, interact_S3);
+#endif
+#else
+ sh_mask_S0 = lje_vc_S;
+ sh_mask_S1 = lje_vc_S;
+#ifndef HALF_LJ
+ sh_mask_S2 = lje_vc_S;
+ sh_mask_S3 = lje_vc_S;
+#endif
+#endif
+
+ VLJ_S0 = gmx_simd_fmadd_r(gmx_simd_mul_r(sixth_S, c6grid_S0), gmx_simd_fmadd_r(rinvsix_nm_S0, gmx_simd_fnmadd_r(expmcr2_S0, poly_S0, one_S), sh_mask_S0), VLJ_S0);
+ VLJ_S1 = gmx_simd_fmadd_r(gmx_simd_mul_r(sixth_S, c6grid_S1), gmx_simd_fmadd_r(rinvsix_nm_S1, gmx_simd_fnmadd_r(expmcr2_S1, poly_S1, one_S), sh_mask_S1), VLJ_S1);
+#ifndef HALF_LJ
+ VLJ_S2 = gmx_simd_fmadd_r(gmx_simd_mul_r(sixth_S, c6grid_S2), gmx_simd_fmadd_r(rinvsix_nm_S2, gmx_simd_fnmadd_r(expmcr2_S2, poly_S2, one_S), sh_mask_S2), VLJ_S2);
+ VLJ_S3 = gmx_simd_fmadd_r(gmx_simd_mul_r(sixth_S, c6grid_S3), gmx_simd_fmadd_r(rinvsix_nm_S3, gmx_simd_fnmadd_r(expmcr2_S3, poly_S3, one_S), sh_mask_S3), VLJ_S3);
+#endif
+#endif /* CALC_ENERGIES */
+ }
+#endif /* LJ_EWALD_GEOM */
+