Merge release-5-0 into master
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_4xn / nbnxn_kernel_simd_4xn.c
index 5694d56ff7764a11f7c130fbb038990b3242b6b7..3531299efd5feeda4bf72de911292bee47805745 100644 (file)
  * kernel type 4xn.
  */
 
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
 
-#include "typedefs.h"
+#include "config.h"
 
-#ifdef GMX_NBNXN_SIMD_4XN
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_simd.h"
 
-#ifdef GMX_NBNXN_HALF_WIDTH_SIMD
-#define GMX_USE_HALF_WIDTH_SIMD_HERE
-#endif
+#ifdef GMX_NBNXN_SIMD_4XN
 
-#include "gromacs/simd/macros.h"
 #include "gromacs/simd/vector_operations.h"
+
 #if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
 #error "unsupported SIMD width"
 #endif
 
 #define GMX_SIMD_J_UNROLL_SIZE 1
 #include "nbnxn_kernel_simd_4xn.h"
-#include "../nbnxn_kernel_common.h"
-#include "gmx_omp_nthreads.h"
-#include "types/force_flags.h"
-#include "gmx_fatal.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/utility/fatalerror.h"
 
 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
  */
 enum {
-    coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
+    coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
+};
+
+/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
+ */
+enum {
+    vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
 };
 
 /* Declare and define the kernel function pointer lookup tables.
  * The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch functions.
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
  */
-static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
+static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
 {
     {
         nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
@@ -80,6 +85,7 @@ static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
         nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
         nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
     },
     {
         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
@@ -87,6 +93,7 @@ static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
         nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
         nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
     },
     {
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
@@ -94,6 +101,7 @@ static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
     },
     {
         nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
@@ -101,6 +109,7 @@ static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
         nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
         nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
     },
     {
         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
@@ -108,10 +117,11 @@ static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
     },
 };
 
-static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
+static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
 {
     {
         nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
@@ -119,6 +129,7 @@ static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
         nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
         nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
     },
     {
         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
@@ -126,6 +137,7 @@ static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
         nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
         nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
     },
     {
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
@@ -133,6 +145,7 @@ static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
     },
     {
         nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
@@ -140,6 +153,7 @@ static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
         nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
         nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
     },
     {
         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
@@ -147,10 +161,11 @@ static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
     },
 };
 
-static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
+static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
 {
     {
         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
@@ -158,6 +173,7 @@ static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
         nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
         nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
     },
     {
         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
@@ -165,6 +181,7 @@ static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
         nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
         nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
     },
     {
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
@@ -172,6 +189,7 @@ static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
     },
     {
         nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
@@ -179,6 +197,7 @@ static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
         nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
         nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
     },
     {
         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
@@ -186,6 +205,7 @@ static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
     },
 };
 
@@ -232,7 +252,7 @@ reduce_group_energies(int ng, int ng_2log,
 
 #else /* GMX_NBNXN_SIMD_4XN */
 
-#include "gmx_fatal.h"
+#include "gromacs/utility/fatalerror.h"
 
 #endif /* GMX_NBNXN_SIMD_4XN */
 
@@ -251,15 +271,16 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
 {
     int                nnbl;
     nbnxn_pairlist_t **nbl;
-    int                coult, ljtreatment = 0;
+    int                coulkt, vdwkt = 0;
     int                nb;
+    int                nthreads gmx_unused;
 
     nnbl = nbl_list->nnbl;
     nbl  = nbl_list->nbl;
 
     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
     {
-        coult = coultRF;
+        coulkt = coulktRF;
     }
     else
     {
@@ -267,44 +288,65 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
         {
             if (ic->rcoulomb == ic->rvdw)
             {
-                coult = coultTAB;
+                coulkt = coulktTAB;
             }
             else
             {
-                coult = coultTAB_TWIN;
+                coulkt = coulktTAB_TWIN;
             }
         }
         else
         {
             if (ic->rcoulomb == ic->rvdw)
             {
-                coult = coultEWALD;
+                coulkt = coulktEWALD;
             }
             else
             {
-                coult = coultEWALD_TWIN;
+                coulkt = coulktEWALD_TWIN;
             }
         }
     }
 
-    switch (ic->vdw_modifier)
+    if (ic->vdwtype == evdwCUT)
+    {
+        switch (ic->vdw_modifier)
+        {
+            case eintmodNONE:
+            case eintmodPOTSHIFT:
+                switch (nbat->comb_rule)
+                {
+                    case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
+                    case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
+                    case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
+                    default:       gmx_incons("Unknown combination rule");
+                }
+                break;
+            case eintmodFORCESWITCH:
+                vdwkt = vdwktLJFORCESWITCH;
+                break;
+            case eintmodPOTSWITCH:
+                vdwkt = vdwktLJPOTSWITCH;
+                break;
+            default:
+                gmx_incons("Unsupported VdW interaction modifier");
+        }
+    }
+    else if (ic->vdwtype == evdwPME)
+    {
+        if (ic->ljpme_comb_rule == eljpmeLB)
+        {
+            gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
+        }
+        vdwkt = vdwktLJEWALDCOMBGEOM;
+    }
+    else
     {
-        case eintmodNONE:
-        case eintmodPOTSHIFT:
-            ljtreatment = nbat->comb_rule;
-            break;
-        /* Switch functions follow after cut-off combination rule kernels */
-        case eintmodFORCESWITCH:
-            ljtreatment = ljcrNR;
-            break;
-        case eintmodPOTSWITCH:
-            ljtreatment = ljcrNR + 1;
-            break;
-        default:
-            gmx_incons("Unsupported VdW interaction modifier");
+        gmx_incons("Unsupported VdW interaction type");
     }
 
-#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+    nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
     for (nb = 0; nb < nnbl; nb++)
     {
         nbnxn_atomdata_output_t *out;
@@ -334,11 +376,11 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
         if (!(force_flags & GMX_FORCE_ENERGY))
         {
             /* Don't calculate energies */
-            p_nbk_noener[coult][ljtreatment](nbl[nb], nbat,
-                                             ic,
-                                             shift_vec,
-                                             out->f,
-                                             fshift_p);
+            p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
+                                        ic,
+                                        shift_vec,
+                                        out->f,
+                                        fshift_p);
         }
         else if (out->nV == 1)
         {
@@ -346,13 +388,13 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
             out->Vvdw[0] = 0;
             out->Vc[0]   = 0;
 
-            p_nbk_ener[coult][ljtreatment](nbl[nb], nbat,
-                                           ic,
-                                           shift_vec,
-                                           out->f,
-                                           fshift_p,
-                                           out->Vvdw,
-                                           out->Vc);
+            p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
+                                      ic,
+                                      shift_vec,
+                                      out->f,
+                                      fshift_p,
+                                      out->Vvdw,
+                                      out->Vc);
         }
         else
         {
@@ -368,13 +410,13 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
                 out->VSc[i] = 0;
             }
 
-            p_nbk_energrp[coult][ljtreatment](nbl[nb], nbat,
-                                              ic,
-                                              shift_vec,
-                                              out->f,
-                                              fshift_p,
-                                              out->VSvdw,
-                                              out->VSc);
+            p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
+                                         ic,
+                                         shift_vec,
+                                         out->f,
+                                         fshift_p,
+                                         out->VSvdw,
+                                         out->VSc);
 
             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
                                   out->VSvdw, out->VSc,