Merge release-5-0 into master

[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / nbnxn_kernel_file_generator / nbnxn_kernel_simd_template.c.pre
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre

index 8b72ac72c34c94f8f3d626c779f35caebe989b8f..5e410cdecc4525f96fa7ab74c4a50c848b203861 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre
@@ -1,33 +1,79 @@
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#include "gmxpre.h"
+
+#include "config.h"
  
-#include "typedefs.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_simd.h"
  
  #ifdef {0}
  
-{1}
-#include "gmx_simd_macros.h"
-#include "gmx_simd_vec.h"
+{1}#include "gromacs/simd/vector_operations.h"
+
  {2}
  #define GMX_SIMD_J_UNROLL_SIZE {3}
  #include "{4}"
-#include "../nbnxn_kernel_common.h"
-#include "gmx_omp_nthreads.h"
-#include "types/force_flags.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/utility/fatalerror.h"
  
  /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
   */
  enum {{
-    coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
+    coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
+}};
+
+/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
+ */
+enum {{
+    vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
  }};
  
-/* Declare and define the kernel function pointer lookup tables. */
-static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ */
+static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
  {7}
-static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
+static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
  {8}
-static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
+static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
  {9}
  
  static void
@@ -35,7 +81,7 @@ reduce_group_energies(int ng, int ng_2log,
                        const real *VSvdw, const real *VSc,
                        real *Vvdw, real *Vc)
  {{
-    const int unrollj      = GMX_SIMD_WIDTH_HERE/GMX_SIMD_J_UNROLL_SIZE;
+    const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
      const int unrollj_half = unrollj/2;
      int       ng_p2, i, j, j0, j1, c, s;
  
@@ -72,7 +118,7 @@ reduce_group_energies(int ng, int ng_2log,
  
  #else /* {0} */
  
-#include "gmx_fatal.h"
+#include "gromacs/utility/fatalerror.h"
  
  #endif /* {0} */
  
@@ -91,15 +137,16 @@ void
  {{
      int                nnbl;
      nbnxn_pairlist_t **nbl;
-    int                coult;
+    int                coulkt, vdwkt = 0;
      int                nb;
+    int                nthreads gmx_unused;
  
      nnbl = nbl_list->nnbl;
      nbl  = nbl_list->nbl;
  
      if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
      {{
-        coult = coultRF;
+        coulkt = coulktRF;
      }}
      else
      {{
@@ -107,27 +154,65 @@ void
          {{
              if (ic->rcoulomb == ic->rvdw)
              {{
-                coult = coultTAB;
+                coulkt = coulktTAB;
              }}
              else
              {{
-                coult = coultTAB_TWIN;
+                coulkt = coulktTAB_TWIN;
              }}
          }}
          else
          {{
              if (ic->rcoulomb == ic->rvdw)
              {{
-                coult = coultEWALD;
+                coulkt = coulktEWALD;
              }}
              else
              {{
-                coult = coultEWALD_TWIN;
+                coulkt = coulktEWALD_TWIN;
              }}
          }}
      }}
  
-#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+    if (ic->vdwtype == evdwCUT)
+    {{
+        switch (ic->vdw_modifier)
+        {{
+            case eintmodNONE:
+            case eintmodPOTSHIFT:
+                switch (nbat->comb_rule)
+                {{
+                    case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
+                    case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
+                    case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
+                    default:       gmx_incons("Unknown combination rule");
+                }}
+                break;
+            case eintmodFORCESWITCH:
+                vdwkt = vdwktLJFORCESWITCH;
+                break;
+            case eintmodPOTSWITCH:
+                vdwkt = vdwktLJPOTSWITCH;
+                break;
+            default:
+                gmx_incons("Unsupported VdW interaction modifier");
+        }}
+    }}
+    else if (ic->vdwtype == evdwPME)
+    {{
+        if (ic->ljpme_comb_rule == eljpmeLB)
+        {{
+            gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
+        }}
+        vdwkt = vdwktLJEWALDCOMBGEOM;
+    }}
+    else
+    {{
+        gmx_incons("Unsupported VdW interaction type");
+    }}
+
+    nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
      for (nb = 0; nb < nnbl; nb++)
      {{
          nbnxn_atomdata_output_t *out;
@@ -157,11 +242,11 @@ void
          if (!(force_flags & GMX_FORCE_ENERGY))
          {{
              /* Don't calculate energies */
-            p_nbk_noener[coult][nbat->comb_rule](nbl[nb], nbat,
-                                                 ic,
-                                                 shift_vec,
-                                                 out->f,
-                                                 fshift_p);
+            p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
+                                        ic,
+                                        shift_vec,
+                                        out->f,
+                                        fshift_p);
          }}
          else if (out->nV == 1)
          {{
@@ -169,13 +254,13 @@ void
              out->Vvdw[0] = 0;
              out->Vc[0]   = 0;
  
-            p_nbk_ener[coult][nbat->comb_rule](nbl[nb], nbat,
-                                               ic,
-                                               shift_vec,
-                                               out->f,
-                                               fshift_p,
-                                               out->Vvdw,
-                                               out->Vc);
+            p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
+                                      ic,
+                                      shift_vec,
+                                      out->f,
+                                      fshift_p,
+                                      out->Vvdw,
+                                      out->Vc);
          }}
          else
          {{
@@ -191,13 +276,13 @@ void
                  out->VSc[i] = 0;
              }}
  
-            p_nbk_energrp[coult][nbat->comb_rule](nbl[nb], nbat,
-                                                  ic,
-                                                  shift_vec,
-                                                  out->f,
-                                                  fshift_p,
-                                                  out->VSvdw,
-                                                  out->VSc);
+            p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
+                                         ic,
+                                         shift_vec,
+                                         out->f,
+                                         fshift_p,
+                                         out->VSvdw,
+                                         out->VSc);
  
              reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
                                    out->VSvdw, out->VSc,