Unify nbnxn kernel dispatchers
authorBerk Hess <hess@kth.se>
Thu, 20 Apr 2017 19:04:11 +0000 (21:04 +0200)
committerBerk Hess <hess@kth.se>
Tue, 4 Jul 2017 14:02:56 +0000 (16:02 +0200)
Reduced code duplication by merging the three nbnxn kernel dispatch
functions for C reference, simd_4xn and simd_2xnn into a single
dispatcher. This also removes the implementation details from
sim_util.cpp.

More reorganization could be done, but is not included here to
minimize the size of this change.

The original goal of this change was to conditionally compile all
files in the simd_4xn and simd_2xnn directory, but currently we
do not have the conditions available at configure time.

Change-Id: I41f7ae96d267a12cf6024866b59bb0cda7e1cd2f

16 files changed:
docs/doxygen/suppressions.txt
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp [new file with mode: 0644]
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h [new file with mode: 0644]
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/make_verlet_simd_kernel_files.py
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.cpp.pre [deleted file]
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.h.pre
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.cpp
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp [deleted file]
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp [deleted file]
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h
src/gromacs/mdlib/sim_util.cpp

index 244613c42af473afaade866da452a7fa32576f3c..7ca9ce61fd371de8f21c70a09ffaf56dbd1b228a 100644 (file)
@@ -40,10 +40,8 @@ src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: includes "config.h" unneces
 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: should include "config.h"
 src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp: warning: includes "config.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.cpp: warning: includes "config.h" unnecessarily
-src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp: warning: includes "config.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h"
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "nbnxn_simd.h"
-src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp: warning: includes "config.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h: warning: should include "nbnxn_simd.h"
 
 # Temporary while we change the SIMD implementation
index 422cd8d5a8597f03c1da7ab35230c5e4c4014270..4edc4c2495684c1c87166ddafba07127f522b53d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * the research papers on the package. Check out http://www.gromacs.org.
  */
 
+/*! \internal \file
+ *
+ * \brief
+ * Declares the nbnxn pair interaction kernel function types and kind counts, also declares utility functions used in nbnxn_kernel.cpp.
+ *
+ * \author Berk Hess <hess@kth.se>
+ */
+
 #ifndef _nbnxn_kernel_common_h
 #define _nbnxn_kernel_common_h
 
 #include "gromacs/math/vectypes.h"
+/* nbnxn_atomdata_t and nbnxn_pairlist_t could be forward declared, but that requires modifications in all SIMD kernel files */
+#include "gromacs/mdlib/nbnxn_atomdata.h"
 #include "gromacs/mdlib/nbnxn_pairlist.h"
 #include "gromacs/utility/real.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
+struct interaction_const_t;
 
-/* Clear the force buffer f. Either the whole buffer or only the parts
- * used by the current thread when nbat->bUseBufferFlags is set.
+/*! \brief Pair-interaction kernel type that also calculates energies.
+ */
+typedef void (nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
+                             const nbnxn_atomdata_t     *nbat,
+                             const interaction_const_t  *ic,
+                             rvec                       *shift_vec,
+                             real                       *f,
+                             real                       *fshift,
+                             real                       *Vvdw,
+                             real                       *Vc);
+
+/*! \brief Pointer to \p nbk_func_ener.
+ */
+typedef nbk_func_ener *p_nbk_func_ener;
+
+/*! \brief Pair-interaction kernel type that does not calculates energies.
+ */
+typedef void (nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
+                               const nbnxn_atomdata_t     *nbat,
+                               const interaction_const_t  *ic,
+                               rvec                       *shift_vec,
+                               real                       *f,
+                               real                       *fshift);
+
+/*! \brief Pointer to \p nbk_func_noener.
+ */
+typedef nbk_func_noener *p_nbk_func_noener;
+
+/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
+ */
+enum {
+    coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
+};
+
+/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
+ *
+ * The \p LJCUT_COMB refers to the LJ combination rule for the short range.
+ * The \p EWALDCOMB refers to the combination rule for the grid part.
+ * \p vdwktNR is the number of VdW treatments for the SIMD kernels.
+ * \p vdwktNR_ref is the number of VdW treatments for the C reference kernels.
+ * These two numbers differ, because currently only the reference kernels
+ * support LB combination rules for the LJ-Ewald grid part.
+ */
+enum {
+    vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktLJEWALDCOMBLB, vdwktNR = vdwktLJEWALDCOMBLB, vdwktNR_ref
+};
+
+/*! \brief Clears the force buffer.
+ *
+ * Either the whole buffer is cleared or only the parts used
+ * by the current thread when nbat->bUseBufferFlags is set.
  * In the latter case output_index is the task/thread list/buffer index.
  */
 void
 clear_f(const nbnxn_atomdata_t *nbat, int output_index, real *f);
 
-/* Clear the shift forces */
+/*! \brief Clears the shift forces.
+ */
 void
 clear_fshift(real *fshift);
 
-/* Reduce the collected energy terms over the pair-lists/threads */
+/*! \brief Reduces the collected energy terms over the pair-lists/threads.
+ */
 void
 reduce_energies_over_lists(const nbnxn_atomdata_t     *nbat,
                            int                         nlist,
                            real                       *Vvdw,
                            real                       *Vc);
 
-#if 0
-{
-#endif
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp
new file mode 100644 (file)
index 0000000..4cdd857
--- /dev/null
@@ -0,0 +1,409 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#include "gmxpre.h"
+
+#include "nbnxn_kernel_cpu.h"
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/mdlib/force_flags.h"
+#include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_simd.h"
+#include "gromacs/mdtypes/interaction_const.h"
+#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/simd/simd.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/real.h"
+
+#include "nbnxn_kernel_common.h"
+#define INCLUDE_KERNELFUNCTION_TABLES
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
+#ifdef GMX_NBNXN_SIMD_2XNN
+#include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+#include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
+#endif
+#undef INCLUDE_FUNCTION_TABLES
+
+/*! \brief Clears the energy group output buffers
+ *
+ * \param[in,out] out  nbnxn kernel output struct
+ */
+static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
+{
+    for (int i = 0; i < out->nV; i++)
+    {
+        out->Vvdw[i] = 0;
+        out->Vc[i]   = 0;
+    }
+
+    for (int i = 0; i < out->nVS; i++)
+    {
+        out->VSvdw[i] = 0;
+    }
+    for (int i = 0; i < out->nVS; i++)
+    {
+        out->VSc[i] = 0;
+    }
+}
+
+/*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
+ * to single terms in the output buffers.
+ *
+ * The SIMD kernels produce a large number of energy buffer in SIMD registers
+ * to avoid scattered reads and writes.
+ *
+ * \tparam        unrollj         The unroll size for j-particles in the SIMD kernel
+ * \param[in]     numGroups       The number of energy groups
+ * \param[in]     numGroups_2log  Log2 of numGroups, rounded up
+ * \param[in]     vVdwSimd        SIMD Van der Waals energy buffers
+ * \param[in]     vCoulombSimd    SIMD Coulomb energy buffers
+ * \param[in,out] vVdw            Van der Waals energy output buffer
+ * \param[in,out] vCoulomb        Coulomb energy output buffer
+ */
+template <int unrollj> static void
+reduceGroupEnergySimdBuffers(int numGroups, int numGroups_2log,
+                             const real *vVdwSimd, const real *vCoulombSimd,
+                             real * gmx_restrict vVdw,
+                             real * gmx_restrict vCoulomb)
+{
+    // cppcheck-suppress duplicateExpression
+    const int unrollj_half     = unrollj/2;
+    /* Energies are stored in SIMD registers with size 2^numGroups_2log */
+    const int numGroupsStorage = (1 << numGroups_2log);
+
+    /* The size of the SIMD energy group buffer array is:
+     * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
+     */
+    for (int i = 0; i < numGroups; i++)
+    {
+        for (int j1 = 0; j1 < numGroups; j1++)
+        {
+            for (int j0 = 0; j0 < numGroups; j0++)
+            {
+                int c = ((i*numGroups + j1)*numGroupsStorage + j0)*unrollj_half*unrollj;
+                for (int s = 0; s < unrollj_half; s++)
+                {
+                    vVdw    [i*numGroups + j0] += vVdwSimd    [c + 0];
+                    vVdw    [i*numGroups + j1] += vVdwSimd    [c + 1];
+                    vCoulomb[i*numGroups + j0] += vCoulombSimd[c + 0];
+                    vCoulomb[i*numGroups + j1] += vCoulombSimd[c + 1];
+                    c                          += unrollj + 2;
+                }
+            }
+        }
+    }
+}
+
+void
+nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
+                 const interaction_const_t *ic,
+                 rvec                      *shiftVectors,
+                 int                        forceFlags,
+                 int                        clearF,
+                 real                      *fshift,
+                 real                      *vCoulomb,
+                 real                      *vVdw)
+{
+    const nbnxn_atomdata_t  *nbat = nbvg->nbat;
+
+    int                      coulkt;
+    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
+    {
+        coulkt = coulktRF;
+    }
+    else
+    {
+        if (nbvg->ewald_excl == ewaldexclTable)
+        {
+            if (ic->rcoulomb == ic->rvdw)
+            {
+                coulkt = coulktTAB;
+            }
+            else
+            {
+                coulkt = coulktTAB_TWIN;
+            }
+        }
+        else
+        {
+            if (ic->rcoulomb == ic->rvdw)
+            {
+                coulkt = coulktEWALD;
+            }
+            else
+            {
+                coulkt = coulktEWALD_TWIN;
+            }
+        }
+    }
+
+    int vdwkt = 0;
+    if (ic->vdwtype == evdwCUT)
+    {
+        switch (ic->vdw_modifier)
+        {
+            case eintmodNONE:
+            case eintmodPOTSHIFT:
+                switch (nbat->comb_rule)
+                {
+                    case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
+                    case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
+                    case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
+                    default:
+                        GMX_RELEASE_ASSERT(false, "Unknown combination rule");
+                }
+                break;
+            case eintmodFORCESWITCH:
+                vdwkt = vdwktLJFORCESWITCH;
+                break;
+            case eintmodPOTSWITCH:
+                vdwkt = vdwktLJPOTSWITCH;
+                break;
+            default:
+                GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
+        }
+    }
+    else if (ic->vdwtype == evdwPME)
+    {
+        if (ic->ljpme_comb_rule == eljpmeGEOM)
+        {
+            vdwkt = vdwktLJEWALDCOMBGEOM;
+        }
+        else
+        {
+            vdwkt = vdwktLJEWALDCOMBLB;
+            /* At setup we (should have) selected the C reference kernel */
+            GMX_RELEASE_ASSERT(nbvg->kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+        }
+    }
+    else
+    {
+        GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
+    }
+
+    int                nnbl = nbvg->nbl_lists.nnbl;
+    nbnxn_pairlist_t **nbl  = nbvg->nbl_lists.nbl;
+
+    // cppcheck-suppress unreadVariable
+    int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
+    for (int nb = 0; nb < nnbl; nb++)
+    {
+        // Presently, the kernels do not call C++ code that can throw,
+        // so no need for a try/catch pair in this OpenMP region.
+        nbnxn_atomdata_output_t *out = &nbat->out[nb];
+
+        if (clearF == enbvClearFYes)
+        {
+            clear_f(nbat, nb, out->f);
+        }
+
+        real *fshift_p;
+        if ((forceFlags & GMX_FORCE_VIRIAL) && nnbl == 1)
+        {
+            fshift_p = fshift;
+        }
+        else
+        {
+            fshift_p = out->fshift;
+
+            if (clearF == enbvClearFYes)
+            {
+                clear_fshift(fshift_p);
+            }
+        }
+
+        if (!(forceFlags & GMX_FORCE_ENERGY))
+        {
+            /* Don't calculate energies */
+            switch (nbvg->kernel_type)
+            {
+                case nbnxnk4x4_PlainC:
+                    nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
+                                                           ic,
+                                                           shiftVectors,
+                                                           out->f,
+                                                           fshift_p);
+                    break;
+#ifdef GMX_NBNXN_SIMD_2XNN
+                case nbnxnk4xN_SIMD_2xNN:
+                    nbnxn_kernel_noener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
+                                                                 ic,
+                                                                 shiftVectors,
+                                                                 out->f,
+                                                                 fshift_p);
+                    break;
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+                case nbnxnk4xN_SIMD_4xN:
+                    nbnxn_kernel_noener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
+                                                                ic,
+                                                                shiftVectors,
+                                                                out->f,
+                                                                fshift_p);
+                    break;
+#endif
+                default:
+                    GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+            }
+        }
+        else if (out->nV == 1)
+        {
+            /* A single energy group (pair) */
+            out->Vvdw[0] = 0;
+            out->Vc[0]   = 0;
+
+            switch (nbvg->kernel_type)
+            {
+                case nbnxnk4x4_PlainC:
+                    nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
+                                                         ic,
+                                                         shiftVectors,
+                                                         out->f,
+                                                         fshift_p,
+                                                         out->Vvdw,
+                                                         out->Vc);
+                    break;
+#ifdef GMX_NBNXN_SIMD_2XNN
+                case nbnxnk4xN_SIMD_2xNN:
+                    nbnxn_kernel_ener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
+                                                               ic,
+                                                               shiftVectors,
+                                                               out->f,
+                                                               fshift_p,
+                                                               out->Vvdw,
+                                                               out->Vc);
+                    break;
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+                case nbnxnk4xN_SIMD_4xN:
+                    nbnxn_kernel_ener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
+                                                              ic,
+                                                              shiftVectors,
+                                                              out->f,
+                                                              fshift_p,
+                                                              out->Vvdw,
+                                                              out->Vc);
+                    break;
+#endif
+                default:
+                    GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+            }
+        }
+        else
+        {
+            /* Calculate energy group contributions */
+            clearGroupEnergies(out);
+
+            int unrollj = 0;
+
+            switch (nbvg->kernel_type)
+            {
+                case nbnxnk4x4_PlainC:
+                    unrollj = NBNXN_CPU_CLUSTER_I_SIZE;
+                    nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
+                                                            ic,
+                                                            shiftVectors,
+                                                            out->f,
+                                                            fshift_p,
+                                                            out->Vvdw,
+                                                            out->Vc);
+                    break;
+#ifdef GMX_NBNXN_SIMD_2XNN
+                case nbnxnk4xN_SIMD_2xNN:
+                    unrollj = GMX_SIMD_REAL_WIDTH/2;
+                    nbnxn_kernel_energrp_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
+                                                                  ic,
+                                                                  shiftVectors,
+                                                                  out->f,
+                                                                  fshift_p,
+                                                                  out->VSvdw,
+                                                                  out->VSc);
+                    break;
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+                case nbnxnk4xN_SIMD_4xN:
+                    unrollj = GMX_SIMD_REAL_WIDTH;
+                    nbnxn_kernel_energrp_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
+                                                                 ic,
+                                                                 shiftVectors,
+                                                                 out->f,
+                                                                 fshift_p,
+                                                                 out->VSvdw,
+                                                                 out->VSc);
+                    break;
+#endif
+                default:
+                    GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+            }
+
+            if (nbvg->kernel_type != nbnxnk4x4_PlainC)
+            {
+                switch (unrollj)
+                {
+                    case 2:
+                        reduceGroupEnergySimdBuffers<2>(nbat->nenergrp,
+                                                        nbat->neg_2log,
+                                                        out->VSvdw, out->VSc,
+                                                        out->Vvdw, out->Vc);
+                        break;
+                    case 4:
+                        reduceGroupEnergySimdBuffers<4>(nbat->nenergrp,
+                                                        nbat->neg_2log,
+                                                        out->VSvdw, out->VSc,
+                                                        out->Vvdw, out->Vc);
+                        break;
+                    case 8:
+                        reduceGroupEnergySimdBuffers<8>(nbat->nenergrp,
+                                                        nbat->neg_2log,
+                                                        out->VSvdw, out->VSc,
+                                                        out->Vvdw, out->Vc);
+                        break;
+                    default:
+                        GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
+                }
+            }
+        }
+    }
+
+    if (forceFlags & GMX_FORCE_ENERGY)
+    {
+        reduce_energies_over_lists(nbat, nnbl, vVdw, vCoulomb);
+    }
+}
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h
new file mode 100644 (file)
index 0000000..43bcbb7
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ *
+ * \brief
+ * Declares the nbnxn pair interaction kernel dispatcher.
+ *
+ * \author Berk Hess <hess@kth.se>
+ */
+
+#ifndef _nbnxn_kernel_cpu_h
+#define _nbnxn_kernel_cpu_h
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/real.h"
+
+struct interaction_const_t;
+struct nonbonded_verlet_group_t;
+
+/*! \brief Dispatches the non-bonded N versus M atom cluster CPU kernels.
+ *
+ * OpenMP parallelization is performed within this function.
+ * Energy reduction, but not force and shift force reduction, is performed
+ * within this function.
+ *
+ * \param[in,out] nbvg          The group (local/non-local) to compute interaction for
+ * \param[in]     ic            Non-bonded interaction constants
+ * \param[in]     shiftVectors  The PBC shift vectors
+ * \param[in]     forceFlags    Flags that tell what to compute
+ * \param[in]     clearF        Enum that tells if to clear the force output buffer
+ * \param[out]    fshift        Shift force output buffer
+ * \param[out]    vCoulomb      Output buffer for Coulomb energies
+ * \param[out]    vVdw          Output buffer for Van der Waals energies
+ */
+void
+nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
+                 const interaction_const_t *ic,
+                 rvec                      *shiftVectors,
+                 int                        forceFlags,
+                 int                        clearF,
+                 real                      *fshift,
+                 real                      *vCoulomb,
+                 real                      *vVdw);
+
+#endif
index 0b541e752d20174f60e5fcac29b69464d81d1038..333018a3febddec36c24e4a6164b4123071780e1 100755 (executable)
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
+# Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
 # are:
 #
 #   A single header file that declares all the kernel functions for
-#   this nbnxn kernel structure type, including the function that does
-#   the dispatch via the function pointer table.
-#
-#   A single C kernel dispatcher file that defines the function that
-#   decides at run time which kernel to call.
+#   this nbnxn kernel structure type and a function pointer table.
 #
 #   Many C kernel files, each defining a single kernel function. These
 #   functions can take a noticeable time to compile, and should tend
@@ -151,13 +147,11 @@ VerletKernelTypeDict = {
     },
 }
 
-KernelDispatcherTemplate = read_kernel_template("nbnxn_kernel_simd_template.cpp.pre")
 KernelsHeaderTemplate = read_kernel_template("nbnxn_kernel_simd_template.h.pre")
 
-# For each Verlet kernel type, write three kinds of files:
-#   a header file defining the functions for all the kernels,
-#   a code file containing the kernel function lookup table and
-#     the kernel dispatcher function
+# For each Verlet kernel type, write two kinds of files:
+#   a header file defining the functions for all the kernels and
+#     the kernel function lookup table
 #   for each kernel, a file defining the single C function for that kernel
 for type in VerletKernelTypeDict:
     DirName = "../simd_{0}".format(type)
@@ -210,26 +204,11 @@ for type in VerletKernelTypeDict:
     with open('{0}/{1}'.format(DirName,KernelsHeaderFileName),'w') as fp:
         fp.write(FileHeader.format(type))
         fp.write(KernelsHeaderTemplate
-                 .format(KernelsName,
-                         " " * (len(KernelsName) + 1),
-                         KernelDeclarations))
-
-    # Write the file defining the kernel dispatcher
-    # function for this type
-    with open('{0}/{1}'.format(DirName,"{0}.cpp".format(KernelsName,type)),'w') as fp:
-        fp.write(FileHeader.format(type))
-        fp.write(KernelDispatcherTemplate
-                 .format(VerletKernelTypeDict[type]['Define'],
-                         VerletKernelTypeDict[type]['WidthSetup'],
-                         VerletKernelTypeDict[type]['WidthCheck'],
-                         VerletKernelTypeDict[type]['UnrollSize'],
-                         KernelsHeaderFileName,
-                         KernelsName,
-                         ' ' * (len(KernelsName)+1),
+                 .format(KernelDeclarations,
+                         type,
                          KernelFunctionLookupTable['F'],
                          KernelFunctionLookupTable['VF'],
-                         KernelFunctionLookupTable['VgrpF'],
-                     )
+                         KernelFunctionLookupTable['VgrpF'])
              )
 
 sys.exit()
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.cpp.pre b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.cpp.pre
deleted file mode 100644 (file)
index 7c412dc..0000000
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_simd.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-
-#ifdef {0}
-
-{1}#include "gromacs/simd/vector_operations.h"
-
-{2}
-#define GMX_SIMD_J_UNROLL_SIZE {3}
-#include "{4}"
-
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
-#include "gromacs/simd/simd.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-
-/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
- */
-enum {{
-    coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
-}};
-
-/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
- */
-enum {{
-    vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
-}};
-
-/* Declare and define the kernel function pointer lookup tables.
- * The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch/PME functions.
- */
-static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
-{7}
-static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
-{8}
-static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
-{9}
-
-static void
-reduce_group_energies(int ng, int ng_2log,
-                      const real *VSvdw, const real *VSc,
-                      real *Vvdw, real *Vc)
-{{
-    const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
-    const int unrollj_half = unrollj/2;
-    int       ng_p2, i, j, j0, j1, c, s;
-
-    ng_p2 = (1<<ng_2log);
-
-    /* The size of the x86 SIMD energy group buffer array is:
-     * ng*ng*ng_p2*unrollj_half*simd_width
-     */
-    for (i = 0; i < ng; i++)
-    {{
-        for (j = 0; j < ng; j++)
-        {{
-            Vvdw[i*ng+j] = 0;
-            Vc[i*ng+j]   = 0;
-        }}
-
-        for (j1 = 0; j1 < ng; j1++)
-        {{
-            for (j0 = 0; j0 < ng; j0++)
-            {{
-                c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
-                for (s = 0; s < unrollj_half; s++)
-                {{
-                    Vvdw[i*ng+j0] += VSvdw[c+0];
-                    Vvdw[i*ng+j1] += VSvdw[c+1];
-                    Vc  [i*ng+j0] += VSc  [c+0];
-                    Vc  [i*ng+j1] += VSc  [c+1];
-                    c             += unrollj + 2;
-                }}
-            }}
-        }}
-    }}
-}}
-
-#else /* {0} */
-
-#include "gromacs/utility/fatalerror.h"
-
-#endif /* {0} */
-
-void
-{5}(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
-{6}const nbnxn_atomdata_t    gmx_unused *nbat,
-{6}const interaction_const_t gmx_unused *ic,
-{6}int                       gmx_unused  ewald_excl,
-{6}rvec                      gmx_unused *shift_vec,
-{6}int                       gmx_unused  force_flags,
-{6}int                       gmx_unused  clearF,
-{6}real                      gmx_unused *fshift,
-{6}real                      gmx_unused *Vc,
-{6}real                      gmx_unused *Vvdw)
-#ifdef {0}
-{{
-    int                nnbl;
-    nbnxn_pairlist_t **nbl;
-    int                coulkt, vdwkt = 0;
-    int                nb, nthreads;
-
-    nnbl = nbl_list->nnbl;
-    nbl  = nbl_list->nbl;
-
-    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
-    {{
-        coulkt = coulktRF;
-    }}
-    else
-    {{
-        if (ewald_excl == ewaldexclTable)
-        {{
-            if (ic->rcoulomb == ic->rvdw)
-            {{
-                coulkt = coulktTAB;
-            }}
-            else
-            {{
-                coulkt = coulktTAB_TWIN;
-            }}
-        }}
-        else
-        {{
-            if (ic->rcoulomb == ic->rvdw)
-            {{
-                coulkt = coulktEWALD;
-            }}
-            else
-            {{
-                coulkt = coulktEWALD_TWIN;
-            }}
-        }}
-    }}
-
-    if (ic->vdwtype == evdwCUT)
-    {{
-        switch (ic->vdw_modifier)
-        {{
-            case eintmodNONE:
-            case eintmodPOTSHIFT:
-                switch (nbat->comb_rule)
-                {{
-                    case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
-                    case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
-                    case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
-                    default:       gmx_incons("Unknown combination rule");
-                }}
-                break;
-            case eintmodFORCESWITCH:
-                vdwkt = vdwktLJFORCESWITCH;
-                break;
-            case eintmodPOTSWITCH:
-                vdwkt = vdwktLJPOTSWITCH;
-                break;
-            default:
-                gmx_incons("Unsupported VdW interaction modifier");
-        }}
-    }}
-    else if (ic->vdwtype == evdwPME)
-    {{
-        if (ic->ljpme_comb_rule == eljpmeLB)
-        {{
-            gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
-        }}
-        vdwkt = vdwktLJEWALDCOMBGEOM;
-    }}
-    else
-    {{
-        gmx_incons("Unsupported VdW interaction type");
-    }}
-    // cppcheck-suppress unreadVariable
-    nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
-    for (nb = 0; nb < nnbl; nb++)
-    {{
-        // Presently, the kernels do not call C++ code that can throw, so
-        // no need for a try/catch pair in this OpenMP region.
-        nbnxn_atomdata_output_t *out;
-        real                    *fshift_p;
-
-        out = &nbat->out[nb];
-
-        if (clearF == enbvClearFYes)
-        {{
-            clear_f(nbat, nb, out->f);
-        }}
-
-        if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
-        {{
-            fshift_p = fshift;
-        }}
-        else
-        {{
-            fshift_p = out->fshift;
-
-            if (clearF == enbvClearFYes)
-            {{
-                clear_fshift(fshift_p);
-            }}
-        }}
-
-        if (!(force_flags & GMX_FORCE_ENERGY))
-        {{
-            /* Don't calculate energies */
-            p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
-                                        ic,
-                                        shift_vec,
-                                        out->f,
-                                        fshift_p);
-        }}
-        else if (out->nV == 1)
-        {{
-            /* No energy groups */
-            out->Vvdw[0] = 0;
-            out->Vc[0]   = 0;
-
-            p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
-                                      ic,
-                                      shift_vec,
-                                      out->f,
-                                      fshift_p,
-                                      out->Vvdw,
-                                      out->Vc);
-        }}
-        else
-        {{
-            /* Calculate energy group contributions */
-            int i;
-
-            for (i = 0; i < out->nVS; i++)
-            {{
-                out->VSvdw[i] = 0;
-            }}
-            for (i = 0; i < out->nVS; i++)
-            {{
-                out->VSc[i] = 0;
-            }}
-
-            p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
-                                         ic,
-                                         shift_vec,
-                                         out->f,
-                                         fshift_p,
-                                         out->VSvdw,
-                                         out->VSc);
-
-            reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
-                                  out->VSvdw, out->VSc,
-                                  out->Vvdw, out->Vc);
-        }}
-    }}
-
-    if (force_flags & GMX_FORCE_ENERGY)
-    {{
-        reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
-    }}
-}}
-#else
-{{
-    gmx_incons("{5} called when such kernels "
-               " are not enabled.");
-}}
-#endif
-#undef GMX_SIMD_J_UNROLL_SIZE
index 40ac05818008886bb4a949abe1726e185ef12745..eab6de081bc4c21361ab4b9c5ac32c35c6acb5f6 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/utility/real.h"
 
-/*! \brief Run-time dispatcher for nbnxn kernel functions. */
-void
-{0}(nbnxn_pairlist_set_t       *nbl_list,
-{1}const nbnxn_atomdata_t     *nbat,
-{1}const interaction_const_t  *ic,
-{1}int                         ewald_excl,
-{1}rvec                       *shift_vec,
-{1}int                         force_flags,
-{1}int                         clearF,
-{1}real                       *fshift,
-{1}real                       *Vc,
-{1}real                       *Vvdw);
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
 
-/* Need an #include guard so that sim_util.c can include all
- * such files. */
-#ifndef _nbnxn_kernel_simd_include_h
-#define _nbnxn_kernel_simd_include_h
-/*! \brief Typedefs for declaring kernel functions. */
-typedef void (nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
-                             const nbnxn_atomdata_t     *nbat,
-                             const interaction_const_t  *ic,
-                             rvec                       *shift_vec,
-                             real                       *f,
-                             real                       *fshift,
-                             real                       *Vvdw,
-                             real                       *Vc);
-typedef nbk_func_ener *p_nbk_func_ener;
+/* Declare all the different kernel functions.
+ */
+{0}
 
-typedef void (nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
-                               const nbnxn_atomdata_t     *nbat,
-                               const interaction_const_t  *ic,
-                               rvec                       *shift_vec,
-                               real                       *f,
-                               real                       *fshift);
-typedef nbk_func_noener *p_nbk_func_noener;
-#endif
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
 
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_simd_{1}[coulktNR][vdwktNR] =
 {2}
+p_nbk_func_ener nbnxn_kernel_ener_simd_{1}[coulktNR][vdwktNR] =
+{3}
+p_nbk_func_ener nbnxn_kernel_energrp_simd_{1}[coulktNR][vdwktNR] =
+{4}
+
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
index 818982c539baa6eef69a8ab1617a2e02165f81f3..309d9a33e421e947944cf81a9e6697bc31d26853 100644 (file)
 #include "gromacs/mdlib/force.h"
 #include "gromacs/mdlib/nb_verlet.h"
 #include "gromacs/mdlib/nbnxn_consts.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/utility/fatalerror.h"
 
+#include "nbnxn_kernel_common.h"
+
 static const int c_numClPerSupercl = c_nbnxnGpuNumClusterPerSupercluster;
 static const int c_clSize          = c_nbnxnGpuClusterSize;
 
index fe8d744083284b7f28c432e5c4bc832fe2c97b60..cc13cbc67abbaff0cf8f153922d4da96267ffb41 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
 #include "gromacs/mdlib/nb_verlet.h"
 #include "gromacs/mdlib/nbnxn_consts.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/smalloc.h"
 
-/*! \brief Typedefs for declaring lookup tables of kernel functions.
- */
-
-typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
-                                  const nbnxn_atomdata_t     *nbat,
-                                  const interaction_const_t  *ic,
-                                  rvec                       *shift_vec,
-                                  real                       *f,
-                                  real                       *fshift);
-
-typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
-                                const nbnxn_atomdata_t     *nbat,
-                                const interaction_const_t  *ic,
-                                rvec                       *shift_vec,
-                                real                       *f,
-                                real                       *fshift,
-                                real                       *Vvdw,
-                                real                       *Vc);
 
 /* Analytical reaction-field kernels */
 #define CALC_COUL_RF
@@ -143,191 +124,3 @@ typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
 #undef LJ_EWALD
 #undef VDW_CUTOFF_CHECK
 #undef CALC_COUL_TAB
-
-
-enum {
-    coultRF, coultTAB, coultTAB_TWIN, coultNR
-};
-
-enum {
-    vdwtCUT, vdwtFSWITCH, vdwtPSWITCH, vdwtEWALDGEOM, vdwtEWALDLB, vdwtNR
-};
-
-p_nbk_func_noener p_nbk_c_noener[coultNR][vdwtNR] =
-{
-    { nbnxn_kernel_ElecRF_VdwLJ_F_ref,           nbnxn_kernel_ElecRF_VdwLJFsw_F_ref,           nbnxn_kernel_ElecRF_VdwLJPsw_F_ref,           nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_ref,           nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_ref           },
-    { nbnxn_kernel_ElecQSTab_VdwLJ_F_ref,        nbnxn_kernel_ElecQSTab_VdwLJFsw_F_ref,        nbnxn_kernel_ElecQSTab_VdwLJPsw_F_ref,        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_ref,        nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_F_ref        },
-    { nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref }
-};
-
-p_nbk_func_ener p_nbk_c_ener[coultNR][vdwtNR] =
-{
-    { nbnxn_kernel_ElecRF_VdwLJ_VF_ref,           nbnxn_kernel_ElecRF_VdwLJFsw_VF_ref,           nbnxn_kernel_ElecRF_VdwLJPsw_VF_ref,           nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_ref,           nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_ref            },
-    { nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref,        nbnxn_kernel_ElecQSTab_VdwLJFsw_VF_ref,        nbnxn_kernel_ElecQSTab_VdwLJPsw_VF_ref,        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_ref,        nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VF_ref         },
-    { nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref  }
-};
-
-p_nbk_func_ener p_nbk_c_energrp[coultNR][vdwtNR] =
-{
-    { nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref,           nbnxn_kernel_ElecRF_VdwLJFsw_VgrpF_ref,           nbnxn_kernel_ElecRF_VdwLJPsw_VgrpF_ref,           nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_ref,           nbnxn_kernel_ElecRF_VdwLJEwCombLB_VgrpF_ref           },
-    { nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref,        nbnxn_kernel_ElecQSTab_VdwLJFsw_VgrpF_ref,        nbnxn_kernel_ElecQSTab_VdwLJPsw_VgrpF_ref,        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_ref,        nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VgrpF_ref        },
-    { nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref }
-};
-
-void
-nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list,
-                 const nbnxn_atomdata_t     *nbat,
-                 const interaction_const_t  *ic,
-                 rvec                       *shift_vec,
-                 int                         force_flags,
-                 int                         clearF,
-                 real                       *fshift,
-                 real                       *Vc,
-                 real                       *Vvdw)
-{
-    int                nnbl;
-    nbnxn_pairlist_t **nbl;
-    int                coult;
-    int                vdwt;
-    int                nb;
-    int                nthreads gmx_unused;
-
-    nnbl = nbl_list->nnbl;
-    nbl  = nbl_list->nbl;
-
-    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
-    {
-        coult = coultRF;
-    }
-    else
-    {
-        if (ic->rcoulomb == ic->rvdw)
-        {
-            coult = coultTAB;
-        }
-        else
-        {
-            coult = coultTAB_TWIN;
-        }
-    }
-
-    if (ic->vdwtype == evdwCUT)
-    {
-        switch (ic->vdw_modifier)
-        {
-            case eintmodPOTSHIFT:
-            case eintmodNONE:
-                vdwt = vdwtCUT;
-                break;
-            case eintmodFORCESWITCH:
-                vdwt = vdwtFSWITCH;
-                break;
-            case eintmodPOTSWITCH:
-                vdwt = vdwtPSWITCH;
-                break;
-            default:
-                gmx_incons("Unsupported VdW modifier");
-                break;
-        }
-    }
-    else if (ic->vdwtype == evdwPME)
-    {
-        if (ic->ljpme_comb_rule == ljcrGEOM)
-        {
-            assert(nbat->comb_rule == ljcrGEOM);
-            vdwt = vdwtEWALDGEOM;
-        }
-        else
-        {
-            assert(nbat->comb_rule == ljcrLB);
-            vdwt = vdwtEWALDLB;
-        }
-    }
-    else
-    {
-        gmx_incons("Unsupported vdwtype in nbnxn reference kernel");
-    }
-
-    // cppcheck-suppress unreadVariable
-    nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
-    for (nb = 0; nb < nnbl; nb++)
-    {
-        // Presently, the kernels do not call C++ code that can throw, so
-        // no need for a try/catch pair in this OpenMP region.
-        nbnxn_atomdata_output_t *out;
-        real                    *fshift_p;
-
-        out = &nbat->out[nb];
-
-        if (clearF == enbvClearFYes)
-        {
-            clear_f(nbat, nb, out->f);
-        }
-
-        if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
-        {
-            fshift_p = fshift;
-        }
-        else
-        {
-            fshift_p = out->fshift;
-
-            if (clearF == enbvClearFYes)
-            {
-                clear_fshift(fshift_p);
-            }
-        }
-
-        if (!(force_flags & GMX_FORCE_ENERGY))
-        {
-            /* Don't calculate energies */
-            p_nbk_c_noener[coult][vdwt](nbl[nb], nbat,
-                                        ic,
-                                        shift_vec,
-                                        out->f,
-                                        fshift_p);
-        }
-        else if (out->nV == 1)
-        {
-            /* No energy groups */
-            out->Vvdw[0] = 0;
-            out->Vc[0]   = 0;
-
-            p_nbk_c_ener[coult][vdwt](nbl[nb], nbat,
-                                      ic,
-                                      shift_vec,
-                                      out->f,
-                                      fshift_p,
-                                      out->Vvdw,
-                                      out->Vc);
-        }
-        else
-        {
-            /* Calculate energy group contributions */
-            int i;
-
-            for (i = 0; i < out->nV; i++)
-            {
-                out->Vvdw[i] = 0;
-            }
-            for (i = 0; i < out->nV; i++)
-            {
-                out->Vc[i] = 0;
-            }
-
-            p_nbk_c_energrp[coult][vdwt](nbl[nb], nbat,
-                                         ic,
-                                         shift_vec,
-                                         out->f,
-                                         fshift_p,
-                                         out->Vvdw,
-                                         out->Vc);
-        }
-    }
-
-    if (force_flags & GMX_FORCE_ENERGY)
-    {
-        reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
-    }
-}
index 184f0a424c8a3ff35cf1ca8a48302ed3637994c2..39d53e74c50dcd4b2d23a71f557f65a3230a1377 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * the research papers on the package. Check out http://www.gromacs.org.
  */
 
-#ifndef _nbnxn_kernel_ref_h
-#define _nbnxn_kernel_ref_h
+#include "nbnxn_kernel_common.h"
 
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/utility/real.h"
+/* Declare all the different kernel functions.
+ */
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJ_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJFsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJPsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJ_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJFsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJPsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref;
+
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJ_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJFsw_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJPsw_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJFsw_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJPsw_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref;
+
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJFsw_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJPsw_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecRF_VdwLJEwCombLB_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJFsw_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJPsw_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref;
+nbk_func_ener   nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref;
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
+
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ * For the C reference kernels, unlike the SIMD kernels, there is not much
+ * advantage in using combination rules, so we (re-)use the same kernel.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_ref[coulktNR][vdwktNR_ref] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJ_F_ref,
+        nbnxn_kernel_ElecRF_VdwLJ_F_ref,
+        nbnxn_kernel_ElecRF_VdwLJ_F_ref,
+        nbnxn_kernel_ElecRF_VdwLJFsw_F_ref,
+        nbnxn_kernel_ElecRF_VdwLJPsw_F_ref,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_ref,
+        nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJFsw_F_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJPsw_F_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_F_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref
+    }
+};
 
-/* Wrapper call for the non-bonded n vs n reference kernels */
-void
-nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list,
-                 const nbnxn_atomdata_t     *nbat,
-                 const interaction_const_t  *ic,
-                 rvec                       *shift_vec,
-                 int                         force_flags,
-                 int                         clearF,
-                 real                       *fshift,
-                 real                       *Vc,
-                 real                       *Vvdw);
+p_nbk_func_ener nbnxn_kernel_ener_ref[coulktNR][vdwktNR_ref] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecRF_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecRF_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecRF_VdwLJFsw_VF_ref,
+        nbnxn_kernel_ElecRF_VdwLJPsw_VF_ref,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_ref,
+        nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJFsw_VF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJPsw_VF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VF_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref
+    }
+};
 
-#ifdef __cplusplus
-}
-#endif
+p_nbk_func_ener nbnxn_kernel_energrp_ref[coulktNR][vdwktNR_ref] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecRF_VdwLJFsw_VgrpF_ref,
+        nbnxn_kernel_ElecRF_VdwLJPsw_VgrpF_ref,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_ref,
+        nbnxn_kernel_ElecRF_VdwLJEwCombLB_VgrpF_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJFsw_VgrpF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJPsw_VgrpF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_ref,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VgrpF_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref
+    }
+};
 
-#endif
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
index 9e956ec2237c12b4df090f33d540f1d3ceb5e805..c68cb3a00c7d292864ec5371936b650819138284 100644 (file)
@@ -79,7 +79,7 @@
 #error "No VdW type defined"
 #endif
 
-static void
+void
 #ifndef CALC_ENERGIES
 NBK_FUNC_NAME(_F)
 #else
@@ -95,11 +95,8 @@ NBK_FUNC_NAME(_VgrpF)
  const nbnxn_atomdata_t     *nbat,
  const interaction_const_t  *ic,
  rvec                       *shift_vec,
- real                       *f
-#ifdef CALC_SHIFTFORCES
- ,
- real                       *fshift
-#endif
+ real                       *f,
+ real gmx_unused            *fshift
 #ifdef CALC_ENERGIES
  ,
  real                       *Vvdw,
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp
deleted file mode 100644 (file)
index 0f97061..0000000
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the Verlet kernel generator for
- * kernel type 2xnn.
- */
-
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_simd.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-
-#ifdef GMX_NBNXN_SIMD_2XNN
-
-/* Include the full-width SIMD macros */
-#include "gromacs/simd/vector_operations.h"
-
-#if !(GMX_SIMD_REAL_WIDTH == 8 || GMX_SIMD_REAL_WIDTH == 16)
-#error "unsupported SIMD width"
-#endif
-
-#define GMX_SIMD_J_UNROLL_SIZE 2
-#include "nbnxn_kernel_simd_2xnn.h"
-
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
-#include "gromacs/simd/simd.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-
-/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
- */
-enum {
-    coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
-};
-
-/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
- */
-enum {
-    vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
-};
-
-/* Declare and define the kernel function pointer lookup tables.
- * The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch/PME functions.
- */
-static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
-{
-    {
-        nbnxn_kernel_ElecRF_VdwLJCombGeom_F_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJCombLB_F_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJ_F_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJFSw_F_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJPSw_F_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJ_F_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJFSw_F_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJPSw_F_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecEw_VdwLJCombGeom_F_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJCombLB_F_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJ_F_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJFSw_F_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJPSw_F_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn,
-    },
-};
-
-static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
-{
-    {
-        nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJCombLB_VF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJ_VF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJFSw_VF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJPSw_VF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJ_VF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJCombLB_VF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJ_VF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJFSw_VF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJPSw_VF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_2xnn,
-    },
-};
-
-static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
-{
-    {
-        nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJ_VgrpF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_2xnn,
-    },
-    {
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_2xnn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
-    },
-};
-
-
-static void
-reduce_group_energies(int ng, int ng_2log,
-                      const real *VSvdw, const real *VSc,
-                      real *Vvdw, real *Vc)
-{
-    const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
-    const int unrollj_half = unrollj/2;
-    int       ng_p2, i, j, j0, j1, c, s;
-
-    ng_p2 = (1<<ng_2log);
-
-    /* The size of the x86 SIMD energy group buffer array is:
-     * ng*ng*ng_p2*unrollj_half*simd_width
-     */
-    for (i = 0; i < ng; i++)
-    {
-        for (j = 0; j < ng; j++)
-        {
-            Vvdw[i*ng+j] = 0;
-            Vc[i*ng+j]   = 0;
-        }
-
-        for (j1 = 0; j1 < ng; j1++)
-        {
-            for (j0 = 0; j0 < ng; j0++)
-            {
-                c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
-                for (s = 0; s < unrollj_half; s++)
-                {
-                    Vvdw[i*ng+j0] += VSvdw[c+0];
-                    Vvdw[i*ng+j1] += VSvdw[c+1];
-                    Vc  [i*ng+j0] += VSc  [c+0];
-                    Vc  [i*ng+j1] += VSc  [c+1];
-                    c             += unrollj + 2;
-                }
-            }
-        }
-    }
-}
-
-#else /* GMX_NBNXN_SIMD_2XNN */
-
-#include "gromacs/utility/fatalerror.h"
-
-#endif /* GMX_NBNXN_SIMD_2XNN */
-
-void
-nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
-                       const nbnxn_atomdata_t    gmx_unused *nbat,
-                       const interaction_const_t gmx_unused *ic,
-                       int                       gmx_unused  ewald_excl,
-                       rvec                      gmx_unused *shift_vec,
-                       int                       gmx_unused  force_flags,
-                       int                       gmx_unused  clearF,
-                       real                      gmx_unused *fshift,
-                       real                      gmx_unused *Vc,
-                       real                      gmx_unused *Vvdw)
-#ifdef GMX_NBNXN_SIMD_2XNN
-{
-    int                nnbl;
-    nbnxn_pairlist_t **nbl;
-    int                coulkt, vdwkt = 0;
-    int                nb, nthreads gmx_unused;
-
-    nnbl = nbl_list->nnbl;
-    nbl  = nbl_list->nbl;
-
-    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
-    {
-        coulkt = coulktRF;
-    }
-    else
-    {
-        if (ewald_excl == ewaldexclTable)
-        {
-            if (ic->rcoulomb == ic->rvdw)
-            {
-                coulkt = coulktTAB;
-            }
-            else
-            {
-                coulkt = coulktTAB_TWIN;
-            }
-        }
-        else
-        {
-            if (ic->rcoulomb == ic->rvdw)
-            {
-                coulkt = coulktEWALD;
-            }
-            else
-            {
-                coulkt = coulktEWALD_TWIN;
-            }
-        }
-    }
-
-    if (ic->vdwtype == evdwCUT)
-    {
-        switch (ic->vdw_modifier)
-        {
-            case eintmodNONE:
-            case eintmodPOTSHIFT:
-                switch (nbat->comb_rule)
-                {
-                    case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
-                    case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
-                    case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
-                    default:       gmx_incons("Unknown combination rule");
-                }
-                break;
-            case eintmodFORCESWITCH:
-                vdwkt = vdwktLJFORCESWITCH;
-                break;
-            case eintmodPOTSWITCH:
-                vdwkt = vdwktLJPOTSWITCH;
-                break;
-            default:
-                gmx_incons("Unsupported VdW interaction modifier");
-        }
-    }
-    else if (ic->vdwtype == evdwPME)
-    {
-        if (ic->ljpme_comb_rule == eljpmeLB)
-        {
-            gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
-        }
-        vdwkt = vdwktLJEWALDCOMBGEOM;
-    }
-    else
-    {
-        gmx_incons("Unsupported VdW interaction type");
-    }
-    // cppcheck-suppress unreadVariable
-    nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
-    for (nb = 0; nb < nnbl; nb++)
-    {
-        // Presently, the kernels do not call C++ code that can throw, so
-        // no need for a try/catch pair in this OpenMP region.
-        nbnxn_atomdata_output_t *out;
-        real                    *fshift_p;
-
-        out = &nbat->out[nb];
-
-        if (clearF == enbvClearFYes)
-        {
-            clear_f(nbat, nb, out->f);
-        }
-
-        if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
-        {
-            fshift_p = fshift;
-        }
-        else
-        {
-            fshift_p = out->fshift;
-
-            if (clearF == enbvClearFYes)
-            {
-                clear_fshift(fshift_p);
-            }
-        }
-
-        if (!(force_flags & GMX_FORCE_ENERGY))
-        {
-            /* Don't calculate energies */
-            p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
-                                        ic,
-                                        shift_vec,
-                                        out->f,
-                                        fshift_p);
-        }
-        else if (out->nV == 1)
-        {
-            /* No energy groups */
-            out->Vvdw[0] = 0;
-            out->Vc[0]   = 0;
-
-            p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
-                                      ic,
-                                      shift_vec,
-                                      out->f,
-                                      fshift_p,
-                                      out->Vvdw,
-                                      out->Vc);
-        }
-        else
-        {
-            /* Calculate energy group contributions */
-            int i;
-
-            for (i = 0; i < out->nVS; i++)
-            {
-                out->VSvdw[i] = 0;
-            }
-            for (i = 0; i < out->nVS; i++)
-            {
-                out->VSc[i] = 0;
-            }
-
-            p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
-                                         ic,
-                                         shift_vec,
-                                         out->f,
-                                         fshift_p,
-                                         out->VSvdw,
-                                         out->VSc);
-
-            reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
-                                  out->VSvdw, out->VSc,
-                                  out->Vvdw, out->Vc);
-        }
-    }
-
-    if (force_flags & GMX_FORCE_ENERGY)
-    {
-        reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
-    }
-}
-#else
-{
-    gmx_incons("nbnxn_kernel_simd_2xnn called when such kernels "
-               " are not enabled.");
-}
-#endif
-#undef GMX_SIMD_J_UNROLL_SIZE
index 2052aef63e379222663c5fc918240bb5a2c9b31e..79a8d37de909c24c5b1787bb450e73c7ce1d33da 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * kernel type 2xnn.
  */
 
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/utility/real.h"
 
-/*! \brief Run-time dispatcher for nbnxn kernel functions. */
-void
-nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t       *nbl_list,
-                       const nbnxn_atomdata_t     *nbat,
-                       const interaction_const_t  *ic,
-                       int                         ewald_excl,
-                       rvec                       *shift_vec,
-                       int                         force_flags,
-                       int                         clearF,
-                       real                       *fshift,
-                       real                       *Vc,
-                       real                       *Vvdw);
-
-/* Need an #include guard so that sim_util.c can include all
- * such files. */
-#ifndef _nbnxn_kernel_simd_include_h
-#define _nbnxn_kernel_simd_include_h
-/*! \brief Typedefs for declaring kernel functions. */
-typedef void (nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
-                             const nbnxn_atomdata_t     *nbat,
-                             const interaction_const_t  *ic,
-                             rvec                       *shift_vec,
-                             real                       *f,
-                             real                       *fshift,
-                             real                       *Vvdw,
-                             real                       *Vc);
-typedef nbk_func_ener *p_nbk_func_ener;
-
-typedef void (nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
-                               const nbnxn_atomdata_t     *nbat,
-                               const interaction_const_t  *ic,
-                               rvec                       *shift_vec,
-                               real                       *f,
-                               real                       *fshift);
-typedef nbk_func_noener *p_nbk_func_noener;
-#endif
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
 
+/* Declare all the different kernel functions.
+ */
 nbk_func_ener         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn;
 nbk_func_ener         nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn;
 nbk_func_ener         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn;
@@ -171,3 +134,146 @@ nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn;
 nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn;
 nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn;
 nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn;
+
+
+
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
+
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_simd_2xnn[coulktNR][vdwktNR] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJCombGeom_F_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJCombLB_F_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJ_F_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJFSw_F_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJPSw_F_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJ_F_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJFSw_F_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJPSw_F_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecEw_VdwLJCombGeom_F_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJCombLB_F_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJ_F_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJFSw_F_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJPSw_F_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn,
+    },
+};
+
+p_nbk_func_ener nbnxn_kernel_ener_simd_2xnn[coulktNR][vdwktNR] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJCombLB_VF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJ_VF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJFSw_VF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJPSw_VF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJCombLB_VF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJ_VF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJFSw_VF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJPSw_VF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_2xnn,
+    },
+};
+
+p_nbk_func_ener nbnxn_kernel_energrp_simd_2xnn[coulktNR][vdwktNR] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJ_VgrpF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_2xnn,
+    },
+    {
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_2xnn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
+    },
+};
+
+
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp
deleted file mode 100644 (file)
index 8f062df..0000000
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the Verlet kernel generator for
- * kernel type 4xn.
- */
-
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_simd.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-
-#ifdef GMX_NBNXN_SIMD_4XN
-
-#include "gromacs/simd/vector_operations.h"
-
-#if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
-#error "unsupported SIMD width"
-#endif
-
-#define GMX_SIMD_J_UNROLL_SIZE 1
-#include "nbnxn_kernel_simd_4xn.h"
-
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
-#include "gromacs/simd/simd.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-
-/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
- */
-enum {
-    coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
-};
-
-/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
- */
-enum {
-    vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
-};
-
-/* Declare and define the kernel function pointer lookup tables.
- * The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch/PME functions.
- */
-static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
-{
-    {
-        nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
-        nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
-        nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
-        nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
-        nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
-        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
-    },
-    {
-        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
-    },
-    {
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
-    },
-    {
-        nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
-        nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
-        nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
-        nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
-        nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
-        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
-    },
-    {
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
-    },
-};
-
-static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
-{
-    {
-        nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
-    },
-};
-
-static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
-{
-    {
-        nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
-        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
-        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
-        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
-    },
-    {
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
-        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
-    },
-};
-
-
-static void
-reduce_group_energies(int ng, int ng_2log,
-                      const real *VSvdw, const real *VSc,
-                      real *Vvdw, real *Vc)
-{
-    const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
-    const int unrollj_half = unrollj/2;
-    int       ng_p2, i, j, j0, j1, c, s;
-
-    ng_p2 = (1<<ng_2log);
-
-    /* The size of the x86 SIMD energy group buffer array is:
-     * ng*ng*ng_p2*unrollj_half*simd_width
-     */
-    for (i = 0; i < ng; i++)
-    {
-        for (j = 0; j < ng; j++)
-        {
-            Vvdw[i*ng+j] = 0;
-            Vc[i*ng+j]   = 0;
-        }
-
-        for (j1 = 0; j1 < ng; j1++)
-        {
-            for (j0 = 0; j0 < ng; j0++)
-            {
-                c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
-                for (s = 0; s < unrollj_half; s++)
-                {
-                    Vvdw[i*ng+j0] += VSvdw[c+0];
-                    Vvdw[i*ng+j1] += VSvdw[c+1];
-                    Vc  [i*ng+j0] += VSc  [c+0];
-                    Vc  [i*ng+j1] += VSc  [c+1];
-                    c             += unrollj + 2;
-                }
-            }
-        }
-    }
-}
-
-#else /* GMX_NBNXN_SIMD_4XN */
-
-#include "gromacs/utility/fatalerror.h"
-
-#endif /* GMX_NBNXN_SIMD_4XN */
-
-void
-nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
-                      const nbnxn_atomdata_t    gmx_unused *nbat,
-                      const interaction_const_t gmx_unused *ic,
-                      int                       gmx_unused  ewald_excl,
-                      rvec                      gmx_unused *shift_vec,
-                      int                       gmx_unused  force_flags,
-                      int                       gmx_unused  clearF,
-                      real                      gmx_unused *fshift,
-                      real                      gmx_unused *Vc,
-                      real                      gmx_unused *Vvdw)
-#ifdef GMX_NBNXN_SIMD_4XN
-{
-    int                nnbl;
-    nbnxn_pairlist_t **nbl;
-    int                coulkt, vdwkt = 0;
-    int                nb, nthreads gmx_unused;
-
-    nnbl = nbl_list->nnbl;
-    nbl  = nbl_list->nbl;
-
-    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
-    {
-        coulkt = coulktRF;
-    }
-    else
-    {
-        if (ewald_excl == ewaldexclTable)
-        {
-            if (ic->rcoulomb == ic->rvdw)
-            {
-                coulkt = coulktTAB;
-            }
-            else
-            {
-                coulkt = coulktTAB_TWIN;
-            }
-        }
-        else
-        {
-            if (ic->rcoulomb == ic->rvdw)
-            {
-                coulkt = coulktEWALD;
-            }
-            else
-            {
-                coulkt = coulktEWALD_TWIN;
-            }
-        }
-    }
-
-    if (ic->vdwtype == evdwCUT)
-    {
-        switch (ic->vdw_modifier)
-        {
-            case eintmodNONE:
-            case eintmodPOTSHIFT:
-                switch (nbat->comb_rule)
-                {
-                    case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
-                    case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
-                    case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
-                    default:       gmx_incons("Unknown combination rule");
-                }
-                break;
-            case eintmodFORCESWITCH:
-                vdwkt = vdwktLJFORCESWITCH;
-                break;
-            case eintmodPOTSWITCH:
-                vdwkt = vdwktLJPOTSWITCH;
-                break;
-            default:
-                gmx_incons("Unsupported VdW interaction modifier");
-        }
-    }
-    else if (ic->vdwtype == evdwPME)
-    {
-        if (ic->ljpme_comb_rule == eljpmeLB)
-        {
-            gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
-        }
-        vdwkt = vdwktLJEWALDCOMBGEOM;
-    }
-    else
-    {
-        gmx_incons("Unsupported VdW interaction type");
-    }
-    // cppcheck-suppress unreadVariable
-    nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
-    for (nb = 0; nb < nnbl; nb++)
-    {
-        // Presently, the kernels do not call C++ code that can throw, so
-        // no need for a try/catch pair in this OpenMP region.
-        nbnxn_atomdata_output_t *out;
-        real                    *fshift_p;
-
-        out = &nbat->out[nb];
-
-        if (clearF == enbvClearFYes)
-        {
-            clear_f(nbat, nb, out->f);
-        }
-
-        if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
-        {
-            fshift_p = fshift;
-        }
-        else
-        {
-            fshift_p = out->fshift;
-
-            if (clearF == enbvClearFYes)
-            {
-                clear_fshift(fshift_p);
-            }
-        }
-
-        if (!(force_flags & GMX_FORCE_ENERGY))
-        {
-            /* Don't calculate energies */
-            p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
-                                        ic,
-                                        shift_vec,
-                                        out->f,
-                                        fshift_p);
-        }
-        else if (out->nV == 1)
-        {
-            /* No energy groups */
-            out->Vvdw[0] = 0;
-            out->Vc[0]   = 0;
-
-            p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
-                                      ic,
-                                      shift_vec,
-                                      out->f,
-                                      fshift_p,
-                                      out->Vvdw,
-                                      out->Vc);
-        }
-        else
-        {
-            /* Calculate energy group contributions */
-            int i;
-
-            for (i = 0; i < out->nVS; i++)
-            {
-                out->VSvdw[i] = 0;
-            }
-            for (i = 0; i < out->nVS; i++)
-            {
-                out->VSc[i] = 0;
-            }
-
-            p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
-                                         ic,
-                                         shift_vec,
-                                         out->f,
-                                         fshift_p,
-                                         out->VSvdw,
-                                         out->VSc);
-
-            reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
-                                  out->VSvdw, out->VSc,
-                                  out->Vvdw, out->Vc);
-        }
-    }
-
-    if (force_flags & GMX_FORCE_ENERGY)
-    {
-        reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
-    }
-}
-#else
-{
-    gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
-               " are not enabled.");
-}
-#endif
-#undef GMX_SIMD_J_UNROLL_SIZE
index 29f689e67e31f2e5eadbe5b262dd365f2374cbe0..38999fb88df9c52dcdca9eb05d83c74fe5a6dffa 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * kernel type 4xn.
  */
 
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/utility/real.h"
 
-/*! \brief Run-time dispatcher for nbnxn kernel functions. */
-void
-nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t       *nbl_list,
-                      const nbnxn_atomdata_t     *nbat,
-                      const interaction_const_t  *ic,
-                      int                         ewald_excl,
-                      rvec                       *shift_vec,
-                      int                         force_flags,
-                      int                         clearF,
-                      real                       *fshift,
-                      real                       *Vc,
-                      real                       *Vvdw);
-
-/* Need an #include guard so that sim_util.c can include all
- * such files. */
-#ifndef _nbnxn_kernel_simd_include_h
-#define _nbnxn_kernel_simd_include_h
-/*! \brief Typedefs for declaring kernel functions. */
-typedef void (nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
-                             const nbnxn_atomdata_t     *nbat,
-                             const interaction_const_t  *ic,
-                             rvec                       *shift_vec,
-                             real                       *f,
-                             real                       *fshift,
-                             real                       *Vvdw,
-                             real                       *Vc);
-typedef nbk_func_ener *p_nbk_func_ener;
-
-typedef void (nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
-                               const nbnxn_atomdata_t     *nbat,
-                               const interaction_const_t  *ic,
-                               rvec                       *shift_vec,
-                               real                       *f,
-                               real                       *fshift);
-typedef nbk_func_noener *p_nbk_func_noener;
-#endif
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
 
+/* Declare all the different kernel functions.
+ */
 nbk_func_ener         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn;
 nbk_func_ener         nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn;
 nbk_func_ener         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn;
@@ -171,3 +134,146 @@ nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn;
 nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn;
 nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn;
 nbk_func_noener       nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn;
+
+
+
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
+
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_simd_4xn[coulktNR][vdwktNR] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
+        nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
+        nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
+        nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
+        nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
+    },
+    {
+        nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
+        nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
+        nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
+        nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
+        nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
+    },
+    {
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
+    },
+};
+
+p_nbk_func_ener nbnxn_kernel_ener_simd_4xn[coulktNR][vdwktNR] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
+    },
+};
+
+p_nbk_func_ener nbnxn_kernel_energrp_simd_4xn[coulktNR][vdwktNR] =
+{
+    {
+        nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
+    },
+    {
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
+        nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
+    },
+};
+
+
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
index 1d9dba9c590f5e58f0dc6c130362bc5ccffea3b2..5fa86ab40e1a3a3e8c61759a02ddf0e603152bc2 100644 (file)
@@ -82,9 +82,6 @@
 #include "gromacs/mdlib/qmmm.h"
 #include "gromacs/mdlib/update.h"
 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
-#include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
-#include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
 #include "gromacs/mdtypes/commrec.h"
 #include "gromacs/mdtypes/iforceprovider.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/utility/sysinfo.h"
 
 #include "nbnxn_gpu.h"
+#include "nbnxn_kernels/nbnxn_kernel_cpu.h"
 
 void print_time(FILE                     *out,
                 gmx_walltime_accounting_t walltime_accounting,
@@ -441,8 +439,10 @@ static void do_nb_verlet(t_forcerec *fr,
     switch (nbvg->kernel_type)
     {
         case nbnxnk4x4_PlainC:
-            nbnxn_kernel_ref(&nbvg->nbl_lists,
-                             nbvg->nbat, ic,
+        case nbnxnk4xN_SIMD_4xN:
+        case nbnxnk4xN_SIMD_2xNN:
+            nbnxn_kernel_cpu(nbvg,
+                             ic,
                              fr->shift_vec,
                              flags,
                              clearF,
@@ -453,33 +453,6 @@ static void do_nb_verlet(t_forcerec *fr,
                              enerd->grpp.ener[egLJSR]);
             break;
 
-        case nbnxnk4xN_SIMD_4xN:
-            nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
-                                  nbvg->nbat, ic,
-                                  nbvg->ewald_excl,
-                                  fr->shift_vec,
-                                  flags,
-                                  clearF,
-                                  fr->fshift[0],
-                                  enerd->grpp.ener[egCOULSR],
-                                  fr->bBHAM ?
-                                  enerd->grpp.ener[egBHAMSR] :
-                                  enerd->grpp.ener[egLJSR]);
-            break;
-        case nbnxnk4xN_SIMD_2xNN:
-            nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
-                                   nbvg->nbat, ic,
-                                   nbvg->ewald_excl,
-                                   fr->shift_vec,
-                                   flags,
-                                   clearF,
-                                   fr->fshift[0],
-                                   enerd->grpp.ener[egCOULSR],
-                                   fr->bBHAM ?
-                                   enerd->grpp.ener[egBHAMSR] :
-                                   enerd->grpp.ener[egLJSR]);
-            break;
-
         case nbnxnk8x8x8_GPU:
             nbnxn_gpu_launch_kernel(fr->nbv->gpu_nbv, nbvg->nbat, flags, ilocality);
             break;