src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: should include "config.h"
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp: warning: includes "config.h" unnecessarily
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.cpp: warning: includes "config.h" unnecessarily
-src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp: warning: includes "config.h" unnecessarily
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h"
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "nbnxn_simd.h"
-src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp: warning: includes "config.h" unnecessarily
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h: warning: should include "nbnxn_simd.h"
# Temporary while we change the SIMD implementation
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief
+ * Declares the nbnxn pair interaction kernel function types and kind counts, also declares utility functions used in nbnxn_kernel.cpp.
+ *
+ * \author Berk Hess <hess@kth.se>
+ */
+
#ifndef _nbnxn_kernel_common_h
#define _nbnxn_kernel_common_h
#include "gromacs/math/vectypes.h"
+/* nbnxn_atomdata_t and nbnxn_pairlist_t could be forward declared, but that requires modifications in all SIMD kernel files */
+#include "gromacs/mdlib/nbnxn_atomdata.h"
#include "gromacs/mdlib/nbnxn_pairlist.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
+struct interaction_const_t;
-/* Clear the force buffer f. Either the whole buffer or only the parts
- * used by the current thread when nbat->bUseBufferFlags is set.
+/*! \brief Pair-interaction kernel type that also calculates energies.
+ */
+typedef void (nbk_func_ener)(const nbnxn_pairlist_t *nbl,
+ const nbnxn_atomdata_t *nbat,
+ const interaction_const_t *ic,
+ rvec *shift_vec,
+ real *f,
+ real *fshift,
+ real *Vvdw,
+ real *Vc);
+
+/*! \brief Pointer to \p nbk_func_ener.
+ */
+typedef nbk_func_ener *p_nbk_func_ener;
+
+/*! \brief Pair-interaction kernel type that does not calculates energies.
+ */
+typedef void (nbk_func_noener)(const nbnxn_pairlist_t *nbl,
+ const nbnxn_atomdata_t *nbat,
+ const interaction_const_t *ic,
+ rvec *shift_vec,
+ real *f,
+ real *fshift);
+
+/*! \brief Pointer to \p nbk_func_noener.
+ */
+typedef nbk_func_noener *p_nbk_func_noener;
+
+/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
+ */
+enum {
+ coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
+};
+
+/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
+ *
+ * The \p LJCUT_COMB refers to the LJ combination rule for the short range.
+ * The \p EWALDCOMB refers to the combination rule for the grid part.
+ * \p vdwktNR is the number of VdW treatments for the SIMD kernels.
+ * \p vdwktNR_ref is the number of VdW treatments for the C reference kernels.
+ * These two numbers differ, because currently only the reference kernels
+ * support LB combination rules for the LJ-Ewald grid part.
+ */
+enum {
+ vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktLJEWALDCOMBLB, vdwktNR = vdwktLJEWALDCOMBLB, vdwktNR_ref
+};
+
+/*! \brief Clears the force buffer.
+ *
+ * Either the whole buffer is cleared or only the parts used
+ * by the current thread when nbat->bUseBufferFlags is set.
* In the latter case output_index is the task/thread list/buffer index.
*/
void
clear_f(const nbnxn_atomdata_t *nbat, int output_index, real *f);
-/* Clear the shift forces */
+/*! \brief Clears the shift forces.
+ */
void
clear_fshift(real *fshift);
-/* Reduce the collected energy terms over the pair-lists/threads */
+/*! \brief Reduces the collected energy terms over the pair-lists/threads.
+ */
void
reduce_energies_over_lists(const nbnxn_atomdata_t *nbat,
int nlist,
real *Vvdw,
real *Vc);
-#if 0
-{
-#endif
-#ifdef __cplusplus
-}
-#endif
-
#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#include "gmxpre.h"
+
+#include "nbnxn_kernel_cpu.h"
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/mdlib/force_flags.h"
+#include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_simd.h"
+#include "gromacs/mdtypes/interaction_const.h"
+#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/simd/simd.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/real.h"
+
+#include "nbnxn_kernel_common.h"
+#define INCLUDE_KERNELFUNCTION_TABLES
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
+#ifdef GMX_NBNXN_SIMD_2XNN
+#include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+#include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
+#endif
+#undef INCLUDE_FUNCTION_TABLES
+
+/*! \brief Clears the energy group output buffers
+ *
+ * \param[in,out] out nbnxn kernel output struct
+ */
+static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
+{
+ for (int i = 0; i < out->nV; i++)
+ {
+ out->Vvdw[i] = 0;
+ out->Vc[i] = 0;
+ }
+
+ for (int i = 0; i < out->nVS; i++)
+ {
+ out->VSvdw[i] = 0;
+ }
+ for (int i = 0; i < out->nVS; i++)
+ {
+ out->VSc[i] = 0;
+ }
+}
+
+/*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
+ * to single terms in the output buffers.
+ *
+ * The SIMD kernels produce a large number of energy buffer in SIMD registers
+ * to avoid scattered reads and writes.
+ *
+ * \tparam unrollj The unroll size for j-particles in the SIMD kernel
+ * \param[in] numGroups The number of energy groups
+ * \param[in] numGroups_2log Log2 of numGroups, rounded up
+ * \param[in] vVdwSimd SIMD Van der Waals energy buffers
+ * \param[in] vCoulombSimd SIMD Coulomb energy buffers
+ * \param[in,out] vVdw Van der Waals energy output buffer
+ * \param[in,out] vCoulomb Coulomb energy output buffer
+ */
+template <int unrollj> static void
+reduceGroupEnergySimdBuffers(int numGroups, int numGroups_2log,
+ const real *vVdwSimd, const real *vCoulombSimd,
+ real * gmx_restrict vVdw,
+ real * gmx_restrict vCoulomb)
+{
+ // cppcheck-suppress duplicateExpression
+ const int unrollj_half = unrollj/2;
+ /* Energies are stored in SIMD registers with size 2^numGroups_2log */
+ const int numGroupsStorage = (1 << numGroups_2log);
+
+ /* The size of the SIMD energy group buffer array is:
+ * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
+ */
+ for (int i = 0; i < numGroups; i++)
+ {
+ for (int j1 = 0; j1 < numGroups; j1++)
+ {
+ for (int j0 = 0; j0 < numGroups; j0++)
+ {
+ int c = ((i*numGroups + j1)*numGroupsStorage + j0)*unrollj_half*unrollj;
+ for (int s = 0; s < unrollj_half; s++)
+ {
+ vVdw [i*numGroups + j0] += vVdwSimd [c + 0];
+ vVdw [i*numGroups + j1] += vVdwSimd [c + 1];
+ vCoulomb[i*numGroups + j0] += vCoulombSimd[c + 0];
+ vCoulomb[i*numGroups + j1] += vCoulombSimd[c + 1];
+ c += unrollj + 2;
+ }
+ }
+ }
+ }
+}
+
+void
+nbnxn_kernel_cpu(nonbonded_verlet_group_t *nbvg,
+ const interaction_const_t *ic,
+ rvec *shiftVectors,
+ int forceFlags,
+ int clearF,
+ real *fshift,
+ real *vCoulomb,
+ real *vVdw)
+{
+ const nbnxn_atomdata_t *nbat = nbvg->nbat;
+
+ int coulkt;
+ if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
+ {
+ coulkt = coulktRF;
+ }
+ else
+ {
+ if (nbvg->ewald_excl == ewaldexclTable)
+ {
+ if (ic->rcoulomb == ic->rvdw)
+ {
+ coulkt = coulktTAB;
+ }
+ else
+ {
+ coulkt = coulktTAB_TWIN;
+ }
+ }
+ else
+ {
+ if (ic->rcoulomb == ic->rvdw)
+ {
+ coulkt = coulktEWALD;
+ }
+ else
+ {
+ coulkt = coulktEWALD_TWIN;
+ }
+ }
+ }
+
+ int vdwkt = 0;
+ if (ic->vdwtype == evdwCUT)
+ {
+ switch (ic->vdw_modifier)
+ {
+ case eintmodNONE:
+ case eintmodPOTSHIFT:
+ switch (nbat->comb_rule)
+ {
+ case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
+ case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
+ case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
+ default:
+ GMX_RELEASE_ASSERT(false, "Unknown combination rule");
+ }
+ break;
+ case eintmodFORCESWITCH:
+ vdwkt = vdwktLJFORCESWITCH;
+ break;
+ case eintmodPOTSWITCH:
+ vdwkt = vdwktLJPOTSWITCH;
+ break;
+ default:
+ GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
+ }
+ }
+ else if (ic->vdwtype == evdwPME)
+ {
+ if (ic->ljpme_comb_rule == eljpmeGEOM)
+ {
+ vdwkt = vdwktLJEWALDCOMBGEOM;
+ }
+ else
+ {
+ vdwkt = vdwktLJEWALDCOMBLB;
+ /* At setup we (should have) selected the C reference kernel */
+ GMX_RELEASE_ASSERT(nbvg->kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+ }
+ }
+ else
+ {
+ GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
+ }
+
+ int nnbl = nbvg->nbl_lists.nnbl;
+ nbnxn_pairlist_t **nbl = nbvg->nbl_lists.nbl;
+
+ // cppcheck-suppress unreadVariable
+ int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
+ for (int nb = 0; nb < nnbl; nb++)
+ {
+ // Presently, the kernels do not call C++ code that can throw,
+ // so no need for a try/catch pair in this OpenMP region.
+ nbnxn_atomdata_output_t *out = &nbat->out[nb];
+
+ if (clearF == enbvClearFYes)
+ {
+ clear_f(nbat, nb, out->f);
+ }
+
+ real *fshift_p;
+ if ((forceFlags & GMX_FORCE_VIRIAL) && nnbl == 1)
+ {
+ fshift_p = fshift;
+ }
+ else
+ {
+ fshift_p = out->fshift;
+
+ if (clearF == enbvClearFYes)
+ {
+ clear_fshift(fshift_p);
+ }
+ }
+
+ if (!(forceFlags & GMX_FORCE_ENERGY))
+ {
+ /* Don't calculate energies */
+ switch (nbvg->kernel_type)
+ {
+ case nbnxnk4x4_PlainC:
+ nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p);
+ break;
+#ifdef GMX_NBNXN_SIMD_2XNN
+ case nbnxnk4xN_SIMD_2xNN:
+ nbnxn_kernel_noener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p);
+ break;
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+ case nbnxnk4xN_SIMD_4xN:
+ nbnxn_kernel_noener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p);
+ break;
+#endif
+ default:
+ GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+ }
+ }
+ else if (out->nV == 1)
+ {
+ /* A single energy group (pair) */
+ out->Vvdw[0] = 0;
+ out->Vc[0] = 0;
+
+ switch (nbvg->kernel_type)
+ {
+ case nbnxnk4x4_PlainC:
+ nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p,
+ out->Vvdw,
+ out->Vc);
+ break;
+#ifdef GMX_NBNXN_SIMD_2XNN
+ case nbnxnk4xN_SIMD_2xNN:
+ nbnxn_kernel_ener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p,
+ out->Vvdw,
+ out->Vc);
+ break;
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+ case nbnxnk4xN_SIMD_4xN:
+ nbnxn_kernel_ener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p,
+ out->Vvdw,
+ out->Vc);
+ break;
+#endif
+ default:
+ GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+ }
+ }
+ else
+ {
+ /* Calculate energy group contributions */
+ clearGroupEnergies(out);
+
+ int unrollj = 0;
+
+ switch (nbvg->kernel_type)
+ {
+ case nbnxnk4x4_PlainC:
+ unrollj = NBNXN_CPU_CLUSTER_I_SIZE;
+ nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p,
+ out->Vvdw,
+ out->Vc);
+ break;
+#ifdef GMX_NBNXN_SIMD_2XNN
+ case nbnxnk4xN_SIMD_2xNN:
+ unrollj = GMX_SIMD_REAL_WIDTH/2;
+ nbnxn_kernel_energrp_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p,
+ out->VSvdw,
+ out->VSc);
+ break;
+#endif
+#ifdef GMX_NBNXN_SIMD_4XN
+ case nbnxnk4xN_SIMD_4xN:
+ unrollj = GMX_SIMD_REAL_WIDTH;
+ nbnxn_kernel_energrp_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
+ ic,
+ shiftVectors,
+ out->f,
+ fshift_p,
+ out->VSvdw,
+ out->VSc);
+ break;
+#endif
+ default:
+ GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+ }
+
+ if (nbvg->kernel_type != nbnxnk4x4_PlainC)
+ {
+ switch (unrollj)
+ {
+ case 2:
+ reduceGroupEnergySimdBuffers<2>(nbat->nenergrp,
+ nbat->neg_2log,
+ out->VSvdw, out->VSc,
+ out->Vvdw, out->Vc);
+ break;
+ case 4:
+ reduceGroupEnergySimdBuffers<4>(nbat->nenergrp,
+ nbat->neg_2log,
+ out->VSvdw, out->VSc,
+ out->Vvdw, out->Vc);
+ break;
+ case 8:
+ reduceGroupEnergySimdBuffers<8>(nbat->nenergrp,
+ nbat->neg_2log,
+ out->VSvdw, out->VSc,
+ out->Vvdw, out->Vc);
+ break;
+ default:
+ GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
+ }
+ }
+ }
+ }
+
+ if (forceFlags & GMX_FORCE_ENERGY)
+ {
+ reduce_energies_over_lists(nbat, nnbl, vVdw, vCoulomb);
+ }
+}
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ *
+ * \brief
+ * Declares the nbnxn pair interaction kernel dispatcher.
+ *
+ * \author Berk Hess <hess@kth.se>
+ */
+
+#ifndef _nbnxn_kernel_cpu_h
+#define _nbnxn_kernel_cpu_h
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/real.h"
+
+struct interaction_const_t;
+struct nonbonded_verlet_group_t;
+
+/*! \brief Dispatches the non-bonded N versus M atom cluster CPU kernels.
+ *
+ * OpenMP parallelization is performed within this function.
+ * Energy reduction, but not force and shift force reduction, is performed
+ * within this function.
+ *
+ * \param[in,out] nbvg The group (local/non-local) to compute interaction for
+ * \param[in] ic Non-bonded interaction constants
+ * \param[in] shiftVectors The PBC shift vectors
+ * \param[in] forceFlags Flags that tell what to compute
+ * \param[in] clearF Enum that tells if to clear the force output buffer
+ * \param[out] fshift Shift force output buffer
+ * \param[out] vCoulomb Output buffer for Coulomb energies
+ * \param[out] vVdw Output buffer for Van der Waals energies
+ */
+void
+nbnxn_kernel_cpu(nonbonded_verlet_group_t *nbvg,
+ const interaction_const_t *ic,
+ rvec *shiftVectors,
+ int forceFlags,
+ int clearF,
+ real *fshift,
+ real *vCoulomb,
+ real *vVdw);
+
+#endif
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
+# Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
# are:
#
# A single header file that declares all the kernel functions for
-# this nbnxn kernel structure type, including the function that does
-# the dispatch via the function pointer table.
-#
-# A single C kernel dispatcher file that defines the function that
-# decides at run time which kernel to call.
+# this nbnxn kernel structure type and a function pointer table.
#
# Many C kernel files, each defining a single kernel function. These
# functions can take a noticeable time to compile, and should tend
},
}
-KernelDispatcherTemplate = read_kernel_template("nbnxn_kernel_simd_template.cpp.pre")
KernelsHeaderTemplate = read_kernel_template("nbnxn_kernel_simd_template.h.pre")
-# For each Verlet kernel type, write three kinds of files:
-# a header file defining the functions for all the kernels,
-# a code file containing the kernel function lookup table and
-# the kernel dispatcher function
+# For each Verlet kernel type, write two kinds of files:
+# a header file defining the functions for all the kernels and
+# the kernel function lookup table
# for each kernel, a file defining the single C function for that kernel
for type in VerletKernelTypeDict:
DirName = "../simd_{0}".format(type)
with open('{0}/{1}'.format(DirName,KernelsHeaderFileName),'w') as fp:
fp.write(FileHeader.format(type))
fp.write(KernelsHeaderTemplate
- .format(KernelsName,
- " " * (len(KernelsName) + 1),
- KernelDeclarations))
-
- # Write the file defining the kernel dispatcher
- # function for this type
- with open('{0}/{1}'.format(DirName,"{0}.cpp".format(KernelsName,type)),'w') as fp:
- fp.write(FileHeader.format(type))
- fp.write(KernelDispatcherTemplate
- .format(VerletKernelTypeDict[type]['Define'],
- VerletKernelTypeDict[type]['WidthSetup'],
- VerletKernelTypeDict[type]['WidthCheck'],
- VerletKernelTypeDict[type]['UnrollSize'],
- KernelsHeaderFileName,
- KernelsName,
- ' ' * (len(KernelsName)+1),
+ .format(KernelDeclarations,
+ type,
KernelFunctionLookupTable['F'],
KernelFunctionLookupTable['VF'],
- KernelFunctionLookupTable['VgrpF'],
- )
+ KernelFunctionLookupTable['VgrpF'])
)
sys.exit()
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_simd.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-
-#ifdef {0}
-
-{1}#include "gromacs/simd/vector_operations.h"
-
-{2}
-#define GMX_SIMD_J_UNROLL_SIZE {3}
-#include "{4}"
-
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
-#include "gromacs/simd/simd.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-
-/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
- */
-enum {{
- coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
-}};
-
-/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
- */
-enum {{
- vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
-}};
-
-/* Declare and define the kernel function pointer lookup tables.
- * The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch/PME functions.
- */
-static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
-{7}
-static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
-{8}
-static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
-{9}
-
-static void
-reduce_group_energies(int ng, int ng_2log,
- const real *VSvdw, const real *VSc,
- real *Vvdw, real *Vc)
-{{
- const int unrollj = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
- const int unrollj_half = unrollj/2;
- int ng_p2, i, j, j0, j1, c, s;
-
- ng_p2 = (1<<ng_2log);
-
- /* The size of the x86 SIMD energy group buffer array is:
- * ng*ng*ng_p2*unrollj_half*simd_width
- */
- for (i = 0; i < ng; i++)
- {{
- for (j = 0; j < ng; j++)
- {{
- Vvdw[i*ng+j] = 0;
- Vc[i*ng+j] = 0;
- }}
-
- for (j1 = 0; j1 < ng; j1++)
- {{
- for (j0 = 0; j0 < ng; j0++)
- {{
- c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
- for (s = 0; s < unrollj_half; s++)
- {{
- Vvdw[i*ng+j0] += VSvdw[c+0];
- Vvdw[i*ng+j1] += VSvdw[c+1];
- Vc [i*ng+j0] += VSc [c+0];
- Vc [i*ng+j1] += VSc [c+1];
- c += unrollj + 2;
- }}
- }}
- }}
- }}
-}}
-
-#else /* {0} */
-
-#include "gromacs/utility/fatalerror.h"
-
-#endif /* {0} */
-
-void
-{5}(nbnxn_pairlist_set_t gmx_unused *nbl_list,
-{6}const nbnxn_atomdata_t gmx_unused *nbat,
-{6}const interaction_const_t gmx_unused *ic,
-{6}int gmx_unused ewald_excl,
-{6}rvec gmx_unused *shift_vec,
-{6}int gmx_unused force_flags,
-{6}int gmx_unused clearF,
-{6}real gmx_unused *fshift,
-{6}real gmx_unused *Vc,
-{6}real gmx_unused *Vvdw)
-#ifdef {0}
-{{
- int nnbl;
- nbnxn_pairlist_t **nbl;
- int coulkt, vdwkt = 0;
- int nb, nthreads;
-
- nnbl = nbl_list->nnbl;
- nbl = nbl_list->nbl;
-
- if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
- {{
- coulkt = coulktRF;
- }}
- else
- {{
- if (ewald_excl == ewaldexclTable)
- {{
- if (ic->rcoulomb == ic->rvdw)
- {{
- coulkt = coulktTAB;
- }}
- else
- {{
- coulkt = coulktTAB_TWIN;
- }}
- }}
- else
- {{
- if (ic->rcoulomb == ic->rvdw)
- {{
- coulkt = coulktEWALD;
- }}
- else
- {{
- coulkt = coulktEWALD_TWIN;
- }}
- }}
- }}
-
- if (ic->vdwtype == evdwCUT)
- {{
- switch (ic->vdw_modifier)
- {{
- case eintmodNONE:
- case eintmodPOTSHIFT:
- switch (nbat->comb_rule)
- {{
- case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
- case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
- case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
- default: gmx_incons("Unknown combination rule");
- }}
- break;
- case eintmodFORCESWITCH:
- vdwkt = vdwktLJFORCESWITCH;
- break;
- case eintmodPOTSWITCH:
- vdwkt = vdwktLJPOTSWITCH;
- break;
- default:
- gmx_incons("Unsupported VdW interaction modifier");
- }}
- }}
- else if (ic->vdwtype == evdwPME)
- {{
- if (ic->ljpme_comb_rule == eljpmeLB)
- {{
- gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
- }}
- vdwkt = vdwktLJEWALDCOMBGEOM;
- }}
- else
- {{
- gmx_incons("Unsupported VdW interaction type");
- }}
- // cppcheck-suppress unreadVariable
- nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
- for (nb = 0; nb < nnbl; nb++)
- {{
- // Presently, the kernels do not call C++ code that can throw, so
- // no need for a try/catch pair in this OpenMP region.
- nbnxn_atomdata_output_t *out;
- real *fshift_p;
-
- out = &nbat->out[nb];
-
- if (clearF == enbvClearFYes)
- {{
- clear_f(nbat, nb, out->f);
- }}
-
- if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
- {{
- fshift_p = fshift;
- }}
- else
- {{
- fshift_p = out->fshift;
-
- if (clearF == enbvClearFYes)
- {{
- clear_fshift(fshift_p);
- }}
- }}
-
- if (!(force_flags & GMX_FORCE_ENERGY))
- {{
- /* Don't calculate energies */
- p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p);
- }}
- else if (out->nV == 1)
- {{
- /* No energy groups */
- out->Vvdw[0] = 0;
- out->Vc[0] = 0;
-
- p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->Vvdw,
- out->Vc);
- }}
- else
- {{
- /* Calculate energy group contributions */
- int i;
-
- for (i = 0; i < out->nVS; i++)
- {{
- out->VSvdw[i] = 0;
- }}
- for (i = 0; i < out->nVS; i++)
- {{
- out->VSc[i] = 0;
- }}
-
- p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->VSvdw,
- out->VSc);
-
- reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
- out->VSvdw, out->VSc,
- out->Vvdw, out->Vc);
- }}
- }}
-
- if (force_flags & GMX_FORCE_ENERGY)
- {{
- reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
- }}
-}}
-#else
-{{
- gmx_incons("{5} called when such kernels "
- " are not enabled.");
-}}
-#endif
-#undef GMX_SIMD_J_UNROLL_SIZE
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/utility/real.h"
-/*! \brief Run-time dispatcher for nbnxn kernel functions. */
-void
-{0}(nbnxn_pairlist_set_t *nbl_list,
-{1}const nbnxn_atomdata_t *nbat,
-{1}const interaction_const_t *ic,
-{1}int ewald_excl,
-{1}rvec *shift_vec,
-{1}int force_flags,
-{1}int clearF,
-{1}real *fshift,
-{1}real *Vc,
-{1}real *Vvdw);
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
-/* Need an #include guard so that sim_util.c can include all
- * such files. */
-#ifndef _nbnxn_kernel_simd_include_h
-#define _nbnxn_kernel_simd_include_h
-/*! \brief Typedefs for declaring kernel functions. */
-typedef void (nbk_func_ener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift,
- real *Vvdw,
- real *Vc);
-typedef nbk_func_ener *p_nbk_func_ener;
+/* Declare all the different kernel functions.
+ */
+{0}
-typedef void (nbk_func_noener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift);
-typedef nbk_func_noener *p_nbk_func_noener;
-#endif
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_simd_{1}[coulktNR][vdwktNR] =
{2}
+p_nbk_func_ener nbnxn_kernel_ener_simd_{1}[coulktNR][vdwktNR] =
+{3}
+p_nbk_func_ener nbnxn_kernel_energrp_simd_{1}[coulktNR][vdwktNR] =
+{4}
+
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
#include "gromacs/mdlib/force.h"
#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_consts.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/fatalerror.h"
+#include "nbnxn_kernel_common.h"
+
static const int c_numClPerSupercl = c_nbnxnGpuNumClusterPerSupercluster;
static const int c_clSize = c_nbnxnGpuClusterSize;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_consts.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
-/*! \brief Typedefs for declaring lookup tables of kernel functions.
- */
-
-typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift);
-
-typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift,
- real *Vvdw,
- real *Vc);
/* Analytical reaction-field kernels */
#define CALC_COUL_RF
#undef LJ_EWALD
#undef VDW_CUTOFF_CHECK
#undef CALC_COUL_TAB
-
-
-enum {
- coultRF, coultTAB, coultTAB_TWIN, coultNR
-};
-
-enum {
- vdwtCUT, vdwtFSWITCH, vdwtPSWITCH, vdwtEWALDGEOM, vdwtEWALDLB, vdwtNR
-};
-
-p_nbk_func_noener p_nbk_c_noener[coultNR][vdwtNR] =
-{
- { nbnxn_kernel_ElecRF_VdwLJ_F_ref, nbnxn_kernel_ElecRF_VdwLJFsw_F_ref, nbnxn_kernel_ElecRF_VdwLJPsw_F_ref, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_ref, nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_ref },
- { nbnxn_kernel_ElecQSTab_VdwLJ_F_ref, nbnxn_kernel_ElecQSTab_VdwLJFsw_F_ref, nbnxn_kernel_ElecQSTab_VdwLJPsw_F_ref, nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_ref, nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_F_ref },
- { nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref }
-};
-
-p_nbk_func_ener p_nbk_c_ener[coultNR][vdwtNR] =
-{
- { nbnxn_kernel_ElecRF_VdwLJ_VF_ref, nbnxn_kernel_ElecRF_VdwLJFsw_VF_ref, nbnxn_kernel_ElecRF_VdwLJPsw_VF_ref, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_ref, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_ref },
- { nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref, nbnxn_kernel_ElecQSTab_VdwLJFsw_VF_ref, nbnxn_kernel_ElecQSTab_VdwLJPsw_VF_ref, nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_ref, nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VF_ref },
- { nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref }
-};
-
-p_nbk_func_ener p_nbk_c_energrp[coultNR][vdwtNR] =
-{
- { nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref, nbnxn_kernel_ElecRF_VdwLJFsw_VgrpF_ref, nbnxn_kernel_ElecRF_VdwLJPsw_VgrpF_ref, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_ref, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VgrpF_ref },
- { nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref, nbnxn_kernel_ElecQSTab_VdwLJFsw_VgrpF_ref, nbnxn_kernel_ElecQSTab_VdwLJPsw_VgrpF_ref, nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_ref, nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VgrpF_ref },
- { nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref, nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref }
-};
-
-void
-nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- int force_flags,
- int clearF,
- real *fshift,
- real *Vc,
- real *Vvdw)
-{
- int nnbl;
- nbnxn_pairlist_t **nbl;
- int coult;
- int vdwt;
- int nb;
- int nthreads gmx_unused;
-
- nnbl = nbl_list->nnbl;
- nbl = nbl_list->nbl;
-
- if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
- {
- coult = coultRF;
- }
- else
- {
- if (ic->rcoulomb == ic->rvdw)
- {
- coult = coultTAB;
- }
- else
- {
- coult = coultTAB_TWIN;
- }
- }
-
- if (ic->vdwtype == evdwCUT)
- {
- switch (ic->vdw_modifier)
- {
- case eintmodPOTSHIFT:
- case eintmodNONE:
- vdwt = vdwtCUT;
- break;
- case eintmodFORCESWITCH:
- vdwt = vdwtFSWITCH;
- break;
- case eintmodPOTSWITCH:
- vdwt = vdwtPSWITCH;
- break;
- default:
- gmx_incons("Unsupported VdW modifier");
- break;
- }
- }
- else if (ic->vdwtype == evdwPME)
- {
- if (ic->ljpme_comb_rule == ljcrGEOM)
- {
- assert(nbat->comb_rule == ljcrGEOM);
- vdwt = vdwtEWALDGEOM;
- }
- else
- {
- assert(nbat->comb_rule == ljcrLB);
- vdwt = vdwtEWALDLB;
- }
- }
- else
- {
- gmx_incons("Unsupported vdwtype in nbnxn reference kernel");
- }
-
- // cppcheck-suppress unreadVariable
- nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
- for (nb = 0; nb < nnbl; nb++)
- {
- // Presently, the kernels do not call C++ code that can throw, so
- // no need for a try/catch pair in this OpenMP region.
- nbnxn_atomdata_output_t *out;
- real *fshift_p;
-
- out = &nbat->out[nb];
-
- if (clearF == enbvClearFYes)
- {
- clear_f(nbat, nb, out->f);
- }
-
- if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
- {
- fshift_p = fshift;
- }
- else
- {
- fshift_p = out->fshift;
-
- if (clearF == enbvClearFYes)
- {
- clear_fshift(fshift_p);
- }
- }
-
- if (!(force_flags & GMX_FORCE_ENERGY))
- {
- /* Don't calculate energies */
- p_nbk_c_noener[coult][vdwt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p);
- }
- else if (out->nV == 1)
- {
- /* No energy groups */
- out->Vvdw[0] = 0;
- out->Vc[0] = 0;
-
- p_nbk_c_ener[coult][vdwt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->Vvdw,
- out->Vc);
- }
- else
- {
- /* Calculate energy group contributions */
- int i;
-
- for (i = 0; i < out->nV; i++)
- {
- out->Vvdw[i] = 0;
- }
- for (i = 0; i < out->nV; i++)
- {
- out->Vc[i] = 0;
- }
-
- p_nbk_c_energrp[coult][vdwt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->Vvdw,
- out->Vc);
- }
- }
-
- if (force_flags & GMX_FORCE_ENERGY)
- {
- reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
- }
-}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifndef _nbnxn_kernel_ref_h
-#define _nbnxn_kernel_ref_h
+#include "nbnxn_kernel_common.h"
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/utility/real.h"
+/* Declare all the different kernel functions.
+ */
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJ_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJFsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJPsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_ref;
+nbk_func_noener nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJ_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJFsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJPsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref;
+nbk_func_noener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref;
+
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJ_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJFsw_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJPsw_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJFsw_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJPsw_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref;
+
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJFsw_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJPsw_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecRF_VdwLJEwCombLB_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJFsw_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJPsw_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref;
+nbk_func_ener nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref;
-#ifdef __cplusplus
-extern "C" {
-#endif
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
+
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ * For the C reference kernels, unlike the SIMD kernels, there is not much
+ * advantage in using combination rules, so we (re-)use the same kernel.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_ref[coulktNR][vdwktNR_ref] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJ_F_ref,
+ nbnxn_kernel_ElecRF_VdwLJ_F_ref,
+ nbnxn_kernel_ElecRF_VdwLJ_F_ref,
+ nbnxn_kernel_ElecRF_VdwLJFsw_F_ref,
+ nbnxn_kernel_ElecRF_VdwLJPsw_F_ref,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_ref,
+ nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJFsw_F_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJPsw_F_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_F_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_F_ref
+ }
+};
-/* Wrapper call for the non-bonded n vs n reference kernels */
-void
-nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- int force_flags,
- int clearF,
- real *fshift,
- real *Vc,
- real *Vvdw);
+p_nbk_func_ener nbnxn_kernel_ener_ref[coulktNR][vdwktNR_ref] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecRF_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecRF_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecRF_VdwLJFsw_VF_ref,
+ nbnxn_kernel_ElecRF_VdwLJPsw_VF_ref,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_ref,
+ nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJFsw_VF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJPsw_VF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VF_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VF_ref
+ }
+};
-#ifdef __cplusplus
-}
-#endif
+p_nbk_func_ener nbnxn_kernel_energrp_ref[coulktNR][vdwktNR_ref] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecRF_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecRF_VdwLJFsw_VgrpF_ref,
+ nbnxn_kernel_ElecRF_VdwLJPsw_VgrpF_ref,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_ref,
+ nbnxn_kernel_ElecRF_VdwLJEwCombLB_VgrpF_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJFsw_VgrpF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJPsw_VgrpF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_ref,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombLB_VgrpF_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFsw_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPsw_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_ref,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombLB_VgrpF_ref
+ }
+};
-#endif
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
#error "No VdW type defined"
#endif
-static void
+void
#ifndef CALC_ENERGIES
NBK_FUNC_NAME(_F)
#else
const nbnxn_atomdata_t *nbat,
const interaction_const_t *ic,
rvec *shift_vec,
- real *f
-#ifdef CALC_SHIFTFORCES
- ,
- real *fshift
-#endif
+ real *f,
+ real gmx_unused *fshift
#ifdef CALC_ENERGIES
,
real *Vvdw,
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the Verlet kernel generator for
- * kernel type 2xnn.
- */
-
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_simd.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-
-#ifdef GMX_NBNXN_SIMD_2XNN
-
-/* Include the full-width SIMD macros */
-#include "gromacs/simd/vector_operations.h"
-
-#if !(GMX_SIMD_REAL_WIDTH == 8 || GMX_SIMD_REAL_WIDTH == 16)
-#error "unsupported SIMD width"
-#endif
-
-#define GMX_SIMD_J_UNROLL_SIZE 2
-#include "nbnxn_kernel_simd_2xnn.h"
-
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
-#include "gromacs/simd/simd.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-
-/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
- */
-enum {
- coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
-};
-
-/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
- */
-enum {
- vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
-};
-
-/* Declare and define the kernel function pointer lookup tables.
- * The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch/PME functions.
- */
-static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
-{
- {
- nbnxn_kernel_ElecRF_VdwLJCombGeom_F_2xnn,
- nbnxn_kernel_ElecRF_VdwLJCombLB_F_2xnn,
- nbnxn_kernel_ElecRF_VdwLJ_F_2xnn,
- nbnxn_kernel_ElecRF_VdwLJFSw_F_2xnn,
- nbnxn_kernel_ElecRF_VdwLJPSw_F_2xnn,
- nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_2xnn,
- },
- {
- nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJ_F_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJFSw_F_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJPSw_F_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_2xnn,
- },
- {
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_2xnn,
- },
- {
- nbnxn_kernel_ElecEw_VdwLJCombGeom_F_2xnn,
- nbnxn_kernel_ElecEw_VdwLJCombLB_F_2xnn,
- nbnxn_kernel_ElecEw_VdwLJ_F_2xnn,
- nbnxn_kernel_ElecEw_VdwLJFSw_F_2xnn,
- nbnxn_kernel_ElecEw_VdwLJPSw_F_2xnn,
- nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_2xnn,
- },
- {
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn,
- },
-};
-
-static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
-{
- {
- nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJCombLB_VF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJ_VF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJFSw_VF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJPSw_VF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_2xnn,
- },
- {
- nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJ_VF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_2xnn,
- },
- {
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_2xnn,
- },
- {
- nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJCombLB_VF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJ_VF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJFSw_VF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJPSw_VF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_2xnn,
- },
- {
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_2xnn,
- },
-};
-
-static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
-{
- {
- nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_2xnn,
- nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_2xnn,
- },
- {
- nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_2xnn,
- },
- {
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_2xnn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
- },
- {
- nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJ_VgrpF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_2xnn,
- nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_2xnn,
- },
- {
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_2xnn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
- },
-};
-
-
-static void
-reduce_group_energies(int ng, int ng_2log,
- const real *VSvdw, const real *VSc,
- real *Vvdw, real *Vc)
-{
- const int unrollj = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
- const int unrollj_half = unrollj/2;
- int ng_p2, i, j, j0, j1, c, s;
-
- ng_p2 = (1<<ng_2log);
-
- /* The size of the x86 SIMD energy group buffer array is:
- * ng*ng*ng_p2*unrollj_half*simd_width
- */
- for (i = 0; i < ng; i++)
- {
- for (j = 0; j < ng; j++)
- {
- Vvdw[i*ng+j] = 0;
- Vc[i*ng+j] = 0;
- }
-
- for (j1 = 0; j1 < ng; j1++)
- {
- for (j0 = 0; j0 < ng; j0++)
- {
- c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
- for (s = 0; s < unrollj_half; s++)
- {
- Vvdw[i*ng+j0] += VSvdw[c+0];
- Vvdw[i*ng+j1] += VSvdw[c+1];
- Vc [i*ng+j0] += VSc [c+0];
- Vc [i*ng+j1] += VSc [c+1];
- c += unrollj + 2;
- }
- }
- }
- }
-}
-
-#else /* GMX_NBNXN_SIMD_2XNN */
-
-#include "gromacs/utility/fatalerror.h"
-
-#endif /* GMX_NBNXN_SIMD_2XNN */
-
-void
-nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t gmx_unused *nbl_list,
- const nbnxn_atomdata_t gmx_unused *nbat,
- const interaction_const_t gmx_unused *ic,
- int gmx_unused ewald_excl,
- rvec gmx_unused *shift_vec,
- int gmx_unused force_flags,
- int gmx_unused clearF,
- real gmx_unused *fshift,
- real gmx_unused *Vc,
- real gmx_unused *Vvdw)
-#ifdef GMX_NBNXN_SIMD_2XNN
-{
- int nnbl;
- nbnxn_pairlist_t **nbl;
- int coulkt, vdwkt = 0;
- int nb, nthreads gmx_unused;
-
- nnbl = nbl_list->nnbl;
- nbl = nbl_list->nbl;
-
- if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
- {
- coulkt = coulktRF;
- }
- else
- {
- if (ewald_excl == ewaldexclTable)
- {
- if (ic->rcoulomb == ic->rvdw)
- {
- coulkt = coulktTAB;
- }
- else
- {
- coulkt = coulktTAB_TWIN;
- }
- }
- else
- {
- if (ic->rcoulomb == ic->rvdw)
- {
- coulkt = coulktEWALD;
- }
- else
- {
- coulkt = coulktEWALD_TWIN;
- }
- }
- }
-
- if (ic->vdwtype == evdwCUT)
- {
- switch (ic->vdw_modifier)
- {
- case eintmodNONE:
- case eintmodPOTSHIFT:
- switch (nbat->comb_rule)
- {
- case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
- case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
- case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
- default: gmx_incons("Unknown combination rule");
- }
- break;
- case eintmodFORCESWITCH:
- vdwkt = vdwktLJFORCESWITCH;
- break;
- case eintmodPOTSWITCH:
- vdwkt = vdwktLJPOTSWITCH;
- break;
- default:
- gmx_incons("Unsupported VdW interaction modifier");
- }
- }
- else if (ic->vdwtype == evdwPME)
- {
- if (ic->ljpme_comb_rule == eljpmeLB)
- {
- gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
- }
- vdwkt = vdwktLJEWALDCOMBGEOM;
- }
- else
- {
- gmx_incons("Unsupported VdW interaction type");
- }
- // cppcheck-suppress unreadVariable
- nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
- for (nb = 0; nb < nnbl; nb++)
- {
- // Presently, the kernels do not call C++ code that can throw, so
- // no need for a try/catch pair in this OpenMP region.
- nbnxn_atomdata_output_t *out;
- real *fshift_p;
-
- out = &nbat->out[nb];
-
- if (clearF == enbvClearFYes)
- {
- clear_f(nbat, nb, out->f);
- }
-
- if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
- {
- fshift_p = fshift;
- }
- else
- {
- fshift_p = out->fshift;
-
- if (clearF == enbvClearFYes)
- {
- clear_fshift(fshift_p);
- }
- }
-
- if (!(force_flags & GMX_FORCE_ENERGY))
- {
- /* Don't calculate energies */
- p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p);
- }
- else if (out->nV == 1)
- {
- /* No energy groups */
- out->Vvdw[0] = 0;
- out->Vc[0] = 0;
-
- p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->Vvdw,
- out->Vc);
- }
- else
- {
- /* Calculate energy group contributions */
- int i;
-
- for (i = 0; i < out->nVS; i++)
- {
- out->VSvdw[i] = 0;
- }
- for (i = 0; i < out->nVS; i++)
- {
- out->VSc[i] = 0;
- }
-
- p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->VSvdw,
- out->VSc);
-
- reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
- out->VSvdw, out->VSc,
- out->Vvdw, out->Vc);
- }
- }
-
- if (force_flags & GMX_FORCE_ENERGY)
- {
- reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
- }
-}
-#else
-{
- gmx_incons("nbnxn_kernel_simd_2xnn called when such kernels "
- " are not enabled.");
-}
-#endif
-#undef GMX_SIMD_J_UNROLL_SIZE
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* kernel type 2xnn.
*/
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/utility/real.h"
-/*! \brief Run-time dispatcher for nbnxn kernel functions. */
-void
-nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t *nbl_list,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- int ewald_excl,
- rvec *shift_vec,
- int force_flags,
- int clearF,
- real *fshift,
- real *Vc,
- real *Vvdw);
-
-/* Need an #include guard so that sim_util.c can include all
- * such files. */
-#ifndef _nbnxn_kernel_simd_include_h
-#define _nbnxn_kernel_simd_include_h
-/*! \brief Typedefs for declaring kernel functions. */
-typedef void (nbk_func_ener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift,
- real *Vvdw,
- real *Vc);
-typedef nbk_func_ener *p_nbk_func_ener;
-
-typedef void (nbk_func_noener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift);
-typedef nbk_func_noener *p_nbk_func_noener;
-#endif
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+/* Declare all the different kernel functions.
+ */
nbk_func_ener nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn;
nbk_func_ener nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn;
nbk_func_ener nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn;
nbk_func_noener nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn;
nbk_func_noener nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn;
nbk_func_noener nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn;
+
+
+
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
+
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_simd_2xnn[coulktNR][vdwktNR] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJCombGeom_F_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJCombLB_F_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJ_F_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJFSw_F_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJPSw_F_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJ_F_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJFSw_F_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJPSw_F_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecEw_VdwLJCombGeom_F_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJCombLB_F_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJ_F_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJFSw_F_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJPSw_F_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn,
+ },
+};
+
+p_nbk_func_ener nbnxn_kernel_ener_simd_2xnn[coulktNR][vdwktNR] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJCombLB_VF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJ_VF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJFSw_VF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJPSw_VF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJCombLB_VF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJ_VF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJFSw_VF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJPSw_VF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_2xnn,
+ },
+};
+
+p_nbk_func_ener nbnxn_kernel_energrp_simd_2xnn[coulktNR][vdwktNR] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJ_VgrpF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_2xnn,
+ },
+ {
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_2xnn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
+ },
+};
+
+
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the Verlet kernel generator for
- * kernel type 4xn.
- */
-
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_simd.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-
-#ifdef GMX_NBNXN_SIMD_4XN
-
-#include "gromacs/simd/vector_operations.h"
-
-#if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
-#error "unsupported SIMD width"
-#endif
-
-#define GMX_SIMD_J_UNROLL_SIZE 1
-#include "nbnxn_kernel_simd_4xn.h"
-
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
-#include "gromacs/simd/simd.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-
-/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
- */
-enum {
- coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
-};
-
-/*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
- */
-enum {
- vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
-};
-
-/* Declare and define the kernel function pointer lookup tables.
- * The minor index of the array goes over both the LJ combination rules,
- * which is only supported by plain cut-off, and the LJ switch/PME functions.
- */
-static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
-{
- {
- nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
- nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
- nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
- nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
- nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
- nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
- },
- {
- nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
- },
- {
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
- },
- {
- nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
- nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
- nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
- nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
- nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
- nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
- },
- {
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
- },
-};
-
-static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
-{
- {
- nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
- nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
- nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
- nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
- nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
- nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
- },
- {
- nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
- },
- {
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
- },
- {
- nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
- nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
- nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
- nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
- nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
- nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
- },
- {
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
- },
-};
-
-static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
-{
- {
- nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
- nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
- nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
- nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
- nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
- nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
- },
- {
- nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
- nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
- },
- {
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
- nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
- },
- {
- nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
- nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
- nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
- nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
- nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
- nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
- },
- {
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
- nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
- },
-};
-
-
-static void
-reduce_group_energies(int ng, int ng_2log,
- const real *VSvdw, const real *VSc,
- real *Vvdw, real *Vc)
-{
- const int unrollj = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
- const int unrollj_half = unrollj/2;
- int ng_p2, i, j, j0, j1, c, s;
-
- ng_p2 = (1<<ng_2log);
-
- /* The size of the x86 SIMD energy group buffer array is:
- * ng*ng*ng_p2*unrollj_half*simd_width
- */
- for (i = 0; i < ng; i++)
- {
- for (j = 0; j < ng; j++)
- {
- Vvdw[i*ng+j] = 0;
- Vc[i*ng+j] = 0;
- }
-
- for (j1 = 0; j1 < ng; j1++)
- {
- for (j0 = 0; j0 < ng; j0++)
- {
- c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
- for (s = 0; s < unrollj_half; s++)
- {
- Vvdw[i*ng+j0] += VSvdw[c+0];
- Vvdw[i*ng+j1] += VSvdw[c+1];
- Vc [i*ng+j0] += VSc [c+0];
- Vc [i*ng+j1] += VSc [c+1];
- c += unrollj + 2;
- }
- }
- }
- }
-}
-
-#else /* GMX_NBNXN_SIMD_4XN */
-
-#include "gromacs/utility/fatalerror.h"
-
-#endif /* GMX_NBNXN_SIMD_4XN */
-
-void
-nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t gmx_unused *nbl_list,
- const nbnxn_atomdata_t gmx_unused *nbat,
- const interaction_const_t gmx_unused *ic,
- int gmx_unused ewald_excl,
- rvec gmx_unused *shift_vec,
- int gmx_unused force_flags,
- int gmx_unused clearF,
- real gmx_unused *fshift,
- real gmx_unused *Vc,
- real gmx_unused *Vvdw)
-#ifdef GMX_NBNXN_SIMD_4XN
-{
- int nnbl;
- nbnxn_pairlist_t **nbl;
- int coulkt, vdwkt = 0;
- int nb, nthreads gmx_unused;
-
- nnbl = nbl_list->nnbl;
- nbl = nbl_list->nbl;
-
- if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
- {
- coulkt = coulktRF;
- }
- else
- {
- if (ewald_excl == ewaldexclTable)
- {
- if (ic->rcoulomb == ic->rvdw)
- {
- coulkt = coulktTAB;
- }
- else
- {
- coulkt = coulktTAB_TWIN;
- }
- }
- else
- {
- if (ic->rcoulomb == ic->rvdw)
- {
- coulkt = coulktEWALD;
- }
- else
- {
- coulkt = coulktEWALD_TWIN;
- }
- }
- }
-
- if (ic->vdwtype == evdwCUT)
- {
- switch (ic->vdw_modifier)
- {
- case eintmodNONE:
- case eintmodPOTSHIFT:
- switch (nbat->comb_rule)
- {
- case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
- case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
- case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
- default: gmx_incons("Unknown combination rule");
- }
- break;
- case eintmodFORCESWITCH:
- vdwkt = vdwktLJFORCESWITCH;
- break;
- case eintmodPOTSWITCH:
- vdwkt = vdwktLJPOTSWITCH;
- break;
- default:
- gmx_incons("Unsupported VdW interaction modifier");
- }
- }
- else if (ic->vdwtype == evdwPME)
- {
- if (ic->ljpme_comb_rule == eljpmeLB)
- {
- gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
- }
- vdwkt = vdwktLJEWALDCOMBGEOM;
- }
- else
- {
- gmx_incons("Unsupported VdW interaction type");
- }
- // cppcheck-suppress unreadVariable
- nthreads = gmx_omp_nthreads_get(emntNonbonded);
-#pragma omp parallel for schedule(static) num_threads(nthreads)
- for (nb = 0; nb < nnbl; nb++)
- {
- // Presently, the kernels do not call C++ code that can throw, so
- // no need for a try/catch pair in this OpenMP region.
- nbnxn_atomdata_output_t *out;
- real *fshift_p;
-
- out = &nbat->out[nb];
-
- if (clearF == enbvClearFYes)
- {
- clear_f(nbat, nb, out->f);
- }
-
- if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
- {
- fshift_p = fshift;
- }
- else
- {
- fshift_p = out->fshift;
-
- if (clearF == enbvClearFYes)
- {
- clear_fshift(fshift_p);
- }
- }
-
- if (!(force_flags & GMX_FORCE_ENERGY))
- {
- /* Don't calculate energies */
- p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p);
- }
- else if (out->nV == 1)
- {
- /* No energy groups */
- out->Vvdw[0] = 0;
- out->Vc[0] = 0;
-
- p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->Vvdw,
- out->Vc);
- }
- else
- {
- /* Calculate energy group contributions */
- int i;
-
- for (i = 0; i < out->nVS; i++)
- {
- out->VSvdw[i] = 0;
- }
- for (i = 0; i < out->nVS; i++)
- {
- out->VSc[i] = 0;
- }
-
- p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
- ic,
- shift_vec,
- out->f,
- fshift_p,
- out->VSvdw,
- out->VSc);
-
- reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
- out->VSvdw, out->VSc,
- out->Vvdw, out->Vc);
- }
- }
-
- if (force_flags & GMX_FORCE_ENERGY)
- {
- reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
- }
-}
-#else
-{
- gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
- " are not enabled.");
-}
-#endif
-#undef GMX_SIMD_J_UNROLL_SIZE
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* kernel type 4xn.
*/
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/nbnxn_pairlist.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/utility/real.h"
-/*! \brief Run-time dispatcher for nbnxn kernel functions. */
-void
-nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t *nbl_list,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- int ewald_excl,
- rvec *shift_vec,
- int force_flags,
- int clearF,
- real *fshift,
- real *Vc,
- real *Vvdw);
-
-/* Need an #include guard so that sim_util.c can include all
- * such files. */
-#ifndef _nbnxn_kernel_simd_include_h
-#define _nbnxn_kernel_simd_include_h
-/*! \brief Typedefs for declaring kernel functions. */
-typedef void (nbk_func_ener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift,
- real *Vvdw,
- real *Vc);
-typedef nbk_func_ener *p_nbk_func_ener;
-
-typedef void (nbk_func_noener)(const nbnxn_pairlist_t *nbl,
- const nbnxn_atomdata_t *nbat,
- const interaction_const_t *ic,
- rvec *shift_vec,
- real *f,
- real *fshift);
-typedef nbk_func_noener *p_nbk_func_noener;
-#endif
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+/* Declare all the different kernel functions.
+ */
nbk_func_ener nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn;
nbk_func_ener nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn;
nbk_func_ener nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn;
nbk_func_noener nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn;
nbk_func_noener nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn;
nbk_func_noener nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn;
+
+
+
+#ifdef INCLUDE_KERNELFUNCTION_TABLES
+
+/* Declare and define the kernel function pointer lookup tables.
+ * The minor index of the array goes over both the LJ combination rules,
+ * which is only supported by plain cut-off, and the LJ switch/PME functions.
+ */
+p_nbk_func_noener nbnxn_kernel_noener_simd_4xn[coulktNR][vdwktNR] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
+ nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
+ nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
+ nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
+ nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
+ },
+ {
+ nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
+ nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
+ nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
+ nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
+ nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
+ },
+ {
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
+ },
+};
+
+p_nbk_func_ener nbnxn_kernel_ener_simd_4xn[coulktNR][vdwktNR] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
+ },
+};
+
+p_nbk_func_ener nbnxn_kernel_energrp_simd_4xn[coulktNR][vdwktNR] =
+{
+ {
+ nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
+ },
+ {
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
+ nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
+ },
+};
+
+
+#endif /* INCLUDE_KERNELFUNCTION_TABLES */
#include "gromacs/mdlib/qmmm.h"
#include "gromacs/mdlib/update.h"
#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
-#include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
-#include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/iforceprovider.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/utility/sysinfo.h"
#include "nbnxn_gpu.h"
+#include "nbnxn_kernels/nbnxn_kernel_cpu.h"
void print_time(FILE *out,
gmx_walltime_accounting_t walltime_accounting,
switch (nbvg->kernel_type)
{
case nbnxnk4x4_PlainC:
- nbnxn_kernel_ref(&nbvg->nbl_lists,
- nbvg->nbat, ic,
+ case nbnxnk4xN_SIMD_4xN:
+ case nbnxnk4xN_SIMD_2xNN:
+ nbnxn_kernel_cpu(nbvg,
+ ic,
fr->shift_vec,
flags,
clearF,
enerd->grpp.ener[egLJSR]);
break;
- case nbnxnk4xN_SIMD_4xN:
- nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
- nbvg->nbat, ic,
- nbvg->ewald_excl,
- fr->shift_vec,
- flags,
- clearF,
- fr->fshift[0],
- enerd->grpp.ener[egCOULSR],
- fr->bBHAM ?
- enerd->grpp.ener[egBHAMSR] :
- enerd->grpp.ener[egLJSR]);
- break;
- case nbnxnk4xN_SIMD_2xNN:
- nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
- nbvg->nbat, ic,
- nbvg->ewald_excl,
- fr->shift_vec,
- flags,
- clearF,
- fr->fshift[0],
- enerd->grpp.ener[egCOULSR],
- fr->bBHAM ?
- enerd->grpp.ener[egBHAMSR] :
- enerd->grpp.ener[egLJSR]);
- break;
-
case nbnxnk8x8x8_GPU:
nbnxn_gpu_launch_kernel(fr->nbv->gpu_nbv, nbvg->nbat, flags, ilocality);
break;