#define INCLUDE_KERNELFUNCTION_TABLES
#include "kernels_reference/kernel_ref.h"
#ifdef GMX_NBNXN_SIMD_2XNN
-#include "kernels_simd_2xmm/kernels.h"
+# include "kernels_simd_2xmm/kernels.h"
#endif
#ifdef GMX_NBNXN_SIMD_4XN
-#include "kernels_simd_4xm/kernels.h"
+# include "kernels_simd_4xm/kernels.h"
#endif
#undef INCLUDE_FUNCTION_TABLES
*
* \param[in,out] out nbnxn kernel output struct
*/
-static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
+static void clearGroupEnergies(nbnxn_atomdata_output_t* out)
{
std::fill(out->Vvdw.begin(), out->Vvdw.end(), 0.0_real);
std::fill(out->Vc.begin(), out->Vc.end(), 0.0_real);
* \param[in] numGroups_2log Log2 of numGroups, rounded up
* \param[in,out] out Struct with energy buffers
*/
-template <int unrollj> static void
-reduceGroupEnergySimdBuffers(int numGroups,
- int numGroups_2log,
- nbnxn_atomdata_output_t *out)
+template<int unrollj>
+static void reduceGroupEnergySimdBuffers(int numGroups, int numGroups_2log, nbnxn_atomdata_output_t* out)
{
- const int unrollj_half = unrollj/2;
+ const int unrollj_half = unrollj / 2;
/* Energies are stored in SIMD registers with size 2^numGroups_2log */
- const int numGroupsStorage = (1 << numGroups_2log);
+ const int numGroupsStorage = (1 << numGroups_2log);
- const real * gmx_restrict vVdwSimd = out->VSvdw.data();
- const real * gmx_restrict vCoulombSimd = out->VSc.data();
- real * gmx_restrict vVdw = out->Vvdw.data();
- real * gmx_restrict vCoulomb = out->Vc.data();
+ const real* gmx_restrict vVdwSimd = out->VSvdw.data();
+ const real* gmx_restrict vCoulombSimd = out->VSc.data();
+ real* gmx_restrict vVdw = out->Vvdw.data();
+ real* gmx_restrict vCoulomb = out->Vc.data();
/* The size of the SIMD energy group buffer array is:
* numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
{
for (int j0 = 0; j0 < numGroups; j0++)
{
- int c = ((i*numGroups + j1)*numGroupsStorage + j0)*unrollj_half*unrollj;
+ int c = ((i * numGroups + j1) * numGroupsStorage + j0) * unrollj_half * unrollj;
for (int s = 0; s < unrollj_half; s++)
{
- vVdw [i*numGroups + j0] += vVdwSimd [c + 0];
- vVdw [i*numGroups + j1] += vVdwSimd [c + 1];
- vCoulomb[i*numGroups + j0] += vCoulombSimd[c + 0];
- vCoulomb[i*numGroups + j1] += vCoulombSimd[c + 1];
- c += unrollj + 2;
+ vVdw[i * numGroups + j0] += vVdwSimd[c + 0];
+ vVdw[i * numGroups + j1] += vVdwSimd[c + 1];
+ vCoulomb[i * numGroups + j0] += vCoulombSimd[c + 0];
+ vCoulomb[i * numGroups + j1] += vCoulombSimd[c + 1];
+ c += unrollj + 2;
}
}
}
* \param[out] vVdw Output buffer for Van der Waals energies
* \param[in] wcycle Pointer to cycle counting data structure.
*/
-static void
-nbnxn_kernel_cpu(const PairlistSet &pairlistSet,
- const Nbnxm::KernelSetup &kernelSetup,
- nbnxn_atomdata_t *nbat,
- const interaction_const_t &ic,
- rvec *shiftVectors,
- const gmx::StepWorkload &stepWork,
- int clearF,
- real *vCoulomb,
- real *vVdw,
- gmx_wallcycle *wcycle)
+static void nbnxn_kernel_cpu(const PairlistSet& pairlistSet,
+ const Nbnxm::KernelSetup& kernelSetup,
+ nbnxn_atomdata_t* nbat,
+ const interaction_const_t& ic,
+ rvec* shiftVectors,
+ const gmx::StepWorkload& stepWork,
+ int clearF,
+ real* vCoulomb,
+ real* vVdw,
+ gmx_wallcycle* wcycle)
{
- int coulkt;
+ int coulkt;
if (EEL_RF(ic.eeltype) || ic.eeltype == eelCUT)
{
coulkt = coulktRF;
}
}
- const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+ const nbnxn_atomdata_t::Params& nbatParams = nbat->params();
int vdwkt = 0;
if (ic.vdwtype == evdwCUT)
switch (nbatParams.comb_rule)
{
case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
- case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
+ case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
- default:
- GMX_RELEASE_ASSERT(false, "Unknown combination rule");
+ default: GMX_RELEASE_ASSERT(false, "Unknown combination rule");
}
break;
- case eintmodFORCESWITCH:
- vdwkt = vdwktLJFORCESWITCH;
- break;
- case eintmodPOTSWITCH:
- vdwkt = vdwktLJPOTSWITCH;
- break;
- default:
- GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
+ case eintmodFORCESWITCH: vdwkt = vdwktLJFORCESWITCH; break;
+ case eintmodPOTSWITCH: vdwkt = vdwktLJPOTSWITCH; break;
+ default: GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
}
}
else if (ic.vdwtype == evdwPME)
{
vdwkt = vdwktLJEWALDCOMBLB;
/* At setup we (should have) selected the C reference kernel */
- GMX_RELEASE_ASSERT(kernelSetup.kernelType == Nbnxm::KernelType::Cpu4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+ GMX_RELEASE_ASSERT(kernelSetup.kernelType == Nbnxm::KernelType::Cpu4x4_PlainC,
+ "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB "
+ "combination rules");
}
}
else
gmx::ArrayRef<const NbnxnPairlistCpu> pairlists = pairlistSet.cpuLists();
- int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
+ int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
wallcycle_sub_start(wcycle, ewcsNONBONDED_CLEAR);
#pragma omp parallel for schedule(static) num_threads(nthreads)
for (gmx::index nb = 0; nb < pairlists.ssize(); nb++)
{
// Presently, the kernels do not call C++ code that can throw,
// so no need for a try/catch pair in this OpenMP region.
- nbnxn_atomdata_output_t *out = &nbat->out[nb];
+ nbnxn_atomdata_output_t* out = &nbat->out[nb];
if (clearF == enbvClearFYes)
{
}
// TODO: Change to reference
- const NbnxnPairlistCpu *pairlist = &pairlists[nb];
+ const NbnxnPairlistCpu* pairlist = &pairlists[nb];
if (!stepWork.computeEnergy)
{
switch (kernelSetup.kernelType)
{
case Nbnxm::KernelType::Cpu4x4_PlainC:
- nbnxn_kernel_noener_ref[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxn_kernel_noener_ref[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#ifdef GMX_NBNXN_SIMD_2XNN
case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
- nbnxm_kernel_noener_simd_2xmm[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxm_kernel_noener_simd_2xmm[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
- nbnxm_kernel_noener_simd_4xm[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxm_kernel_noener_simd_4xm[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#endif
- default:
- GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+ default: GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
}
}
else if (out->Vvdw.size() == 1)
switch (kernelSetup.kernelType)
{
case Nbnxm::KernelType::Cpu4x4_PlainC:
- nbnxn_kernel_ener_ref[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxn_kernel_ener_ref[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#ifdef GMX_NBNXN_SIMD_2XNN
case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
- nbnxm_kernel_ener_simd_2xmm[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxm_kernel_ener_simd_2xmm[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
- nbnxm_kernel_ener_simd_4xm[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxm_kernel_ener_simd_4xm[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#endif
- default:
- GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+ default: GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
}
}
else
{
case Nbnxm::KernelType::Cpu4x4_PlainC:
unrollj = c_nbnxnCpuIClusterSize;
- nbnxn_kernel_energrp_ref[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxn_kernel_energrp_ref[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#ifdef GMX_NBNXN_SIMD_2XNN
case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
- unrollj = GMX_SIMD_REAL_WIDTH/2;
- nbnxm_kernel_energrp_simd_2xmm[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ unrollj = GMX_SIMD_REAL_WIDTH / 2;
+ nbnxm_kernel_energrp_simd_2xmm[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
unrollj = GMX_SIMD_REAL_WIDTH;
- nbnxm_kernel_energrp_simd_4xm[coulkt][vdwkt](pairlist, nbat,
- &ic,
- shiftVectors,
- out);
+ nbnxm_kernel_energrp_simd_4xm[coulkt][vdwkt](pairlist, nbat, &ic, shiftVectors, out);
break;
#endif
- default:
- GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
+ default: GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
}
if (kernelSetup.kernelType != Nbnxm::KernelType::Cpu4x4_PlainC)
switch (unrollj)
{
case 2:
- reduceGroupEnergySimdBuffers<2>(nbatParams.nenergrp,
- nbatParams.neg_2log,
- out);
+ reduceGroupEnergySimdBuffers<2>(nbatParams.nenergrp, nbatParams.neg_2log, out);
break;
case 4:
- reduceGroupEnergySimdBuffers<4>(nbatParams.nenergrp,
- nbatParams.neg_2log,
- out);
+ reduceGroupEnergySimdBuffers<4>(nbatParams.nenergrp, nbatParams.neg_2log, out);
break;
case 8:
- reduceGroupEnergySimdBuffers<8>(nbatParams.nenergrp,
- nbatParams.neg_2log,
- out);
+ reduceGroupEnergySimdBuffers<8>(nbatParams.nenergrp, nbatParams.neg_2log, out);
break;
- default:
- GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
+ default: GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
}
}
}
}
}
-static void accountFlops(t_nrnb *nrnb,
- const PairlistSet &pairlistSet,
- const nonbonded_verlet_t &nbv,
- const interaction_const_t &ic,
- const gmx::StepWorkload &stepWork)
+static void accountFlops(t_nrnb* nrnb,
+ const PairlistSet& pairlistSet,
+ const nonbonded_verlet_t& nbv,
+ const interaction_const_t& ic,
+ const gmx::StepWorkload& stepWork)
{
const bool usingGpuKernels = nbv.useGpu();
- int enr_nbnxn_kernel_ljc;
+ int enr_nbnxn_kernel_ljc;
if (EEL_RF(ic.eeltype) || ic.eeltype == eelCUT)
{
enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
}
- else if ((!usingGpuKernels && nbv.kernelSetup().ewaldExclusionType == Nbnxm::EwaldExclusionType::Analytical) ||
- (usingGpuKernels && Nbnxm::gpu_is_kernel_ewald_analytical(nbv.gpu_nbv)))
+ else if ((!usingGpuKernels && nbv.kernelSetup().ewaldExclusionType == Nbnxm::EwaldExclusionType::Analytical)
+ || (usingGpuKernels && Nbnxm::gpu_is_kernel_ewald_analytical(nbv.gpu_nbv)))
{
enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
}
{
/* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
enr_nbnxn_kernel_ljc += 1;
- enr_nbnxn_kernel_lj += 1;
+ enr_nbnxn_kernel_lj += 1;
}
- inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
- pairlistSet.natpair_ljq_);
- inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
- pairlistSet.natpair_lj_);
+ inc_nrnb(nrnb, enr_nbnxn_kernel_ljc, pairlistSet.natpair_ljq_);
+ inc_nrnb(nrnb, enr_nbnxn_kernel_lj, pairlistSet.natpair_lj_);
/* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
- inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
- pairlistSet.natpair_q_);
+ inc_nrnb(nrnb, enr_nbnxn_kernel_ljc - eNR_NBNXN_LJ_RF + eNR_NBNXN_RF, pairlistSet.natpair_q_);
if (ic.vdw_modifier == eintmodFORCESWITCH)
{
}
}
-void
-nonbonded_verlet_t::dispatchNonbondedKernel(gmx::InteractionLocality iLocality,
- const interaction_const_t &ic,
- const gmx::StepWorkload &stepWork,
- int clearF,
- const t_forcerec &fr,
- gmx_enerdata_t *enerd,
- t_nrnb *nrnb)
+void nonbonded_verlet_t::dispatchNonbondedKernel(gmx::InteractionLocality iLocality,
+ const interaction_const_t& ic,
+ const gmx::StepWorkload& stepWork,
+ int clearF,
+ const t_forcerec& fr,
+ gmx_enerdata_t* enerd,
+ t_nrnb* nrnb)
{
- const PairlistSet &pairlistSet = pairlistSets().pairlistSet(iLocality);
+ const PairlistSet& pairlistSet = pairlistSets().pairlistSet(iLocality);
switch (kernelSetup().kernelType)
{
case Nbnxm::KernelType::Cpu4x4_PlainC:
case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
- nbnxn_kernel_cpu(pairlistSet,
- kernelSetup(),
- nbat.get(),
- ic,
- fr.shift_vec,
- stepWork,
- clearF,
- enerd->grpp.ener[egCOULSR].data(),
- fr.bBHAM ?
- enerd->grpp.ener[egBHAMSR].data() :
- enerd->grpp.ener[egLJSR].data(),
+ nbnxn_kernel_cpu(pairlistSet, kernelSetup(), nbat.get(), ic, fr.shift_vec, stepWork,
+ clearF, enerd->grpp.ener[egCOULSR].data(),
+ fr.bBHAM ? enerd->grpp.ener[egBHAMSR].data() : enerd->grpp.ener[egLJSR].data(),
wcycle_);
break;
break;
case Nbnxm::KernelType::Cpu8x8x8_PlainC:
- nbnxn_kernel_gpu_ref(pairlistSet.gpuList(),
- nbat.get(), &ic,
- fr.shift_vec,
- stepWork,
- clearF,
- nbat->out[0].f,
- nbat->out[0].fshift.data(),
- enerd->grpp.ener[egCOULSR].data(),
- fr.bBHAM ?
- enerd->grpp.ener[egBHAMSR].data() :
- enerd->grpp.ener[egLJSR].data());
+ nbnxn_kernel_gpu_ref(
+ pairlistSet.gpuList(), nbat.get(), &ic, fr.shift_vec, stepWork, clearF,
+ nbat->out[0].f, nbat->out[0].fshift.data(), enerd->grpp.ener[egCOULSR].data(),
+ fr.bBHAM ? enerd->grpp.ener[egBHAMSR].data() : enerd->grpp.ener[egLJSR].data());
break;
- default:
- GMX_RELEASE_ASSERT(false, "Invalid nonbonded kernel type passed!");
-
+ default: GMX_RELEASE_ASSERT(false, "Invalid nonbonded kernel type passed!");
}
accountFlops(nrnb, pairlistSet, *this, ic, stepWork);
}
-void
-nonbonded_verlet_t::dispatchFreeEnergyKernel(gmx::InteractionLocality iLocality,
- const t_forcerec *fr,
- rvec x[],
- gmx::ForceWithShiftForces *forceWithShiftForces,
- const t_mdatoms &mdatoms,
- t_lambda *fepvals,
- real *lambda,
- gmx_enerdata_t *enerd,
- const gmx::StepWorkload &stepWork,
- t_nrnb *nrnb)
+void nonbonded_verlet_t::dispatchFreeEnergyKernel(gmx::InteractionLocality iLocality,
+ const t_forcerec* fr,
+ rvec x[],
+ gmx::ForceWithShiftForces* forceWithShiftForces,
+ const t_mdatoms& mdatoms,
+ t_lambda* fepvals,
+ real* lambda,
+ gmx_enerdata_t* enerd,
+ const gmx::StepWorkload& stepWork,
+ t_nrnb* nrnb)
{
const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists();
nb_kernel_data_t kernel_data;
real dvdl_nb[efptNR] = { 0 };
- kernel_data.flags = donb_flags;
- kernel_data.lambda = lambda;
- kernel_data.dvdl = dvdl_nb;
+ kernel_data.flags = donb_flags;
+ kernel_data.lambda = lambda;
+ kernel_data.dvdl = dvdl_nb;
kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR].data();
kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR].data();
- GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(), "Number of lists should be same as number of NB threads");
+ GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(),
+ "Number of lists should be same as number of NB threads");
wallcycle_sub_start(wcycle_, ewcsNONBONDED_FEP);
#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
{
try
{
- gmx_nb_free_energy_kernel(nbl_fep[th].get(),
- x, forceWithShiftForces,
- fr, &mdatoms, &kernel_data, nrnb);
+ gmx_nb_free_energy_kernel(nbl_fep[th].get(), x, forceWithShiftForces, fr, &mdatoms,
+ &kernel_data, nrnb);
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
if (fepvals->sc_alpha != 0)
{
- enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
+ enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
}
else
{
- enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
+ enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
}
if (fepvals->n_lambda > 0 && stepWork.computeDhdl && fepvals->sc_alpha != 0)
{
real lam_i[efptNR];
- kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
+ kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE))
+ | GMX_NONBONDED_DO_FOREIGNLAMBDA;
kernel_data.lambda = lam_i;
kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR].data();
kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR].data();
{
for (int j = 0; j < efptNR; j++)
{
- lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
}
reset_foreign_enerdata(enerd);
#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
{
try
{
- gmx_nb_free_energy_kernel(nbl_fep[th].get(),
- x, forceWithShiftForces,
- fr, &mdatoms, &kernel_data, nrnb);
+ gmx_nb_free_energy_kernel(nbl_fep[th].get(), x, forceWithShiftForces, fr,
+ &mdatoms, &kernel_data, nrnb);
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);