#include "gromacs/awh/awh.h"
#include "gromacs/domdec/dlbtiming.h"
+#include "gromacs/domdec/domdec.h"
#include "gromacs/domdec/domdec_struct.h"
#include "gromacs/domdec/partition.h"
#include "gromacs/essentialdynamics/edsam.h"
#include "gromacs/gmxlib/nonbonded/nonbonded.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/imd/imd.h"
-#include "gromacs/listed-forces/bonded.h"
-#include "gromacs/listed-forces/disre.h"
-#include "gromacs/listed-forces/gpubonded.h"
-#include "gromacs/listed-forces/manage-threading.h"
-#include "gromacs/listed-forces/orires.h"
+#include "gromacs/listed_forces/bonded.h"
+#include "gromacs/listed_forces/disre.h"
+#include "gromacs/listed_forces/gpubonded.h"
+#include "gromacs/listed_forces/manage_threading.h"
+#include "gromacs/listed_forces/orires.h"
#include "gromacs/math/arrayrefwithpadding.h"
#include "gromacs/math/functions.h"
#include "gromacs/math/units.h"
#include "gromacs/mdlib/forcerec.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/mdrun.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_atomdata.h"
-#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
-#include "gromacs/mdlib/nbnxn_grid.h"
-#include "gromacs/mdlib/nbnxn_search.h"
#include "gromacs/mdlib/ppforceworkload.h"
#include "gromacs/mdlib/qmmm.h"
#include "gromacs/mdlib/update.h"
-#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
#include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/enerdata.h"
#include "gromacs/mdtypes/forceoutput.h"
#include "gromacs/mdtypes/iforceprovider.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/mdtypes/state.h"
+#include "gromacs/nbnxm/atomdata.h"
+#include "gromacs/nbnxm/gpu_data_mgmt.h"
+#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/logger.h"
-#include "gromacs/utility/pleasecite.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/strconvert.h"
#include "gromacs/utility/sysinfo.h"
-#include "nbnxn_gpu.h"
-#include "nbnxn_kernels/nbnxn_kernel_cpu.h"
-#include "nbnxn_kernels/nbnxn_kernel_prune.h"
-
// TODO: this environment variable allows us to verify before release
// that on less common architectures the total cost of polling is not larger than
// a blocking wait (so polling does not introduce overhead when the static
}
}
-static void do_nb_verlet(const t_forcerec *fr,
- const interaction_const_t *ic,
- gmx_enerdata_t *enerd,
- int flags, int ilocality,
- int clearF,
- int64_t step,
- t_nrnb *nrnb,
- gmx_wallcycle_t wcycle)
+static void do_nb_verlet(t_forcerec *fr,
+ const interaction_const_t *ic,
+ gmx_enerdata_t *enerd,
+ const int flags,
+ const Nbnxm::InteractionLocality ilocality,
+ const int clearF,
+ const int64_t step,
+ t_nrnb *nrnb,
+ gmx_wallcycle_t wcycle)
{
if (!(flags & GMX_FORCE_NONBONDED))
{
* the current coordinates of the atoms.
*/
wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
- nbnxn_kernel_cpu_prune(nbvg, nbv->nbat, fr->shift_vec, nbv->listParams->rlistInner);
+ NbnxnDispatchPruneKernel(nbv, ilocality, fr->shift_vec);
wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
}
wallcycle_sub_start(wcycle, ewcsNONBONDED);
}
- switch (nbvg->kernel_type)
- {
- case nbnxnk4x4_PlainC:
- case nbnxnk4xN_SIMD_4xN:
- case nbnxnk4xN_SIMD_2xNN:
- nbnxn_kernel_cpu(nbvg,
- nbv->nbat,
- ic,
- fr->shift_vec,
- flags,
- clearF,
- fr->fshift[0],
- enerd->grpp.ener[egCOULSR],
- fr->bBHAM ?
- enerd->grpp.ener[egBHAMSR] :
- enerd->grpp.ener[egLJSR]);
- break;
-
- case nbnxnk8x8x8_GPU:
- nbnxn_gpu_launch_kernel(nbv->gpu_nbv, flags, ilocality);
- break;
+ NbnxnDispatchKernel(nbv, ilocality, *ic, flags, clearF, fr, enerd, nrnb);
- case nbnxnk8x8x8_PlainC:
- nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
- nbv->nbat, ic,
- fr->shift_vec,
- flags,
- clearF,
- nbv->nbat->out[0].f,
- fr->fshift[0],
- enerd->grpp.ener[egCOULSR],
- fr->bBHAM ?
- enerd->grpp.ener[egBHAMSR] :
- enerd->grpp.ener[egLJSR]);
- break;
-
- default:
- GMX_RELEASE_ASSERT(false, "Invalid nonbonded kernel type passed!");
-
- }
if (!bUsingGpuKernels)
{
wallcycle_sub_stop(wcycle, ewcsNONBONDED);
}
-
- int enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
- if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
- {
- enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
- }
- else if ((!bUsingGpuKernels && nbvg->ewald_excl == ewaldexclAnalytical) ||
- (bUsingGpuKernels && nbnxn_gpu_is_kernel_ewald_analytical(nbv->gpu_nbv)))
- {
- enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
- }
- else
- {
- enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
- }
- enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
- if (flags & GMX_FORCE_ENERGY)
- {
- /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
- enr_nbnxn_kernel_ljc += 1;
- enr_nbnxn_kernel_lj += 1;
- }
-
- inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
- nbvg->nbl_lists.natpair_ljq);
- inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
- nbvg->nbl_lists.natpair_lj);
- /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
- inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
- nbvg->nbl_lists.natpair_q);
-
- if (ic->vdw_modifier == eintmodFORCESWITCH)
- {
- /* We add up the switch cost separately */
- inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
- nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
- }
- if (ic->vdw_modifier == eintmodPOTSWITCH)
- {
- /* We add up the switch cost separately */
- inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
- nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
- }
- if (ic->vdwtype == evdwPME)
- {
- /* We add up the LJ Ewald cost separately */
- inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
- nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
- }
}
static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
{
GpuTaskCompletion completionType = (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check;
wallcycle_start_nocount(wcycle, ewcWAIT_GPU_NB_L);
- isNbGpuDone = nbnxn_gpu_try_finish_task(nbv->gpu_nbv,
- flags, eatLocal,
- haveOtherWork,
- enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
- fshift, completionType);
+ isNbGpuDone = Nbnxm::gpu_try_finish_task(nbv->gpu_nbv,
+ flags,
+ Nbnxm::AtomLocality::Local,
+ haveOtherWork,
+ enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
+ fshift, completionType);
wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
// To get the call count right, when the task finished we
// issue a start/stop.
wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), eatLocal,
+ nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
nbv->nbat, as_rvec_array(force->unpaddedArrayRef().data()), wcycle);
}
}
*/
int numRollingParts = nbv->listParams->numRollingParts;
GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
- int stepWithCurrentList = step - nbv->grp[eintLocal].nbl_lists.outerListCreationStep;
+ int stepWithCurrentList = step - nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.outerListCreationStep;
bool stepIsEven = ((stepWithCurrentList & 1) == 0);
if (stepWithCurrentList > 0 &&
stepWithCurrentList < inputrec->nstlist - 1 &&
(stepIsEven || DOMAINDECOMP(cr)))
{
- nbnxn_gpu_launch_kernel_pruneonly(nbv->gpu_nbv,
- stepIsEven ? eintLocal : eintNonlocal,
- numRollingParts);
+ Nbnxm::gpu_launch_kernel_pruneonly(nbv->gpu_nbv,
+ stepIsEven ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal,
+ numRollingParts);
}
}
nonbonded_verlet_t *nbv = fr->nbv;
bStateChanged = ((flags & GMX_FORCE_STATECHANGED) != 0);
- bNS = ((flags & GMX_FORCE_NS) != 0) && (!fr->bAllvsAll);
+ bNS = ((flags & GMX_FORCE_NS) != 0);
bFillGrid = (bNS && bStateChanged);
bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
bDoForces = ((flags & GMX_FORCE_FORCES) != 0);
nullptr, 0, mdatoms->homenr, -1,
fr->cginfo, x.unpaddedArrayRef(),
0, nullptr,
- nbv->grp[eintLocal].kernel_type,
+ nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
nbv->nbat);
wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
}
wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
nbnxn_put_on_grid_nonlocal(nbv->nbs.get(), domdec_zones(cr->dd),
fr->cginfo, x.unpaddedArrayRef(),
- nbv->grp[eintNonlocal].kernel_type,
+ nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
nbv->nbat);
wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
}
if (bNS)
{
- nbnxn_gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat);
+ Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat);
}
- nbnxn_gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat);
+ Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
// higher-level object than the nb module.
fr->gpuBonded->updateInteractionListsAndDeviceBuffers(nbnxn_get_gridindices(fr->nbv->nbs.get()),
top->idef,
- nbnxn_gpu_get_xq(nbv->gpu_nbv),
- nbnxn_gpu_get_f(nbv->gpu_nbv),
- nbnxn_gpu_get_fshift(nbv->gpu_nbv));
+ Nbnxm::gpu_get_xq(nbv->gpu_nbv),
+ Nbnxm::gpu_get_f(nbv->gpu_nbv),
+ Nbnxm::gpu_get_fshift(nbv->gpu_nbv));
ppForceWorkload->haveGpuBondedWork = fr->gpuBonded->haveInteractions();
}
/* do local pair search */
if (bNS)
{
+ nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists;
+
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
&top->excls,
nbv->listParams->rlistOuter,
nbv->min_ci_balanced,
- &nbv->grp[eintLocal].nbl_lists,
- eintLocal,
- nbv->grp[eintLocal].kernel_type,
+ &pairlistSet,
+ Nbnxm::InteractionLocality::Local,
+ nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
nrnb);
- nbv->grp[eintLocal].nbl_lists.outerListCreationStep = step;
+ pairlistSet.outerListCreationStep = step;
if (nbv->listParams->useDynamicPruning && !bUseGPU)
{
- nbnxnPrepareListForDynamicPruning(&nbv->grp[eintLocal].nbl_lists);
+ nbnxnPrepareListForDynamicPruning(&pairlistSet);
}
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
if (bUseGPU)
{
/* initialize local pair-list on the GPU */
- nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
- nbv->grp[eintLocal].nbl_lists.nbl[0],
- eintLocal);
+ Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
+ pairlistSet.nblGpu[0],
+ Nbnxm::InteractionLocality::Local);
}
wallcycle_stop(wcycle, ewcNS);
}
else
{
- nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), eatLocal, FALSE, as_rvec_array(x.unpaddedArrayRef().data()),
+ nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
+ FALSE, as_rvec_array(x.unpaddedArrayRef().data()),
nbv->nbat, wcycle);
}
wallcycle_start(wcycle, ewcLAUNCH_GPU);
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- nbnxn_gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, eatLocal, ppForceWorkload->haveGpuBondedWork);
+ Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
// bonded work not split into separate local and non-local, so with DD
/* launch local nonbonded work on GPU */
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, flags, Nbnxm::InteractionLocality::Local, enbvClearFNo,
step, nrnb, wcycle);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
do non-local pair search */
if (DOMAINDECOMP(cr))
{
+ nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists;
+
if (bNS)
{
wallcycle_start_nocount(wcycle, ewcNS);
&top->excls,
nbv->listParams->rlistOuter,
nbv->min_ci_balanced,
- &nbv->grp[eintNonlocal].nbl_lists,
- eintNonlocal,
- nbv->grp[eintNonlocal].kernel_type,
+ &pairlistSet,
+ Nbnxm::InteractionLocality::NonLocal,
+ nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
nrnb);
- nbv->grp[eintNonlocal].nbl_lists.outerListCreationStep = step;
+ pairlistSet.outerListCreationStep = step;
if (nbv->listParams->useDynamicPruning && !bUseGPU)
{
- nbnxnPrepareListForDynamicPruning(&nbv->grp[eintNonlocal].nbl_lists);
+ nbnxnPrepareListForDynamicPruning(&pairlistSet);
}
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
- if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_GPU)
+ if (nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type == nbnxnk8x8x8_GPU)
{
/* initialize non-local pair-list on the GPU */
- nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
- nbv->grp[eintNonlocal].nbl_lists.nbl[0],
- eintNonlocal);
+ Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
+ pairlistSet.nblGpu[0],
+ Nbnxm::InteractionLocality::NonLocal);
}
wallcycle_stop(wcycle, ewcNS);
}
{
dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle);
- nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), eatNonlocal, FALSE, as_rvec_array(x.unpaddedArrayRef().data()),
+ nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
+ FALSE, as_rvec_array(x.unpaddedArrayRef().data()),
nbv->nbat, wcycle);
}
/* launch non-local nonbonded tasks on GPU */
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- nbnxn_gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, eatNonlocal, ppForceWorkload->haveGpuBondedWork);
+ Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, Nbnxm::AtomLocality::NonLocal, ppForceWorkload->haveGpuBondedWork);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (ppForceWorkload->haveGpuBondedWork)
}
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, flags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
step, nrnb, wcycle);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (DOMAINDECOMP(cr))
{
- nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
- flags, eatNonlocal, ppForceWorkload->haveGpuBondedWork);
+ Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
+ flags, Nbnxm::AtomLocality::NonLocal, ppForceWorkload->haveGpuBondedWork);
}
- nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
- flags, eatLocal, ppForceWorkload->haveGpuBondedWork);
+ Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
+ flags, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (ppForceWorkload->haveGpuBondedWork && (flags & GMX_FORCE_ENERGY))
if (!bUseOrEmulGPU)
{
- do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
+ do_nb_verlet(fr, ic, enerd, flags, Nbnxm::InteractionLocality::Local, enbvClearFYes,
step, nrnb, wcycle);
}
/* Calculate the local and non-local free energy interactions here.
* Happens here on the CPU both with and without GPU.
*/
- if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ if (fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.nbl_fep[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
+ do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists,
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
}
if (DOMAINDECOMP(cr) &&
- fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nbl_fep[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
+ do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists,
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
if (!bUseOrEmulGPU)
{
- int aloc;
-
if (DOMAINDECOMP(cr))
{
- do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, flags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
step, nrnb, wcycle);
}
- if (!bUseOrEmulGPU)
- {
- aloc = eintLocal;
- }
- else
- {
- aloc = eintNonlocal;
- }
+ const Nbnxm::InteractionLocality iloc =
+ (!bUseOrEmulGPU ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal);
/* Add all the non-bonded force to the normal force array.
* This can be split into a local and a non-local part when overlapping
*/
wallcycle_stop(wcycle, ewcFORCE);
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), eatAll, nbv->nbat, f, wcycle);
+ nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::All, nbv->nbat, f, wcycle);
wallcycle_start_nocount(wcycle, ewcFORCE);
/* if there are multiple fshift output buffers reduce them */
if ((flags & GMX_FORCE_VIRIAL) &&
- nbv->grp[aloc].nbl_lists.nnbl > 1)
+ nbv->grp[iloc].nbl_lists.nnbl > 1)
{
/* This is not in a subcounter because it takes a
negligible and constant-sized amount of time */
if (bUseGPU)
{
wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
- nbnxn_gpu_wait_finish_task(nbv->gpu_nbv,
- flags, eatNonlocal,
- ppForceWorkload->haveGpuBondedWork,
- enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
- fr->fshift);
+ Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
+ flags, Nbnxm::AtomLocality::NonLocal,
+ ppForceWorkload->haveGpuBondedWork,
+ enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
+ fr->fshift);
cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
}
else
{
wallcycle_start_nocount(wcycle, ewcFORCE);
- do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
+ do_nb_verlet(fr, ic, enerd, flags, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes,
step, nrnb, wcycle);
wallcycle_stop(wcycle, ewcFORCE);
}
/* skip the reduction if there was no non-local work to do */
- if (nbv->grp[eintNonlocal].nbl_lists.nbl[0]->nsci > 0)
+ if (!nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nblGpu[0]->sci.empty())
{
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), eatNonlocal,
+ nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
nbv->nbat, f, wcycle);
}
}
const float gpuWaitApiOverheadMargin = 2e6f; /* cycles */
wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
- nbnxn_gpu_wait_finish_task(nbv->gpu_nbv,
- flags, eatLocal, ppForceWorkload->haveGpuBondedWork,
- enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
- fr->fshift);
+ Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
+ flags, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork,
+ enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
+ fr->fshift);
float cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
if (ddCloseBalanceRegion == DdCloseBalanceRegionAfterForceComputation::yes)
// NOTE: emulation kernel is not included in the balancing region,
// but emulation mode does not target performance anyway
wallcycle_start_nocount(wcycle, ewcFORCE);
- do_nb_verlet(fr, ic, enerd, flags, eintLocal,
+ do_nb_verlet(fr, ic, enerd, flags, Nbnxm::InteractionLocality::Local,
DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
step, nrnb, wcycle);
wallcycle_stop(wcycle, ewcFORCE);
/* now clear the GPU outputs while we finish the step on the CPU */
wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- nbnxn_gpu_clear_outputs(nbv->gpu_nbv, flags);
+ Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, flags);
/* Is dynamic pair-list pruning activated? */
if (nbv->listParams->useDynamicPruning)
* on the non-alternating path. */
if (bUseOrEmulGPU && !alternateGpuWait)
{
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), eatLocal,
+ nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
nbv->nbat, f, wcycle);
}
if (DOMAINDECOMP(cr))
}
bStateChanged = ((flags & GMX_FORCE_STATECHANGED) != 0);
- bNS = ((flags & GMX_FORCE_NS) != 0) && (!fr->bAllvsAll);
+ bNS = ((flags & GMX_FORCE_NS) != 0);
/* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
bFillGrid = (bNS && bStateChanged);
bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
if (printReport)
{
- auto nbnxn_gpu_timings = use_GPU(nbv) ? nbnxn_gpu_get_timings(nbv->gpu_nbv) : nullptr;
+ auto nbnxn_gpu_timings = use_GPU(nbv) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
gmx_wallclock_gpu_pme_t pme_gpu_timings = {};
if (pme_gpu_task_enabled(pme))
{
}
}
-extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, gmx::ArrayRef<real> lambda, double *lam0)
+void initialize_lambdas(FILE *fplog,
+ const t_inputrec &ir,
+ bool isMaster,
+ int *fep_state,
+ gmx::ArrayRef<real> lambda,
+ double *lam0)
{
- /* this function works, but could probably use a logic rewrite to keep all the different
- types of efep straight. */
+ /* TODO: Clean up initialization of fep_state and lambda in
+ t_state. This function works, but could probably use a logic
+ rewrite to keep all the different types of efep straight. */
- if ((ir->efep == efepNO) && (!ir->bSimTemp))
+ if ((ir.efep == efepNO) && (!ir.bSimTemp))
{
return;
}
- t_lambda *fep = ir->fepvals;
- *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
- if checkpoint is set -- a kludge is in for now
- to prevent this.*/
+ const t_lambda *fep = ir.fepvals;
+ if (isMaster)
+ {
+ *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
+ if checkpoint is set -- a kludge is in for now
+ to prevent this.*/
+ }
for (int i = 0; i < efptNR; i++)
{
+ double thisLambda;
/* overwrite lambda state with init_lambda for now for backwards compatibility */
- if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
+ if (fep->init_lambda >= 0) /* if it's -1, it was never initialized */
{
- lambda[i] = fep->init_lambda;
- if (lam0)
- {
- lam0[i] = lambda[i];
- }
+ thisLambda = fep->init_lambda;
}
else
{
- lambda[i] = fep->all_lambda[i][*fep_state];
- if (lam0)
- {
- lam0[i] = lambda[i];
- }
+ thisLambda = fep->all_lambda[i][fep->init_fep_state];
+ }
+ if (isMaster)
+ {
+ lambda[i] = thisLambda;
+ }
+ if (lam0 != nullptr)
+ {
+ lam0[i] = thisLambda;
}
}
- if (ir->bSimTemp)
+ if (ir.bSimTemp)
{
/* need to rescale control temperatures to match current state */
- for (int i = 0; i < ir->opts.ngtc; i++)
+ for (int i = 0; i < ir.opts.ngtc; i++)
{
- if (ir->opts.ref_t[i] > 0)
+ if (ir.opts.ref_t[i] > 0)
{
- ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
+ ir.opts.ref_t[i] = ir.simtempvals->temperatures[fep->init_fep_state];
}
}
}
fprintf(fplog, "]\n");
}
}
-
-
-void init_md(FILE *fplog,
- const t_commrec *cr, gmx::IMDOutputProvider *outputProvider,
- t_inputrec *ir, const gmx_output_env_t *oenv,
- const MdrunOptions &mdrunOptions,
- double *t, double *t0,
- t_state *globalState, double *lam0,
- t_nrnb *nrnb, gmx_mtop_t *mtop,
- gmx_update_t **upd,
- gmx::BoxDeformation *deform,
- int nfile, const t_filenm fnm[],
- gmx_mdoutf_t *outf, t_mdebin **mdebin,
- tensor force_vir, tensor shake_vir,
- tensor total_vir, tensor pres, rvec mu_tot,
- gmx_bool *bSimAnn, t_vcm **vcm,
- gmx_wallcycle_t wcycle)
-{
- int i;
-
- /* Initial values */
- *t = *t0 = ir->init_t;
-
- *bSimAnn = FALSE;
- for (i = 0; i < ir->opts.ngtc; i++)
- {
- /* set bSimAnn if any group is being annealed */
- if (ir->opts.annealing[i] != eannNO)
- {
- *bSimAnn = TRUE;
- }
- }
-
- /* Initialize lambda variables */
- /* TODO: Clean up initialization of fep_state and lambda in t_state.
- * We currently need to call initialize_lambdas on non-master ranks
- * to initialize lam0.
- */
- if (MASTER(cr))
- {
- initialize_lambdas(fplog, ir, &globalState->fep_state, globalState->lambda, lam0);
- }
- else
- {
- int tmpFepState;
- std::array<real, efptNR> tmpLambda;
- initialize_lambdas(fplog, ir, &tmpFepState, tmpLambda, lam0);
- }
-
- // TODO upd is never NULL in practice, but the analysers don't know that
- if (upd)
- {
- *upd = init_update(ir, deform);
- }
- if (*bSimAnn)
- {
- update_annealing_target_temp(ir, ir->init_t, upd ? *upd : nullptr);
- }
-
- if (vcm != nullptr)
- {
- *vcm = init_vcm(fplog, &mtop->groups, ir);
- }
-
- if (EI_DYNAMICS(ir->eI) && !mdrunOptions.continuationOptions.appendFiles)
- {
- if (ir->etc == etcBERENDSEN)
- {
- please_cite(fplog, "Berendsen84a");
- }
- if (ir->etc == etcVRESCALE)
- {
- please_cite(fplog, "Bussi2007a");
- }
- if (ir->eI == eiSD1)
- {
- please_cite(fplog, "Goga2012");
- }
- }
- init_nrnb(nrnb);
-
- if (nfile != -1)
- {
- *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, mtop, oenv, wcycle);
-
- *mdebin = init_mdebin(mdrunOptions.continuationOptions.appendFiles ? nullptr : mdoutf_get_fp_ene(*outf),
- mtop, ir, mdoutf_get_fp_dhdl(*outf));
- }
-
- /* Initiate variables */
- clear_mat(force_vir);
- clear_mat(shake_vir);
- clear_rvec(mu_tot);
- clear_mat(total_vir);
- clear_mat(pres);
-}
-
-void init_rerun(FILE *fplog,
- const t_commrec *cr, gmx::IMDOutputProvider *outputProvider,
- t_inputrec *ir, const gmx_output_env_t *oenv,
- const MdrunOptions &mdrunOptions,
- t_state *globalState, double *lam0,
- t_nrnb *nrnb, gmx_mtop_t *mtop,
- int nfile, const t_filenm fnm[],
- gmx_mdoutf_t *outf, t_mdebin **mdebin,
- gmx_wallcycle_t wcycle)
-{
- /* Initialize lambda variables */
- /* TODO: Clean up initialization of fep_state and lambda in t_state.
- * We currently need to call initialize_lambdas on non-master ranks
- * to initialize lam0.
- */
- if (MASTER(cr))
- {
- initialize_lambdas(fplog, ir, &globalState->fep_state, globalState->lambda, lam0);
- }
- else
- {
- int tmpFepState;
- std::array<real, efptNR> tmpLambda;
- initialize_lambdas(fplog, ir, &tmpFepState, tmpLambda, lam0);
- }
-
- init_nrnb(nrnb);
-
- if (nfile != -1)
- {
- *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, mtop, oenv, wcycle);
- *mdebin = init_mdebin(mdrunOptions.continuationOptions.appendFiles ? nullptr : mdoutf_get_fp_ene(*outf),
- mtop, ir, mdoutf_get_fp_dhdl(*outf), true);
- }
-}