X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?p=alexxy%2Fgromacs.git;a=blobdiff_plain;f=src%2Fgromacs%2Fmdlib%2Fsim_util.cpp;fp=src%2Fgromacs%2Fmdlib%2Fsim_util.cpp;h=14818a0c4b8edb80ae4ba6f59736d0da9139694a;hp=89da40d77cded04ba52a717538d170598a75bc2d;hb=8e4184a2387fb1b36361b07c9c3df40076d0250d;hpb=4ab77f8b8162e37a89f7bdbd89709528326d7c26 diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 89da40d77c..14818a0c4b 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -981,14 +981,14 @@ static StepWorkload setupStepWorkload(const int legacyFlags, && !(simulationWork.computeNonbondedAtMtsLevel1 && !computeSlowForces); flags.computeDhdl = ((legacyFlags & GMX_FORCE_DHDL) != 0); - if (simulationWork.useGpuBufferOps) + if (simulationWork.useGpuXBufferOps || simulationWork.useGpuFBufferOps) { GMX_ASSERT(simulationWork.useGpuNonbonded, "Can only offload buffer ops if nonbonded computation is also offloaded"); } - flags.useGpuXBufferOps = simulationWork.useGpuBufferOps && !flags.doNeighborSearch; + flags.useGpuXBufferOps = simulationWork.useGpuXBufferOps && !flags.doNeighborSearch; // on virial steps the CPU reduction path is taken - flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !flags.computeVirial; + flags.useGpuFBufferOps = simulationWork.useGpuFBufferOps && !flags.computeVirial; const bool rankHasGpuPmeTask = simulationWork.useGpuPme && !simulationWork.haveSeparatePmeRank; flags.useGpuPmeFReduction = flags.computeSlowForces && flags.useGpuFBufferOps && (rankHasGpuPmeTask || simulationWork.useGpuPmePpCommunication); @@ -1339,7 +1339,7 @@ void do_force(FILE* fplog, const bool reinitGpuPmePpComms = simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch); - auto* localXReadyOnDevice = (stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuBufferOps) + auto* localXReadyOnDevice = (stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuXBufferOps) ? stateGpu->getCoordinatesReadyOnDeviceEvent( AtomLocality::Local, simulationWork, stepWork) : nullptr; @@ -1364,7 +1364,8 @@ void do_force(FILE* fplog, haveCopiedXFromGpu = true; } - if (stepWork.doNeighborSearch && ((stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuBufferOps))) + if (stepWork.doNeighborSearch + && (stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuXBufferOps || simulationWork.useGpuFBufferOps)) { // TODO refactor this to do_md, after partitioning. stateGpu->reinit(mdatoms->homenr, @@ -1530,12 +1531,12 @@ void do_force(FILE* fplog, wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchLocal); wallcycle_stop(wcycle, WallCycleCounter::NS); - if (simulationWork.useGpuBufferOps) + if (simulationWork.useGpuXBufferOps) { nbv->atomdata_init_copy_x_to_nbat_x_gpu(); } - if (simulationWork.useGpuBufferOps) + if (simulationWork.useGpuFBufferOps) { setupLocalGpuForceReduction(runScheduleWork, fr->nbv.get(),