X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?p=alexxy%2Fgromacs.git;a=blobdiff_plain;f=src%2Fgromacs%2Fmdlib%2Fsim_util.cpp;fp=src%2Fgromacs%2Fmdlib%2Fsim_util.cpp;h=89da40d77cded04ba52a717538d170598a75bc2d;hp=5fad2c1d3f4ba3440734bc9a44dd752d69dbb042;hb=977d0e553d447c3a373c4dee62225922364fae27;hpb=0da8ba6fea75b268a36e22ff9150e31133f88552 diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 5fad2c1d3f..89da40d77c 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -986,7 +986,7 @@ static StepWorkload setupStepWorkload(const int legacyFlags, GMX_ASSERT(simulationWork.useGpuNonbonded, "Can only offload buffer ops if nonbonded computation is also offloaded"); } - flags.useGpuXBufferOps = simulationWork.useGpuBufferOps; + flags.useGpuXBufferOps = simulationWork.useGpuBufferOps && !flags.doNeighborSearch; // on virial steps the CPU reduction path is taken flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !flags.computeVirial; const bool rankHasGpuPmeTask = simulationWork.useGpuPme && !simulationWork.haveSeparatePmeRank; @@ -1364,23 +1364,24 @@ void do_force(FILE* fplog, haveCopiedXFromGpu = true; } + if (stepWork.doNeighborSearch && ((stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuBufferOps))) + { + // TODO refactor this to do_md, after partitioning. + stateGpu->reinit(mdatoms->homenr, + getLocalAtomCount(cr->dd, *mdatoms, simulationWork.havePpDomainDecomposition)); + if (stepWork.haveGpuPmeOnThisRank) + { + // TODO: This should be moved into PME setup function ( pme_gpu_prepare_computation(...) ) + pme_gpu_set_device_x(fr->pmedata, stateGpu->getCoordinates()); + } + } + // Coordinates on the device are needed if PME or BufferOps are offloaded. // The local coordinates can be copied right away. // NOTE: Consider moving this copy to right after they are updated and constrained, // if the later is not offloaded. if (stepWork.haveGpuPmeOnThisRank || stepWork.useGpuXBufferOps) { - if (stepWork.doNeighborSearch) - { - // TODO refactor this to do_md, after partitioning. - stateGpu->reinit(mdatoms->homenr, - getLocalAtomCount(cr->dd, *mdatoms, simulationWork.havePpDomainDecomposition)); - if (stepWork.haveGpuPmeOnThisRank) - { - // TODO: This should be moved into PME setup function ( pme_gpu_prepare_computation(...) ) - pme_gpu_set_device_x(fr->pmedata, stateGpu->getCoordinates()); - } - } // We need to copy coordinates when: // 1. Update is not offloaded // 2. The buffers were reinitialized on search step @@ -1529,7 +1530,7 @@ void do_force(FILE* fplog, wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchLocal); wallcycle_stop(wcycle, WallCycleCounter::NS); - if (stepWork.useGpuXBufferOps) + if (simulationWork.useGpuBufferOps) { nbv->atomdata_init_copy_x_to_nbat_x_gpu(); } @@ -1580,7 +1581,7 @@ void do_force(FILE* fplog, wallcycle_start(wcycle, WallCycleCounter::LaunchGpu); wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded); Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get()); - if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps) + if (!stepWork.useGpuXBufferOps) { Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local); } @@ -1687,7 +1688,7 @@ void do_force(FILE* fplog, if (simulationWork.useGpuNonbonded) { - if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps) + if (!stepWork.useGpuXBufferOps) { wallcycle_start(wcycle, WallCycleCounter::LaunchGpu); wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);