Merge "Merge branch 'release-2019' into master"
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
index fef7f4850a6af7824b983b2e0cbd804e4e6dbdc6..1002a58373489853039304a4ebd25adc0474d582 100644 (file)
@@ -63,6 +63,7 @@
 #include "gromacs/essentialdynamics/edsam.h"
 #include "gromacs/ewald/pme.h"
 #include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp_comm_gpu.h"
 #include "gromacs/fileio/trxio.h"
 #include "gromacs/gmxlib/network.h"
 #include "gromacs/gmxlib/nrnb.h"
@@ -331,7 +332,7 @@ void gmx::LegacySimulator::do_md()
 //       2. The proper GPU syncronization is introduced, so that the H2D and D2H data copies can be performed in the separate
 //          stream owned by the StatePropagatorDataGpu
     const auto &simulationWork     = runScheduleWork->simulationWork;
-    const bool  useGpuForPme       = simulationWork.usePmeGpu;
+    const bool  useGpuForPme       = simulationWork.useGpuPme;
     const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
     // Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
     const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
@@ -749,7 +750,7 @@ void gmx::LegacySimulator::do_md()
             // TODO: Move to after all booleans are defined.
             if (useGpuForUpdate && !bFirstStep)
             {
-                stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+                stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
                 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
             }
             /* PME grid + cut-off optimization with GPUs or PME nodes */
@@ -825,15 +826,12 @@ void gmx::LegacySimulator::do_md()
                 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
             }
 
-            // Copy coordinate from the GPU when needed:
-            // - On search steps to keep copy on host (device buffers are reinitialized).
-            // - There are CPU bonded forces that need current coordinates
-            // - When needed for the output.
-            if (bNS ||
-                (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) ||
-                do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed))
+            // Copy coordinate from the GPU when needed at the search step.
+            // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
+            // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
+            if (bNS)
             {
-                stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+                stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
                 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
             }
         }
@@ -1164,6 +1162,14 @@ void gmx::LegacySimulator::do_md()
             }
         }
 
+        // Copy coordinate from the GPU for the output if the update is offloaded and
+        // coordinates have not already been copied for i) search or ii) CPU force tasks.
+        if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork &&
+            (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)))
+        {
+            stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
+            stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+        }
         /* Now we have the energies and forces corresponding to the
          * coordinates at time t. We must output all of this before
          * the update.
@@ -1284,7 +1290,7 @@ void gmx::LegacySimulator::do_md()
 
                 // Copy data to the GPU after buffers might have being reinitialized
                 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
-                stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+                stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
             }
 
             stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::All);
@@ -1308,8 +1314,6 @@ void gmx::LegacySimulator::do_md()
             {
                 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
                 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-                stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
-                stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
             }
         }
         else
@@ -1426,6 +1430,12 @@ void gmx::LegacySimulator::do_md()
 
             if (bGStat || needEkinAtNextStep || doInterSimSignal)
             {
+                // Copy coordinates when needed to stop the CM motion.
+                if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
+                {
+                    stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
+                    stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+                }
                 // Since we're already communicating at this step, we
                 // can propagate intra-simulation signals. Note that
                 // check_nstglobalcomm has the responsibility for
@@ -1462,7 +1472,7 @@ void gmx::LegacySimulator::do_md()
                     // TODO: The special case of removing CM motion should be dealt more gracefully
                     if (useGpuForUpdate)
                     {
-                        stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+                        stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
                         stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
                     }
                 }
@@ -1714,6 +1724,12 @@ void gmx::LegacySimulator::do_md()
 
     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
 
+    if (fr->pmePpCommGpu)
+    {
+        // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
+        fr->pmePpCommGpu.reset();
+    }
+
     global_stat_destroy(gstat);
 
 }