Reorganize search-time code in do_force_cutsVERLET

author Szilárd Páll <pall.szilard@gmail.com>

Fri, 8 Mar 2019 14:18:52 +0000 (15:18 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Wed, 13 Mar 2019 21:25:47 +0000 (22:25 +0100)
author Szilárd Páll <pall.szilard@gmail.com>
Fri, 8 Mar 2019 14:18:52 +0000 (15:18 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Wed, 13 Mar 2019 21:25:47 +0000 (22:25 +0100)
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index cd6cb1ebc4bdaebaf6c96e35e7fc5b6afb5b8500..7b068f8bafc15a8563b8001590ecb14c73e9e2ed 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -982,45 +982,36 @@ static void do_force_cutsVERLET(FILE *fplog,
          nbnxn_atomdata_set(nbv->nbat.get(), nbv->nbs.get(), mdatoms, fr->cginfo);
  
          wallcycle_stop(wcycle, ewcNS);
-    }
  
-    /* initialize the GPU atom data and copy shift vector */
-    if (bUseGPU)
-    {
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-
-        if (bNS)
+        /* initialize the GPU nbnxm atom data and bonded data structures */
+        if (bUseGPU)
          {
-            Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get());
-        }
+            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
  
-        Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
+            wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+            Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get());
+            wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
  
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+            if (fr->gpuBonded)
+            {
+                /* Now we put all atoms on the grid, we can assign bonded
+                 * interactions to the GPU, where the grid order is
+                 * needed. Also the xq, f and fshift device buffers have
+                 * been reallocated if needed, so the bonded code can
+                 * learn about them. */
+                // TODO the xq, f, and fshift buffers are now shared
+                // resources, so they should be maintained by a
+                // higher-level object than the nb module.
+                fr->gpuBonded->updateInteractionListsAndDeviceBuffers(nbnxn_get_gridindices(fr->nbv->nbs.get()),
+                                                                      top->idef,
+                                                                      Nbnxm::gpu_get_xq(nbv->gpu_nbv),
+                                                                      Nbnxm::gpu_get_f(nbv->gpu_nbv),
+                                                                      Nbnxm::gpu_get_fshift(nbv->gpu_nbv));
+            }
  
-        if (bNS && fr->gpuBonded)
-        {
-            /* Now we put all atoms on the grid, we can assign bonded
-             * interactions to the GPU, where the grid order is
-             * needed. Also the xq, f and fshift device buffers have
-             * been reallocated if needed, so the bonded code can
-             * learn about them. */
-            // TODO the xq, f, and fshift buffers are now shared
-            // resources, so they should be maintained by a
-            // higher-level object than the nb module.
-            fr->gpuBonded->updateInteractionListsAndDeviceBuffers(nbnxn_get_gridindices(fr->nbv->nbs.get()),
-                                                                  top->idef,
-                                                                  Nbnxm::gpu_get_xq(nbv->gpu_nbv),
-                                                                  Nbnxm::gpu_get_f(nbv->gpu_nbv),
-                                                                  Nbnxm::gpu_get_fshift(nbv->gpu_nbv));
+            wallcycle_stop(wcycle, ewcLAUNCH_GPU);
          }
  
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
-    }
-
-    if (bNS)
-    {
          // Need to run after the GPU-offload bonded interaction lists
          // are set up to be able to determine whether there is bonded work.
          setupForceWorkload(ppForceWorkload,
@@ -1035,6 +1026,7 @@ static void do_force_cutsVERLET(FILE *fplog,
      /* do local pair search */
      if (bNS)
      {
+        // TODO: fuse this branch with the above bNS block
          wallcycle_start_nocount(wcycle, ewcNS);
          wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
          /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
@@ -1057,6 +1049,7 @@ static void do_force_cutsVERLET(FILE *fplog,
          wallcycle_start(wcycle, ewcLAUNCH_GPU);
  
          wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
          Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
                                    Nbnxm::AtomLocality::Local,
                                    ppForceWorkload->haveGpuBondedWork);
@@ -1094,6 +1087,7 @@ static void do_force_cutsVERLET(FILE *fplog,
      {
          if (bNS)
          {
+            // TODO: fuse this branch with the above large bNS block
              wallcycle_start_nocount(wcycle, ewcNS);
              wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
              /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
author	Szilárd Páll <pall.szilard@gmail.com>
	Fri, 8 Mar 2019 14:18:52 +0000 (15:18 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 13 Mar 2019 21:25:47 +0000 (22:25 +0100)