Merge branch 'release-2018'

[alexxy/gromacs.git] / src / programs / mdrun / runner.cpp
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index 9c6f463ae595f159d0ca5175fc26529796972cca..67824e3258e4705a9d4c2312dab72175a0124469 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -140,6 +140,17 @@ tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
  namespace gmx
  {
  
+/*! \brief Barrier for safe simultaneous thread access to mdrunner data
+ *
+ * Used to ensure that the master thread does not modify mdrunner during copy
+ * on the spawned threads. */
+static void threadMpiMdrunnerAccessBarrier()
+{
+#if GMX_THREAD_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+}
+
  void Mdrunner::reinitializeOnSpawnedThread()
  {
      // TODO This duplication is formally necessary if any thread might
@@ -150,13 +161,14 @@ void Mdrunner::reinitializeOnSpawnedThread()
      // Mdrunner.
      fnm = dup_tfn(nfile, fnm);
  
-    cr  = reinitialize_commrec_for_this_thread(cr);
+    threadMpiMdrunnerAccessBarrier();
  
-    if (!MASTER(cr))
-    {
-        // Only the master rank writes to the log files
-        fplog = nullptr;
-    }
+    cr  = reinitialize_commrec_for_this_thread(cr, ms);
+
+    GMX_RELEASE_ASSERT(!MASTER(cr), "reinitializeOnSpawnedThread should only be called on spawned threads");
+
+    // Only the master rank writes to the log file
+    fplog = nullptr;
  }
  
  /*! \brief The callback used for running on spawned threads.
@@ -165,14 +177,14 @@ void Mdrunner::reinitializeOnSpawnedThread()
   * argument permitted to the thread-launch API call, copies it to make
   * a new runner for this thread, reinitializes necessary data, and
   * proceeds to the simulation. */
-static void mdrunner_start_fn(void *arg)
+static void mdrunner_start_fn(const void *arg)
  {
      try
      {
          auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
          /* copy the arg list to make sure that it's thread-local. This
-           doesn't copy pointed-to items, of course, but those are all
-           const. */
+           doesn't copy pointed-to items, of course; fnm, cr and fplog
+           are reset in the call below, all others should be const. */
          gmx::Mdrunner mdrunner = *masterMdrunner;
          mdrunner.reinitializeOnSpawnedThread();
          mdrunner.mdrunner();
@@ -187,7 +199,7 @@ static void mdrunner_start_fn(void *arg)
   * (including the main thread) for thread-parallel runs. This in turn
   * calls mdrunner() for each thread. All options are the same as for
   * mdrunner(). */
-t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch)
+t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
  {
  
      /* first check whether we even need to start tMPI */
@@ -196,28 +208,21 @@ t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch)
          return cr;
      }
  
-    gmx::Mdrunner spawnedMdrunner = *this;
-    // TODO This duplication is formally necessary if any thread might
-    // modify any memory in fnm or the pointers it contains. If the
-    // contents are ever provably const, then we can remove this
-    // allocation (and memory leak).
-    // TODO This should probably become part of a copy constructor for
-    // Mdrunner.
-    spawnedMdrunner.fnm = dup_tfn(this->nfile, fnm);
-
  #if GMX_THREAD_MPI
      /* now spawn new threads that start mdrunner_start_fn(), while
         the main thread returns, we set thread affinity later */
      if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
-                     mdrunner_start_fn, static_cast<void*>(&spawnedMdrunner)) != TMPI_SUCCESS)
+                     mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
      {
          GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
      }
+
+    threadMpiMdrunnerAccessBarrier();
  #else
      GMX_UNUSED_VALUE(mdrunner_start_fn);
  #endif
  
-    return reinitialize_commrec_for_this_thread(cr);
+    return reinitialize_commrec_for_this_thread(cr, ms);
  }
  
  }      // namespace
@@ -444,9 +449,9 @@ int Mdrunner::mdrunner()
  
      /* CAUTION: threads may be started later on in this function, so
         cr doesn't reflect the final parallel state right now */
-    gmx::MDModules mdModules;
-    t_inputrec     inputrecInstance;
-    t_inputrec    *inputrec = &inputrecInstance;
+    std::unique_ptr<gmx::MDModules> mdModules(new gmx::MDModules);
+    t_inputrec                      inputrecInstance;
+    t_inputrec                     *inputrec = &inputrecInstance;
      snew(mtop, 1);
  
      if (mdrunOptions.continuationOptions.appendFiles)
@@ -496,9 +501,9 @@ int Mdrunner::mdrunner()
      gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
      gmx::MDLogger    mdlog(logOwner.logger());
  
-    hwinfo = gmx_detect_hardware(mdlog, cr);
+    hwinfo = gmx_detect_hardware(mdlog);
  
-    gmx_print_detected_hardware(fplog, cr, mdlog, hwinfo);
+    gmx_print_detected_hardware(fplog, cr, ms, mdlog, hwinfo);
  
      std::vector<int> gpuIdsToUse;
      auto             compatibleGpus = getCompatibleGpus(hwinfo->gpu_info);
@@ -682,7 +687,7 @@ int Mdrunner::mdrunner()
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  
      // TODO: Error handling
-    mdModules.assignOptionsToModules(*inputrec->params, nullptr);
+    mdModules->assignOptionsToModules(*inputrec->params, nullptr);
  
      if (fplog != nullptr)
      {
@@ -790,9 +795,9 @@ int Mdrunner::mdrunner()
      snew(fcd, 1);
  
      /* This needs to be called before read_checkpoint to extend the state */
-    init_disres(fplog, mtop, inputrec, cr, fcd, globalState.get(), replExParams.exchangeInterval > 0);
+    init_disres(fplog, mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
  
-    init_orires(fplog, mtop, inputrec, cr, globalState.get(), &(fcd->orires));
+    init_orires(fplog, mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
  
      if (inputrecDeform(inputrec))
      {
@@ -913,19 +918,13 @@ int Mdrunner::mdrunner()
  
      /* Initialize per-physical-node MPI process/thread ID and counters. */
      gmx_init_intranode_counters(cr);
-    if (cr->ms && cr->ms->nsim > 1 && !opt2bSet("-multidir", nfile, fnm))
-    {
-        GMX_LOG(mdlog.info).asParagraph().
-            appendText("The -multi flag is deprecated, and may be removed in a future version. Please "
-                       "update your workflows to use -multidir instead.");
-    }
  #if GMX_MPI
-    if (MULTISIM(cr))
+    if (isMultiSim(ms))
      {
          GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
                  "This is simulation %d out of %d running as a composite GROMACS\n"
                  "multi-simulation job. Setup for this simulation:\n",
-                cr->ms->sim, cr->ms->nsim);
+                ms->sim, ms->nsim);
      }
      GMX_LOG(mdlog.warning).appendTextFormatted(
              "Using %d MPI %s\n",
@@ -943,7 +942,7 @@ int Mdrunner::mdrunner()
      check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme);
  
      /* Check and update the number of OpenMP threads requested */
-    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, pmeRunMode, *mtop);
+    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, pmeRunMode, *mtop);
  
      gmx_omp_nthreads_init(mdlog, cr,
                            hwinfo->nthreads_hw_avail,
@@ -1023,7 +1022,7 @@ int Mdrunner::mdrunner()
      {
          // Produce the task assignment for this rank.
          gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
-                                              mdlog, cr, gpuTasksOnThisRank);
+                                              mdlog, cr, ms, gpuTasksOnThisRank);
      }
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  
@@ -1040,11 +1039,11 @@ int Mdrunner::mdrunner()
      {
          MPI_Barrier(cr->mpi_comm_mysim);
      }
-    if (MULTISIM(cr))
+    if (isMultiSim(ms))
      {
          if (MASTER(cr))
          {
-            MPI_Barrier(cr->ms->mpi_comm_masters);
+            MPI_Barrier(ms->mpi_comm_masters);
          }
          /* We need another barrier to prevent non-master ranks from contiuing
           * when an error occured in a different simulation.
@@ -1107,7 +1106,7 @@ int Mdrunner::mdrunner()
  
      checkHardwareOversubscription(numThreadsOnThisRank,
                                    *hwinfo->hardwareTopology,
-                                  cr, mdlog);
+                                  cr, ms, mdlog);
  
      if (hw_opt.thread_affinity != threadaffOFF)
      {
@@ -1118,9 +1117,14 @@ int Mdrunner::mdrunner()
          gmx_check_thread_affinity_set(mdlog, cr,
                                        &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
  
+        int numThreadsOnThisNode, intraNodeThreadOffset;
+        analyzeThreadsOnThisNode(cr, ms, nullptr, numThreadsOnThisRank, &numThreadsOnThisNode,
+                                 &intraNodeThreadOffset);
+
          /* Set the CPU affinity */
          gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
-                                numThreadsOnThisRank, nullptr);
+                                numThreadsOnThisRank, numThreadsOnThisNode,
+                                intraNodeThreadOffset, nullptr);
      }
  
      if (mdrunOptions.timingOptions.resetStep > -1)
@@ -1159,7 +1163,7 @@ int Mdrunner::mdrunner()
      {
          /* Initiate forcerecord */
          fr                 = mk_forcerec();
-        fr->forceProviders = mdModules.initForceProviders();
+        fr->forceProviders = mdModules->initForceProviders();
          init_forcerec(fplog, mdlog, fr, fcd,
                        inputrec, mtop, cr, box,
                        opt2fn("-table", nfile, fnm),
@@ -1328,11 +1332,11 @@ int Mdrunner::mdrunner()
          }
  
          /* Now do whatever the user wants us to do (how flexible...) */
-        my_integrator(inputrec->eI) (fplog, cr, mdlog, nfile, fnm,
+        my_integrator(inputrec->eI) (fplog, cr, ms, mdlog, nfile, fnm,
                                       oenv,
                                       mdrunOptions,
                                       vsite, constr,
-                                     mdModules.outputProvider(),
+                                     mdModules->outputProvider(),
                                       inputrec, mtop,
                                       fcd,
                                       globalState.get(),
@@ -1370,7 +1374,7 @@ int Mdrunner::mdrunner()
                 inputrec, nrnb, wcycle, walltime_accounting,
                 fr ? fr->nbv : nullptr,
                 pmedata,
-               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
+               EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
  
      // Free PME data
      if (pmedata)
@@ -1385,9 +1389,10 @@ int Mdrunner::mdrunner()
      // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
      mdAtoms.reset(nullptr);
      globalState.reset(nullptr);
+    mdModules.reset(nullptr);   // destruct force providers here as they might also use the GPU
  
      /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
-    free_gpu_resources(fr, cr);
+    free_gpu_resources(fr, cr, ms);
      free_gpu(nonbondedDeviceInfo);
      free_gpu(pmeDeviceInfo);
  
@@ -1406,6 +1411,7 @@ int Mdrunner::mdrunner()
      if (MASTER(cr) && continuationOptions.appendFiles)
      {
          gmx_log_close(fplog);
+        fplog = nullptr;
      }
  
      rc = (int)gmx_get_stop_condition();
@@ -1416,6 +1422,7 @@ int Mdrunner::mdrunner()
         wait for that. */
      if (PAR(cr) && MASTER(cr))
      {
+        done_commrec(cr);
          tMPI_Finalize();
      }
  #endif