namespace gmx
{
+/*! \brief Barrier for safe simultaneous thread access to mdrunner data
+ *
+ * Used to ensure that the master thread does not modify mdrunner during copy
+ * on the spawned threads. */
+static void threadMpiMdrunnerAccessBarrier()
+{
+#if GMX_THREAD_MPI
+ MPI_Barrier(MPI_COMM_WORLD);
+#endif
+}
+
void Mdrunner::reinitializeOnSpawnedThread()
{
// TODO This duplication is formally necessary if any thread might
// Mdrunner.
fnm = dup_tfn(nfile, fnm);
- cr = reinitialize_commrec_for_this_thread(cr);
+ threadMpiMdrunnerAccessBarrier();
- if (!MASTER(cr))
- {
- // Only the master rank writes to the log files
- fplog = nullptr;
- }
+ cr = reinitialize_commrec_for_this_thread(cr, ms);
+
+ GMX_RELEASE_ASSERT(!MASTER(cr), "reinitializeOnSpawnedThread should only be called on spawned threads");
+
+ // Only the master rank writes to the log file
+ fplog = nullptr;
}
/*! \brief The callback used for running on spawned threads.
* argument permitted to the thread-launch API call, copies it to make
* a new runner for this thread, reinitializes necessary data, and
* proceeds to the simulation. */
-static void mdrunner_start_fn(void *arg)
+static void mdrunner_start_fn(const void *arg)
{
try
{
auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
/* copy the arg list to make sure that it's thread-local. This
- doesn't copy pointed-to items, of course, but those are all
- const. */
+ doesn't copy pointed-to items, of course; fnm, cr and fplog
+ are reset in the call below, all others should be const. */
gmx::Mdrunner mdrunner = *masterMdrunner;
mdrunner.reinitializeOnSpawnedThread();
mdrunner.mdrunner();
* (including the main thread) for thread-parallel runs. This in turn
* calls mdrunner() for each thread. All options are the same as for
* mdrunner(). */
-t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch)
+t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
{
/* first check whether we even need to start tMPI */
return cr;
}
- gmx::Mdrunner spawnedMdrunner = *this;
- // TODO This duplication is formally necessary if any thread might
- // modify any memory in fnm or the pointers it contains. If the
- // contents are ever provably const, then we can remove this
- // allocation (and memory leak).
- // TODO This should probably become part of a copy constructor for
- // Mdrunner.
- spawnedMdrunner.fnm = dup_tfn(this->nfile, fnm);
-
#if GMX_THREAD_MPI
/* now spawn new threads that start mdrunner_start_fn(), while
the main thread returns, we set thread affinity later */
if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
- mdrunner_start_fn, static_cast<void*>(&spawnedMdrunner)) != TMPI_SUCCESS)
+ mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
{
GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
}
+
+ threadMpiMdrunnerAccessBarrier();
#else
GMX_UNUSED_VALUE(mdrunner_start_fn);
#endif
- return reinitialize_commrec_for_this_thread(cr);
+ return reinitialize_commrec_for_this_thread(cr, ms);
}
} // namespace
/* CAUTION: threads may be started later on in this function, so
cr doesn't reflect the final parallel state right now */
- gmx::MDModules mdModules;
- t_inputrec inputrecInstance;
- t_inputrec *inputrec = &inputrecInstance;
+ std::unique_ptr<gmx::MDModules> mdModules(new gmx::MDModules);
+ t_inputrec inputrecInstance;
+ t_inputrec *inputrec = &inputrecInstance;
snew(mtop, 1);
if (mdrunOptions.continuationOptions.appendFiles)
gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
gmx::MDLogger mdlog(logOwner.logger());
- hwinfo = gmx_detect_hardware(mdlog, cr);
+ hwinfo = gmx_detect_hardware(mdlog);
- gmx_print_detected_hardware(fplog, cr, mdlog, hwinfo);
+ gmx_print_detected_hardware(fplog, cr, ms, mdlog, hwinfo);
std::vector<int> gpuIdsToUse;
auto compatibleGpus = getCompatibleGpus(hwinfo->gpu_info);
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
// TODO: Error handling
- mdModules.assignOptionsToModules(*inputrec->params, nullptr);
+ mdModules->assignOptionsToModules(*inputrec->params, nullptr);
if (fplog != nullptr)
{
snew(fcd, 1);
/* This needs to be called before read_checkpoint to extend the state */
- init_disres(fplog, mtop, inputrec, cr, fcd, globalState.get(), replExParams.exchangeInterval > 0);
+ init_disres(fplog, mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
- init_orires(fplog, mtop, inputrec, cr, globalState.get(), &(fcd->orires));
+ init_orires(fplog, mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
if (inputrecDeform(inputrec))
{
/* Initialize per-physical-node MPI process/thread ID and counters. */
gmx_init_intranode_counters(cr);
- if (cr->ms && cr->ms->nsim > 1 && !opt2bSet("-multidir", nfile, fnm))
- {
- GMX_LOG(mdlog.info).asParagraph().
- appendText("The -multi flag is deprecated, and may be removed in a future version. Please "
- "update your workflows to use -multidir instead.");
- }
#if GMX_MPI
- if (MULTISIM(cr))
+ if (isMultiSim(ms))
{
GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
"This is simulation %d out of %d running as a composite GROMACS\n"
"multi-simulation job. Setup for this simulation:\n",
- cr->ms->sim, cr->ms->nsim);
+ ms->sim, ms->nsim);
}
GMX_LOG(mdlog.warning).appendTextFormatted(
"Using %d MPI %s\n",
check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme);
/* Check and update the number of OpenMP threads requested */
- checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, pmeRunMode, *mtop);
+ checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, pmeRunMode, *mtop);
gmx_omp_nthreads_init(mdlog, cr,
hwinfo->nthreads_hw_avail,
{
// Produce the task assignment for this rank.
gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
- mdlog, cr, gpuTasksOnThisRank);
+ mdlog, cr, ms, gpuTasksOnThisRank);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
{
MPI_Barrier(cr->mpi_comm_mysim);
}
- if (MULTISIM(cr))
+ if (isMultiSim(ms))
{
if (MASTER(cr))
{
- MPI_Barrier(cr->ms->mpi_comm_masters);
+ MPI_Barrier(ms->mpi_comm_masters);
}
/* We need another barrier to prevent non-master ranks from contiuing
* when an error occured in a different simulation.
checkHardwareOversubscription(numThreadsOnThisRank,
*hwinfo->hardwareTopology,
- cr, mdlog);
+ cr, ms, mdlog);
if (hw_opt.thread_affinity != threadaffOFF)
{
gmx_check_thread_affinity_set(mdlog, cr,
&hw_opt, hwinfo->nthreads_hw_avail, TRUE);
+ int numThreadsOnThisNode, intraNodeThreadOffset;
+ analyzeThreadsOnThisNode(cr, ms, nullptr, numThreadsOnThisRank, &numThreadsOnThisNode,
+ &intraNodeThreadOffset);
+
/* Set the CPU affinity */
gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
- numThreadsOnThisRank, nullptr);
+ numThreadsOnThisRank, numThreadsOnThisNode,
+ intraNodeThreadOffset, nullptr);
}
if (mdrunOptions.timingOptions.resetStep > -1)
{
/* Initiate forcerecord */
fr = mk_forcerec();
- fr->forceProviders = mdModules.initForceProviders();
+ fr->forceProviders = mdModules->initForceProviders();
init_forcerec(fplog, mdlog, fr, fcd,
inputrec, mtop, cr, box,
opt2fn("-table", nfile, fnm),
}
/* Now do whatever the user wants us to do (how flexible...) */
- my_integrator(inputrec->eI) (fplog, cr, mdlog, nfile, fnm,
+ my_integrator(inputrec->eI) (fplog, cr, ms, mdlog, nfile, fnm,
oenv,
mdrunOptions,
vsite, constr,
- mdModules.outputProvider(),
+ mdModules->outputProvider(),
inputrec, mtop,
fcd,
globalState.get(),
inputrec, nrnb, wcycle, walltime_accounting,
fr ? fr->nbv : nullptr,
pmedata,
- EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
+ EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
// Free PME data
if (pmedata)
// As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
mdAtoms.reset(nullptr);
globalState.reset(nullptr);
+ mdModules.reset(nullptr); // destruct force providers here as they might also use the GPU
/* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
- free_gpu_resources(fr, cr);
+ free_gpu_resources(fr, cr, ms);
free_gpu(nonbondedDeviceInfo);
free_gpu(pmeDeviceInfo);
if (MASTER(cr) && continuationOptions.appendFiles)
{
gmx_log_close(fplog);
+ fplog = nullptr;
}
rc = (int)gmx_get_stop_condition();
wait for that. */
if (PAR(cr) && MASTER(cr))
{
+ done_commrec(cr);
tMPI_Finalize();
}
#endif