{
DevelopmentFeatureFlags devFlags;
- devFlags.enableGpuBufferOps =
- GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
+ devFlags.enableGpuBufferOps = (GMX_GPU_CUDA || GMX_GPU_SYCL) && useGpuForNonbonded
+ && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
devFlags.enableGpuHaloExchange = GMX_MPI && GMX_GPU_CUDA && getenv("GMX_GPU_DD_COMMS") != nullptr;
devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE;
devFlags.enableGpuPmePPComm = GMX_MPI && GMX_GPU_CUDA && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
hw_opt.nthreads_tmpi);
useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(useGpuForNonbonded,
pmeTarget,
+ pmeFftTarget,
numAvailableDevices,
userGpuTaskAssignment,
*hwinfo_,
// master and spawned threads joins at the end of this block.
}
- GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
+ GMX_RELEASE_ASSERT(!GMX_MPI || ms || simulationCommunicator != MPI_COMM_NULL,
"Must have valid communicator unless running a multi-simulation");
CommrecHandle crHandle = init_commrec(simulationCommunicator);
t_commrec* cr = crHandle.get();
GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
partialDeserializedTpr.reset(nullptr);
- GMX_RELEASE_ASSERT(
- !inputrec->useConstantAcceleration,
- "Linear acceleration has been removed in GROMACS 2022, and was broken for many years "
- "before that. Use GROMACS 4.5 or earlier if you need this feature.");
-
- // Now we decide whether to use the domain decomposition machinery.
- // Note that this does not necessarily imply actually using multiple domains.
// Now the number of ranks is known to all ranks, and each knows
// the inputrec read by the master rank. The ranks can now all run
// the task-deciding functions and will agree on the result
gpusWereDetected);
useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded,
pmeTarget,
+ pmeFftTarget,
userGpuTaskAssignment,
*hwinfo_,
*inputrec,
int deviceId = -1;
DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId);
- // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?)
- bool useTiming = true;
-
- if (GMX_GPU_CUDA)
- {
- /* WARNING: CUDA timings are incorrect with multiple streams.
- * This is the main reason why they are disabled by default.
- */
- // TODO: Consider turning on by default when we can detect nr of streams.
- useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
- }
- else if (GMX_GPU_OPENCL)
- {
- useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
- }
-
// TODO Currently this is always built, yet DD partition code
// checks if it is built before using it. Probably it should
// become an MDModule that is made only when another module
{
dd_setup_dlb_resource_sharing(cr, deviceId);
}
- deviceStreamManager = std::make_unique<DeviceStreamManager>(
- *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming);
+ const bool useGpuTiming = decideGpuTimingsUsage();
+ deviceStreamManager = std::make_unique<DeviceStreamManager>(
+ *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useGpuTiming);
}
// If the user chose a task assignment, give them some hints
ms,
&nrnb,
wcycle.get(),
- fr->bMolPBC);
+ fr->bMolPBC,
+ &observablesReducerBuilder);
/* Energy terms and groups */
gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),