Unify handling of GMX_ENABLE_GPU_TIMING and GMX_DISABLE_GPU_TIMING
[alexxy/gromacs.git] / src / gromacs / nbnxm / nbnxm_gpu_data_mgmt.cpp
index e1df9d0a418e479f66b0e36d994d4bce46261083..2263593cb842d5e43f302c83f4584a371ad908f7 100644 (file)
@@ -107,7 +107,7 @@ static inline void init_ewald_coulomb_force_table(const EwaldCorrectionTables& t
 {
     if (nbp->coulomb_tab)
     {
-        destroyParamLookupTable(&nbp->coulomb_tab, nbp->coulomb_tab_texobj);
+        destroyParamLookupTable(&nbp->coulomb_tab, &nbp->coulomb_tab_texobj);
     }
 
     nbp->coulomb_tab_scale = tables.scale;
@@ -249,14 +249,14 @@ static inline void initAtomdataFirst(NBAtomDataGpu*       atomdata,
                                      const DeviceStream&  localStream)
 {
     atomdata->numTypes = numTypes;
-    allocateDeviceBuffer(&atomdata->shiftVec, SHIFTS, deviceContext);
+    allocateDeviceBuffer(&atomdata->shiftVec, gmx::c_numShiftVectors, deviceContext);
     atomdata->shiftVecUploaded = false;
 
-    allocateDeviceBuffer(&atomdata->fShift, SHIFTS, deviceContext);
+    allocateDeviceBuffer(&atomdata->fShift, gmx::c_numShiftVectors, deviceContext);
     allocateDeviceBuffer(&atomdata->eLJ, 1, deviceContext);
     allocateDeviceBuffer(&atomdata->eElec, 1, deviceContext);
 
-    clearDeviceBufferAsync(&atomdata->fShift, 0, SHIFTS, localStream);
+    clearDeviceBufferAsync(&atomdata->fShift, 0, gmx::c_numShiftVectors, localStream);
     clearDeviceBufferAsync(&atomdata->eElec, 0, 1, localStream);
     clearDeviceBufferAsync(&atomdata->eLJ, 0, 1, localStream);
 
@@ -431,18 +431,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
     nb->timers = new Nbnxm::GpuTimers();
     snew(nb->timings, 1);
 
-    /* WARNING: CUDA timings are incorrect with multiple streams.
-     * This is the main reason why they are disabled by default.
-     * Can be enabled by setting GMX_ENABLE_GPU_TIMING environment variable.
-     * TODO: Consider turning on by default when we can detect nr of streams.
-     *
-     * OpenCL timing is enabled by default and can be disabled by
-     * GMX_DISABLE_GPU_TIMING environment variable.
-     *
-     * Timing is disabled in SYCL.
-     */
-    nb->bDoTime = (GMX_GPU_CUDA && (getenv("GMX_ENABLE_GPU_TIMING") != nullptr))
-                  || (GMX_GPU_OPENCL && (getenv("GMX_DISABLE_GPU_TIMING") == nullptr));
+    nb->bDoTime = decideGpuTimingsUsage();
 
     if (nb->bDoTime)
     {
@@ -452,7 +441,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
     /* init nbst */
     pmalloc(reinterpret_cast<void**>(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ));
     pmalloc(reinterpret_cast<void**>(&nb->nbst.eElec), sizeof(*nb->nbst.eElec));
-    pmalloc(reinterpret_cast<void**>(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.fShift), gmx::c_numShiftVectors * sizeof(*nb->nbst.fShift));
 
     init_plist(nb->plist[InteractionLocality::Local]);
 
@@ -518,7 +507,7 @@ void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
         copyToDeviceBuffer(&adat->shiftVec,
                            gmx::asGenericFloat3Pointer(nbatom->shift_vec),
                            0,
-                           SHIFTS,
+                           gmx::c_numShiftVectors,
                            localStream,
                            GpuApiCallBehavior::Async,
                            nullptr);
@@ -718,7 +707,7 @@ void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
     // Clear shift force array and energies if the outputs were used in the current step
     if (computeVirial)
     {
-        clearDeviceBufferAsync(&adat->fShift, 0, SHIFTS, localStream);
+        clearDeviceBufferAsync(&adat->fShift, 0, gmx::c_numShiftVectors, localStream);
         clearDeviceBufferAsync(&adat->eLJ, 0, 1, localStream);
         clearDeviceBufferAsync(&adat->eElec, 0, 1, localStream);
     }
@@ -746,7 +735,9 @@ bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
             || (nb->nbparam->elecType == ElecType::EwaldAnaTwin));
 }
 
-void setupGpuShortRangeWork(NbnxmGpu* nb, const gmx::GpuBonded* gpuBonded, const gmx::InteractionLocality iLocality)
+void setupGpuShortRangeWork(NbnxmGpu*                      nb,
+                            const gmx::ListedForcesGpu*    listedForcesGpu,
+                            const gmx::InteractionLocality iLocality)
 {
     GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
 
@@ -754,7 +745,7 @@ void setupGpuShortRangeWork(NbnxmGpu* nb, const gmx::GpuBonded* gpuBonded, const
     // interaction locality contains entries or if there is any
     // bonded work (as this is not split into local/nonlocal).
     nb->haveWork[iLocality] = ((nb->plist[iLocality]->nsci != 0)
-                               || (gpuBonded != nullptr && gpuBonded->haveInteractions()));
+                               || (listedForcesGpu != nullptr && listedForcesGpu->haveInteractions()));
 }
 
 bool haveGpuShortRangeWork(const NbnxmGpu* nb, const gmx::InteractionLocality interactionLocality)
@@ -859,7 +850,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
             copyFromDeviceBuffer(nb->nbst.fShift,
                                  &adat->fShift,
                                  0,
-                                 SHIFTS,
+                                 gmx::c_numShiftVectors,
                                  deviceStream,
                                  GpuApiCallBehavior::Async,
                                  bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
@@ -1132,17 +1123,17 @@ void gpu_free(NbnxmGpu* nb)
     /* Free nbparam */
     if (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin)
     {
-        destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj);
+        destroyParamLookupTable(&nbparam->coulomb_tab, &nbparam->coulomb_tab_texobj);
     }
 
     if (!useLjCombRule(nb->nbparam->vdwType))
     {
-        destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj);
+        destroyParamLookupTable(&nbparam->nbfp, &nbparam->nbfp_texobj);
     }
 
     if (nbparam->vdwType == VdwType::EwaldGeom || nbparam->vdwType == VdwType::EwaldLB)
     {
-        destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj);
+        destroyParamLookupTable(&nbparam->nbfp_comb, &nbparam->nbfp_comb_texobj);
     }
 
     /* Free plist */
@@ -1182,4 +1173,11 @@ void gpu_free(NbnxmGpu* nb)
     }
 }
 
+DeviceBuffer<gmx::RVec> gpu_get_f(NbnxmGpu* nb)
+{
+    GMX_ASSERT(nb != nullptr, "nb pointer must be valid");
+
+    return nb->atdat->f;
+}
+
 } // namespace Nbnxm