Remove logging from hardware detection

author Mark Abraham <mark.j.abraham@gmail.com>

Wed, 4 Nov 2020 14:16:50 +0000 (14:16 +0000)

committer Joe Jordan <ejjordan12@gmail.com>

Wed, 4 Nov 2020 14:16:50 +0000 (14:16 +0000)
author Mark Abraham <mark.j.abraham@gmail.com>
Wed, 4 Nov 2020 14:16:50 +0000 (14:16 +0000)
committer Joe Jordan <ejjordan12@gmail.com>
Wed, 4 Nov 2020 14:16:50 +0000 (14:16 +0000)
diff --git a/api/gmxapi/cpp/context.cpp b/api/gmxapi/cpp/context.cpp

index 1b5cb813d14bbf0812796d074280f99e0b66ac99..6d11120eae6c55e520374f3ba4a21636f41f7db0 100644 (file)
--- a/api/gmxapi/cpp/context.cpp
+++ b/api/gmxapi/cpp/context.cpp
@@ -55,6 +55,8 @@
  #include "gromacs/commandline/filenm.h"
  #include "gromacs/commandline/pargs.h"
  #include "gromacs/gmxlib/network.h"
+#include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/hw_info.h"
  #include "gromacs/mdlib/stophandler.h"
  #include "gromacs/mdrunutility/logging.h"
  #include "gromacs/mdrunutility/multisim.h"
@@ -65,7 +67,7 @@
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxmpi.h"
  #include "gromacs/utility/init.h"
-#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
  
  #include "gmxapi/mpi/resourceassignment.h"
  #include "gmxapi/exceptions.h"
@@ -229,7 +231,9 @@ Context createContext()
  }
  
  ContextImpl::ContextImpl(MpiContextManager&& mpi) noexcept(std::is_nothrow_constructible_v<gmx::LegacyMdrunOptions>) :
-    mpi_(std::move(mpi))
+    mpi_(std::move(mpi)),
+    hardwareInformation_(gmx_detect_hardware(
+            gmx::PhysicalNodeCommunicator(mpi_.communicator(), gmx_physicalnode_id_hash())))
  {
      // Confirm our understanding of the MpiContextManager invariant.
      GMX_ASSERT(mpi_.communicator() == MPI_COMM_NULL ? !GMX_LIB_MPI : GMX_LIB_MPI,
@@ -333,6 +337,9 @@ std::shared_ptr<Session> ContextImpl::launch(const Workflow& work)
          // lifetime. The gmx wrapper binary uses the same infrastructure,
          // but the lifetime is now trivially that of the invocation of the
          // wrapper binary.
+        //
+        // For now, this should match the communicator used for hardware
+        // detection. There's no way to assert this is true.
          auto communicator = mpi_.communicator();
          // Confirm the precondition for simulationContext().
          GMX_ASSERT(communicator == MPI_COMM_NULL ? !GMX_LIB_MPI : GMX_LIB_MPI,
@@ -350,6 +357,7 @@ std::shared_ptr<Session> ContextImpl::launch(const Workflow& work)
  
          auto builder = MdrunnerBuilder(std::move(mdModules),
                                         compat::not_null<SimulationContext*>(&simulationContext));
+        builder.addHardwareDetectionResult(hardwareInformation_.get());
          builder.addSimulationMethod(options.mdrunOptions, options.pforce, startingBehavior);
          builder.addDomainDecomposition(options.domdecOptions);
          // \todo pass by value
diff --git a/api/gmxapi/cpp/context_impl.h b/api/gmxapi/cpp/context_impl.h

index 4f9296d379b59c52ffa2318e5ca110c782c6bb36..8091303a56b7e7cb5ce4a2b5025f10df42c480bf 100644 (file)
--- a/api/gmxapi/cpp/context_impl.h
+++ b/api/gmxapi/cpp/context_impl.h
@@ -51,6 +51,8 @@
  #include "gmxapi/context.h"
  #include "gmxapi/session.h"
  
+struct gmx_hw_info_t;
+
  namespace gmxapi
  {
  
@@ -277,6 +279,12 @@ public:
       */
      const MpiContextManager mpi_;
  
+    /*! \brief Owning handle to the results of the hardware detection.
+     *
+     * The hardware is detected across the whole environment described
+     * by \c mpi_ */
+    std::unique_ptr<gmx_hw_info_t> hardwareInformation_;
+
  private:
      /*!
       * \brief Basic constructor.
diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp

index 3bcdac4835f2d52b5db9855e7aaab9109b213f40..884b98a4a2079cf063a215863cd8f91c7871cb5d 100644 (file)
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -94,26 +94,38 @@ namespace gmx
  #    define _SC_NPROCESSORS_CONF _SC_NPROC_CONF
  #endif
  
-/*! \brief Information about the hardware of all nodes (common to all threads in this process).
+/*! \brief The result of device detection
   *
- * This information is constructed only when required, but thereafter
- * its lifetime is that of the whole process, potentially across
- * multiple successive simulation parts. It's wise to ensure that only
- * one thread can create the information, but thereafter they can all
- * read it without e.g. needing a std::shared_ptr to ensure its
- * lifetime exceeds that of the thread. */
-static std::unique_ptr<gmx_hw_info_t> g_hardwareInfo;
-//! A mutex to protect the hwinfo structure
-static Mutex g_hardwareInfoMutex;
-
-//! Detect GPUs, if that makes sense to attempt.
-static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
-                            const PhysicalNodeCommunicator&  physicalNodeComm,
-                            compat::not_null<gmx_hw_info_t*> hardwareInfo)
+ * Note that non-functional device detection still produces
+ * a detection result, ie. of no devices. This might not be
+ * what the user wanted, so it makes sense to log later when
+ * that is possible. */
+struct DeviceDetectionResult
  {
+    //! The device information detected
+    std::vector<std::unique_ptr<DeviceInformation>> deviceInfoList_;
+    //! Container of possible warnings to issue when that is possible
+    std::vector<std::string> deviceDetectionWarnings_;
+};
+
+/*! \brief Detect GPUs when that makes sense to attempt.
+ *
+ * \param[in]  physicalNodeComm  The communicator across this physical node
+ * \return The result of the detection, perhaps including diagnostic messages
+ *         to issue later.
+ *
+ * \todo Coordinating the efficient detection of devices across
+ * multiple ranks per node should be separated from the lower-level
+ * hardware detection. See
+ * https://gitlab.com/gromacs/gromacs/-/issues/3650.
+ */
+static DeviceDetectionResult detectAllDeviceInformation(const PhysicalNodeCommunicator& physicalNodeComm)
+{
+    DeviceDetectionResult deviceDetectionResult;
+
      if (!isDeviceDetectionEnabled())
      {
-        return;
+        return deviceDetectionResult;
      }
  
      std::string errorMessage;
@@ -122,15 +134,22 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
  #if GMX_LIB_MPI
      isMasterRankOfPhysicalNode = (physicalNodeComm.rank_ == 0);
  #else
-    // We choose to run the detection only once with thread-MPI and
-    // use a mutex to enforce it.
+    // Without an MPI library, this process is trivially the only one
+    // on the physical node. This code runs before e.g. thread-MPI
+    // ranks are spawned, so detection is race-free by construction.
+    // Read-only access is enforced with providing those ranks with a
+    // handle to a const object, so usage is also free of races.
      GMX_UNUSED_VALUE(physicalNodeComm);
      isMasterRankOfPhysicalNode = true;
  #endif
  
-    /* The OpenCL support requires us to run detection on all ranks.
+    /* The SYCL and OpenCL support requires us to run detection on all
+     * ranks.
+     *
       * With CUDA we don't need to, and prefer to detect on one rank
-     * and send the information to the other ranks over MPI. */
+     * and send the information to the other ranks over MPI. This
+     * avoids creating a start-up bottleneck with each MPI rank on a
+     * node making the same GPU API calls. */
      constexpr bool allRanksMustDetectGpus = (GMX_GPU_OPENCL != 0 || GMX_GPU_SYCL != 0);
      bool           gpusCanBeDetected      = false;
      if (isMasterRankOfPhysicalNode || allRanksMustDetectGpus)
@@ -139,19 +158,14 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
          gpusCanBeDetected = isDeviceDetectionFunctional(&errorMessage);
          if (!gpusCanBeDetected)
          {
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "NOTE: Detection of GPUs failed. The API reported:\n"
-                            "      %s\n"
-                            "      GROMACS cannot run tasks on a GPU.",
-                            errorMessage.c_str());
+            deviceDetectionResult.deviceDetectionWarnings_.emplace_back(
+                    "Detection of GPUs failed. The API reported:\n" + errorMessage);
          }
      }
  
      if (gpusCanBeDetected)
      {
-        hardwareInfo->deviceInfoList = findDevices();
+        deviceDetectionResult.deviceInfoList_ = findDevices();
          // No need to tell the user anything at this point, they get a
          // hardware report later.
      }
@@ -166,7 +180,7 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
          if (isMasterRankOfPhysicalNode)
          {
              gmx::InMemorySerializer writer;
-            serializeDeviceInformations(hardwareInfo->deviceInfoList, &writer);
+            serializeDeviceInformations(deviceDetectionResult.deviceInfoList_, &writer);
              buffer       = writer.finishAndGetBuffer();
              sizeOfBuffer = buffer.size();
          }
@@ -180,11 +194,12 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
              if (!isMasterRankOfPhysicalNode)
              {
                  gmx::InMemoryDeserializer reader(buffer, false);
-                hardwareInfo->deviceInfoList = deserializeDeviceInformations(&reader);
+                deviceDetectionResult.deviceInfoList_ = deserializeDeviceInformations(&reader);
              }
          }
      }
  #endif
+    return deviceDetectionResult;
  }
  
  //! Reduce the locally collected \p hardwareInfo over MPI ranks
@@ -384,13 +399,8 @@ static void hardwareTopologyPrepareDetection()
  #endif
  }
  
-/*! \brief Sanity check hardware topology and print some notes to log
- *
- *  \param mdlog            Logger.
- *  \param hardwareTopology Reference to hardwareTopology object.
- */
-static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
-                                                 const gmx::HardwareTopology gmx_unused& hardwareTopology)
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
+                                          const gmx::HardwareTopology gmx_unused& hardwareTopology)
  {
  #if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF)
      if (hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
@@ -430,23 +440,14 @@ static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused&
                              "performance.");
          }
      }
+#else
+    GMX_UNUSED_VALUE(mdlog);
+    GMX_UNUSED_VALUE(hardwareTopology);
  #endif
  }
  
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNodeCommunicator& physicalNodeComm)
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm)
  {
-    // By construction, only one thread ever runs hardware detection,
-    // but we may as well prevent issues arising if that would change.
-    // Taking the lock early ensures that exactly one thread can
-    // attempt to construct g_hardwareInfo.
-    lock_guard<Mutex> lock(g_hardwareInfoMutex);
-
-    // If we already have the information, just return a handle to it.
-    if (g_hardwareInfo != nullptr)
-    {
-        return g_hardwareInfo.get();
-    }
-
      // Make the new hardwareInfo in a temporary.
      hardwareTopologyPrepareDetection();
  
@@ -456,24 +457,29 @@ gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNod
              std::make_unique<CpuInfo>(CpuInfo::detect()),
              std::make_unique<HardwareTopology>(HardwareTopology::detect()));
  
-    // If we detected the topology on this system, double-check that it makes sense
-    if (hardwareInfo->hardwareTopology->isThisSystem())
-    {
-        hardwareTopologyDoubleCheckDetection(mdlog, *hardwareInfo->hardwareTopology);
-    }
-
      // TODO: Get rid of this altogether.
      hardwareInfo->nthreads_hw_avail = hardwareInfo->hardwareTopology->machine().logicalProcessorCount;
  
      // Detect GPUs
-    gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo));
+    // Open a nested scope so no temporary variables can
+    // be mis-used later.
+    {
+        DeviceDetectionResult deviceDetectionResult = detectAllDeviceInformation(physicalNodeComm);
+        hardwareInfo->deviceInfoList.swap(deviceDetectionResult.deviceInfoList_);
+        std::swap(hardwareInfo->hardwareDetectionWarnings_, deviceDetectionResult.deviceDetectionWarnings_);
+    }
+
      gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
  
-    // Now that the temporary is fully constructed, swap it to become
-    // the real thing.
-    g_hardwareInfo.swap(hardwareInfo);
+    return hardwareInfo;
+}
  
-    return g_hardwareInfo.get();
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation)
+{
+    for (const std::string& warningString : hardwareInformation.hardwareDetectionWarnings_)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendText(warningString);
+    }
  }
  
  } // namespace gmx
diff --git a/src/gromacs/hardware/detecthardware.h b/src/gromacs/hardware/detecthardware.h

index efacba0028a37402a646fe69cbf9fe2b587b3c3d..84286c9bd9798356fe645fd96c7233864484ddef 100644 (file)
--- a/src/gromacs/hardware/detecthardware.h
+++ b/src/gromacs/hardware/detecthardware.h
@@ -36,30 +36,45 @@
  #ifndef GMX_HARDWARE_DETECTHARDWARE_H
  #define GMX_HARDWARE_DETECTHARDWARE_H
  
+#include <memory>
+
  struct gmx_hw_info_t;
  
  namespace gmx
  {
+class HardwareTopology;
  class MDLogger;
  class PhysicalNodeCommunicator;
  
-/*! \brief Run detection, consistency checks, and make consistent
+/*! \brief Run detection and make correct and consistent
   * hardware information available on all ranks.
   *
- * This routine constructs the global hwinfo structure and returns a pointer to
- * it. It will run a preamble before executing cpu and hardware checks, and
- * then run consistency checks afterwards. The results will also be made
- * available on all nodes.
- *
   * May do communication on MPI_COMM_WORLD when compiled with real MPI.
   *
- * All processes in a physical node need to coordinate calling this
- * routine. With thread-MPI only the first call leads to detection
- * work, and any subsequent call receives the same handle. With real
- * MPI, communication is needed to coordinate the results. In all
- * cases, any thread within a process may use the returned handle. */
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger&            mdlog,
-                                   const PhysicalNodeCommunicator& physicalNodeComm);
+ * This routine is designed to be called once on each process.  In a
+ * thread-MPI configuration, it may only be called before the threads
+ * are spawned. With real MPI, communication is needed to coordinate
+ * the results. In all cases, any thread within a process may use the
+ * returned handle.
+ *
+ * \todo Replace the use of MPI_COMM_WORLD e.g. by using a libraryCommWorld
+ * argument. See https://gitlab.com/gromacs/gromacs/-/issues/3650
+ */
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm);
+
+/*! \brief Sanity check hardware topology and print some notes to log
+ *
+ *  \param mdlog            Logger.
+ *  \param hardwareTopology Reference to hardwareTopology object.
+ */
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger&         mdlog,
+                                          const gmx::HardwareTopology& hardwareTopology);
+
+/*! \brief Issue warnings to mdlog that were decided during detection
+ *
+ * \param[in] mdlog                Logger
+ * \param[in] hardwareInformation  The hardwareInformation */
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation);
  
  } // namespace gmx
  
diff --git a/src/gromacs/hardware/hw_info.h b/src/gromacs/hardware/hw_info.h

index b7b796f8b6da8ee93d13841d03fdbe92a3dfa6ac..4d51d955adef28c88cd822041b06d4bd1e1183e1 100644 (file)
--- a/src/gromacs/hardware/hw_info.h
+++ b/src/gromacs/hardware/hw_info.h
@@ -92,6 +92,9 @@ struct gmx_hw_info_t
  
      gmx_bool bIdenticalGPUs; /* TRUE if all ranks have the same type(s) and order of GPUs */
      bool     haveAmdZen1Cpu; /* TRUE when at least one CPU in any of the nodes is AMD Zen of the first generation */
+
+    //! Container of warning strings to log later when that is possible.
+    std::vector<std::string> hardwareDetectionWarnings_;
  };
  
  
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index d43fe4fd8af94cd492cf34f342f4efeac3024e40..e78faae8d511116296804c5e9a3276e829970363 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -77,6 +77,7 @@
  #include "gromacs/hardware/cpuinfo.h"
  #include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/device_management.h"
+#include "gromacs/hardware/hardwaretopology.h"
  #include "gromacs/hardware/printhardware.h"
  #include "gromacs/imd/imd.h"
  #include "gromacs/listed_forces/disre.h"
@@ -335,6 +336,7 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const
      newRunner.hw_opt    = hw_opt;
      newRunner.filenames = filenames;
  
+    newRunner.hwinfo_         = hwinfo_;
      newRunner.oenv            = oenv;
      newRunner.mdrunOptions    = mdrunOptions;
      newRunner.domdecOptions   = domdecOptions;
@@ -724,7 +726,6 @@ int Mdrunner::mdrunner()
      gmx_wallcycle_t           wcycle;
      gmx_walltime_accounting_t walltime_accounting = nullptr;
      MembedHolder              membedHolder(filenames.size(), filenames.data());
-    gmx_hw_info_t*            hwinfo = nullptr;
  
      /* CAUTION: threads may be started later on in this function, so
         cr doesn't reflect the final parallel state right now */
@@ -763,20 +764,9 @@ int Mdrunner::mdrunner()
      gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
      gmx::MDLogger    mdlog(logOwner.logger());
  
-    // TODO The thread-MPI master rank makes a working
-    // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
-    // after the threads have been launched. This works because no use
-    // is made of that communicator until after the execution paths
-    // have rejoined. But it is likely that we can improve the way
-    // this is expressed, e.g. by expressly running detection only the
-    // master rank for thread-MPI, rather than relying on the mutex
-    // and reference count.
-    PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
-    hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
-
-    gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
+    gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo_);
  
-    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
+    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo_->deviceInfoList, hw_opt.gpuIdsAvailable);
      const int        numDevicesToUse = gmx::ssize(gpuIdsToUse);
  
      // Print citation requests after all software/hardware printing
@@ -825,7 +815,7 @@ int Mdrunner::mdrunner()
                      gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
                      hw_opt.nthreads_tmpi);
              useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
-                    useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
+                    useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo_,
                      *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
@@ -836,15 +826,13 @@ int Mdrunner::mdrunner()
           * prevent any possible subsequent checks from working
           * correctly. */
          hw_opt.nthreads_tmpi =
-                get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
+                get_nthreads_mpi(hwinfo_, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
                                   inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
  
          // Now start the threads for thread MPI.
          spawnThreads(hw_opt.nthreads_tmpi);
          // The spawned threads enter mdrunner() and execution of
          // master and spawned threads joins at the end of this block.
-        physicalNodeComm =
-                PhysicalNodeCommunicator(libraryWorldCommunicator, gmx_physicalnode_id_hash());
      }
  
      GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
@@ -853,6 +841,14 @@ int Mdrunner::mdrunner()
      t_commrec*    cr       = crHandle.get();
      GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
  
+    PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
+
+    // If we detected the topology on this system, double-check that it makes sense
+    if (hwinfo_->hardwareTopology->isThisSystem())
+    {
+        hardwareTopologyDoubleCheckDetection(mdlog, *hwinfo_->hardwareTopology);
+    }
+
      if (PAR(cr))
      {
          /* now broadcast everything to the non-master nodes/threads: */
@@ -883,7 +879,7 @@ int Mdrunner::mdrunner()
      bool useGpuForPme       = false;
      bool useGpuForBonded    = false;
      bool useGpuForUpdate    = false;
-    bool gpusWereDetected   = hwinfo->ngpu_compatible_tot > 0;
+    bool gpusWereDetected   = hwinfo_->ngpu_compatible_tot > 0;
      try
      {
          // It's possible that there are different numbers of GPUs on
@@ -895,7 +891,7 @@ int Mdrunner::mdrunner()
                  nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
                  gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
          useGpuForPme = decideWhetherToUseGpusForPme(
-                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec,
+                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo_, *inputrec,
                  cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected);
          useGpuForBonded = decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
                                                            bondedTarget, *inputrec, mtop,
@@ -1164,7 +1160,7 @@ int Mdrunner::mdrunner()
       */
      prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box,
                            useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
-                          *hwinfo->cpuInfo);
+                          *hwinfo_->cpuInfo);
  
      // This builder is necessary while we have multi-part construction
      // of DD. Before DD is constructed, we use the existence of
@@ -1194,7 +1190,7 @@ int Mdrunner::mdrunner()
  
      // Produce the task assignment for this rank - done after DD is constructed
      GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
-            gpuIdsToUse, userGpuTaskAssignment, *hwinfo, simulationCommunicator, physicalNodeComm,
+            gpuIdsToUse, userGpuTaskAssignment, *hwinfo_, simulationCommunicator, physicalNodeComm,
              nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
              useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
              // TODO cr->duty & DUTY_PME should imply that a PME
@@ -1331,12 +1327,12 @@ int Mdrunner::mdrunner()
      // that existing affinity setting was from OpenMP or something
      // else, so we run this code both before and after we initialize
      // the OpenMP support.
-    gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+    gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, FALSE);
      /* Check and update the number of OpenMP threads requested */
-    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
+    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_,
                                              pmeRunMode, mtop, *inputrec);
  
-    gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, physicalNodeComm.size_,
+    gmx_omp_nthreads_init(mdlog, cr, hwinfo_->nthreads_hw_avail, physicalNodeComm.size_,
                            hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
  
      // Enable FP exception detection, but not in
@@ -1357,7 +1353,7 @@ int Mdrunner::mdrunner()
      }
  
      /* Now that we know the setup is consistent, check for efficiency */
-    check_resource_division_efficiency(hwinfo, gpuTaskAssignments.thisRankHasAnyGpuTask(),
+    check_resource_division_efficiency(hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(),
                                         mdrunOptions.ntompOptionIsSet, cr, mdlog);
  
      /* getting number of PP/PME threads on this MPI / tMPI rank.
@@ -1366,7 +1362,7 @@ int Mdrunner::mdrunner()
       */
      const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
                                                                    : gmx_omp_nthreads_get(emntPME);
-    checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo->hardwareTopology,
+    checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology,
                                    physicalNodeComm, mdlog);
  
      // Enable Peer access between GPUs where available
@@ -1385,14 +1381,14 @@ int Mdrunner::mdrunner()
           * - which indicates that probably the OpenMP library has changed it
           * since we first checked).
           */
-        gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
+        gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, TRUE);
  
          int numThreadsOnThisNode, intraNodeThreadOffset;
          analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
                                   &intraNodeThreadOffset);
  
          /* Set the CPU affinity */
-        gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, numThreadsOnThisRank,
+        gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo_->hardwareTopology, numThreadsOnThisRank,
                                  numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
      }
  
@@ -1461,7 +1457,7 @@ int Mdrunner::mdrunner()
                      deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
          }
  
-        fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo,
+        fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo_,
                                          runScheduleWork.simulationWork.useGpuNonbonded,
                                          deviceStreamManager.get(), &mtop, box, wcycle);
          // TODO: Move the logic below to a GPU bonded builder
@@ -1797,7 +1793,7 @@ int Mdrunner::mdrunner()
      sfree(disresdata);
      sfree(oriresdata);
  
-    if (!hwinfo->deviceInfoList.empty())
+    if (!hwinfo_->deviceInfoList.empty())
      {
          /* stop the GPU profiler (only CUDA) */
          stopGpuProfiler();
@@ -1899,6 +1895,8 @@ public:
                                                  real                forceWarningThreshold,
                                                  StartingBehavior    startingBehavior);
  
+    void addHardwareDetectionResult(const gmx_hw_info_t* hwinfo);
+
      void addDomdec(const DomdecOptions& options);
  
      void addInput(SimulationInputHandle inputHolder);
@@ -1964,6 +1962,9 @@ private:
      //! The modules that comprise the functionality of mdrun.
      std::unique_ptr<MDModules> mdModules_;
  
+    //! Detected hardware.
+    const gmx_hw_info_t* hwinfo_ = nullptr;
+
      //! \brief Parallelism information.
      gmx_hw_opt_t hardwareOptions_;
  
@@ -2064,6 +2065,16 @@ Mdrunner Mdrunner::BuilderImplementation::build()
      // nullptr is a valid value for the multisim handle
      newRunner.ms = multiSimulation_;
  
+    if (hwinfo_)
+    {
+        newRunner.hwinfo_ = hwinfo_;
+    }
+    else
+    {
+        GMX_THROW(gmx::APIError(
+                "MdrunnerBuilder::addHardwareDetectionResult() is required before build()"));
+    }
+
      if (inputHolder_)
      {
          newRunner.inputHolder_ = std::move(inputHolder_);
@@ -2142,6 +2153,11 @@ Mdrunner Mdrunner::BuilderImplementation::build()
      return newRunner;
  }
  
+void Mdrunner::BuilderImplementation::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
+{
+    hwinfo_ = hwinfo;
+}
+
  void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
  {
      nbpu_opt_ = nbpu_opt;
@@ -2201,6 +2217,12 @@ MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
  
  MdrunnerBuilder::~MdrunnerBuilder() = default;
  
+MdrunnerBuilder& MdrunnerBuilder::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
+{
+    impl_->addHardwareDetectionResult(hwinfo);
+    return *this;
+}
+
  MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions&    options,
                                                        real                   forceWarningThreshold,
                                                        const StartingBehavior startingBehavior)
diff --git a/src/gromacs/mdrun/runner.h b/src/gromacs/mdrun/runner.h

index 2dd38531bb86ce11037c759501ff5be5dab30a5d..703e632b34cf4bf18e1f35d9f6d8f85e0709945d 100644 (file)
--- a/src/gromacs/mdrun/runner.h
+++ b/src/gromacs/mdrun/runner.h
@@ -306,6 +306,9 @@ private:
      //! The modules that comprise mdrun.
      std::unique_ptr<MDModules> mdModules_;
  
+    //! Non-owning handle to the results of the hardware detection.
+    const gmx_hw_info_t* hwinfo_ = nullptr;
+
      /*!
       * \brief Holds simulation input specification provided by client, if any.
       *
@@ -402,6 +405,17 @@ public:
       */
      Mdrunner build();
  
+    /*!
+     * \brief Supply the result of hardware detection to the gmx::Mdrunner
+     *
+     * \param hwinfo  Non-owning not-null handle to result of hardware detection.
+     *
+     * \todo It would be better to express this as either a not-null const pointer or
+     * a const reference, but neither of those is consistent with incremental
+     * building of an object. This motivates future work to be able to make a deep copy
+     * of the detection result. See https://gitlab.com/gromacs/gromacs/-/issues/3650 */
+    MdrunnerBuilder& addHardwareDetectionResult(const gmx_hw_info_t* hwinfo);
+
      /*!
       * \brief Set up non-bonded short-range force calculations.
       *
diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt

index 6a336db5d324494316472f78d2916af6c65112d1..d41ca70478fda8a79dd18a52e64f920a321137d6 100644 (file)
--- a/src/programs/CMakeLists.txt
+++ b/src/programs/CMakeLists.txt
@@ -76,6 +76,7 @@ else()
      target_include_directories(view_objlib SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
      add_library(gmx_objlib OBJECT ${GMX_MAIN_SOURCES})
      target_include_directories(gmx_objlib SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
+    target_include_directories(gmx_objlib SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
      add_executable(gmx
          $<TARGET_OBJECTS:gmx_objlib>
          $<TARGET_OBJECTS:mdrun_objlib>
diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp

index f85fe269318ba8729cf720ad880d7eae035ff187..77a25f1fdfab7963a8594359b643f35731d6af8d 100644 (file)
--- a/src/programs/mdrun/mdrun.cpp
+++ b/src/programs/mdrun/mdrun.cpp
@@ -59,6 +59,7 @@
  #include "gromacs/commandline/pargs.h"
  #include "gromacs/domdec/options.h"
  #include "gromacs/fileio/gmxfio.h"
+#include "gromacs/hardware/detecthardware.h"
  #include "gromacs/mdrun/legacymdrunoptions.h"
  #include "gromacs/mdrun/runner.h"
  #include "gromacs/mdrun/simulationcontext.h"
@@ -66,15 +67,25 @@
  #include "gromacs/mdrunutility/logging.h"
  #include "gromacs/mdrunutility/multisim.h"
  #include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/basenetwork.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
  
  #include "mdrun_main.h"
  
  namespace gmx
  {
  
-//! Implements C-style main function for mdrun
  int gmx_mdrun(int argc, char* argv[])
+{
+    // Set up the communicator, where possible (see docs for
+    // SimulationContext).
+    MPI_Comm                 communicator = GMX_LIB_MPI ? MPI_COMM_WORLD : MPI_COMM_NULL;
+    PhysicalNodeCommunicator physicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
+    std::unique_ptr<gmx_hw_info_t> hwinfo = gmx_detect_hardware(physicalNodeCommunicator);
+    return gmx_mdrun(communicator, *hwinfo, argc, argv);
+}
+
+int gmx_mdrun(MPI_Comm communicator, const gmx_hw_info_t& hwinfo, int argc, char* argv[])
  {
      auto mdModules = std::make_unique<MDModules>();
  
@@ -215,9 +226,6 @@ int gmx_mdrun(int argc, char* argv[])
      ArrayRef<const std::string> multiSimDirectoryNames =
              opt2fnsIfOptionSet("-multidir", ssize(options.filenames), options.filenames.data());
  
-    // Set up the communicator, where possible (see docs for
-    // SimulationContext).
-    MPI_Comm communicator = GMX_LIB_MPI ? MPI_COMM_WORLD : MPI_COMM_NULL;
      // The SimulationContext is necessary with gmxapi so that
      // resources owned by the client code can have suitable
      // lifetime. The gmx wrapper binary uses the same infrastructure,
@@ -245,6 +253,7 @@ int gmx_mdrun(int argc, char* argv[])
       */
      auto builder = MdrunnerBuilder(std::move(mdModules),
                                     compat::not_null<SimulationContext*>(&simulationContext));
+    builder.addHardwareDetectionResult(&hwinfo);
      builder.addSimulationMethod(options.mdrunOptions, options.pforce, startingBehavior);
      builder.addDomainDecomposition(options.domdecOptions);
      // \todo pass by value
diff --git a/src/programs/mdrun/mdrun_main.h b/src/programs/mdrun/mdrun_main.h

index 8a6e7ecae92bb9cb0c402fce594e417df7a7a8fa..5985428de35615246d283163b12e13d23c142655 100644 (file)
--- a/src/programs/mdrun/mdrun_main.h
+++ b/src/programs/mdrun/mdrun_main.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2013,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -32,14 +32,52 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
+/*! \internal \file
+ *
+ * \brief This file declares C-style entrypoints for mdrun
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ *
+ * \ingroup module_mdrun
+ */
  #ifndef GMX_PROGRAMS_MDRUN_MDRUN_H
  #define GMX_PROGRAMS_MDRUN_MDRUN_H
  
+#include "gromacs/utility/gmxmpi.h"
+
+struct gmx_hw_info_t;
+
  namespace gmx
  {
  
+/*! \brief Implements C-style main function for mdrun
+ *
+ * This implementation detects hardware itself, as suits
+ * the gmx wrapper binary.
+ *
+ * \param[in]  argc          Number of C-style command-line arguments
+ * \param[in]  argv          C-style command-line argument strings
+ */
  int gmx_mdrun(int argc, char* argv[]);
  
+/*! \brief Implements C-style main function for mdrun
+ *
+ * This implementation facilitates reuse of infrastructure. This
+ * includes the information about the hardware detected across the
+ * given \c communicator. That suits e.g. efficient implementation of
+ * test fixtures.
+ *
+ * \param[in]  communicator  The communicator to use for the simulation
+ * \param[in]  hwinfo        Describes the hardware detected on the physical nodes of the communicator
+ * \param[in]  argc          Number of C-style command-line arguments
+ * \param[in]  argv          C-style command-line argument strings
+ *
+ * \todo Progress on https://gitlab.com/gromacs/gromacs/-/issues/3774
+ * will remove the need of test binaries to call gmx_mdrun in a way
+ * that is different from the command-line and gmxapi.
+ */
+int gmx_mdrun(MPI_Comm communicator, const gmx_hw_info_t& hwinfo, int argc, char* argv[]);
+
  } // namespace gmx
  
  #endif
diff --git a/src/programs/mdrun/tests/moduletest.cpp b/src/programs/mdrun/tests/moduletest.cpp

index a551798aacfe34a07f2f577420d255890c45929f..9cb67be37eb6557dd2f11eba60dfe178ae68b291 100644 (file)
--- a/src/programs/mdrun/tests/moduletest.cpp
+++ b/src/programs/mdrun/tests/moduletest.cpp
@@ -48,15 +48,19 @@
  
  #include <cstdio>
  
+#include <utility>
+
  #include "gromacs/gmxana/gmx_ana.h"
  #include "gromacs/gmxpreprocess/grompp.h"
  #include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/hw_info.h"
  #include "gromacs/options/basicoptions.h"
  #include "gromacs/options/ioptionscontainer.h"
  #include "gromacs/tools/convert_tpr.h"
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
  #include "gromacs/utility/textwriter.h"
  #include "programs/mdrun/mdrun_main.h"
  
@@ -108,6 +112,14 @@ SimulationRunner::SimulationRunner(TestFileManager* fileManager) :
  {
  #if GMX_LIB_MPI
      GMX_RELEASE_ASSERT(gmx_mpi_initialized(), "MPI system not initialized for mdrun tests");
+
+    // It would be better to also detect this in a thread-MPI build,
+    // but there is no way to do that currently, and it is also not a
+    // problem for such a build. Any code based on such an invalid
+    // test fixture will be found in CI testing, however.
+    GMX_RELEASE_ASSERT(MdrunTestFixtureBase::communicator_ != MPI_COMM_NULL,
+                       "SimulationRunner may only be used from a test fixture that inherits from "
+                       "MdrunTestFixtureBase");
  #endif
  }
  
@@ -223,7 +235,7 @@ int SimulationRunner::callGrompp(const CommandLine& callerRef)
      // Make sure rank zero has written the .tpr file before other
      // ranks try to read it. Thread-MPI and serial do this just fine
      // on their own.
-    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(MdrunTestFixtureBase::communicator_);
  #endif
      return returnValue;
  }
@@ -304,7 +316,8 @@ int SimulationRunner::callMdrun(const CommandLine& callerRef)
      caller.addOption("-ntomp", g_numOpenMPThreads);
  #endif
  
-    return gmx_mdrun(caller.argc(), caller.argv());
+    return gmx_mdrun(MdrunTestFixtureBase::communicator_, *MdrunTestFixtureBase::hwinfo_,
+                     caller.argc(), caller.argv());
  }
  
  int SimulationRunner::callMdrun()
@@ -314,6 +327,26 @@ int SimulationRunner::callMdrun()
  
  // ====
  
+// static
+MPI_Comm MdrunTestFixtureBase::communicator_ = MPI_COMM_NULL;
+// static
+std::unique_ptr<gmx_hw_info_t> MdrunTestFixtureBase::hwinfo_;
+
+// static
+void MdrunTestFixtureBase::SetUpTestCase()
+{
+    communicator_ = MPI_COMM_WORLD;
+    auto newHwinfo =
+            gmx_detect_hardware(PhysicalNodeCommunicator{ communicator_, gmx_physicalnode_id_hash() });
+    std::swap(hwinfo_, newHwinfo);
+}
+
+// static
+void MdrunTestFixtureBase::TearDownTestCase()
+{
+    hwinfo_.reset(nullptr);
+}
+
  MdrunTestFixtureBase::MdrunTestFixtureBase()
  {
  #if GMX_LIB_MPI
@@ -331,7 +364,7 @@ MdrunTestFixture::~MdrunTestFixture()
  {
  #if GMX_LIB_MPI
      // fileManager_ should only clean up after all the ranks are done.
-    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(MdrunTestFixtureBase::communicator_);
  #endif
  }
  
diff --git a/src/programs/mdrun/tests/moduletest.h b/src/programs/mdrun/tests/moduletest.h

index bbf140cee51e981e0b3ce6aee5ac79e3a7235ae5..d29ebe15d30ef32b7efd9f430a36831ebafa5443 100644 (file)
--- a/src/programs/mdrun/tests/moduletest.h
+++ b/src/programs/mdrun/tests/moduletest.h
@@ -51,10 +51,13 @@
  #include <gtest/gtest.h>
  
  #include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/gmxmpi.h"
  
  #include "testutils/cmdlinetest.h"
  #include "testutils/testfilemanager.h"
  
+struct gmx_hw_info_t;
+
  namespace gmx
  {
  namespace test
@@ -79,9 +82,9 @@ enum class SimulationRunnerMdpSource
   * \brief Helper object for running grompp and mdrun in
   * integration tests of mdrun functionality
   *
- * Objects of this class are intended to be owned by
- * IntegrationTestFixture objects, and an IntegrationTestFixture
- * object might own more than one SimulationRunner.
+ * Objects of this class must be owned by objects descended from
+ * MdrunTestFixtureBase, which sets up necessary infrastructure for
+ * it. Such an object may own more than one SimulationRunner.
   *
   * The setup phase creates various temporary files for input and
   * output that are common for mdrun tests, using the file manager
@@ -174,6 +177,7 @@ public:
      std::string mdpInputContents_;
  
  private:
+    //! The file manager used to manage I/O
      TestFileManager& fileManager_;
  
      GMX_DISALLOW_COPY_AND_ASSIGN(SimulationRunner);
@@ -183,31 +187,31 @@ private:
   * \brief Declares test fixture base class for
   * integration tests of mdrun functionality
   *
- * Derived fixture classes (or individual test cases) that might have
- * specific requirements should assert that behaviour, rather than
- * hard-code the requirements. A test that (for example) can't run
- * with more than one thread should report that as a diagnostic, so the
- * person running the test (or designing the test harness) can get
- * feedback on what tests need what conditions without having to read
- * the code of lots of tests.
- *
- * Specifying the execution context (such as numbers of threads and
- * processors) is normally sensible to specify from the test harness
- * (i.e. when CMake/CTest/the user runs a test executable), because
- * only there is information about the hardware available. The default
- * values for such context provided in test fixtures for mdrun should
- * mirror the defaults for mdrun, but currently mdrun.c hard-codes
- * those in a gmx_hw_opt_t.
- *
- * Any method in this class may throw std::bad_alloc if out of memory.
+ * Heavyweight resources are set up here and shared
+ * across all tests in the test case fixture, e.g.
+ * the MPI communicator for the tests and the hardware
+ * detected that is available to it.
   *
   * \ingroup module_mdrun_integration_tests
   */
  class MdrunTestFixtureBase : public ::testing::Test
  {
  public:
+    //! Per-test-case setup for lengthy processes that need run only once.
+    static void SetUpTestCase();
+    //! Per-test-case tear down
+    static void TearDownTestCase();
+
      MdrunTestFixtureBase();
      ~MdrunTestFixtureBase() override;
+
+    //! Communicator over which the test fixture works
+    static MPI_Comm communicator_;
+    /*! \brief Hardware information object
+     *
+     * Detected within \c communicator_ and available to re-use
+     * over all tests in the test case of this text fixture. */
+    static std::unique_ptr<gmx_hw_info_t> hwinfo_;
  };
  
  /*! \internal
@@ -218,7 +222,7 @@ public:
   *
   * \ingroup module_mdrun_integration_tests
   */
-class MdrunTestFixture : public ::testing::Test
+class MdrunTestFixture : public MdrunTestFixtureBase
  {
  public:
      MdrunTestFixture();
diff --git a/src/programs/mdrun/tests/multisimtest.h b/src/programs/mdrun/tests/multisimtest.h

index 05990d6981af98f42603d882b2bbbbcc704d9fb1..81c975c4ea1a6a6bda4d76b1df9b6fc6c815b680 100644 (file)
--- a/src/programs/mdrun/tests/multisimtest.h
+++ b/src/programs/mdrun/tests/multisimtest.h
@@ -69,7 +69,7 @@ typedef std::unique_ptr<CommandLine> CommandLinePointer;
   *
   * \ingroup module_mdrun_integration_tests
   */
-class MultiSimTest : public ::testing::Test, public ::testing::WithParamInterface<const char*>
+class MultiSimTest : public MdrunTestFixtureBase, public ::testing::WithParamInterface<const char*>
  {
  public:
      MultiSimTest();
diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp

index 4aa1219ccade36859d14b72dc98c4831486c2e9e..a4e4aa37b532b3829d53afa1f89edc5929c0eda7 100644 (file)
--- a/src/programs/mdrun/tests/pmetest.cpp
+++ b/src/programs/mdrun/tests/pmetest.cpp
@@ -61,7 +61,6 @@
  #include "gromacs/trajectory/energyframe.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/loggerbuilder.h"
  #include "gromacs/utility/physicalnodecommunicator.h"
  #include "gromacs/utility/stringutil.h"
  
@@ -110,8 +109,8 @@ void PmeTest::runTest(const RunModesList& runModes)
          EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
      }
  
-    auto hardwareInfo_ = gmx_detect_hardware(
-            MDLogger{}, PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()));
+    auto hardwareInfo_ =
+            gmx_detect_hardware(PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()));
  
      for (const auto& mode : runModes)
      {
diff --git a/src/testutils/test_hardware_environment.cpp b/src/testutils/test_hardware_environment.cpp

index a17aa980075829760013337cffceddee4db08775..267c73cf27b927af5b3c6b0c520cc377784e8b65 100644 (file)
--- a/src/testutils/test_hardware_environment.cpp
+++ b/src/testutils/test_hardware_environment.cpp
@@ -54,7 +54,6 @@
  #include "gromacs/hardware/hw_info.h"
  #include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/loggerbuilder.h"
  #include "gromacs/utility/physicalnodecommunicator.h"
  
  namespace gmx
@@ -89,17 +88,14 @@ void callAddGlobalTestEnvironment()
      getTestHardwareEnvironment();
  }
  
-//! Simple hardware initialization
-static gmx_hw_info_t* hardwareInit()
+TestHardwareEnvironment::TestHardwareEnvironment() :
+    hardwareInfo_(gmx_detect_hardware(PhysicalNodeCommunicator{ MPI_COMM_WORLD, gmx_physicalnode_id_hash() }))
  {
-    PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
-    return gmx_detect_hardware(MDLogger{}, physicalNodeComm);
  }
  
  void TestHardwareEnvironment::SetUp()
  {
      testDeviceList_.clear();
-    hardwareInfo_ = hardwareInit();
      // Constructing contexts for all compatible GPUs - will be empty on non-GPU builds
      for (const DeviceInformation& compatibleDeviceInfo : getCompatibleDevices(hardwareInfo_->deviceInfoList))
      {
@@ -111,6 +107,11 @@ void TestHardwareEnvironment::SetUp()
  void TestHardwareEnvironment::TearDown()
  {
      testDeviceList_.clear();
+    /* In OneAPI 2021.1-beta9 and beta10, there is a bug that cause a
+     * segfault when a sycl::device is destructed too late. So, we
+     * explicitly destroy device handles here by resetting
+     * hardwareInfo_, which does no harm to anything else. */
+    hardwareInfo_.reset(nullptr);
  }
  
  } // namespace test
diff --git a/src/testutils/test_hardware_environment.h b/src/testutils/test_hardware_environment.h

index b012ae90d5c78749c34190ecf6058cfd77599837..dbab83183aab62687c6e875fa3e2bc8e32d1d5ab 100644 (file)
--- a/src/testutils/test_hardware_environment.h
+++ b/src/testutils/test_hardware_environment.h
@@ -46,6 +46,7 @@
   */
  
  #include <map>
+#include <memory>
  #include <vector>
  
  #include <gtest/gtest.h>
@@ -68,11 +69,12 @@ class TestHardwareEnvironment : public ::testing::Environment
  {
  private:
      //! General hardware info
-    gmx_hw_info_t* hardwareInfo_;
+    std::unique_ptr<gmx_hw_info_t> hardwareInfo_;
      //! Storage of hardware contexts
      std::vector<std::unique_ptr<TestDevice>> testDeviceList_;
  
  public:
+    TestHardwareEnvironment();
      //! This is called by GTest framework once to query the hardware
      void SetUp() override;
      //! This is called by GTest framework once release the hardware
@@ -82,9 +84,10 @@ public:
      {
          return testDeviceList_;
      }
+    //! Whether the available hardware has any compatible devices
      bool hasCompatibleDevices() const { return !testDeviceList_.empty(); }
      //! Get available hardware information.
-    const gmx_hw_info_t* hwinfo() const { return hardwareInfo_; }
+    const gmx_hw_info_t* hwinfo() const { return hardwareInfo_.get(); }
  };
  
  //! Get the test environment
author	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 4 Nov 2020 14:16:50 +0000 (14:16 +0000)
committer	Joe Jordan <ejjordan12@gmail.com>
	Wed, 4 Nov 2020 14:16:50 +0000 (14:16 +0000)
api/gmxapi/cpp/context.cpp		patch \| blob \| history
api/gmxapi/cpp/context_impl.h		patch \| blob \| history
src/gromacs/hardware/detecthardware.cpp		patch \| blob \| history
src/gromacs/hardware/detecthardware.h		patch \| blob \| history
src/gromacs/hardware/hw_info.h		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.h		patch \| blob \| history
src/programs/CMakeLists.txt		patch \| blob \| history
src/programs/mdrun/mdrun.cpp		patch \| blob \| history
src/programs/mdrun/mdrun_main.h		patch \| blob \| history
src/programs/mdrun/tests/moduletest.cpp		patch \| blob \| history
src/programs/mdrun/tests/moduletest.h		patch \| blob \| history
src/programs/mdrun/tests/multisimtest.h		patch \| blob \| history
src/programs/mdrun/tests/pmetest.cpp		patch \| blob \| history
src/testutils/test_hardware_environment.cpp		patch \| blob \| history
src/testutils/test_hardware_environment.h		patch \| blob \| history