Better concurrency support for analysis nbsearch.
authorTeemu Murtola <teemu.murtola@gmail.com>
Sun, 5 May 2013 09:55:31 +0000 (12:55 +0300)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Thu, 27 Jun 2013 19:28:04 +0000 (21:28 +0200)
AnalysisNeighborhood::initSearch() now supports concurrent calls from
multiple threads such that each call creates an independent search.
A pool of search objects is kept allocated to avoid constant
reallocation of the data structures.

This allows using the neighborhood search routines transparently in
thread-parallel analysis tools after that is implemented in the
framework.  The changes in the template demonstrate how this simplifies
the tool code.

Part of #866.

Change-Id: If469284671ec66e249e138e59da7cc87a8f4ac65

share/template/template.cpp
share/template/template_doc.cpp
src/gromacs/selection/nbsearch.cpp
src/gromacs/selection/nbsearch.h
src/gromacs/selection/tests/nbsearch.cpp

index 3f5ddaf7c57432567fa25b0120e56c170cfce329..3b268350a7c1cd73a9bc03d2768c91db555e65e6 100644 (file)
@@ -52,9 +52,6 @@ class AnalysisTemplate : public TrajectoryAnalysisModule
         virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
                                   const TopologyInformation        &top);
 
-        virtual TrajectoryAnalysisModuleDataPointer startFrames(
-            const AnalysisDataParallelOptions &opt,
-            const SelectionCollection         &selections);
         virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
                                   TrajectoryAnalysisModuleData *pdata);
 
@@ -69,43 +66,12 @@ class AnalysisTemplate : public TrajectoryAnalysisModule
         Selection                        refsel_;
         SelectionList                    sel_;
 
+        AnalysisNeighborhood             nb_;
+
         AnalysisData                     data_;
         AnalysisDataAverageModulePointer avem_;
 };
 
-/*! \brief
- * Frame-local data needed in analysis.
- */
-class AnalysisTemplate::ModuleData : public TrajectoryAnalysisModuleData
-{
-    public:
-        /*! \brief
-         * Initializes frame-local data.
-         *
-         * \param[in] module     Analysis module to use for data objects.
-         * \param[in] opt        Data parallelization options.
-         * \param[in] selections Thread-local selection collection.
-         * \param[in] cutoff     Cutoff distance for the search
-         *   (<=0 stands for no cutoff).
-         */
-        ModuleData(TrajectoryAnalysisModule          *module,
-                   const AnalysisDataParallelOptions &opt,
-                   const SelectionCollection         &selections,
-                   double                             cutoff)
-            : TrajectoryAnalysisModuleData(module, opt, selections)
-        {
-            nb_.setCutoff(cutoff);
-        }
-
-        virtual void finish()
-        {
-            finishDataHandles();
-        }
-
-        //! Neighborhood search data for distance calculation.
-        AnalysisNeighborhood    nb_;
-};
-
 
 AnalysisTemplate::AnalysisTemplate()
     : TrajectoryAnalysisModule("template", "Template analysis tool"),
@@ -160,6 +126,8 @@ void
 AnalysisTemplate::initAnalysis(const TrajectoryAnalysisSettings &settings,
                                const TopologyInformation         & /*top*/)
 {
+    nb_.setCutoff(cutoff_);
+
     data_.setColumnCount(sel_.size());
 
     avem_.reset(new AnalysisDataAverageModule());
@@ -178,25 +146,15 @@ AnalysisTemplate::initAnalysis(const TrajectoryAnalysisSettings &settings,
 }
 
 
-TrajectoryAnalysisModuleDataPointer
-AnalysisTemplate::startFrames(const AnalysisDataParallelOptions &opt,
-                              const SelectionCollection         &selections)
-{
-    return TrajectoryAnalysisModuleDataPointer(
-            new ModuleData(this, opt, selections, cutoff_));
-}
-
-
 void
 AnalysisTemplate::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
                                TrajectoryAnalysisModuleData *pdata)
 {
     AnalysisDataHandle         dh     = pdata->dataHandle(data_);
-    AnalysisNeighborhood      &nb     = static_cast<ModuleData *>(pdata)->nb_;
     const Selection           &refsel = pdata->parallelSelection(refsel_);
 
     AnalysisNeighborhoodSearch nbsearch =
-        nb.initSearch(pbc, refsel.positions());
+        nb_.initSearch(pbc, refsel.positions());
     dh.startFrame(frnr, fr.time);
     for (size_t g = 0; g < sel_.size(); ++g)
     {
index b03295bec2f6611e2fc3913723f2893388dbdec2..51ab79c0aa7bca6a8385d471ef142be60b6627d2 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2011,2012, by the GROMACS development team, led by
+ * Copyright (c) 2011,2012,2013, by the GROMACS development team, led by
  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
  * others, as listed in the AUTHORS file in the top-level source
  * directory and at http://www.gromacs.org.
  * arise in more complex cases.
  * See documentation of gmx::TrajectoryAnalysisModule for a full description of
  * the available virtual methods and convenience functions.
- * The member variable \c options_ will be used to specify command-line options
- * that the tool accepts, and such a variable needs to be present to properly
- * implement gmx::TrajectoryAnalysisModule::initOptions().
- * The next block of member variables are used to contain values provided to
+ * The first block of member variables are used to contain values provided to
  * the different options.  They will vary depending on the needs of the
  * analysis tool.
+ * The AnalysisNeighborhood object provides neighborhood searching that is used
+ * in the analysis.
  * The final block of variables are used to process output data.
  * See initAnalysis() for details on how they are used.
  *
- * We also declare a helper class, AnalysisTemplate::ModuleData, that derives
- * from gmx::TrajectoryAnalysisModuleData and will contain any data that needs
- * to be frame-local in parallel analysis:
- * \until };
- * See documentation of gmx::TrajectoryAnalysisModuleData for more details of
- * how this data can be used.  Note that not all types of analysis will require
- * a custom type for this purpose.
- * Also, if you don't care about parallelization, you can just include these
- * variables in the module class itself, initialize them in
- * gmx::TrajectoryAnalysisModule::initAnalysis(), and do any postprocessing in
- * gmx::TrajectoryAnalysisModule::finishAnalysis()).
+ * For the template, we do not need any custom frame-local data.  If you think
+ * you need some for more complex analysis needs, see documentation of
+ * gmx::TrajectoryAnalysisModuleData for more details.
+ * If you don't care about parallelization, you don't need to conside this
+ * part.  You can simply declare all variables in the module class itself,
+ * initialize them in gmx::TrajectoryAnalysisModule::initAnalysis(), and do any
+ * postprocessing in gmx::TrajectoryAnalysisModule::finishAnalysis()).
  *
  *
  * \section template_ctor Construction
  * parallelism, they still provide convenient building blocks, e.g., for
  * histogramming and file output.
  *
- * For the template, we first create and register one gmx::AnalysisData object
+ * For the template, we first set the cutoff for the neighborhood search.
+ *
+ * Then, we create and register one gmx::AnalysisData object
  * that will contain, for each frame, one column for each input selection.
  * This will contain the main output from the tool: minimum distance between
  * the reference selection and that particular selection.
  * \section template_analysis Actual trajectory analysis
  *
  * There is one more initialization method that needs to be overridden to
- * support automatic parallelization.  If you do not need custom data (or
- * parallelization at all), you can skip this method and ignore the last
- * parameter to gmx::TrajectoryAnalysisModule::analyzeFrame() to make things
- * simpler.  In the template, we do include it to initialize our custom
- * ModuleData object:
- * \skip  TrajectoryAnalysisModuleDataPointer
- * \until }
+ * support automatic parallelization: gmx::TrajectoryAnalysisModule::startFrames().
+ * If you do not need custom frame-local data (or parallelization at all), you
+ * can skip this method and ignore the last parameter to
+ * gmx::TrajectoryAnalysisModule::analyzeFrame() to make things simpler.
+ * In the template, this method is not necessary.
  *
  * The main part of the analysis is (in most analysis codes) done in the
  * gmx::TrajectoryAnalysisModule::analyzeFrame() method, which is called once
  * selection engine.
  *
  * For the template, we first get data from our custom data structure for
- * shorthand access (note the cast on the second line to access our custom
- * data):
+ * shorthand access (if you use a custom data object, you need a \c static_cast
+ * here):
  * \skip  AnalysisDataHandle
  * \until parallelSelection
  *
index b5a9c35cbe26e14f242f5a23a7b5443c27b11ca2..781028026a1957387af727182198e61475e525cc 100644 (file)
 #include <math.h>
 
 #include <algorithm>
+#include <vector>
 
 #include "gromacs/legacyheaders/smalloc.h"
 #include "gromacs/legacyheaders/typedefs.h"
 #include "gromacs/legacyheaders/pbc.h"
 #include "gromacs/legacyheaders/vec.h"
+#include "gromacs/legacyheaders/thread_mpi/mutex.h"
 
 #include "gromacs/selection/nbsearch.h"
 #include "gromacs/selection/position.h"
@@ -811,8 +813,8 @@ namespace internal
 class AnalysisNeighborhoodPairSearchImpl
 {
     public:
-        AnalysisNeighborhoodPairSearchImpl()
-            : nb_(NULL), testIndex_(0)
+        explicit AnalysisNeighborhoodPairSearchImpl(gmx_ana_nbsearch_t *nb)
+            : nb_(nb), testIndex_(0)
         {
         }
 
@@ -830,24 +832,44 @@ class AnalysisNeighborhoodSearchImpl
         typedef AnalysisNeighborhoodPairSearch::ImplPointer
             PairSearchImplPointer;
 
-        AnalysisNeighborhoodSearchImpl()
-            : nb_(NULL), pairSearch_(new AnalysisNeighborhoodPairSearchImpl)
+        // TODO: This is not strictly exception-safe.
+        AnalysisNeighborhoodSearchImpl(real cutoff)
+            : nb_(gmx_ana_nbsearch_create(cutoff, 0)),
+              pairSearch_(new AnalysisNeighborhoodPairSearchImpl(nb_))
         {
+            bTryGrid_ = nb_->bTryGrid;
         }
         ~AnalysisNeighborhoodSearchImpl()
         {
             GMX_RELEASE_ASSERT(pairSearch_.unique(),
                                "Dangling AnalysisNeighborhoodPairSearch reference");
-            if (nb_ != NULL)
-            {
-                gmx_ana_nbsearch_free(nb_);
-            }
+            gmx_ana_nbsearch_free(nb_);
         }
 
+        void setMode(AnalysisNeighborhood::SearchMode mode);
+
         gmx_ana_nbsearch_t     *nb_;
+        bool                    bTryGrid_;
         PairSearchImplPointer   pairSearch_;
 };
 
+void AnalysisNeighborhoodSearchImpl::setMode(AnalysisNeighborhood::SearchMode mode)
+{
+    // TODO: Actually implement eSearchMode_Grid.
+    switch (mode)
+    {
+        case AnalysisNeighborhood::eSearchMode_Automatic:
+            nb_->bTryGrid   = bTryGrid_;
+            break;
+        case AnalysisNeighborhood::eSearchMode_Simple:
+            nb_->bTryGrid   = false;
+            break;
+        case AnalysisNeighborhood::eSearchMode_Grid:
+            nb_->bTryGrid   = bTryGrid_;
+            break;
+    }
+}
+
 }   // namespace internal
 
 /********************************************************************
@@ -858,21 +880,48 @@ class AnalysisNeighborhood::Impl
 {
     public:
         typedef AnalysisNeighborhoodSearch::ImplPointer SearchImplPointer;
+        typedef std::vector<SearchImplPointer> SearchList;
 
-        Impl()
-            : search_(new internal::AnalysisNeighborhoodSearchImpl), bTryGrid_(false)
+        Impl() : cutoff_(0), mode_(eSearchMode_Automatic)
         {
         }
         ~Impl()
         {
-            GMX_RELEASE_ASSERT(search_.unique(),
-                               "Dangling AnalysisNeighborhoodSearch reference");
+            SearchList::const_iterator i;
+            for (i = searchList_.begin(); i != searchList_.end(); ++i)
+            {
+                GMX_RELEASE_ASSERT(i->unique(),
+                                   "Dangling AnalysisNeighborhoodSearch reference");
+            }
         }
 
-        SearchImplPointer       search_;
-        bool                    bTryGrid_;
+        SearchImplPointer getSearch();
+
+        tMPI::mutex             createSearchMutex_;
+        SearchList              searchList_;
+        real                    cutoff_;
+        SearchMode              mode_;
 };
 
+AnalysisNeighborhood::Impl::SearchImplPointer
+AnalysisNeighborhood::Impl::getSearch()
+{
+    tMPI::lock_guard<tMPI::mutex> lock(createSearchMutex_);
+    // TODO: Consider whether this needs to/can be faster, e.g., by keeping a
+    // separate pool of unused search objects.
+    SearchList::const_iterator i;
+    for (i = searchList_.begin(); i != searchList_.end(); ++i)
+    {
+        if (i->unique())
+        {
+            return *i;
+        }
+    }
+    SearchImplPointer search(new internal::AnalysisNeighborhoodSearchImpl(cutoff_));
+    searchList_.push_back(search);
+    return search;
+}
+
 /********************************************************************
  * AnalysisNeighborhood
  */
@@ -888,63 +937,37 @@ AnalysisNeighborhood::~AnalysisNeighborhood()
 
 void AnalysisNeighborhood::setCutoff(real cutoff)
 {
-    GMX_RELEASE_ASSERT(impl_->search_->nb_ == NULL,
-                       "Multiple calls to setCutoff() not currently supported");
-    impl_->search_->nb_              = gmx_ana_nbsearch_create(cutoff, 0);
-    impl_->search_->pairSearch_->nb_ = impl_->search_->nb_;
-    impl_->bTryGrid_                 = impl_->search_->nb_->bTryGrid;
+    GMX_RELEASE_ASSERT(impl_->searchList_.empty(),
+                       "Changing the cutoff after initSearch() not currently supported");
+    impl_->cutoff_ = cutoff;
 }
 
 void AnalysisNeighborhood::setMode(SearchMode mode)
 {
-    GMX_RELEASE_ASSERT(impl_->search_->nb_ != NULL,
-                       "setParameters() not called");
-    // TODO: Actually implement eSearchMode_Grid.
-    switch (mode)
-    {
-        case eSearchMode_Automatic:
-            impl_->search_->nb_->bTryGrid = impl_->bTryGrid_;
-            break;
-        case eSearchMode_Simple:
-            impl_->search_->nb_->bTryGrid = false;
-            break;
-        case eSearchMode_Grid:
-            impl_->search_->nb_->bTryGrid = impl_->bTryGrid_;
-            break;
-    }
+    impl_->mode_ = mode;
 }
 
 AnalysisNeighborhood::SearchMode AnalysisNeighborhood::mode() const
 {
-    GMX_RELEASE_ASSERT(impl_->search_->nb_ != NULL,
-                       "setParameters() not called");
-    if (!impl_->search_->nb_->bTryGrid)
-    {
-        return eSearchMode_Simple;
-    }
-    return eSearchMode_Automatic;
+    return impl_->mode_;
 }
 
 AnalysisNeighborhoodSearch
 AnalysisNeighborhood::initSearch(const t_pbc *pbc, int n, const rvec x[])
 {
-    GMX_RELEASE_ASSERT(impl_->search_->nb_ != NULL,
-                       "setParameters() not called");
-    GMX_RELEASE_ASSERT(impl_->search_.unique(),
-                       "Multiple concurrent searches not currently supported");
-    gmx_ana_nbsearch_init(impl_->search_->nb_, const_cast<t_pbc *>(pbc), n, x);
-    return AnalysisNeighborhoodSearch(impl_->search_);
+    Impl::SearchImplPointer search(impl_->getSearch());
+    search->setMode(mode());
+    gmx_ana_nbsearch_init(search->nb_, const_cast<t_pbc *>(pbc), n, x);
+    return AnalysisNeighborhoodSearch(search);
 }
 
 AnalysisNeighborhoodSearch
 AnalysisNeighborhood::initSearch(const t_pbc *pbc, const gmx_ana_pos_t *p)
 {
-    GMX_RELEASE_ASSERT(impl_->search_->nb_ != NULL,
-                       "setParameters() not called");
-    GMX_RELEASE_ASSERT(impl_->search_.unique(),
-                       "Multiple concurrent searches not currently supported");
-    gmx_ana_nbsearch_pos_init(impl_->search_->nb_, const_cast<t_pbc *>(pbc), p);
-    return AnalysisNeighborhoodSearch(impl_->search_);
+    Impl::SearchImplPointer search(impl_->getSearch());
+    search->setMode(mode());
+    gmx_ana_nbsearch_pos_init(search->nb_, const_cast<t_pbc *>(pbc), p);
+    return AnalysisNeighborhoodSearch(search);
 }
 
 /********************************************************************
index b993b1b6e0bf574f1bc968d75f93901c5937b3d4..ca87c4ad2efc2c86c369aaf59ae14d77a4d26e49 100644 (file)
@@ -131,6 +131,12 @@ class AnalysisNeighborhoodPairSearch;
  * use methods in the returned AnalysisNeighborhoodSearch to find the reference
  * positions that are within the given cutoff from a provided position.
  *
+ * initSearch() is thread-safe and can be called from multiple threads.  Each
+ * call returns a different instance of the search object that can be used
+ * independently of the others.  However, the returned search objects can only
+ * be used within a single thread.  It is also possible to create multiple
+ * concurrent searches within a single thread.
+ *
  * \todo
  * Support for exclusions.
  * The 4.5/4.6 C API had very low-level support for exclusions, which was not
@@ -166,8 +172,10 @@ class AnalysisNeighborhood
          * \param[in]  cutoff Cutoff distance for the search
          *   (<=0 stands for no cutoff).
          *
-         * Currently, needs to be called exactly once, before calling any other
-         * method.
+         * Currently, can only be called before the first call to initSearch().
+         * If this method is not called, no cutoff is used in the searches.
+         *
+         * Does not throw.
          */
         void setCutoff(real cutoff);
         /*! \brief
@@ -194,6 +202,7 @@ class AnalysisNeighborhood
          * \param[in] x   \p n reference positions for the frame.
          * \returns   Search object that can be used to find positions from
          *      \p x within the given cutoff.
+         * \throws    std::bad_alloc if out of memory.
          */
         AnalysisNeighborhoodSearch
         initSearch(const t_pbc *pbc, int n, const rvec x[]);
@@ -204,6 +213,7 @@ class AnalysisNeighborhood
          * \param[in] p   Reference positions for the frame.
          * \returns   Search object that can be used to find positions from
          *      \p p within the given cutoff.
+         * \throws    std::bad_alloc if out of memory.
          */
         AnalysisNeighborhoodSearch
         initSearch(const t_pbc *pbc, const gmx_ana_pos_t *p);
@@ -333,9 +343,9 @@ class AnalysisNeighborhoodSearch
          * Clears this search.
          *
          * Equivalent to \c "*this = AnalysisNeighborhoodSearch();".
-         * Currently, this is required if the previous search variable is still
-         * in scope and you want to call AnalysisNeighborhood::initSearch()
-         * again.
+         * Currently, this is necessary to avoid unnecessary memory allocation
+         * if the previous search variable is still in scope when you want to
+         * call AnalysisNeighborhood::initSearch() again.
          */
         void reset();
 
index 58cb1aaca4c22cca8a2a025cabe16c64a6c8ffb6..dfecebd05c9e8b24e6eec8f61796fa80f2f9e12e 100644 (file)
@@ -296,6 +296,30 @@ void NeighborhoodSearchTest::testPairSearch(
  * Test data generation
  */
 
+class TrivialTestData
+{
+    public:
+        static const NeighborhoodSearchTestData &get()
+        {
+            static TrivialTestData singleton;
+            return singleton.data_;
+        }
+
+        TrivialTestData() : data_(12345, 1.0)
+        {
+            data_.box_[XX][XX] = 5.0;
+            data_.box_[YY][YY] = 5.0;
+            data_.box_[ZZ][ZZ] = 5.0;
+            data_.generateRandomRefPositions(10);
+            data_.generateRandomTestPositions(5);
+            set_pbc(&data_.pbc_, epbcXYZ, data_.box_);
+            data_.computeReferences(&data_.pbc_);
+        }
+
+    private:
+        NeighborhoodSearchTestData data_;
+};
+
 class RandomBoxFullPBCData
 {
     public:
@@ -443,4 +467,18 @@ TEST_F(NeighborhoodSearchTest, GridSearch2DPBC)
     testPairSearch(&search, data);
 }
 
+TEST_F(NeighborhoodSearchTest, HandlesConcurrentSearches)
+{
+    const NeighborhoodSearchTestData &data = TrivialTestData::get();
+
+    nb_.setCutoff(data.cutoff_);
+    gmx::AnalysisNeighborhoodSearch search1 =
+        nb_.initSearch(&data.pbc_, data.refPosCount_, data.refPos_);
+    gmx::AnalysisNeighborhoodSearch search2 =
+        nb_.initSearch(&data.pbc_, data.refPosCount_, data.refPos_);
+
+    testPairSearch(&search1, data);
+    testPairSearch(&search2, data);
+}
+
 } // namespace