src/gromacs/mdtypes/observablesreducer.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \libinternal \file
  36  * \brief
  37  * Declares gmx::ObservablesReducer and builder
  38  *
  39  * Periodically modules implementing MD simulations need to
  40  * communicate with all collaborating ranks to do things like compute
  41  * observables like total energies, signal conditions, and check
  42  * internal consistency. This communication synchronizes all
  43  * participating ranks, which limits scaling and performance, so it is
  44  * done rarely (typically once per step) and only when required.
  45  *
  46  * Modules may provide data of type double to be reduced across
  47  * all ranks via an MPI all-reduce with MPI_SUM. Double-precision
  48  * floating-point is chosen so that no meaningful precision is lost
  49  * e.g. in computing energies, while also permitting integral or
  50  * boolean messages to be passed as double-precision floating-point
  51  * values.
  52  *
  53  * Different modules typically need to communicate on different MD
  54  * steps, so in principle one might optimize by filling a
  55  * std::vector<double> with the values required on the current
  56  * step. However, that requires that each module produce and then copy
  57  * to the reduction buffer the data for this step. The typical amount
  58  * of data required even if all modules need to participate (ie.
  59  * hundreds of doubles) is smaller than the message headers that are
  60  * used by the underlying network transport protocol. So optimizing
  61  * for minimum message size is not particularly effective because it
  62  * does not meaningfully reduce the total time taken to communicate.
  63  *
  64  * Instead, we always reduce a buffer of the size that would be needed
  65  * if all active modules required communication this step. Then no
  66  * module needs to copy data merely to achieve reduction. To achieve
  67  * this, each module needs a stable view of memory into which it can
  68  * store data for which reduction is desired. It also means that
  69  * modules not active in the current simulation do not contribute to
  70  * the workload at run time. Also, modules that are active but don't
  71  * need communication at any particular MD step can passively opt out
  72  * and that incurs no overhead.
  73  *
  74  * The functionality is separated two main components, one that does
  75  * work during the simulation, and a builder that is used only during
  76  * setup time. This separates the responsibilities of
  77  * - allowing subscription and building the communication buffer, from
  78  * - orchestrating the minimum activity needed for this MD step.
  79  *
  80  * The interaction diagrams for those two workflows are depicted
  81  * below.
  82  *
  83 \msc
  84 wordwraparcs=true,
  85 hscale="2";
  86
  87 runner [label="runner"],
  88 builder [label="builder"],
  89 moduleA [label="moduleA"],
  90 moduleB [label="moduleB"],
  91 observablesReducer [label="observablesReducer"];
  92
  93 runner =>> builder [label="makes"];
  94
  95 runner =>> moduleA [label="makes"];
  96 runner =>> moduleA [label="passes builder to"];
  97 moduleA =>> builder [label="subscribes itself to"];
  98
  99 runner =>> moduleB [label="makes"];
 100 runner =>> moduleB [label="passes builder to"];
 101 moduleB =>> builder [label="subscribes itself to"];
 102
 103 runner =>> builder [label="calls build()"];
 104 builder =>> builder [label="makes communication\nbuffer"];
 105 builder =>> moduleA [label="notifies of\ncallback and view"];
 106 builder =>> moduleB [label="notifies of\ncallback and view"];
 107 builder =>> observablesReducer [label="makes"];
 108
 109 \endmsc
 110
 111 Once the \c observablesReducer is built, the builder may be
 112 destructed.
 113
 114 The \c observablesReducer and its modules operate entirely by
 115 passing callbacks.
 116
 117 \msc
 118 wordwraparcs=true,
 119 hscale="2";
 120
 121 runner [label="runner"],
 122 moduleA [label="moduleA"],
 123 moduleB [label="moduleB"],
 124 observablesReducer [label="observablesReducer"],
 125 compute_globals [label="compute_globals()"];
 126
 127 runner =>> moduleA [label="asks for work"];
 128 moduleA =>> moduleA [label="Produces values\nto reduce"];
 129 moduleA =>> observablesReducer [label="requires reduction from"];
 130
 131 runner =>> moduleB [label="asks for work"];
 132 moduleB =>> moduleB [label="Produces values\nto reduce"];
 133 moduleB =>> observablesReducer [label="requires reduction from"];
 134
 135 runner =>> runner [label="Does other things also"];
 136 runner =>> compute_globals [label="asks to do reduction"];
 137 compute_globals =>> compute_globals [label="prepares data to\nreduce in\nlegacy style"];
 138 compute_globals =>> observablesReducer [label="asks for\nbuffer view"];
 139 observablesReducer =>> compute_globals [label="provides\nbuffer view"];
 140 compute_globals =>> compute_globals [label="Does MPI_Allreduce"];
 141 compute_globals =>> observablesReducer [label="notifies after\nreduction"];
 142 observablesReducer =>> moduleA [label="notifies after reduction"];
 143 moduleA =>> moduleA [label="Uses reduced values"];
 144 moduleA =>> observablesReducer [label="returns"];
 145 observablesReducer =>> moduleB [label="notifies after reduction"];
 146 moduleB =>> moduleB [label="Uses reduced values"];
 147 moduleB =>> observablesReducer [label="returns"];
 148 observablesReducer =>> observablesReducer [label="zeroes reduction buffer"];
 149 observablesReducer =>> compute_globals [label="returns"];
 150
 151 \endmsc
 152  *
 153  * Three callbacks are produced and called per participating module:
 154  *
 155  * 1. One produced by the module and passed to the builder so that
 156  *    later the ObservablesReducer can call it to notify the module
 157  *    that reduction is complete.
 158  * 2. One produced by the builder and returned to the module so the
 159  *    latter can call it to require reduction when it wishes
 160  * 3. One produced by the module and passed to the builder so the
 161  *    latter can call it to notify the former of the buffer view
 162  *    it should use in the first callback and receive a copy
 163  *    of the second callback.
 164  *
 165  * Modules often request that reduction occur "soon" ie. this step or
 166  * next step, depending whether reduction has already take place this
 167  * MD step. However they are also able to request reduction to occur
 168  * "eventually" ie. only whenever some other module requires it, so
 169  * the total number of reductions is minimized. Naturally, the
 170  * callback to such a module happens only after the eventual
 171  * reduction, which may happen on the same step or a later one. If a
 172  * module makes more than one "eventually" reduction request before
 173  * reduction takes place, the callback to that module will be called
 174  * multiple times when eventually reduction does take place. It is the
 175  * responsibility of the module to refrain from making those requests
 176  * if the multiple callbacks would be a problem (e.g. maintain an
 177  * internal record of whether a reduction request has been made).
 178  * Modules are not required to set any value for reduction unless they
 179  * are requesting reduction.
 180  *
 181  * An ObservablesReducer object is intended to replace the use of \c
 182  * compute_globals() by simulations, as
 183  * https://gitlab.com/gromacs/gromacs/-/issues/3887 progresses. When
 184  * no modules using the legacy style communication remain, it is
 185  * anticipated that this class will change to contain an MPI
 186  * communicator to use to implement the MPI_Allreduce internally.  At
 187  * that time, communicationBuffer() and reductionComplete() will
 188  * likely change into a doReduction() method, or similar. The flow of
 189  * the whole propagator loop will now be less clear inasmuch as the
 190  * responsibility for requesting reduction now lies with each module,
 191  * however this is probably still more clear than the large forest of
 192  * flags that resulted from all modules having to have their control
 193  * logic in the propagator loop.
 194  *
 195  * \inlibraryapi
 196  * \ingroup module_mdtypes
 197  */
 198 #ifndef GMX_MDTYPES_OBSERVABLESREDUCER_H
 199 #define GMX_MDTYPES_OBSERVABLESREDUCER_H
 200
 201 #include <cstdint>
 202
 203 #include <functional>
 204 #include <memory>
 205 #include <vector>
 206
 207 namespace gmx
 208 {
 209 template<typename>
 210 class ArrayRef;
 211
 212 class ObservablesReducer;
 213 using Step = int64_t;
 214
 215 /*! \brief Control whether reduction is required soon. */
 216 enum class ReductionRequirement : int
 217 {
 218     //! Reduce whenever the runner next checks with the ObservablesReducer.
 219     Soon,
 220     /*! \brief Reduce whenever the runner next checks with the
 221      * ObservablesReducer after some module requires reduction Soon */
 222     Eventually
 223 };
 224
 225 /*! \brief Report whether the reduction has happened this step */
 226 enum class ObservablesReducerStatus : int
 227 {
 228     //! Reduction has not yet happened this step
 229     ReadyToReduce,
 230     //! Reduction has happened this step
 231     AlreadyReducedThisStep
 232 };
 233
 234 /*! \libinternal \brief
 235  * Builder for ObservablesReducer
 236  *
 237  * Receives subscriptions from MD modules. Caller should call \c
 238  * build() once all subscriptions have been received, and then not
 239  * attempt any further subscriptions or builds. At that time, the
 240  * builder may be destructed.
 241  *
 242  * This builder will
 243  * - receive all subscriptions from MD modules, then
 244  * - build the communication buffer used by the subscribers,
 245  * - build the \c ObservablesReducer object that manages the
 246  *   lifetime of that buffer, and
 247  * - notify the subscribers via callback of the view of that buffer
 248  *   that is theirs to use and a further callback to require
 249  *   reduction of that buffer.
 250  * See also the interaction diagram in the documentation
 251  * for observablesreducer.h file.
 252  *
 253  * Note that the builder callbacks do not follow the approach of \c
 254  * MDModulesNotifier because that requires that the same value is
 255  * passed to all recipients. Here a distinct value goes to each
 256  * recipient, ie. a different view of the communication buffer.
 257  *
 258  * In order to avoid circular build-time dependencies between the
 259  * ObservablesReducer (and its builder) with the modules that use it,
 260  * the latter can directly call methods on the former, supplying
 261  * anonymous callbacks to be used by the former to contact the
 262  * latter. CallbackAfterReduction and CallbackFromBuilder are of this
 263  * type.
 264  *
 265  * A callback type CallBackToRequireReduction is also used instead of
 266  * a direct method call on ObservablesReducer to require reduction.
 267  * This is implemented by calling a method on the Impl object of a
 268  * ObservablesReducer. This extends the interface of
 269  * ObservablesReducer in a way that is not directly visible. That
 270  * complexity provides two benefits:
 271  * - only registered subscribers can require reduction (which helps
 272  *   ensure correctness by construction)
 273  * - the ObservablesReducer::Impl has a stable address from the heap
 274  *   allocation needed for std::unique_ptr to use in forming the
 275  *   callback to request reduction.
 276  * Alternatives exist for the latter, but create requirements on the
 277  * stability of the address of ObservablesReducer, and/or extra
 278  * coordination to only pass that address to subscribers once it is
 279  * stable.
 280  *
 281  * It is the subscribers' responsibility to coordinate so that all
 282  * subscribers on all ranks agree on the need to communicate, e.g. by
 283  * orchestrating communication based on the current step number or a
 284  * previous message.
 285  *
 286  */
 287 class ObservablesReducerBuilder
 288 {
 289 public:
 290     //! Constructor
 291     ObservablesReducerBuilder();
 292     //! Destructor
 293     ~ObservablesReducerBuilder();
 294     //! Move constructor
 295     ObservablesReducerBuilder(ObservablesReducerBuilder&& other) noexcept;
 296     //! Move assignment operator
 297     ObservablesReducerBuilder& operator=(ObservablesReducerBuilder&& other) noexcept;
 298
 299     /*! \brief Convenience type for the callback subscribers to
 300      * provide when they require reduction. */
 301     using CallbackAfterReduction = std::function<void(Step)>;
 302     /*! \brief Convenience type for the callback subscribers
 303      * call to require reduction.
 304      *
 305      * When called, the status it returns can be used for checking the
 306      * internal expectations of the subscriber on whether reduction
 307      * has already occured this step, or not. */
 308     using CallbackToRequireReduction = std::function<ObservablesReducerStatus(ReductionRequirement)>;
 309     /*! \brief Convenience type for the callback from the builder to
 310      * notify the subscribers of the callback they will own and later
 311      * use to require reduction and the view of the communication
 312      * buffer they will later use. */
 313     using CallbackFromBuilder = std::function<void(CallbackToRequireReduction&&, ArrayRef<double>)>;
 314
 315     /*! \brief Add a subscriber to the \c ObservablesReducer that will
 316      * later be built in \c build()
 317      *
 318      * Takes ownership of both callbacks supplied by the subscribing
 319      * module. This approach ensures that time is not spent in the MD
 320      * loop constructing std::function objects, because constructing
 321      * one of those requires 1-2 heap allocations (depending on the
 322      * size of the lambda capture).
 323      *
 324      * Must not be called after build() */
 325     void addSubscriber(int                      sizeRequired,
 326                        CallbackFromBuilder&&    callbackFromBuilder,
 327                        CallbackAfterReduction&& callbackAfterReduction);
 328
 329     /*! \brief Build a \c ObservablesReducer to which any subscribers
 330      * have been added
 331      *
 332      * Must be called only once. Notifies each subscriber (via the
 333      * CallbackFromBuilder that it supplied) of the view of the
 334      * reduction buffer that they will use and the
 335      * CallbackToRequireReduction that they will use. */
 336     ObservablesReducer build();
 337
 338 private:
 339     class Impl;
 340     //! Impl object
 341     std::unique_ptr<Impl> impl_;
 342 };
 343
 344 /*! \libinternal \brief
 345  * Manage reduction of observables for registered subscribers
 346  *
 347  * Modules can require that the \c ObservablesReducer object to which
 348  * they have subscribed do communication this step.  After reduction
 349  * is complete, notifications are made to the callbacks that modules
 350  * previously supplied to the ObservablesReducerBuilder. Then the
 351  * reduction buffer is zeroed. Thus the subscribers may not depend on
 352  * the values in their buffer view after the notification callback
 353  * returns, so they should do any necessary processing during that
 354  * callback.
 355  *
 356  * Modules are free to request reduction whenever they wish, and have
 357  * no obligations to do anything at any time. In particular, they
 358  * do not have to set values for their reduction buffer except when
 359  * they are requesting reduction.
 360  *
 361  * The \c ObservablesReducerBuilder object is responsible for
 362  * preparing a vector of doubles and notifying the subscribers of the
 363  * mutually disjoint views of the buffer that they should use for both
 364  * input and output of the reduction. The ObservablesReducer object
 365  * that it builds retains no record of the subscribers, because its
 366  * responsibility is solely to orchestrate the MPI communication and
 367  * callbacks.
 368  *
 369  * Subscribers automatically use the correct \c ObservablesReducer
 370  * object because the callback they received is bound to the correct
 371  * one. The only way a module can participate in an \c
 372  * ObservablesReducer is to have registered with its builder.
 373  *
 374  * The owner of an ObservablesReducer must maintain the lifetime of
 375  * the \c ObservablesReducer object until all subscribers no longer
 376  * need it. After the destruction of an \c ObservablesReducer, if
 377  * subscribers access their view of the communication buffer, the
 378  * behavior is undefined.
 379  *
 380  * \inlibraryapi
 381  * \ingroup module_mdtypes
 382  */
 383 class ObservablesReducer
 384 {
 385 private:
 386     class Impl;
 387     std::unique_ptr<Impl> impl_;
 388
 389 public:
 390     //! Constructor only usable by ObservablesReducerBuilder
 391     explicit ObservablesReducer(std::unique_ptr<Impl> impl);
 392     // Destructor
 393     ~ObservablesReducer();
 394     //! Move constructor
 395     ObservablesReducer(ObservablesReducer&& other) noexcept;
 396     //! Move assignment operator
 397     ObservablesReducer& operator=(ObservablesReducer&& other) noexcept;
 398
 399     /*! \brief Provide view of communication buffer for MPI reduction
 400      *
 401      * If no subscriber used ReductionRequirement::Soon since the last
 402      * call to reductionComplete(), then this method returns an empty
 403      * buffer. Otherwise it returns a view over the buffer potentially
 404      * filled by all subscribed modules.
 405      */
 406     ArrayRef<double> communicationBuffer();
 407     /*! \brief Called by the runner after MPI communication is complete
 408      *
 409      * Notifies all subscribers who required reduction since the last
 410      * call to reductionComplete() and passes the \c step value so
 411      * they can check internally that the simulation state is
 412      * consistent.
 413      *
 414      * After all notifications, zeroes the communication buffer. It is
 415      * the responsibility of the subscribers that required reduction
 416      * to react suitably to the data available during their
 417      * notification. This ensures that modules cannot get arbitrary
 418      * but realistic-looking values left behind from previous
 419      * communication stages. It also ensures that subsequent
 420      * communication stages will not be able to keep reducing values
 421      * until they overflow or underflow. This zeroing is most efficient
 422      * to do centrally in an object of this class.
 423      *
 424      * The choice of zero for the sentinel value is not perfect. In
 425      * principle, a value of zero is potentially significant to any
 426      * subscriber, so could be provided to a subscriber as the result
 427      * of an incorrect implementation of ObservablesReducer or
 428      * inconsistent use by subscribers. However by construction (which
 429      * is tested), the integration tests never produce a zero result
 430      * from an reduced value provided by a subscriber. So, if the
 431      * coverage there is high then there is good reason to expect that
 432      * when a zero value is used by a subscribers it is the result of
 433      * a reduction and thus significant, rather than an artefact of
 434      * the zeroing of the communication buffer after notifications are
 435      * complete.
 436      *
 437      * The choice of zero ensures that the MPI reduction will produce
 438      * a valid numerical result in all cases except when a module that
 439      * required reduction set buffer contents that produced a
 440      * problematic output after reduction.
 441      */
 442     void reductionComplete(Step step);
 443     /*! \brief Notify the ObservablesReducer that this MD step is complete
 444      *
 445      * Any runner using the ObservablesReducer must call this method
 446      * whenever a step completes, so that subscribed modules can use
 447      * that information to check whether reduction is happening on the
 448      * step that they expect.
 449      *
 450      * The ObservablesReducer keeps track of whether reduction has
 451      * already occured this step, so that when modules request
 452      * reduction it can notify them of that status. This permits them
 453      * to check their own requirements, e.g. that
 454      * ReductionRequirement::Soon will operate this step or next
 455      * step. */
 456     void stepComplete();
 457     //! The builder needs to be able to make the Impl object
 458     friend class ObservablesReducerBuilder;
 459 };
 460
 461 } // namespace gmx
 462
 463 #endif