2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010-2018, The GROMACS development team.
5 * Copyright (c) 2019,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * Declares gmx::AnalysisData and gmx::AnalysisDataHandle.
40 * \author Teemu Murtola <teemu.murtola@gmail.com>
42 * \ingroup module_analysisdata
44 #ifndef GMX_ANALYSISDATA_ANALYSISDATA_H
45 #define GMX_ANALYSISDATA_ANALYSISDATA_H
47 #include "gromacs/analysisdata/abstractdata.h"
48 #include "gromacs/utility/real.h"
53 class AnalysisDataHandle;
54 class AnalysisDataParallelOptions;
57 * Parallelizable data container for raw data.
59 * This is the main class used to implement parallelizable data processing in
60 * analysis tools. It is used by first creating an object and setting its
61 * properties using setDataSetCount(), setColumnCount() and setMultipoint(),
62 * and attaching necessary modules using addModule() etc. Then one or more
63 * AnalysisDataHandle objects can be created using startData(). Each data
64 * handle can then be independently used to provide data frames (each frame
65 * must be provided by a single handle, but different frames can be freely
66 * mixed between the handles). The finishFrameSerial() method must be called
67 * in serial for each frame, after one of the handles has been used to provide
68 * the data for that frame. When all data has been provided, the handles
69 * are destroyed using finishData() (or AnalysisDataHandle::finishData()).
71 * When used through the trajectory analysis framework, calls to startData(),
72 * finishFrameSerial(), and finishData() are handled by the framework.
75 * Parallel implementation is not complete.
78 * Special note for MPI implementation: assuming that the initialization of
79 * data objects is identical in all processes, associating the data objects
80 * in different MPI processes should be possible without changes in the
82 * Alternative, more robust implementation could get a unique ID as parameter
83 * to the constructor or a separate function, but would require all tools to
84 * provide it. With the current registration mechanism in
85 * TrajectoryAnalysisModule, this should be straightforward.
89 * \ingroup module_analysisdata
91 class AnalysisData : public AbstractAnalysisData
95 * Creates an empty analysis data object.
97 * \throws std::bad_alloc if out of memory.
100 ~AnalysisData() override;
103 * Sets the number of data sets.
105 * \param[in] dataSetCount Number of data sets (must be > 0).
106 * \throws std::bad_alloc if out of memory.
107 * \throws APIError if modules have been added that are not
108 * compatible with the new data set count.
110 * Must not be called after startData() has been called.
111 * If not called, a single data set is assumed.
112 * If called multiple times, the last call takes effect.
114 void setDataSetCount(int dataSetCount);
116 * Sets the number of columns in a data set.
118 * \param[in] dataSet Zero-based data set index.
119 * \param[in] columnCount Number of columns in the data (must be > 0).
120 * \throws APIError if modules have been added that are not
121 * compatible with the new column count.
123 * Must be called before startData() for each data set.
124 * Must not be called after startData() has been called.
125 * If called multiple times for a data set, the last call takes effect.
127 void setColumnCount(int dataSet, int columnCount);
129 * Sets whether the data contains multiple points per column per frame.
131 * \param[in] bMultipoint Whether the data will allow multiple points
132 * per column within a single frame.
133 * \throws APIError if modules have been added that are not
134 * compatible with the new setting.
136 * If this method is not called, the data is not multipoint.
138 * Must not be called after startData() has been called.
140 * \see isMultipoint()
142 void setMultipoint(bool bMultipoint);
144 int frameCount() const override;
147 * Creates a handle for adding data.
149 * \param[in] opt Options for setting how this handle will be
151 * \returns The created handle.
152 * \throws std::bad_alloc if out of memory.
153 * \throws APIError if any attached data module is not compatible.
154 * \throws unspecified Any exception thrown by attached data modules
155 * in IAnalysisDataModule::dataStarted().
157 * The caller should retain the returned handle (or a copy of it), and
158 * pass it to finishData() after successfully adding all data.
159 * The caller should discard the returned handle if an error occurs;
160 * memory allocated for the handle will be freed when the AnalysisData
161 * object is destroyed.
163 * The \p opt options should be the same for all calls to this method,
164 * and the number of calls should match the parallelization factor
167 AnalysisDataHandle startData(const AnalysisDataParallelOptions& opt);
169 * Performs in-order sequential processing for the next frame.
171 * \param[in] frameIndex Index of the frame that has been finished.
172 * \throws unspecified Any exception thrown by attached data modules
173 * in IAnalysisDataModule::frameFinishedSerial().
175 * This method should be called sequentially for each frame, after data
176 * for that frame has been produced. It is not necessary to call this
177 * method if there is no parallelism, i.e., if only a single data
178 * handle is created and the parallelization options provided at that
179 * time do not indicate parallelism.
181 void finishFrameSerial(int frameIndex);
183 * Destroys a handle after all data has been added.
185 * \param[in] handle Handle to destroy.
186 * \throws unspecified Any exception thrown by attached data modules
187 * in IAnalysisDataModule::dataFinished().
189 * \p handle must have been obtained from startData() of this object.
190 * The order of the calls with respect to the corresponding startData()
191 * calls is not important.
193 * The \p handle (and any copies) are invalid after the call.
195 void finishData(AnalysisDataHandle handle);
198 AnalysisDataFrameRef tryGetDataFrameInternal(int index) const override;
199 bool requestStorageInternal(int nframes) override;
203 std::unique_ptr<Impl> impl_;
205 friend class AnalysisDataHandle;
210 class AnalysisDataHandleImpl;
211 } // namespace internal
214 * Handle for inserting data into AnalysisData.
216 * This class provides an interface for adding data frames into an AnalysisData
217 * object. After a handle is obtained from AnalysisData::startData(), new
218 * frames can be added using startFrame(). Then values for that frame are set
219 * using provided methods (see below), and finishFrame() is called. After all
220 * frames have been added, finishData() (or AnalysisData::finishData()) must be
223 * For simple (non-multipoint) data, within a frame values can be set using
224 * selectDataSet(), setPoint() and setPoints(). Setting the same column in the
225 * same data set multiple times overrides previously set values.
226 * When the frame is finished, attached modules are notified.
228 * Multipoint data works otherwise similarly, but requires finishPointSet() to
229 * be called for each set of points for which the modules need to be notified.
230 * Each point set starts empty (after startFrame() or finishPointSet()), and
231 * values can be set using setPoint()/setPoints().
232 * A single point set can contain values only for a single data set, which must
233 * be selected with selectDataSet() before setting any values.
234 * finishPointSet() must also be called for the last point set just before
237 * This class works like a pointer type: copying and assignment is lightweight,
238 * and all copies work interchangeably, accessing the same internal handle.
239 * However, normally you should only keep one copy of a handle, i.e., treat
240 * this type as movable.
241 * Several handles created from the same AnalysisData object can exist
242 * concurrently, but must currently operate on separate frames.
245 * \ingroup module_analysisdata
247 class AnalysisDataHandle
251 * Constructs an invalid data handle.
253 * This constructor is provided for convenience in cases where it is
254 * easiest to declare an AnalysisDataHandle without immediately
255 * assigning a value to it. Any attempt to call methods without first
256 * assigning a value from AnalysisData::startData() to the handle
261 AnalysisDataHandle();
263 //! Returns whether this data handle is valid.
264 bool isValid() const { return impl_ != nullptr; }
267 * Start data for a new frame.
269 * \param[in] index Zero-based index for the frame to start.
270 * \param[in] x x value for the frame.
271 * \param[in] dx Error in x for the frame if applicable.
273 * \throws unspecified Any exception thrown by attached data
274 * modules in IAnalysisDataModule::frameStarted().
276 * Each \p index value 0, 1, ..., N (where N is the total number of
277 * frames) should be started exactly once by exactly one handle of an
278 * AnalysisData object. The frames may be started out of order, but
279 * currently the implementation places some limitations on how far
280 * the index can be in the future (as counted from the first frame that
283 void startFrame(int index, real x, real dx = 0.0);
285 * Selects a data set for subsequent setPoint()/setPoints() calls.
287 * \param[in] index Zero-based data set index.
289 * After startFrame(), the first data set is always selected.
290 * The set value is remembered until the end of the current frame, also
291 * across finishPointSet() calls.
295 void selectDataSet(int index);
297 * Set a value for a single column for the current frame.
299 * \param[in] column Zero-based column index.
300 * \param[in] value Value to set for the column.
301 * \param[in] bPresent Present flag to set for the column.
303 * If called multiple times for a column (within one point set for
304 * multipoint data), old values are overwritten.
308 void setPoint(int column, real value, bool bPresent = true);
310 * Set a value and its error estimate for a single column for the
313 * \param[in] column Zero-based column index.
314 * \param[in] value Value to set for the column.
315 * \param[in] error Error estimate to set for the column.
316 * \param[in] bPresent Present flag to set for the column.
318 * If called multiple times for a column (within one point set for
319 * multipoint data), old values are overwritten.
323 void setPoint(int column, real value, real error, bool bPresent = true);
325 * Set values for consecutive columns for the current frame.
327 * \param[in] firstColumn Zero-based column index.
328 * \param[in] count Number of columns to set.
329 * \param[in] values Value array of \p column items.
330 * \param[in] bPresent Present flag to set for the column.
332 * Equivalent to calling setPoint(firstColumn + i, values[i], bPresent) for
337 void setPoints(int firstColumn, int count, const real* values, bool bPresent = true);
339 * Finish data for the current point set.
341 * \throws APIError if any attached data module is not compatible.
342 * \throws unspecified Any exception thrown by attached data
343 * modules in IAnalysisDataModule::pointsAdded().
345 * Must be called after each point set for multipoint data, including
346 * the last (i.e., no values must be set between the last call to this
347 * method and AnalysisDataStorage::finishFrame()).
348 * Must not be called for non-multipoint data.
350 void finishPointSet();
352 * Finish data for the current frame.
354 * \throws APIError if any attached data module is not compatible.
355 * \throws unspecified Any exception thrown by attached data
356 * modules in frame notification methods.
359 //! Calls AnalysisData::finishData() for this handle.
364 * Creates a new data handle associated with \p data.
366 * \param impl Data to associate the handle with.
368 * The constructor is private because data handles should only be
369 * constructed through AnalysisData::startData().
373 explicit AnalysisDataHandle(internal::AnalysisDataHandleImpl* impl);
376 * Pointer to the internal implementation class.
378 * The memory for this object is managed by the AnalysisData object,
379 * and AnalysisDataHandle simply provides a public interface for
380 * accessing the implementation.
382 internal::AnalysisDataHandleImpl* impl_;
385 * Needed to access the non-public implementation.
387 friend class AnalysisData;