a3988d03e3d7ef80536adc371bec1c0cc2b1e55b
[alexxy/gromacs.git] / src / gromacs / trajectoryanalysis / modules / distance.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*! \internal \file
36  * \brief
37  * Implements gmx::analysismodules::Distance.
38  *
39  * \author Teemu Murtola <teemu.murtola@gmail.com>
40  * \ingroup module_trajectoryanalysis
41  */
42 #include "gmxpre.h"
43
44 #include "distance.h"
45
46 #include <string>
47
48 #include "gromacs/analysisdata/analysisdata.h"
49 #include "gromacs/analysisdata/modules/average.h"
50 #include "gromacs/analysisdata/modules/histogram.h"
51 #include "gromacs/analysisdata/modules/plot.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/options/basicoptions.h"
54 #include "gromacs/options/filenameoption.h"
55 #include "gromacs/options/ioptionscontainer.h"
56 #include "gromacs/pbcutil/pbc.h"
57 #include "gromacs/selection/selection.h"
58 #include "gromacs/selection/selectionoption.h"
59 #include "gromacs/trajectory/trajectoryframe.h"
60 #include "gromacs/trajectoryanalysis/analysissettings.h"
61 #include "gromacs/utility/arrayref.h"
62 #include "gromacs/utility/exceptions.h"
63 #include "gromacs/utility/stringutil.h"
64
65 namespace gmx
66 {
67
68 namespace analysismodules
69 {
70
71 namespace
72 {
73
74 class Distance : public TrajectoryAnalysisModule
75 {
76     public:
77         Distance();
78
79         virtual void initOptions(IOptionsContainer          *options,
80                                  TrajectoryAnalysisSettings *settings);
81         virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
82                                   const TopologyInformation        &top);
83
84         virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
85                                   TrajectoryAnalysisModuleData *pdata);
86
87         virtual void finishAnalysis(int nframes);
88         virtual void writeOutput();
89
90     private:
91         SelectionList                            sel_;
92         std::string                              fnAverage_;
93         std::string                              fnAll_;
94         std::string                              fnXYZ_;
95         std::string                              fnHistogram_;
96         std::string                              fnAllStats_;
97         double                                   meanLength_;
98         double                                   lengthDev_;
99         double                                   binWidth_;
100
101         AnalysisData                             distances_;
102         AnalysisData                             xyz_;
103         AnalysisDataAverageModulePointer         summaryStatsModule_;
104         AnalysisDataAverageModulePointer         allStatsModule_;
105         AnalysisDataFrameAverageModulePointer    averageModule_;
106         AnalysisDataSimpleHistogramModulePointer histogramModule_;
107
108         // Copy and assign disallowed by base.
109 };
110
111 Distance::Distance()
112     : meanLength_(0.1), lengthDev_(1.0), binWidth_(0.001)
113 {
114     summaryStatsModule_.reset(new AnalysisDataAverageModule());
115     summaryStatsModule_->setAverageDataSets(true);
116     distances_.addModule(summaryStatsModule_);
117     allStatsModule_.reset(new AnalysisDataAverageModule());
118     distances_.addModule(allStatsModule_);
119     averageModule_.reset(new AnalysisDataFrameAverageModule());
120     distances_.addModule(averageModule_);
121     histogramModule_.reset(new AnalysisDataSimpleHistogramModule());
122     distances_.addModule(histogramModule_);
123
124     registerAnalysisDataset(&distances_, "dist");
125     registerAnalysisDataset(&xyz_, "xyz");
126     registerBasicDataset(summaryStatsModule_.get(), "stats");
127     registerBasicDataset(allStatsModule_.get(), "allstats");
128     registerBasicDataset(averageModule_.get(), "average");
129     registerBasicDataset(&histogramModule_->averager(), "histogram");
130 }
131
132
133 void
134 Distance::initOptions(IOptionsContainer *options, TrajectoryAnalysisSettings *settings)
135 {
136     static const char *const desc[] = {
137         "[THISMODULE] calculates distances between pairs of positions",
138         "as a function of time. Each selection specifies an independent set",
139         "of distances to calculate. Each selection should consist of pairs",
140         "of positions, and the distances are computed between positions 1-2,",
141         "3-4, etc.[PAR]",
142         "[TT]-oav[tt] writes the average distance as a function of time for",
143         "each selection.",
144         "[TT]-oall[tt] writes all the individual distances.",
145         "[TT]-oxyz[tt] does the same, but the x, y, and z components of the",
146         "distance are written instead of the norm.",
147         "[TT]-oh[tt] writes a histogram of the distances for each selection.",
148         "The location of the histogram is set with [TT]-len[tt] and",
149         "[TT]-tol[tt]. Bin width is set with [TT]-binw[tt].",
150         "[TT]-oallstat[tt] writes out the average and standard deviation for",
151         "each individual distance, calculated over the frames.[PAR]",
152         "Note that [THISMODULE] calculates distances between fixed pairs",
153         "(1-2, 3-4, etc.) within a single selection.  To calculate distances",
154         "between two selections, including minimum, maximum, and pairwise",
155         "distances, use [gmx-pairdist]."
156     };
157
158     settings->setHelpText(desc);
159
160     options->addOption(FileNameOption("oav").filetype(eftPlot).outputFile()
161                            .store(&fnAverage_).defaultBasename("distave")
162                            .description("Average distances as function of time"));
163     options->addOption(FileNameOption("oall").filetype(eftPlot).outputFile()
164                            .store(&fnAll_).defaultBasename("dist")
165                            .description("All distances as function of time"));
166     options->addOption(FileNameOption("oxyz").filetype(eftPlot).outputFile()
167                            .store(&fnXYZ_).defaultBasename("distxyz")
168                            .description("Distance components as function of time"));
169     options->addOption(FileNameOption("oh").filetype(eftPlot).outputFile()
170                            .store(&fnHistogram_).defaultBasename("disthist")
171                            .description("Histogram of the distances"));
172     options->addOption(FileNameOption("oallstat").filetype(eftPlot).outputFile()
173                            .store(&fnAllStats_).defaultBasename("diststat")
174                            .description("Statistics for individual distances"));
175     options->addOption(SelectionOption("select").storeVector(&sel_)
176                            .required().dynamicMask().multiValue()
177                            .description("Position pairs to calculate distances for"));
178     // TODO: Extend the histogramming implementation to allow automatic
179     // extension of the histograms to cover the data, removing the need for
180     // the first two options.
181     options->addOption(DoubleOption("len").store(&meanLength_)
182                            .description("Mean distance for histogramming"));
183     options->addOption(DoubleOption("tol").store(&lengthDev_)
184                            .description("Width of full distribution as fraction of [TT]-len[tt]"));
185     options->addOption(DoubleOption("binw").store(&binWidth_)
186                            .description("Bin width for histogramming"));
187 }
188
189
190 /*! \brief
191  * Checks that selections conform to the expectations of the tool.
192  */
193 void checkSelections(const SelectionList &sel)
194 {
195     for (size_t g = 0; g < sel.size(); ++g)
196     {
197         if (sel[g].posCount() % 2 != 0)
198         {
199             std::string message = formatString(
200                         "Selection '%s' does not evaluate into an even number of positions "
201                         "(there are %d positions)",
202                         sel[g].name(), sel[g].posCount());
203             GMX_THROW(InconsistentInputError(message));
204         }
205         if (sel[g].isDynamic())
206         {
207             for (int i = 0; i < sel[g].posCount(); i += 2)
208             {
209                 if (sel[g].position(i).selected() != sel[g].position(i+1).selected())
210                 {
211                     std::string message =
212                         formatString("Dynamic selection %d does not select "
213                                      "a consistent set of pairs over the frames",
214                                      static_cast<int>(g + 1));
215                     GMX_THROW(InconsistentInputError(message));
216                 }
217             }
218         }
219     }
220 }
221
222
223 void
224 Distance::initAnalysis(const TrajectoryAnalysisSettings &settings,
225                        const TopologyInformation         & /*top*/)
226 {
227     checkSelections(sel_);
228
229     distances_.setDataSetCount(sel_.size());
230     xyz_.setDataSetCount(sel_.size());
231     for (size_t i = 0; i < sel_.size(); ++i)
232     {
233         const int distCount = sel_[i].posCount() / 2;
234         distances_.setColumnCount(i, distCount);
235         xyz_.setColumnCount(i, distCount * 3);
236     }
237     const double histogramMin = (1.0 - lengthDev_) * meanLength_;
238     const double histogramMax = (1.0 + lengthDev_) * meanLength_;
239     histogramModule_->init(histogramFromRange(histogramMin, histogramMax)
240                                .binWidth(binWidth_).includeAll());
241
242     if (!fnAverage_.empty())
243     {
244         AnalysisDataPlotModulePointer plotm(
245                 new AnalysisDataPlotModule(settings.plotSettings()));
246         plotm->setFileName(fnAverage_);
247         plotm->setTitle("Average distance");
248         plotm->setXAxisIsTime();
249         plotm->setYLabel("Distance (nm)");
250         for (size_t g = 0; g < sel_.size(); ++g)
251         {
252             plotm->appendLegend(sel_[g].name());
253         }
254         averageModule_->addModule(plotm);
255     }
256
257     if (!fnAll_.empty())
258     {
259         AnalysisDataPlotModulePointer plotm(
260                 new AnalysisDataPlotModule(settings.plotSettings()));
261         plotm->setFileName(fnAll_);
262         plotm->setTitle("Distance");
263         plotm->setXAxisIsTime();
264         plotm->setYLabel("Distance (nm)");
265         // TODO: Add legends? (there can be a massive amount of columns)
266         distances_.addModule(plotm);
267     }
268
269     if (!fnXYZ_.empty())
270     {
271         AnalysisDataPlotModulePointer plotm(
272                 new AnalysisDataPlotModule(settings.plotSettings()));
273         plotm->setFileName(fnXYZ_);
274         plotm->setTitle("Distance");
275         plotm->setXAxisIsTime();
276         plotm->setYLabel("Distance (nm)");
277         // TODO: Add legends? (there can be a massive amount of columns)
278         xyz_.addModule(plotm);
279     }
280
281     if (!fnHistogram_.empty())
282     {
283         AnalysisDataPlotModulePointer plotm(
284                 new AnalysisDataPlotModule(settings.plotSettings()));
285         plotm->setFileName(fnHistogram_);
286         plotm->setTitle("Distance histogram");
287         plotm->setXLabel("Distance (nm)");
288         plotm->setYLabel("Probability");
289         for (size_t g = 0; g < sel_.size(); ++g)
290         {
291             plotm->appendLegend(sel_[g].name());
292         }
293         histogramModule_->averager().addModule(plotm);
294     }
295
296     if (!fnAllStats_.empty())
297     {
298         AnalysisDataPlotModulePointer plotm(
299                 new AnalysisDataPlotModule(settings.plotSettings()));
300         plotm->setFileName(fnAllStats_);
301         plotm->setErrorsAsSeparateColumn(true);
302         plotm->setTitle("Statistics for individual distances");
303         plotm->setXLabel("Distance index");
304         plotm->setYLabel("Average/standard deviation (nm)");
305         for (size_t g = 0; g < sel_.size(); ++g)
306         {
307             plotm->appendLegend(std::string(sel_[g].name()) + " avg");
308             plotm->appendLegend(std::string(sel_[g].name()) + " std.dev.");
309         }
310         // TODO: Consider whether this output format is the best possible.
311         allStatsModule_->addModule(plotm);
312     }
313 }
314
315
316 void
317 Distance::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
318                        TrajectoryAnalysisModuleData *pdata)
319 {
320     AnalysisDataHandle   distHandle = pdata->dataHandle(distances_);
321     AnalysisDataHandle   xyzHandle  = pdata->dataHandle(xyz_);
322     const SelectionList &sel        = pdata->parallelSelections(sel_);
323
324     checkSelections(sel);
325
326     distHandle.startFrame(frnr, fr.time);
327     xyzHandle.startFrame(frnr, fr.time);
328     for (size_t g = 0; g < sel.size(); ++g)
329     {
330         distHandle.selectDataSet(g);
331         xyzHandle.selectDataSet(g);
332         for (int i = 0, n = 0; i < sel[g].posCount(); i += 2, ++n)
333         {
334             const SelectionPosition &p1 = sel[g].position(i);
335             const SelectionPosition &p2 = sel[g].position(i+1);
336             rvec                     dx;
337             if (pbc != NULL)
338             {
339                 pbc_dx(pbc, p2.x(), p1.x(), dx);
340             }
341             else
342             {
343                 rvec_sub(p2.x(), p1.x(), dx);
344             }
345             real dist     = norm(dx);
346             bool bPresent = p1.selected() && p2.selected();
347             distHandle.setPoint(n, dist, bPresent);
348             xyzHandle.setPoints(n*3, 3, dx, bPresent);
349         }
350     }
351     distHandle.finishFrame();
352     xyzHandle.finishFrame();
353 }
354
355
356 void
357 Distance::finishAnalysis(int /*nframes*/)
358 {
359     AbstractAverageHistogram &averageHistogram = histogramModule_->averager();
360     averageHistogram.normalizeProbability();
361     averageHistogram.done();
362 }
363
364
365 void
366 Distance::writeOutput()
367 {
368     SelectionList::const_iterator sel;
369     int                           index;
370     for (sel = sel_.begin(), index = 0; sel != sel_.end(); ++sel, ++index)
371     {
372         printf("%s:\n", sel->name());
373         printf("  Number of samples:  %d\n",
374                summaryStatsModule_->sampleCount(index, 0));
375         printf("  Average distance:   %-8.5f nm\n",
376                summaryStatsModule_->average(index, 0));
377         printf("  Standard deviation: %-8.5f nm\n",
378                summaryStatsModule_->standardDeviation(index, 0));
379     }
380 }
381
382 }       // namespace
383
384 const char DistanceInfo::name[]             = "distance";
385 const char DistanceInfo::shortDescription[] =
386     "Calculate distances between pairs of positions";
387
388 TrajectoryAnalysisModulePointer DistanceInfo::create()
389 {
390     return TrajectoryAnalysisModulePointer(new Distance);
391 }
392
393 } // namespace analysismodules
394
395 } // namespace gmx