Add -maxtau option to gmx msd
authorKevin Boyd <kevin44boyd@gmail.com>
Tue, 26 Oct 2021 18:13:12 +0000 (18:13 +0000)
committerJoe Jordan <ejjordan12@gmail.com>
Tue, 26 Oct 2021 18:13:12 +0000 (18:13 +0000)
src/gromacs/trajectoryanalysis/modules/msd.cpp
src/gromacs/trajectoryanalysis/tests/msd.cpp
src/gromacs/trajectoryanalysis/tests/refdata/MsdModuleTest_oneDimensionalDiffusionWithMaxTau.xml [new file with mode: 0644]

index 0a0dd25616e296690a6b9e50689926f763388b46..c227bc6c7f2ce293a83b2d673db55272b72731d2 100644 (file)
@@ -431,6 +431,10 @@ private:
     double t0_ = 0;
     //! Inter-frame delta-t
     std::optional<double> dt_ = std::nullopt;
+    //! Inter-frame maximum time delta.
+    double maxTau_ = std::numeric_limits<double>::max();
+    //! Tracks the index of the first valid frame. Starts at 0, increases as old frames are deleted
+    size_t firstValidFrame_ = 0;
 
     //! First tau value to fit from for diffusion coefficient, defaults to 0.1 * max tau
     real beginFit_ = -1.0;
@@ -501,6 +505,14 @@ void Msd::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* se
         "Note that this diffusion coefficient and error estimate are only",
         "accurate when the MSD is completely linear between",
         "[TT]-beginfit[tt] and [TT]-endfit[tt].[PAR]",
+        "By default, [THISMODULE] compares all trajectory frames against every frame stored at",
+        "[TT]-trestart[TT] intervals, so the number of frames stored scales linearly with the",
+        "number of frames processed. This can lead to long analysis times and out-of-memory errors",
+        "for long/large trajectories, and often the data at higher time deltas lacks sufficient ",
+        "sampling, often manifesting as a wobbly line on the MSD plot after a straighter region at",
+        "lower time deltas. The [TT]-maxtau[TT] option can be used to cap the maximum time delta",
+        "for frame comparison, which may improve performance and can be used to avoid",
+        "out-of-memory issues.[PAR]"
     };
     settings->setHelpText(desc);
 
@@ -528,6 +540,10 @@ void Msd::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* se
                                .description("Time between restarting points in trajectory (ps)")
                                .defaultValue(10.0)
                                .store(&trestart_));
+    options->addOption(
+            DoubleOption("maxtau")
+                    .description("Maximum time delta between frames to calculate MSDs for (ps)")
+                    .store(&maxTau_));
     options->addOption(
             RealOption("beginfit").description("Time point at which to start fitting.").store(&beginFit_));
     options->addOption(RealOption("endfit").description("End time for fitting.").store(&endFit_));
@@ -645,8 +661,10 @@ void Msd::analyzeFrame(int gmx_unused                frameNumber,
 
     // Each frame will get a tau between it and frame 0, and all other frame combos should be
     // covered by this.
-    // \todo this will no longer hold exactly when maxtau is added
-    taus_.push_back(time - times_[0]);
+    if (const double tau = time - times_[0]; tau <= maxTau_)
+    {
+        taus_.push_back(time - times_[0]);
+    }
 
     for (MsdGroupData& msdData : groupData_)
     {
@@ -655,10 +673,19 @@ void Msd::analyzeFrame(int gmx_unused                frameNumber,
         ArrayRef<const RVec> coords = msdData.coordinateManager_.buildCoordinates(sel, pbc);
 
         // For each preceding frame, calculate tau and do comparison.
-        for (size_t i = 0; i < msdData.frames.size(); i++)
+        for (size_t i = firstValidFrame_; i < msdData.frames.size(); i++)
         {
             // If dt > trestart, the tau increment will be dt rather than trestart.
-            double  tau      = time - (t0_ + std::max<double>(trestart_, *dt_) * i);
+            const double tau = time - (t0_ + std::max<double>(trestart_, *dt_) * i);
+            if (tau > maxTau_)
+            {
+                // The (now empty) entry is no longer needed, so over time the outer vector will
+                // grow with extraneous empty elements persisting, but the alternative would require
+                // some more complicated remapping of tau to frame index.
+                msdData.frames[i].clear();
+                firstValidFrame_ = i + 1;
+                continue;
+            }
             int64_t tauIndex = gmx::roundToInt64(tau / *dt_);
             msdData.msds[tauIndex].push_back(calcMsd_(coords, msdData.frames[i]));
 
index 16220eafd58a332eeb77a58834fc5fb64e9d6465..08d489fc2bd4bdef1951180d3033c9ac25bde8a3 100644 (file)
@@ -225,6 +225,16 @@ TEST_F(MsdModuleTest, oneDimensionalDiffusion)
     runTest(CommandLine(cmdline));
 }
 
+TEST_F(MsdModuleTest, oneDimensionalDiffusionWithMaxTau)
+{
+    setInputFile("-f", "msd_traj.xtc");
+    setInputFile("-s", "msd_coords.gro");
+    setInputFile("-n", "msd.ndx");
+    const char* const cmdline[] = { "-trestart", "200", "-type", "x", "-sel", "0", "-maxtau", "5" };
+    runTest(CommandLine(cmdline));
+}
+
+
 // -------------------------------------------------------------------------
 // These tests operate on a more realistic trajectory, with a solvated protein,
 // with 20 frames at a 2 ps dt. Note that this box is also non-square, so we're validating
diff --git a/src/gromacs/trajectoryanalysis/tests/refdata/MsdModuleTest_oneDimensionalDiffusionWithMaxTau.xml b/src/gromacs/trajectoryanalysis/tests/refdata/MsdModuleTest_oneDimensionalDiffusionWithMaxTau.xml
new file mode 100644 (file)
index 0000000..c74497d
--- /dev/null
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <String Name="CommandLine">-trestart 200 -type x -sel 0 -maxtau 5</String>
+  <OutputFiles Name="Files">
+    <File Name="-o">
+      <XvgLegend Name="DiffusionCoefficient"></XvgLegend>
+      <XvgLegend Name="Legend">
+        <String>title "Mean Squared Displacement"</String>
+        <String>xaxis  label "tau (ps)"</String>
+        <String>yaxis  label "MSD (nm\\S2\\N)"</String>
+        <String>TYPE xy</String>
+        <String>s0 legend "D[ particles] = 8.0000 (+/- 0.0000) (1e-5 cm^2/s)"</String>
+      </XvgLegend>
+      <XvgData Name="Data">
+        <Sequence Name="Row0">
+          <Int Name="Length">2</Int>
+          <Real>0.000</Real>
+          <Real>0</Real>
+        </Sequence>
+        <Sequence Name="Row1">
+          <Int Name="Length">2</Int>
+          <Real>1.000</Real>
+          <Real>0.016</Real>
+        </Sequence>
+        <Sequence Name="Row2">
+          <Int Name="Length">2</Int>
+          <Real>2.000</Real>
+          <Real>0.032</Real>
+        </Sequence>
+        <Sequence Name="Row3">
+          <Int Name="Length">2</Int>
+          <Real>3.000</Real>
+          <Real>0.048</Real>
+        </Sequence>
+        <Sequence Name="Row4">
+          <Int Name="Length">2</Int>
+          <Real>4.000</Real>
+          <Real>0.064</Real>
+        </Sequence>
+        <Sequence Name="Row5">
+          <Int Name="Length">2</Int>
+          <Real>5.000</Real>
+          <Real>0.08</Real>
+        </Sequence>
+      </XvgData>
+    </File>
+  </OutputFiles>
+</ReferenceData>