Updated g_cluster

author David van der Spoel <spoel@xray.bmc.uu.se>

Sat, 3 Aug 2013 17:28:27 +0000 (19:28 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Fri, 30 Aug 2013 12:45:17 +0000 (14:45 +0200)
author David van der Spoel <spoel@xray.bmc.uu.se>
Sat, 3 Aug 2013 17:28:27 +0000 (19:28 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Fri, 30 Aug 2013 12:45:17 +0000 (14:45 +0200)
diff --git a/src/gromacs/gmxana/cmat.c b/src/gromacs/gmxana/cmat.c

index 5fba0be20760857b83dbdbf4ec8e05691b55e236..788d4e791d2c8b53d4b235c6d33e5a51f4f32f1a 100644 (file)
--- a/src/gromacs/gmxana/cmat.c
+++ b/src/gromacs/gmxana/cmat.c
@@ -39,6 +39,7 @@
  #include "cmat.h"
  #include "smalloc.h"
  #include "macros.h"
+#include "vec.h"
  #include "xvgr.h"
  #include "matio.h"
  #include "futil.h"
@@ -51,11 +52,9 @@ t_mat *init_mat(int n1, gmx_bool b1D)
      m->n1     = n1;
      m->nn     = 0;
      m->b1D    = b1D;
-    m->emat   = 0;
      m->maxrms = 0;
      m->minrms = 1e20;
      m->sumrms = 0;
-    m->nn     = 0;
      m->mat    = mk_matrix(n1, n1, b1D);
  
      snew(m->erow, n1);
@@ -65,6 +64,29 @@ t_mat *init_mat(int n1, gmx_bool b1D)
      return m;
  }
  
+void copy_t_mat(t_mat *dst, t_mat *src)
+{
+    int i, j;
+
+    if (dst->nn != src->nn)
+    {
+        fprintf(stderr, "t_mat structures not identical in size dst %d src %d\n",dst->nn,src->nn);
+        return;
+    }
+    dst->maxrms = src->maxrms;
+    dst->minrms = src->minrms;
+    dst->sumrms = src->sumrms;
+    for(i = 0; (i < src->nn); i++)
+    {
+        for(j = 0; (j < src->nn); j++)
+        {
+            dst->mat[i][j] = src->mat[i][j];
+        }
+        dst->erow[i] = src->erow[i];
+        dst->m_ind[i] = src->m_ind[i];
+    }
+}
+
  void enlarge_mat(t_mat *m, int deltan)
  {
      int i, j;
@@ -122,50 +144,39 @@ void done_mat(t_mat **m)
      *m = NULL;
  }
  
-real row_energy(int nn, int row, real *mat)
-{
-    real re = 0;
-    int  i;
-
-    for (i = 0; (i < nn); i++)
-    {
-        re += abs(i-row)*mat[i];
-    }
-    return re/nn;
-}
-
  real mat_energy(t_mat *m)
  {
-    real re, retot;
-    int  j, jj;
+    int  j;
+    real emat = 0;
  
-    retot = 0;
-    for (j = 0; (j < m->nn); j++)
+    for (j = 0; (j < m->nn-1); j++)
      {
-        jj         = m->m_ind[j];
-        re         = row_energy(m->nn, jj, m->mat[j]);
-        m->erow[j] = re;
-        retot     += re;
+        emat += sqr(m->mat[j][j+1]);
      }
-    m->emat = retot/m->nn;
-    return m->emat;
+    return emat;
  }
  
-void swap_rows(t_mat *m, int isw, int jsw)
+void swap_rows(t_mat *m, int iswap, int jswap)
  {
      real *tmp, ttt;
-    int   i;
+    int   i, itmp;
+
+    /* Swap indices */
+    itmp            = m->m_ind[iswap];
+    m->m_ind[iswap] = m->m_ind[jswap];
+    m->m_ind[jswap] = itmp;
+
+    /* Swap rows (since the matrix is an array of pointers) */
+    tmp           = m->mat[iswap];
+    m->mat[iswap] = m->mat[jswap];
+    m->mat[jswap] = tmp;
  
-    /* Swap rows */
-    tmp         = m->mat[isw];
-    m->mat[isw] = m->mat[jsw];
-    m->mat[jsw] = tmp;
      /* Swap columns */
      for (i = 0; (i < m->nn); i++)
      {
-        ttt            = m->mat[isw][i];
-        m->mat[isw][i] = m->mat[jsw][i];
-        m->mat[jsw][i] = ttt;
+        ttt              = m->mat[i][iswap];
+        m->mat[i][iswap] = m->mat[i][jswap];
+        m->mat[i][jswap] = ttt;
      }
  }
  
diff --git a/src/gromacs/gmxana/cmat.h b/src/gromacs/gmxana/cmat.h

index 613f0971dd014436d27247ac995193c8b871ec26..69930a48ec670f527343175350ab7c9bd936984a 100644 (file)
--- a/src/gromacs/gmxana/cmat.h
+++ b/src/gromacs/gmxana/cmat.h
@@ -51,7 +51,7 @@ typedef struct {
      int      n1, nn;
      int     *m_ind;
      gmx_bool b1D;
-    real     emat, minrms, maxrms, sumrms;
+    real     minrms, maxrms, sumrms;
      real    *erow;
      real   **mat;
  } t_mat;
@@ -61,18 +61,18 @@ typedef struct {
  
  extern t_mat *init_mat(int n1, gmx_bool b1D);
  
+extern void copy_t_mat(t_mat *dst, t_mat *src);
+
  extern void enlarge_mat(t_mat *m, int deltan);
  
  extern void reset_index(t_mat *m);
  
-extern void swap_rows(t_mat *m, int isw, int jsw);
+extern void swap_rows(t_mat *m, int iswap, int jswap);
  
  extern void set_mat_entry(t_mat *m, int i, int j, real val);
  
  extern void done_mat(t_mat **m);
  
-extern real row_energy(int n1, int row, real *mat);
-
  extern real mat_energy(t_mat *mat);
  
  extern void swap_mat(t_mat *m);
diff --git a/src/gromacs/gmxana/gmx_cluster.c b/src/gromacs/gmxana/gmx_cluster.c

index d4e4af2bb27828474bdf7867e8840dca438888b9..0a8be51c4ca8b789a1743611b79e49540b8b34bf 100644 (file)
--- a/src/gromacs/gmxana/gmx_cluster.c
+++ b/src/gromacs/gmxana/gmx_cluster.c
@@ -47,7 +47,7 @@
  #include "vec.h"
  #include "macros.h"
  #include "index.h"
-#include "random.h"
+#include "gmx_random.h"
  #include "pbc.h"
  #include "rmpbc.h"
  #include "xvgr.h"
@@ -149,81 +149,137 @@ void cp_index(int nn, int from[], int to[])
      }
  }
  
-void mc_optimize(FILE *log, t_mat *m, int maxiter, int *seed, real kT)
+void mc_optimize(FILE *log, t_mat *m, real *time,
+                 int maxiter, int nrandom,
+                 int seed, real kT,
+                 const char *conv, output_env_t oenv)
  {
-    real  e[2], ei, ej, efac;
-    int  *low_index;
-    int   cur = 0;
-#define next (1-cur)
-    int   i, isw, jsw, iisw, jjsw, nn;
+    FILE      *fp = NULL;
+    real       ecur, enext, emin, prob, enorm;
+    int        i, j, iswap, jswap, nn, nuphill = 0;
+    gmx_rng_t  rng;
+    t_mat     *minimum;
  
-    fprintf(stderr, "\nDoing Monte Carlo clustering\n");
-    nn = m->nn;
-    snew(low_index, nn);
-    cp_index(nn, m->m_ind, low_index);
-    if (getenv("TESTMC"))
+    if (m->n1 != m->nn)
      {
-        e[cur] = mat_energy(m);
-        pr_energy(log, e[cur]);
-        fprintf(log, "Doing 1000 random swaps\n");
-        for (i = 0; (i < 1000); i++)
+        fprintf(stderr,"Can not do Monte Carlo optimization with a non-square matrix.\n");
+        return;
+    }
+    printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");
+    printf("by reordering the frames to minimize the path between the two structures\n");
+    printf("that have the largest pairwise RMSD.\n");
+
+    iswap = jswap = -1;
+    enorm = m->mat[0][0];
+    for(i = 0; (i < m->n1); i++)
+    {
+        for(j = 0; (j < m->nn); j++)
          {
-            do
+            if (m->mat[i][j] > enorm) 
              {
-                isw = nn*rando(seed);
-                jsw = nn*rando(seed);
+                enorm = m->mat[i][j];
+                iswap   = i;
+                jswap   = j;
              }
-            while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
-            iisw          = m->m_ind[isw];
-            jjsw          = m->m_ind[jsw];
-            m->m_ind[isw] = jjsw;
-            m->m_ind[jsw] = iisw;
          }
      }
-    e[cur] = mat_energy(m);
-    pr_energy(log, e[cur]);
+    if ((iswap == -1) || (jswap == -1))
+    {
+        fprintf(stderr, "Matrix contains identical values in all fields\n");
+        return;
+    }
+    swap_rows(m, 0, iswap);
+    swap_rows(m, m->n1-1, jswap);
+    emin = ecur = mat_energy(m);
+    printf("Largest distance %g between %d and %d. Energy: %g.\n",
+           enorm, iswap, jswap, emin);
+
+    rng = gmx_rng_init(seed);
+    nn = m->nn;
+
+    /* Initiate and store global minimum */
+    minimum = init_mat(nn, m->b1D);
+    minimum->nn = nn;
+    copy_t_mat(minimum, m);
+
+    if (NULL != conv)
+    {
+        fp = xvgropen(conv, "Convergence of the MC optimization",
+                      "Energy", "Step", oenv);
+    }
      for (i = 0; (i < maxiter); i++)
      {
+        /* Generate new swapping candidates */
          do
          {
-            isw = nn*rando(seed);
-            jsw = nn*rando(seed);
+            iswap = 1+(nn-2)*gmx_rng_uniform_real(rng);
+            jswap = 1+(nn-2)*gmx_rng_uniform_real(rng);
          }
-        while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
-
-        iisw = m->m_ind[isw];
-        jjsw = m->m_ind[jsw];
-        ei   = row_energy(nn, iisw, m->mat[jsw]);
-        ej   = row_energy(nn, jjsw, m->mat[isw]);
+        while ((iswap == jswap) || (iswap >= nn-1) || (jswap >= nn-1));
  
-        e[next] = e[cur] + (ei+ej-EROW(m, isw)-EROW(m, jsw))/nn;
+        /* Apply swap and compute energy */
+        swap_rows(m, iswap, jswap);
+        enext = mat_energy(m);
  
-        efac = kT ? exp((e[next]-e[cur])/kT) : -1;
-        if ((e[next] > e[cur]) || (efac > rando(seed)))
+        /* Compute probability */
+        prob = 0;
+        if ((enext < ecur) || (i < nrandom))
          {
-
-            if (e[next] > e[cur])
+            prob = 1;
+            if (enext < emin)
              {
-                cp_index(nn, m->m_ind, low_index);
+                /* Store global minimum */
+                copy_t_mat(minimum, m);
+                emin = enext;
              }
-            else
+        }
+        else if (kT > 0)
+        {
+            /* Try Monte Carlo step */
+            prob = exp(-(enext-ecur)/(enorm*kT));
+        }
+
+        if ((prob == 1) || (gmx_rng_uniform_real(rng) < prob))
+        {
+            if (enext > ecur)
              {
-                fprintf(log, "Taking uphill step\n");
+                nuphill++;
              }
  
-            /* Now swapping rows */
-            m->m_ind[isw]  = jjsw;
-            m->m_ind[jsw]  = iisw;
-            EROW(m, isw)   = ei;
-            EROW(m, jsw)   = ej;
-            cur            = next;
-            fprintf(log, "Iter: %d Swapped %4d and %4d (now %g)",
-                    i, isw, jsw, mat_energy(m));
-            pr_energy(log, e[cur]);
+            fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n",
+                    i, iswap, jswap, enext, prob);
+            if (NULL != fp)
+            {
+                fprintf(fp, "%6d  %10g\n", i, enext);
+            }
+            ecur = enext;
          }
+        else
+        {
+            swap_rows(m, jswap, iswap);
+        }
+    }
+    fprintf(log, "%d uphill steps were taken during optimization\n",
+            nuphill);
+
+    /* Now swap the matrix to get it into global minimum mode */
+    copy_t_mat(m, minimum);
+
+    fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));
+    fprintf(log, "Global minimum energy %g\n", mat_energy(m));
+    fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");
+    for(i=0; (i<m->nn); i++)
+    {
+        fprintf(log, "%10g  %5d  %10g\n",
+                time[m->m_ind[i]],
+                m->m_ind[i],
+                (i<m->nn-1) ? m->mat[m->m_ind[i]][m->m_ind[i+1]] : 0);
+    }
+
+    if (NULL != fp)
+    {
+        fclose(fp);
      }
-    /* Now restore the highest energy index */
-    cp_index(nn, low_index, m->m_ind);
  }
  
  static void calc_dist(int nind, rvec x[], real **d)
@@ -1290,7 +1346,16 @@ int gmx_cluster(int argc, char *argv[])
          "of a structure are the M closest structures or all structures within",
          "[TT]cutoff[tt].[PAR]",
  
-        "Monte Carlo: reorder the RMSD matrix using Monte Carlo.[PAR]",
+        "Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",
+        "the order of the frames is using the smallest possible increments.",
+        "With this it is possible to make a smooth animation going from one",
+        "structure to another with the largest possible (e.g.) RMSD between",
+        "them, however the intermediate steps should be as small as possible.",
+        "Applications could be to visualize a potential of mean force",
+        "ensemble of simulations or a pulling simulation. Obviously the user",
+        "has to prepare the trajectory well (e.g. by not superimposing frames).",
+        "The final result can be inspect visually by looking at the matrix",
+        "[TT].xpm[tt] file, which should vary smoothly from bottom to top.[PAR]",
  
          "diagonalization: diagonalize the RMSD matrix.[PAR]",
  
@@ -1344,7 +1409,7 @@ int gmx_cluster(int argc, char *argv[])
      rvec              *xtps, *usextps, *x1, **xx = NULL;
      const char        *fn, *trx_out_fn;
      t_clusters         clust;
-    t_mat             *rms;
+    t_mat             *rms, *orig=NULL;
      real              *eigenvalues;
      t_topology         top;
      int                ePBC;
@@ -1376,7 +1441,7 @@ int gmx_cluster(int argc, char *argv[])
      static int   nlevels  = 40, skip = 1;
      static real  scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
      gmx_bool     bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
-    static int   niter    = 10000, seed = 1993, write_ncl = 0, write_nst = 1, minstruct = 1;
+    static int   niter    = 10000, nrandom = 0, seed = 1993, write_ncl = 0, write_nst = 1, minstruct = 1;
      static real  kT       = 1e-3;
      static int   M        = 10, P = 3;
      output_env_t oenv;
@@ -1416,9 +1481,11 @@ int gmx_cluster(int argc, char *argv[])
          { "-P",     FALSE, etINT,  {&P},
            "Number of identical nearest neighbors required to form a cluster" },
          { "-seed",  FALSE, etINT,  {&seed},
-          "Random number seed for Monte Carlo clustering algorithm" },
+          "Random number seed for Monte Carlo clustering algorithm: <= 0 means generate" },
          { "-niter", FALSE, etINT,  {&niter},
            "Number of iterations for MC" },
+        { "-nrandom", FALSE, etINT,  {&nrandom},
+          "The first iterations for MC may be done complete random, to shuffle the frames" },
          { "-kT",    FALSE, etREAL, {&kT},
            "Boltzmann weighting factor for Monte Carlo optimization "
            "(zero turns off uphill steps)" },
@@ -1430,10 +1497,12 @@ int gmx_cluster(int argc, char *argv[])
          { efTPS, "-s",     NULL,        ffOPTRD },
          { efNDX, NULL,     NULL,        ffOPTRD },
          { efXPM, "-dm",   "rmsd",       ffOPTRD },
+        { efXPM, "-om",   "rmsd-raw",   ffWRITE },
          { efXPM, "-o",    "rmsd-clust", ffWRITE },
          { efLOG, "-g",    "cluster",    ffWRITE },
          { efXVG, "-dist", "rmsd-dist",  ffOPTWR },
          { efXVG, "-ev",   "rmsd-eig",   ffOPTWR },
+        { efXVG, "-conv", "mc-conv",    ffOPTWR },
          { efXVG, "-sz",   "clust-size", ffOPTWR},
          { efXPM, "-tr",   "clust-trans", ffOPTWR},
          { efXVG, "-ntr",  "clust-trans", ffOPTWR},
@@ -1719,7 +1788,7 @@ int gmx_cluster(int argc, char *argv[])
                  rms->minrms, rms->maxrms);
      ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms->sumrms/(nf*(nf-1)));
      ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
-    ffprintf_g(stderr, log, buf, "Energy of the matrix is %g nm\n", mat_energy(rms));
+    ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));
      if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
      {
          fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "
@@ -1781,9 +1850,11 @@ int gmx_cluster(int argc, char *argv[])
              ffclose(fp);
              break;
          case m_monte_carlo:
-            mc_optimize(log, rms, niter, &seed, kT);
-            swap_mat(rms);
-            reset_index(rms);
+            orig = init_mat(rms->nn, FALSE);
+            orig->nn = rms->nn;
+            copy_t_mat(orig, rms);
+            mc_optimize(log, rms, time, niter, nrandom, seed, kT, 
+                        opt2fn_null("-conv",NFILE,fnm), oenv);
              break;
          case m_jarvis_patrick:
              jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
@@ -1797,7 +1868,7 @@ int gmx_cluster(int argc, char *argv[])
  
      if (method == m_monte_carlo || method == m_diagonalize)
      {
-        fprintf(stderr, "Energy of the matrix after clustering is %g nm\n",
+        fprintf(stderr, "Energy of the matrix after clustering is %g.\n",
                  mat_energy(rms));
      }
  
@@ -1878,7 +1949,18 @@ int gmx_cluster(int argc, char *argv[])
      }
      fprintf(stderr, "\n");
      ffclose(fp);
-
+    if (NULL != orig)
+    {
+        fp = opt2FILE("-om", NFILE, fnm, "w");
+        sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
+        sprintf(title, "RMS%sDeviation", bRMSdist ? " Distance " : " ");
+        write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
+                  nf, nf, time, time, orig->mat, 0.0, orig->maxrms,
+                  rlo_top, rhi_top, &nlevels);
+        ffclose(fp);
+        done_mat(&orig);
+        sfree(orig);
+    }
      /* now show what we've done */
      do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
      do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
@@ -1893,6 +1975,7 @@ int gmx_cluster(int argc, char *argv[])
          do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
          do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
      }
+    do_view(oenv, opt2fn_null("-conv", NFILE, fnm), NULL);
  
      return 0;
  }
author	David van der Spoel <spoel@xray.bmc.uu.se>
	Sat, 3 Aug 2013 17:28:27 +0000 (19:28 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Fri, 30 Aug 2013 12:45:17 +0000 (14:45 +0200)
src/gromacs/gmxana/cmat.c		patch \| blob \| history
src/gromacs/gmxana/cmat.h		patch \| blob \| history
src/gromacs/gmxana/gmx_cluster.c		patch \| blob \| history