src/gromacs/gmxana/gmx_spatial.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <math.h>
  40 #include <stdlib.h>
  41
  42 #include "gromacs/commandline/pargs.h"
  43 #include "typedefs.h"
  44 #include "gromacs/utility/smalloc.h"
  45 #include "gromacs/math/vec.h"
  46 #include "gromacs/fileio/tpxio.h"
  47 #include "gromacs/fileio/trxio.h"
  48 #include "gromacs/topology/index.h"
  49 #include "gromacs/pbcutil/pbc.h"
  50 #include "gromacs/pbcutil/rmpbc.h"
  51 #include "gmx_ana.h"
  52 #include "macros.h"
  53
  54 static const double bohr = 0.529177249;  /* conversion factor to compensate for VMD plugin conversion... */
  55
  56 int gmx_spatial(int argc, char *argv[])
  57 {
  58     const char     *desc[] = {
  59         "[THISMODULE] calculates the spatial distribution function and ",
  60         "outputs it in a form that can be read by VMD as Gaussian98 cube format. ",
  61         "For a system of 32,000 atoms and a 50 ns trajectory, the SDF can be generated ",
  62         "in about 30 minutes, with most of the time dedicated to the two runs through ",
  63         "[TT]trjconv[tt] that are required to center everything properly. ",
  64         "This also takes a whole bunch of space (3 copies of the trajectory file). ",
  65         "Still, the pictures are pretty and very informative when the fitted selection is properly made. ",
  66         "3-4 atoms in a widely mobile group (like a free amino acid in solution) works ",
  67         "well, or select the protein backbone in a stable folded structure to get the SDF ",
  68         "of solvent and look at the time-averaged solvation shell. ",
  69         "It is also possible using this program to generate the SDF based on some arbitrary ",
  70         "Cartesian coordinate. To do that, simply omit the preliminary [gmx-trjconv] steps. \n",
  71         "USAGE: \n",
  72         "1. Use [gmx-make_ndx] to create a group containing the atoms around which you want the SDF \n",
  73         "2. [TT]gmx trjconv -s a.tpr -f a.tng -o b.tng -boxcenter tric -ur compact -pbc none[tt] \n",
  74         "3. [TT]gmx trjconv -s a.tpr -f b.tng -o c.tng -fit rot+trans[tt] \n",
  75         "4. run [THISMODULE] on the [TT]c.tng[tt] output of step #3. \n",
  76         "5. Load [TT]grid.cube[tt] into VMD and view as an isosurface. \n",
  77         "[BB]Note[bb] that systems such as micelles will require [TT]gmx trjconv -pbc cluster[tt] between steps 1 and 2\n",
  78         "WARNINGS:[BR]",
  79         "The SDF will be generated for a cube that contains all bins that have some non-zero occupancy. ",
  80         "However, the preparatory [TT]-fit rot+trans[tt] option to [gmx-trjconv] implies that your system will be rotating ",
  81         "and translating in space (in order that the selected group does not). Therefore the values that are ",
  82         "returned will only be valid for some region around your central group/coordinate that has full overlap ",
  83         "with system volume throughout the entire translated/rotated system over the course of the trajectory. ",
  84         "It is up to the user to ensure that this is the case. \n",
  85         "BUGS:[BR]",
  86         "When the allocated memory is not large enough, a segmentation fault may occur. This is usually detected ",
  87         "and the program is halted prior to the fault while displaying a warning message suggesting the use of the [TT]-nab[tt] (Number of Additional Bins)",
  88         "option. However, the program does not detect all such events. If you encounter a segmentation fault, run it again ",
  89         "with an increased [TT]-nab[tt] value. \n",
  90         "RISKY OPTIONS:[BR]",
  91         "To reduce the amount of space and time required, you can output only the coords ",
  92         "that are going to be used in the first and subsequent run through [gmx-trjconv]. ",
  93         "However, be sure to set the [TT]-nab[tt] option to a sufficiently high value since ",
  94         "memory is allocated for cube bins based on the initial coordinates and the [TT]-nab[tt] ",
  95         "option value. \n"
  96     };
  97
  98     static gmx_bool bPBC         = FALSE;
  99     static gmx_bool bSHIFT       = FALSE;
 100     static int      iIGNOREOUTER = -1;   /*Positive values may help if the surface is spikey */
 101     static gmx_bool bCUTDOWN     = TRUE;
 102     static real     rBINWIDTH    = 0.05; /* nm */
 103     static gmx_bool bCALCDIV     = TRUE;
 104     static int      iNAB         = 4;
 105
 106     t_pargs         pa[] = {
 107         { "-pbc",      FALSE, etBOOL, {&bPBC},
 108           "Use periodic boundary conditions for computing distances" },
 109         { "-div",      FALSE, etBOOL, {&bCALCDIV},
 110           "Calculate and apply the divisor for bin occupancies based on atoms/minimal cube size. Set as TRUE for visualization and as FALSE ([TT]-nodiv[tt]) to get accurate counts per frame" },
 111         { "-ign",      FALSE, etINT, {&iIGNOREOUTER},
 112           "Do not display this number of outer cubes (positive values may reduce boundary speckles; -1 ensures outer surface is visible)" },
 113         /*    { "-cut",      bCUTDOWN, etBOOL, {&bCUTDOWN},*/
 114         /*      "Display a total cube that is of minimal size" }, */
 115         { "-bin",      FALSE, etREAL, {&rBINWIDTH},
 116           "Width of the bins (nm)" },
 117         { "-nab",      FALSE, etINT, {&iNAB},
 118           "Number of additional bins to ensure proper memory allocation" }
 119     };
 120
 121     double          MINBIN[3];
 122     double          MAXBIN[3];
 123     t_topology      top;
 124     int             ePBC;
 125     char            title[STRLEN];
 126     t_trxframe      fr;
 127     rvec           *xtop, *shx[26];
 128     matrix          box, box_pbc;
 129     t_trxstatus    *status;
 130     int             flags = TRX_READ_X;
 131     t_pbc           pbc;
 132     t_atoms        *atoms;
 133     int             natoms;
 134     char           *grpnm, *grpnmp;
 135     atom_id        *index, *indexp;
 136     int             i, nidx, nidxp;
 137     int             v;
 138     int             j, k;
 139     long         ***bin = (long ***)NULL;
 140     long            nbin[3];
 141     FILE           *flp;
 142     long            x, y, z, minx, miny, minz, maxx, maxy, maxz;
 143     long            numfr, numcu;
 144     long            tot, max, min;
 145     double          norm;
 146     output_env_t    oenv;
 147     gmx_rmpbc_t     gpbc = NULL;
 148
 149     t_filenm        fnm[] = {
 150         { efTPS,  NULL,  NULL, ffREAD }, /* this is for the topology */
 151         { efTRX, "-f", NULL, ffREAD },   /* and this for the trajectory */
 152         { efNDX, NULL, NULL, ffOPTRD }
 153     };
 154
 155 #define NFILE asize(fnm)
 156
 157     /* This is the routine responsible for adding default options,
 158      * calling the X/motif interface, etc. */
 159     if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW,
 160                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
 161     {
 162         return 0;
 163     }
 164
 165     read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &xtop, NULL, box, TRUE);
 166     sfree(xtop);
 167
 168     atoms = &(top.atoms);
 169     printf("Select group to generate SDF:\n");
 170     get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &nidx, &index, &grpnm);
 171     printf("Select group to output coords (e.g. solute):\n");
 172     get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &nidxp, &indexp, &grpnmp);
 173
 174     /* The first time we read data is a little special */
 175     natoms = read_first_frame(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &fr, flags);
 176
 177     /* Memory Allocation */
 178     MINBIN[XX] = MAXBIN[XX] = fr.x[0][XX];
 179     MINBIN[YY] = MAXBIN[YY] = fr.x[0][YY];
 180     MINBIN[ZZ] = MAXBIN[ZZ] = fr.x[0][ZZ];
 181     for (i = 1; i < top.atoms.nr; ++i)
 182     {
 183         if (fr.x[i][XX] < MINBIN[XX])
 184         {
 185             MINBIN[XX] = fr.x[i][XX];
 186         }
 187         if (fr.x[i][XX] > MAXBIN[XX])
 188         {
 189             MAXBIN[XX] = fr.x[i][XX];
 190         }
 191         if (fr.x[i][YY] < MINBIN[YY])
 192         {
 193             MINBIN[YY] = fr.x[i][YY];
 194         }
 195         if (fr.x[i][YY] > MAXBIN[YY])
 196         {
 197             MAXBIN[YY] = fr.x[i][YY];
 198         }
 199         if (fr.x[i][ZZ] < MINBIN[ZZ])
 200         {
 201             MINBIN[ZZ] = fr.x[i][ZZ];
 202         }
 203         if (fr.x[i][ZZ] > MAXBIN[ZZ])
 204         {
 205             MAXBIN[ZZ] = fr.x[i][ZZ];
 206         }
 207     }
 208     for (i = ZZ; i >= XX; --i)
 209     {
 210         MAXBIN[i]  = (ceil((MAXBIN[i]-MINBIN[i])/rBINWIDTH)+(double)iNAB)*rBINWIDTH+MINBIN[i];
 211         MINBIN[i] -= (double)iNAB*rBINWIDTH;
 212         nbin[i]    = (long)ceil((MAXBIN[i]-MINBIN[i])/rBINWIDTH);
 213     }
 214     snew(bin, nbin[XX]);
 215     for (i = 0; i < nbin[XX]; ++i)
 216     {
 217         snew(bin[i], nbin[YY]);
 218         for (j = 0; j < nbin[YY]; ++j)
 219         {
 220             snew(bin[i][j], nbin[ZZ]);
 221         }
 222     }
 223     copy_mat(box, box_pbc);
 224     numfr = 0;
 225     minx  = miny = minz = 999;
 226     maxx  = maxy = maxz = 0;
 227
 228     if (bPBC)
 229     {
 230         gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
 231     }
 232     /* This is the main loop over frames */
 233     do
 234     {
 235         /* Must init pbc every step because of pressure coupling */
 236
 237         copy_mat(box, box_pbc);
 238         if (bPBC)
 239         {
 240             gmx_rmpbc_trxfr(gpbc, &fr);
 241             set_pbc(&pbc, ePBC, box_pbc);
 242         }
 243
 244         for (i = 0; i < nidx; i++)
 245         {
 246             if (fr.x[index[i]][XX] < MINBIN[XX] || fr.x[index[i]][XX] > MAXBIN[XX] ||
 247                 fr.x[index[i]][YY] < MINBIN[YY] || fr.x[index[i]][YY] > MAXBIN[YY] ||
 248                 fr.x[index[i]][ZZ] < MINBIN[ZZ] || fr.x[index[i]][ZZ] > MAXBIN[ZZ])
 249             {
 250                 printf("There was an item outside of the allocated memory. Increase the value given with the -nab option.\n");
 251                 printf("Memory was allocated for [%f,%f,%f]\tto\t[%f,%f,%f]\n", MINBIN[XX], MINBIN[YY], MINBIN[ZZ], MAXBIN[XX], MAXBIN[YY], MAXBIN[ZZ]);
 252                 printf("Memory was required for [%f,%f,%f]\n", fr.x[index[i]][XX], fr.x[index[i]][YY], fr.x[index[i]][ZZ]);
 253                 exit(1);
 254             }
 255             x = (long)ceil((fr.x[index[i]][XX]-MINBIN[XX])/rBINWIDTH);
 256             y = (long)ceil((fr.x[index[i]][YY]-MINBIN[YY])/rBINWIDTH);
 257             z = (long)ceil((fr.x[index[i]][ZZ]-MINBIN[ZZ])/rBINWIDTH);
 258             ++bin[x][y][z];
 259             if (x < minx)
 260             {
 261                 minx = x;
 262             }
 263             if (x > maxx)
 264             {
 265                 maxx = x;
 266             }
 267             if (y < miny)
 268             {
 269                 miny = y;
 270             }
 271             if (y > maxy)
 272             {
 273                 maxy = y;
 274             }
 275             if (z < minz)
 276             {
 277                 minz = z;
 278             }
 279             if (z > maxz)
 280             {
 281                 maxz = z;
 282             }
 283         }
 284         numfr++;
 285         /* printf("%f\t%f\t%f\n",box[XX][XX],box[YY][YY],box[ZZ][ZZ]); */
 286
 287     }
 288     while (read_next_frame(oenv, status, &fr));
 289
 290     if (bPBC)
 291     {
 292         gmx_rmpbc_done(gpbc);
 293     }
 294
 295     if (!bCUTDOWN)
 296     {
 297         minx = miny = minz = 0;
 298         maxx = nbin[XX];
 299         maxy = nbin[YY];
 300         maxz = nbin[ZZ];
 301     }
 302
 303     /* OUTPUT */
 304     flp = gmx_ffopen("grid.cube", "w");
 305     fprintf(flp, "Spatial Distribution Function\n");
 306     fprintf(flp, "test\n");
 307     fprintf(flp, "%5d%12.6f%12.6f%12.6f\n", nidxp, (MINBIN[XX]+(minx+iIGNOREOUTER)*rBINWIDTH)*10./bohr, (MINBIN[YY]+(miny+iIGNOREOUTER)*rBINWIDTH)*10./bohr, (MINBIN[ZZ]+(minz+iIGNOREOUTER)*rBINWIDTH)*10./bohr);
 308     fprintf(flp, "%5ld%12.6f%12.6f%12.6f\n", maxx-minx+1-(2*iIGNOREOUTER), rBINWIDTH*10./bohr, 0., 0.);
 309     fprintf(flp, "%5ld%12.6f%12.6f%12.6f\n", maxy-miny+1-(2*iIGNOREOUTER), 0., rBINWIDTH*10./bohr, 0.);
 310     fprintf(flp, "%5ld%12.6f%12.6f%12.6f\n", maxz-minz+1-(2*iIGNOREOUTER), 0., 0., rBINWIDTH*10./bohr);
 311     for (i = 0; i < nidxp; i++)
 312     {
 313         v = 2;
 314         if (*(top.atoms.atomname[indexp[i]][0]) == 'C')
 315         {
 316             v = 6;
 317         }
 318         if (*(top.atoms.atomname[indexp[i]][0]) == 'N')
 319         {
 320             v = 7;
 321         }
 322         if (*(top.atoms.atomname[indexp[i]][0]) == 'O')
 323         {
 324             v = 8;
 325         }
 326         if (*(top.atoms.atomname[indexp[i]][0]) == 'H')
 327         {
 328             v = 1;
 329         }
 330         if (*(top.atoms.atomname[indexp[i]][0]) == 'S')
 331         {
 332             v = 16;
 333         }
 334         fprintf(flp, "%5d%12.6f%12.6f%12.6f%12.6f\n", v, 0., (double)fr.x[indexp[i]][XX]*10./bohr, (double)fr.x[indexp[i]][YY]*10./bohr, (double)fr.x[indexp[i]][ZZ]*10./bohr);
 335     }
 336
 337     tot = 0;
 338     for (k = 0; k < nbin[XX]; k++)
 339     {
 340         if (!(k < minx || k > maxx))
 341         {
 342             continue;
 343         }
 344         for (j = 0; j < nbin[YY]; j++)
 345         {
 346             if (!(j < miny || j > maxy))
 347             {
 348                 continue;
 349             }
 350             for (i = 0; i < nbin[ZZ]; i++)
 351             {
 352                 if (!(i < minz || i > maxz))
 353                 {
 354                     continue;
 355                 }
 356                 if (bin[k][j][i] != 0)
 357                 {
 358                     printf("A bin was not empty when it should have been empty. Programming error.\n");
 359                     printf("bin[%d][%d][%d] was = %ld\n", k, j, i, bin[k][j][i]);
 360                     exit(1);
 361                 }
 362             }
 363         }
 364     }
 365
 366     min = 999;
 367     max = 0;
 368     for (k = 0; k < nbin[XX]; k++)
 369     {
 370         if (k < minx+iIGNOREOUTER || k > maxx-iIGNOREOUTER)
 371         {
 372             continue;
 373         }
 374         for (j = 0; j < nbin[YY]; j++)
 375         {
 376             if (j < miny+iIGNOREOUTER || j > maxy-iIGNOREOUTER)
 377             {
 378                 continue;
 379             }
 380             for (i = 0; i < nbin[ZZ]; i++)
 381             {
 382                 if (i < minz+iIGNOREOUTER || i > maxz-iIGNOREOUTER)
 383                 {
 384                     continue;
 385                 }
 386                 tot += bin[k][j][i];
 387                 if (bin[k][j][i] > max)
 388                 {
 389                     max = bin[k][j][i];
 390                 }
 391                 if (bin[k][j][i] < min)
 392                 {
 393                     min = bin[k][j][i];
 394                 }
 395             }
 396         }
 397     }
 398
 399     numcu = (maxx-minx+1-(2*iIGNOREOUTER))*(maxy-miny+1-(2*iIGNOREOUTER))*(maxz-minz+1-(2*iIGNOREOUTER));
 400     if (bCALCDIV)
 401     {
 402         norm = ((double)numcu*(double)numfr) / (double)tot;
 403     }
 404     else
 405     {
 406         norm = 1.0;
 407     }
 408
 409     for (k = 0; k < nbin[XX]; k++)
 410     {
 411         if (k < minx+iIGNOREOUTER || k > maxx-iIGNOREOUTER)
 412         {
 413             continue;
 414         }
 415         for (j = 0; j < nbin[YY]; j++)
 416         {
 417             if (j < miny+iIGNOREOUTER || j > maxy-iIGNOREOUTER)
 418             {
 419                 continue;
 420             }
 421             for (i = 0; i < nbin[ZZ]; i++)
 422             {
 423                 if (i < minz+iIGNOREOUTER || i > maxz-iIGNOREOUTER)
 424                 {
 425                     continue;
 426                 }
 427                 fprintf(flp, "%12.6f ", norm*(double)bin[k][j][i]/(double)numfr);
 428             }
 429             fprintf(flp, "\n");
 430         }
 431         fprintf(flp, "\n");
 432     }
 433     gmx_ffclose(flp);
 434
 435     /* printf("x=%d to %d\n",minx,maxx); */
 436     /* printf("y=%d to %d\n",miny,maxy); */
 437     /* printf("z=%d to %d\n",minz,maxz); */
 438
 439     if (bCALCDIV)
 440     {
 441         printf("Counts per frame in all %ld cubes divided by %le\n", numcu, 1.0/norm);
 442         printf("Normalized data: average %le, min %le, max %le\n", 1.0, norm*(double)min/(double)numfr, norm*(double)max/(double)numfr);
 443     }
 444     else
 445     {
 446         printf("grid.cube contains counts per frame in all %ld cubes\n", numcu);
 447         printf("Raw data: average %le, min %le, max %le\n", 1.0/norm, (double)min/(double)numfr, (double)max/(double)numfr);
 448     }
 449
 450     return 0;
 451 }