2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/pdbio.h"
46 #include "gromacs/fileio/strdb.h"
47 #include "gromacs/fileio/tpxio.h"
48 #include "gromacs/fileio/trxio.h"
49 #include "gromacs/fileio/xvgr.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/legacyheaders/macros.h"
52 #include "gromacs/legacyheaders/typedefs.h"
53 #include "gromacs/legacyheaders/viewit.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/smalloc.h"
60 static int strip_dssp(char *dsspfile, int nres,
61 gmx_bool bPhobres[], real t,
62 real *acc, FILE *fTArea,
63 t_matrix *mat, int average_area[],
64 const output_env_t oenv)
66 static gmx_bool bFirst = TRUE;
69 static int xsize, frame;
72 int i, nr, iacc, nresidues;
73 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
77 tapeout = gmx_ffopen(dsspfile, "r");
82 fgets2(buf, STRLEN, tapeout);
84 while (strstr(buf, "KAPPA") == NULL);
87 /* Since we also have empty lines in the dssp output (temp) file,
88 * and some line content is saved to the ssbuf variable,
89 * we need more memory than just nres elements. To be shure,
90 * we allocate 2*nres-1, since for each chain there is a
91 * separating line in the temp file. (At most each residue
92 * could have been defined as a separate chain.) */
93 snew(ssbuf, 2*nres-1);
100 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != NULL); nr++)
102 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
104 SSTP = '='; /* Chain separator sign '=' */
108 SSTP = buf[16] == ' ' ? '~' : buf[16];
114 /* Only calculate solvent accessible area if needed */
115 if ((NULL != acc) && (buf[13] != '!'))
117 sscanf(&(buf[34]), "%d", &iacc);
119 /* average_area and bPhobres are counted from 0...nres-1 */
120 average_area[nresidues] += iacc;
121 if (bPhobres[nresidues])
131 /* Keep track of the residue number (do not count chain separator lines '!') */
141 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
144 sprintf(mat->title, "Secondary structure");
146 sprintf(mat->label_x, "%s", output_env_get_time_label(oenv));
147 sprintf(mat->label_y, "Residue");
148 mat->bDiscrete = TRUE;
150 snew(mat->axis_y, nr);
151 for (i = 0; i < nr; i++)
153 mat->axis_y[i] = i+1;
164 srenew(mat->axis_x, xsize);
165 srenew(mat->matrix, xsize);
167 mat->axis_x[frame] = t;
168 snew(mat->matrix[frame], nr);
170 for (i = 0; i < nr; i++)
173 mat->matrix[frame][i] = max(0, searchcmap(mat->nmap, mat->map, c));
180 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01*iaccb, 0.01*iaccf);
182 gmx_ffclose(tapeout);
184 /* Return the number of lines found in the dssp file (i.e. number
185 * of redidues plus chain separator lines).
186 * This is the number of y elements needed for the area xpm file */
190 static gmx_bool *bPhobics(t_atoms *atoms)
197 nb = get_strings("phbres.dat", &cb);
198 snew(bb, atoms->nres);
200 for (i = 0; (i < atoms->nres); i++)
202 if (-1 != search_str(nb, cb, *atoms->resinfo[i].name) )
210 static void check_oo(t_atoms *atoms)
216 OOO = gmx_strdup("O");
218 for (i = 0; (i < atoms->nr); i++)
220 if (strcmp(*(atoms->atomname[i]), "OXT") == 0)
222 *atoms->atomname[i] = OOO;
224 else if (strcmp(*(atoms->atomname[i]), "O1") == 0)
226 *atoms->atomname[i] = OOO;
228 else if (strcmp(*(atoms->atomname[i]), "OC1") == 0)
230 *atoms->atomname[i] = OOO;
235 static void norm_acc(t_atoms *atoms, int nres,
236 real av_area[], real norm_av_area[])
240 char surffn[] = "surface.dat";
241 char **surf_res, **surf_lines;
244 n_surf = get_lines(surffn, &surf_lines);
246 snew(surf_res, n_surf);
247 for (i = 0; (i < n_surf); i++)
249 snew(surf_res[i], 5);
250 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
253 for (i = 0; (i < nres); i++)
255 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
258 norm_av_area[i] = av_area[i] / surf[n];
262 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
263 *atoms->resinfo[i].name, surffn);
268 void prune_ss_legend(t_matrix *mat)
272 int i, r, f, newnmap;
275 snew(present, mat->nmap);
276 snew(newnum, mat->nmap);
278 for (f = 0; f < mat->nx; f++)
280 for (r = 0; r < mat->ny; r++)
282 present[mat->matrix[f][r]] = TRUE;
288 for (i = 0; i < mat->nmap; i++)
295 srenew(newmap, newnmap);
296 newmap[newnmap-1] = mat->map[i];
299 if (newnmap != mat->nmap)
303 for (f = 0; f < mat->nx; f++)
305 for (r = 0; r < mat->ny; r++)
307 mat->matrix[f][r] = newnum[mat->matrix[f][r]];
313 void write_sas_mat(const char *fn, real **accr, int nframe, int nres, t_matrix *mat)
317 t_rgb rlo = {1, 1, 1}, rhi = {0, 0, 0};
322 hi = lo = accr[0][0];
323 for (i = 0; i < nframe; i++)
325 for (j = 0; j < nres; j++)
327 lo = min(lo, accr[i][j]);
328 hi = max(hi, accr[i][j]);
331 fp = gmx_ffopen(fn, "w");
333 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)",
334 "Time", "Residue Index", nframe, nres,
335 mat->axis_x, mat->axis_y, accr, lo, hi, rlo, rhi, &nlev);
340 void analyse_ss(const char *outfile, t_matrix *mat, const char *ss_string,
341 const output_env_t oenv)
345 int f, r, *count, *total, ss_count, total_count;
350 snew(count, mat->nmap);
351 snew(total, mat->nmap);
352 snew(leg, mat->nmap+1);
353 leg[0] = "Structure";
354 for (s = 0; s < (size_t)mat->nmap; s++)
356 leg[s+1] = gmx_strdup(map[s].desc);
359 fp = xvgropen(outfile, "Secondary Structure",
360 output_env_get_xvgr_tlabel(oenv), "Number of Residues", oenv);
361 if (output_env_get_print_xvgr_codes(oenv))
363 fprintf(fp, "@ subtitle \"Structure = ");
365 for (s = 0; s < strlen(ss_string); s++)
371 for (f = 0; f < mat->nmap; f++)
373 if (ss_string[s] == map[f].code.c1)
375 fprintf(fp, "%s", map[f].desc);
380 xvgr_legend(fp, mat->nmap+1, leg, oenv);
383 for (s = 0; s < (size_t)mat->nmap; s++)
387 for (f = 0; f < mat->nx; f++)
390 for (s = 0; s < (size_t)mat->nmap; s++)
394 for (r = 0; r < mat->ny; r++)
396 count[mat->matrix[f][r]]++;
397 total[mat->matrix[f][r]]++;
399 for (s = 0; s < (size_t)mat->nmap; s++)
401 if (strchr(ss_string, map[s].code.c1))
403 ss_count += count[s];
404 total_count += count[s];
407 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
408 for (s = 0; s < (size_t)mat->nmap; s++)
410 fprintf(fp, " %5d", count[s]);
414 /* now print column totals */
415 fprintf(fp, "%-8s %5d", "# Totals", total_count);
416 for (s = 0; s < (size_t)mat->nmap; s++)
418 fprintf(fp, " %5d", total[s]);
422 /* now print percentages */
423 fprintf(fp, "%-8s %5.2f", "# SS %", total_count / (real) (mat->nx * mat->ny));
424 for (s = 0; s < (size_t)mat->nmap; s++)
426 fprintf(fp, " %5.2f", total[s] / (real) (mat->nx * mat->ny));
435 int gmx_do_dssp(int argc, char *argv[])
437 const char *desc[] = {
439 "reads a trajectory file and computes the secondary structure for",
441 "calling the dssp program. If you do not have the dssp program,",
442 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
443 "that the dssp executable is located in ",
444 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
445 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
446 "executable, e.g.: [PAR]",
447 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
448 "Since version 2.0.0, dssp is invoked with a syntax that differs",
449 "from earlier versions. If you have an older version of dssp,",
450 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
451 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
452 "Even newer versions (which at the time of writing are not yet released)",
453 "are assumed to have the same syntax as 2.0.0.[PAR]",
454 "The structure assignment for each residue and time is written to an",
455 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
456 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
457 "Individual chains are separated by light grey lines in the [TT].xpm[tt] and",
459 "The number of residues with each secondary structure type and the",
460 "total secondary structure ([TT]-sss[tt]) count as a function of",
461 "time are also written to file ([TT]-sc[tt]).[PAR]",
462 "Solvent accessible surface (SAS) per residue can be calculated, both in",
463 "absolute values (A^2) and in fractions of the maximal accessible",
464 "surface of a residue. The maximal accessible surface is defined as",
465 "the accessible surface of a residue in a chain of glycines.",
466 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
467 "and that is more efficient.[PAR]",
468 "Finally, this program can dump the secondary structure in a special file",
469 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
470 "these two programs can be used to analyze dihedral properties as a",
471 "function of secondary structure type."
473 static gmx_bool bVerbose;
474 static const char *ss_string = "HEBT";
475 static int dsspVersion = 2;
477 { "-v", FALSE, etBOOL, {&bVerbose},
478 "HIDDENGenerate miles of useless information" },
479 { "-sss", FALSE, etSTR, {&ss_string},
480 "Secondary structures for structure count"},
481 { "-ver", FALSE, etINT, {&dsspVersion},
482 "DSSP major version. Syntax changed with version 2"}
487 FILE *ss, *acc, *fTArea, *tmpf;
488 const char *fnSCount, *fnArea, *fnTArea, *fnAArea;
489 const char *leg[] = { "Phobic", "Phylic" };
494 int nres, nr0, naccr, nres_plus_separators;
495 gmx_bool *bPhbres, bDoAccSurf;
497 int i, j, natoms, nframe = 0;
500 char *grpnm, *ss_str;
504 real **accr, *accr_ptr = NULL, *av_area, *norm_av_area;
505 char pdbfile[32], tmpfile[32], title[256];
509 gmx_rmpbc_t gpbc = NULL;
512 { efTRX, "-f", NULL, ffREAD },
513 { efTPS, NULL, NULL, ffREAD },
514 { efNDX, NULL, NULL, ffOPTRD },
515 { efDAT, "-ssdump", "ssdump", ffOPTWR },
516 { efMAP, "-map", "ss", ffLIBRD },
517 { efXPM, "-o", "ss", ffWRITE },
518 { efXVG, "-sc", "scount", ffWRITE },
519 { efXPM, "-a", "area", ffOPTWR },
520 { efXVG, "-ta", "totarea", ffOPTWR },
521 { efXVG, "-aa", "averarea", ffOPTWR }
523 #define NFILE asize(fnm)
525 if (!parse_common_args(&argc, argv,
526 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT,
527 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
531 fnSCount = opt2fn("-sc", NFILE, fnm);
532 fnArea = opt2fn_null("-a", NFILE, fnm);
533 fnTArea = opt2fn_null("-ta", NFILE, fnm);
534 fnAArea = opt2fn_null("-aa", NFILE, fnm);
535 bDoAccSurf = (fnArea || fnTArea || fnAArea);
537 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &xp, NULL, box, FALSE);
538 atoms = &(top.atoms);
540 bPhbres = bPhobics(atoms);
542 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
545 for (i = 0; (i < gnx); i++)
547 if (atoms->atom[index[i]].resind != nr0)
549 nr0 = atoms->atom[index[i]].resind;
553 fprintf(stderr, "There are %d residues in your selected group\n", nres);
555 strcpy(pdbfile, "ddXXXXXX");
557 if ((tmpf = fopen(pdbfile, "w")) == NULL)
559 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
561 if ((tmpf = fopen(pdbfile, "w")) == NULL)
563 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
571 strcpy(tmpfile, "ddXXXXXX");
573 if ((tmpf = fopen(tmpfile, "w")) == NULL)
575 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
577 if ((tmpf = fopen(tmpfile, "w")) == NULL)
579 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
587 if ((dptr = getenv("DSSP")) == NULL)
589 dptr = "/usr/local/bin/dssp";
591 if (!gmx_fexist(dptr))
593 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)",
596 if (dsspVersion >= 2)
600 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by do_dssp. Assuming version 2 syntax.\n\n", dsspVersion);
603 sprintf(dssp, "%s -i %s -o %s > /dev/null %s",
604 dptr, pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
608 sprintf(dssp, "%s %s %s %s > /dev/null %s",
609 dptr, bDoAccSurf ? "" : "-na", pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
612 fprintf(stderr, "dssp cmd='%s'\n", dssp);
616 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
617 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
618 xvgr_legend(fTArea, 2, leg, oenv);
626 mat.nmap = readcmap(opt2fn("-map", NFILE, fnm), &(mat.map));
628 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
629 if (natoms > atoms->nr)
631 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
635 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
638 snew(average_area, atoms->nres);
639 snew(av_area, atoms->nres);
640 snew(norm_av_area, atoms->nres);
644 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
647 t = output_env_conv_time(oenv, t);
648 if (bDoAccSurf && nframe >= naccr)
652 for (i = naccr-10; i < naccr; i++)
654 snew(accr[i], 2*atoms->nres-1);
657 gmx_rmpbc(gpbc, natoms, box, x);
658 tapein = gmx_ffopen(pdbfile, "w");
659 write_pdbfile_indexed(tapein, NULL, atoms, x, ePBC, box, ' ', -1, gnx, index, NULL, TRUE);
663 printf("Warning-- No calls to system(3) supported on this platform.");
664 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp);
667 if (0 != system(dssp))
669 gmx_fatal(FARGS, "Failed to execute command: %s\n",
670 "Try specifying your dssp version with the -ver option.", dssp);
674 /* strip_dssp returns the number of lines found in the dssp file, i.e.
675 * the number of residues plus the separator lines */
679 accr_ptr = accr[nframe];
682 nres_plus_separators = strip_dssp(tmpfile, nres, bPhbres, t,
683 accr_ptr, fTArea, &mat, average_area, oenv);
688 while (read_next_x(oenv, status, &t, x, box));
689 fprintf(stderr, "\n");
695 gmx_rmpbc_done(gpbc);
697 prune_ss_legend(&mat);
699 ss = opt2FILE("-o", NFILE, fnm, "w");
701 write_xpm_m(ss, mat);
704 if (opt2bSet("-ssdump", NFILE, fnm))
706 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
707 snew(ss_str, nres+1);
708 fprintf(ss, "%d\n", nres);
709 for (j = 0; j < mat.nx; j++)
711 for (i = 0; (i < mat.ny); i++)
713 ss_str[i] = mat.map[mat.matrix[j][i]].code.c1;
716 fprintf(ss, "%s\n", ss_str);
721 analyse_ss(fnSCount, &mat, ss_string, oenv);
725 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
727 for (i = 0; i < atoms->nres; i++)
729 av_area[i] = (average_area[i] / (real)nframe);
732 norm_acc(atoms, nres, av_area, norm_av_area);
736 acc = xvgropen(fnAArea, "Average Accessible Area",
737 "Residue", "A\\S2", oenv);
738 for (i = 0; (i < nres); i++)
740 fprintf(acc, "%5d %10g %10g\n", i+1, av_area[i], norm_av_area[i]);
746 view_all(oenv, NFILE, fnm);