2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
41 #include "gromacs/commandline/pargs.h"
42 #include "gromacs/fileio/matio.h"
43 #include "gromacs/fileio/pdbio.h"
44 #include "gromacs/fileio/strdb.h"
45 #include "gromacs/fileio/tpxio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/gstat.h"
49 #include "gromacs/legacyheaders/macros.h"
50 #include "gromacs/legacyheaders/typedefs.h"
51 #include "gromacs/legacyheaders/viewit.h"
52 #include "gromacs/pbcutil/rmpbc.h"
53 #include "gromacs/topology/index.h"
54 #include "gromacs/utility/cstringutil.h"
55 #include "gromacs/utility/fatalerror.h"
56 #include "gromacs/utility/smalloc.h"
58 static int strip_dssp(char *dsspfile, int nres,
59 gmx_bool bPhobres[], real t,
60 real *acc, FILE *fTArea,
61 t_matrix *mat, int average_area[],
62 const output_env_t oenv)
64 static gmx_bool bFirst = TRUE;
67 static int xsize, frame;
70 int i, nr, iacc, nresidues;
71 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
75 tapeout = gmx_ffopen(dsspfile, "r");
80 fgets2(buf, STRLEN, tapeout);
82 while (strstr(buf, "KAPPA") == NULL);
85 /* Since we also have empty lines in the dssp output (temp) file,
86 * and some line content is saved to the ssbuf variable,
87 * we need more memory than just nres elements. To be shure,
88 * we allocate 2*nres-1, since for each chain there is a
89 * separating line in the temp file. (At most each residue
90 * could have been defined as a separate chain.) */
91 snew(ssbuf, 2*nres-1);
98 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != NULL); nr++)
100 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
102 SSTP = '='; /* Chain separator sign '=' */
106 SSTP = buf[16] == ' ' ? '~' : buf[16];
112 /* Only calculate solvent accessible area if needed */
113 if ((NULL != acc) && (buf[13] != '!'))
115 sscanf(&(buf[34]), "%d", &iacc);
117 /* average_area and bPhobres are counted from 0...nres-1 */
118 average_area[nresidues] += iacc;
119 if (bPhobres[nresidues])
129 /* Keep track of the residue number (do not count chain separator lines '!') */
139 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
142 sprintf(mat->title, "Secondary structure");
144 sprintf(mat->label_x, "%s", output_env_get_time_label(oenv));
145 sprintf(mat->label_y, "Residue");
146 mat->bDiscrete = TRUE;
148 snew(mat->axis_y, nr);
149 for (i = 0; i < nr; i++)
151 mat->axis_y[i] = i+1;
162 srenew(mat->axis_x, xsize);
163 srenew(mat->matrix, xsize);
165 mat->axis_x[frame] = t;
166 snew(mat->matrix[frame], nr);
168 for (i = 0; i < nr; i++)
171 mat->matrix[frame][i] = max(0, searchcmap(mat->nmap, mat->map, c));
178 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01*iaccb, 0.01*iaccf);
180 gmx_ffclose(tapeout);
182 /* Return the number of lines found in the dssp file (i.e. number
183 * of redidues plus chain separator lines).
184 * This is the number of y elements needed for the area xpm file */
188 static gmx_bool *bPhobics(t_atoms *atoms)
195 nb = get_strings("phbres.dat", &cb);
196 snew(bb, atoms->nres);
198 for (i = 0; (i < atoms->nres); i++)
200 if (-1 != search_str(nb, cb, *atoms->resinfo[i].name) )
208 static void check_oo(t_atoms *atoms)
214 OOO = gmx_strdup("O");
216 for (i = 0; (i < atoms->nr); i++)
218 if (strcmp(*(atoms->atomname[i]), "OXT") == 0)
220 *atoms->atomname[i] = OOO;
222 else if (strcmp(*(atoms->atomname[i]), "O1") == 0)
224 *atoms->atomname[i] = OOO;
226 else if (strcmp(*(atoms->atomname[i]), "OC1") == 0)
228 *atoms->atomname[i] = OOO;
233 static void norm_acc(t_atoms *atoms, int nres,
234 real av_area[], real norm_av_area[])
238 char surffn[] = "surface.dat";
239 char **surf_res, **surf_lines;
242 n_surf = get_lines(surffn, &surf_lines);
244 snew(surf_res, n_surf);
245 for (i = 0; (i < n_surf); i++)
247 snew(surf_res[i], 5);
248 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
251 for (i = 0; (i < nres); i++)
253 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
256 norm_av_area[i] = av_area[i] / surf[n];
260 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
261 *atoms->resinfo[i].name, surffn);
266 void prune_ss_legend(t_matrix *mat)
270 int i, r, f, newnmap;
273 snew(present, mat->nmap);
274 snew(newnum, mat->nmap);
276 for (f = 0; f < mat->nx; f++)
278 for (r = 0; r < mat->ny; r++)
280 present[mat->matrix[f][r]] = TRUE;
286 for (i = 0; i < mat->nmap; i++)
293 srenew(newmap, newnmap);
294 newmap[newnmap-1] = mat->map[i];
297 if (newnmap != mat->nmap)
301 for (f = 0; f < mat->nx; f++)
303 for (r = 0; r < mat->ny; r++)
305 mat->matrix[f][r] = newnum[mat->matrix[f][r]];
311 void write_sas_mat(const char *fn, real **accr, int nframe, int nres, t_matrix *mat)
315 t_rgb rlo = {1, 1, 1}, rhi = {0, 0, 0};
320 hi = lo = accr[0][0];
321 for (i = 0; i < nframe; i++)
323 for (j = 0; j < nres; j++)
325 lo = min(lo, accr[i][j]);
326 hi = max(hi, accr[i][j]);
329 fp = gmx_ffopen(fn, "w");
331 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)",
332 "Time", "Residue Index", nframe, nres,
333 mat->axis_x, mat->axis_y, accr, lo, hi, rlo, rhi, &nlev);
338 void analyse_ss(const char *outfile, t_matrix *mat, const char *ss_string,
339 const output_env_t oenv)
343 int f, r, *count, *total, ss_count, total_count;
348 snew(count, mat->nmap);
349 snew(total, mat->nmap);
350 snew(leg, mat->nmap+1);
351 leg[0] = "Structure";
352 for (s = 0; s < (size_t)mat->nmap; s++)
354 leg[s+1] = gmx_strdup(map[s].desc);
357 fp = xvgropen(outfile, "Secondary Structure",
358 output_env_get_xvgr_tlabel(oenv), "Number of Residues", oenv);
359 if (output_env_get_print_xvgr_codes(oenv))
361 fprintf(fp, "@ subtitle \"Structure = ");
363 for (s = 0; s < strlen(ss_string); s++)
369 for (f = 0; f < mat->nmap; f++)
371 if (ss_string[s] == map[f].code.c1)
373 fprintf(fp, "%s", map[f].desc);
378 xvgr_legend(fp, mat->nmap+1, leg, oenv);
381 for (s = 0; s < (size_t)mat->nmap; s++)
385 for (f = 0; f < mat->nx; f++)
388 for (s = 0; s < (size_t)mat->nmap; s++)
392 for (r = 0; r < mat->ny; r++)
394 count[mat->matrix[f][r]]++;
395 total[mat->matrix[f][r]]++;
397 for (s = 0; s < (size_t)mat->nmap; s++)
399 if (strchr(ss_string, map[s].code.c1))
401 ss_count += count[s];
402 total_count += count[s];
405 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
406 for (s = 0; s < (size_t)mat->nmap; s++)
408 fprintf(fp, " %5d", count[s]);
412 /* now print column totals */
413 fprintf(fp, "%-8s %5d", "# Totals", total_count);
414 for (s = 0; s < (size_t)mat->nmap; s++)
416 fprintf(fp, " %5d", total[s]);
420 /* now print percentages */
421 fprintf(fp, "%-8s %5.2f", "# SS %", total_count / (real) (mat->nx * mat->ny));
422 for (s = 0; s < (size_t)mat->nmap; s++)
424 fprintf(fp, " %5.2f", total[s] / (real) (mat->nx * mat->ny));
433 int gmx_do_dssp(int argc, char *argv[])
435 const char *desc[] = {
437 "reads a trajectory file and computes the secondary structure for",
439 "calling the dssp program. If you do not have the dssp program,",
440 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
441 "that the dssp executable is located in ",
442 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
443 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
444 "executable, e.g.: [PAR]",
445 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
446 "Since version 2.0.0, dssp is invoked with a syntax that differs",
447 "from earlier versions. If you have an older version of dssp,",
448 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
449 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
450 "Even newer versions (which at the time of writing are not yet released)",
451 "are assumed to have the same syntax as 2.0.0.[PAR]",
452 "The structure assignment for each residue and time is written to an",
453 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
454 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
455 "Individual chains are separated by light grey lines in the [TT].xpm[tt] and",
457 "The number of residues with each secondary structure type and the",
458 "total secondary structure ([TT]-sss[tt]) count as a function of",
459 "time are also written to file ([TT]-sc[tt]).[PAR]",
460 "Solvent accessible surface (SAS) per residue can be calculated, both in",
461 "absolute values (A^2) and in fractions of the maximal accessible",
462 "surface of a residue. The maximal accessible surface is defined as",
463 "the accessible surface of a residue in a chain of glycines.",
464 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
465 "and that is more efficient.[PAR]",
466 "Finally, this program can dump the secondary structure in a special file",
467 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
468 "these two programs can be used to analyze dihedral properties as a",
469 "function of secondary structure type."
471 static gmx_bool bVerbose;
472 static const char *ss_string = "HEBT";
473 static int dsspVersion = 2;
475 { "-v", FALSE, etBOOL, {&bVerbose},
476 "HIDDENGenerate miles of useless information" },
477 { "-sss", FALSE, etSTR, {&ss_string},
478 "Secondary structures for structure count"},
479 { "-ver", FALSE, etINT, {&dsspVersion},
480 "DSSP major version. Syntax changed with version 2"}
485 FILE *ss, *acc, *fTArea, *tmpf;
486 const char *fnSCount, *fnArea, *fnTArea, *fnAArea;
487 const char *leg[] = { "Phobic", "Phylic" };
492 int nres, nr0, naccr, nres_plus_separators;
493 gmx_bool *bPhbres, bDoAccSurf;
495 int i, j, natoms, nframe = 0;
498 char *grpnm, *ss_str;
502 real **accr, *accr_ptr = NULL, *av_area, *norm_av_area;
503 char pdbfile[32], tmpfile[32], title[256];
507 gmx_rmpbc_t gpbc = NULL;
510 { efTRX, "-f", NULL, ffREAD },
511 { efTPS, NULL, NULL, ffREAD },
512 { efNDX, NULL, NULL, ffOPTRD },
513 { efDAT, "-ssdump", "ssdump", ffOPTWR },
514 { efMAP, "-map", "ss", ffLIBRD },
515 { efXPM, "-o", "ss", ffWRITE },
516 { efXVG, "-sc", "scount", ffWRITE },
517 { efXPM, "-a", "area", ffOPTWR },
518 { efXVG, "-ta", "totarea", ffOPTWR },
519 { efXVG, "-aa", "averarea", ffOPTWR }
521 #define NFILE asize(fnm)
523 if (!parse_common_args(&argc, argv,
524 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT,
525 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
529 fnSCount = opt2fn("-sc", NFILE, fnm);
530 fnArea = opt2fn_null("-a", NFILE, fnm);
531 fnTArea = opt2fn_null("-ta", NFILE, fnm);
532 fnAArea = opt2fn_null("-aa", NFILE, fnm);
533 bDoAccSurf = (fnArea || fnTArea || fnAArea);
535 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &xp, NULL, box, FALSE);
536 atoms = &(top.atoms);
538 bPhbres = bPhobics(atoms);
540 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
543 for (i = 0; (i < gnx); i++)
545 if (atoms->atom[index[i]].resind != nr0)
547 nr0 = atoms->atom[index[i]].resind;
551 fprintf(stderr, "There are %d residues in your selected group\n", nres);
553 strcpy(pdbfile, "ddXXXXXX");
555 if ((tmpf = fopen(pdbfile, "w")) == NULL)
557 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
559 if ((tmpf = fopen(pdbfile, "w")) == NULL)
561 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
569 strcpy(tmpfile, "ddXXXXXX");
571 if ((tmpf = fopen(tmpfile, "w")) == NULL)
573 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
575 if ((tmpf = fopen(tmpfile, "w")) == NULL)
577 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
585 if ((dptr = getenv("DSSP")) == NULL)
587 dptr = "/usr/local/bin/dssp";
589 if (!gmx_fexist(dptr))
591 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)",
594 if (dsspVersion >= 2)
598 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by do_dssp. Assuming version 2 syntax.\n\n", dsspVersion);
601 sprintf(dssp, "%s -i %s -o %s > /dev/null %s",
602 dptr, pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
606 sprintf(dssp, "%s %s %s %s > /dev/null %s",
607 dptr, bDoAccSurf ? "" : "-na", pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
610 fprintf(stderr, "dssp cmd='%s'\n", dssp);
614 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
615 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
616 xvgr_legend(fTArea, 2, leg, oenv);
624 mat.nmap = readcmap(opt2fn("-map", NFILE, fnm), &(mat.map));
626 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
627 if (natoms > atoms->nr)
629 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
633 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
636 snew(average_area, atoms->nres);
637 snew(av_area, atoms->nres);
638 snew(norm_av_area, atoms->nres);
642 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
645 t = output_env_conv_time(oenv, t);
646 if (bDoAccSurf && nframe >= naccr)
650 for (i = naccr-10; i < naccr; i++)
652 snew(accr[i], 2*atoms->nres-1);
655 gmx_rmpbc(gpbc, natoms, box, x);
656 tapein = gmx_ffopen(pdbfile, "w");
657 write_pdbfile_indexed(tapein, NULL, atoms, x, ePBC, box, ' ', -1, gnx, index, NULL, TRUE);
660 if (0 != system(dssp))
662 gmx_fatal(FARGS, "Failed to execute command: %s\n",
663 "Try specifying your dssp version with the -ver option.", dssp);
666 /* strip_dssp returns the number of lines found in the dssp file, i.e.
667 * the number of residues plus the separator lines */
671 accr_ptr = accr[nframe];
674 nres_plus_separators = strip_dssp(tmpfile, nres, bPhbres, t,
675 accr_ptr, fTArea, &mat, average_area, oenv);
680 while (read_next_x(oenv, status, &t, x, box));
681 fprintf(stderr, "\n");
687 gmx_rmpbc_done(gpbc);
689 prune_ss_legend(&mat);
691 ss = opt2FILE("-o", NFILE, fnm, "w");
693 write_xpm_m(ss, mat);
696 if (opt2bSet("-ssdump", NFILE, fnm))
698 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
699 snew(ss_str, nres+1);
700 fprintf(ss, "%d\n", nres);
701 for (j = 0; j < mat.nx; j++)
703 for (i = 0; (i < mat.ny); i++)
705 ss_str[i] = mat.map[mat.matrix[j][i]].code.c1;
708 fprintf(ss, "%s\n", ss_str);
713 analyse_ss(fnSCount, &mat, ss_string, oenv);
717 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
719 for (i = 0; i < atoms->nres; i++)
721 av_area[i] = (average_area[i] / (real)nframe);
724 norm_acc(atoms, nres, av_area, norm_av_area);
728 acc = xvgropen(fnAArea, "Average Accessible Area",
729 "Residue", "A\\S2", oenv);
730 for (i = 0; (i < nres); i++)
732 fprintf(acc, "%5d %10g %10g\n", i+1, av_area[i], norm_av_area[i]);
738 view_all(oenv, NFILE, fnm);