2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/legacyheaders/typedefs.h"
44 #include "gromacs/legacyheaders/macros.h"
45 #include "gromacs/fileio/pdbio.h"
46 #include "gromacs/topology/index.h"
48 #include "gromacs/fileio/tpxio.h"
49 #include "gromacs/fileio/trxio.h"
50 #include "gromacs/legacyheaders/viewit.h"
52 #include "gromacs/commandline/pargs.h"
53 #include "gromacs/fileio/matio.h"
54 #include "gromacs/fileio/strdb.h"
55 #include "gromacs/fileio/xvgr.h"
56 #include "gromacs/pbcutil/rmpbc.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/smalloc.h"
61 static int strip_dssp(char *dsspfile, int nres,
62 gmx_bool bPhobres[], real t,
63 real *acc, FILE *fTArea,
64 t_matrix *mat, int average_area[],
65 const output_env_t oenv)
67 static gmx_bool bFirst = TRUE;
70 static int xsize, frame;
73 int i, nr, iacc, nresidues;
74 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
78 tapeout = gmx_ffopen(dsspfile, "r");
83 fgets2(buf, STRLEN, tapeout);
85 while (strstr(buf, "KAPPA") == NULL);
88 /* Since we also have empty lines in the dssp output (temp) file,
89 * and some line content is saved to the ssbuf variable,
90 * we need more memory than just nres elements. To be shure,
91 * we allocate 2*nres-1, since for each chain there is a
92 * separating line in the temp file. (At most each residue
93 * could have been defined as a separate chain.) */
94 snew(ssbuf, 2*nres-1);
101 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != NULL); nr++)
103 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
105 SSTP = '='; /* Chain separator sign '=' */
109 SSTP = buf[16] == ' ' ? '~' : buf[16];
115 /* Only calculate solvent accessible area if needed */
116 if ((NULL != acc) && (buf[13] != '!'))
118 sscanf(&(buf[34]), "%d", &iacc);
120 /* average_area and bPhobres are counted from 0...nres-1 */
121 average_area[nresidues] += iacc;
122 if (bPhobres[nresidues])
132 /* Keep track of the residue number (do not count chain separator lines '!') */
142 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
145 sprintf(mat->title, "Secondary structure");
147 sprintf(mat->label_x, "%s", output_env_get_time_label(oenv));
148 sprintf(mat->label_y, "Residue");
149 mat->bDiscrete = TRUE;
151 snew(mat->axis_y, nr);
152 for (i = 0; i < nr; i++)
154 mat->axis_y[i] = i+1;
165 srenew(mat->axis_x, xsize);
166 srenew(mat->matrix, xsize);
168 mat->axis_x[frame] = t;
169 snew(mat->matrix[frame], nr);
171 for (i = 0; i < nr; i++)
174 mat->matrix[frame][i] = max(0, searchcmap(mat->nmap, mat->map, c));
181 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01*iaccb, 0.01*iaccf);
183 gmx_ffclose(tapeout);
185 /* Return the number of lines found in the dssp file (i.e. number
186 * of redidues plus chain separator lines).
187 * This is the number of y elements needed for the area xpm file */
191 static gmx_bool *bPhobics(t_atoms *atoms)
198 nb = get_strings("phbres.dat", &cb);
199 snew(bb, atoms->nres);
201 for (i = 0; (i < atoms->nres); i++)
203 if (-1 != search_str(nb, cb, *atoms->resinfo[i].name) )
211 static void check_oo(t_atoms *atoms)
217 OOO = gmx_strdup("O");
219 for (i = 0; (i < atoms->nr); i++)
221 if (strcmp(*(atoms->atomname[i]), "OXT") == 0)
223 *atoms->atomname[i] = OOO;
225 else if (strcmp(*(atoms->atomname[i]), "O1") == 0)
227 *atoms->atomname[i] = OOO;
229 else if (strcmp(*(atoms->atomname[i]), "OC1") == 0)
231 *atoms->atomname[i] = OOO;
236 static void norm_acc(t_atoms *atoms, int nres,
237 real av_area[], real norm_av_area[])
241 char surffn[] = "surface.dat";
242 char **surf_res, **surf_lines;
245 n_surf = get_lines(surffn, &surf_lines);
247 snew(surf_res, n_surf);
248 for (i = 0; (i < n_surf); i++)
250 snew(surf_res[i], 5);
251 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
254 for (i = 0; (i < nres); i++)
256 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
259 norm_av_area[i] = av_area[i] / surf[n];
263 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
264 *atoms->resinfo[i].name, surffn);
269 void prune_ss_legend(t_matrix *mat)
273 int i, r, f, newnmap;
276 snew(present, mat->nmap);
277 snew(newnum, mat->nmap);
279 for (f = 0; f < mat->nx; f++)
281 for (r = 0; r < mat->ny; r++)
283 present[mat->matrix[f][r]] = TRUE;
289 for (i = 0; i < mat->nmap; i++)
296 srenew(newmap, newnmap);
297 newmap[newnmap-1] = mat->map[i];
300 if (newnmap != mat->nmap)
304 for (f = 0; f < mat->nx; f++)
306 for (r = 0; r < mat->ny; r++)
308 mat->matrix[f][r] = newnum[mat->matrix[f][r]];
314 void write_sas_mat(const char *fn, real **accr, int nframe, int nres, t_matrix *mat)
318 t_rgb rlo = {1, 1, 1}, rhi = {0, 0, 0};
323 hi = lo = accr[0][0];
324 for (i = 0; i < nframe; i++)
326 for (j = 0; j < nres; j++)
328 lo = min(lo, accr[i][j]);
329 hi = max(hi, accr[i][j]);
332 fp = gmx_ffopen(fn, "w");
334 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)",
335 "Time", "Residue Index", nframe, nres,
336 mat->axis_x, mat->axis_y, accr, lo, hi, rlo, rhi, &nlev);
341 void analyse_ss(const char *outfile, t_matrix *mat, const char *ss_string,
342 const output_env_t oenv)
346 int f, r, *count, *total, ss_count, total_count;
351 snew(count, mat->nmap);
352 snew(total, mat->nmap);
353 snew(leg, mat->nmap+1);
354 leg[0] = "Structure";
355 for (s = 0; s < (size_t)mat->nmap; s++)
357 leg[s+1] = gmx_strdup(map[s].desc);
360 fp = xvgropen(outfile, "Secondary Structure",
361 output_env_get_xvgr_tlabel(oenv), "Number of Residues", oenv);
362 if (output_env_get_print_xvgr_codes(oenv))
364 fprintf(fp, "@ subtitle \"Structure = ");
366 for (s = 0; s < strlen(ss_string); s++)
372 for (f = 0; f < mat->nmap; f++)
374 if (ss_string[s] == map[f].code.c1)
376 fprintf(fp, "%s", map[f].desc);
381 xvgr_legend(fp, mat->nmap+1, leg, oenv);
384 for (s = 0; s < (size_t)mat->nmap; s++)
388 for (f = 0; f < mat->nx; f++)
391 for (s = 0; s < (size_t)mat->nmap; s++)
395 for (r = 0; r < mat->ny; r++)
397 count[mat->matrix[f][r]]++;
398 total[mat->matrix[f][r]]++;
400 for (s = 0; s < (size_t)mat->nmap; s++)
402 if (strchr(ss_string, map[s].code.c1))
404 ss_count += count[s];
405 total_count += count[s];
408 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
409 for (s = 0; s < (size_t)mat->nmap; s++)
411 fprintf(fp, " %5d", count[s]);
415 /* now print column totals */
416 fprintf(fp, "%-8s %5d", "# Totals", total_count);
417 for (s = 0; s < (size_t)mat->nmap; s++)
419 fprintf(fp, " %5d", total[s]);
423 /* now print percentages */
424 fprintf(fp, "%-8s %5.2f", "# SS %", total_count / (real) (mat->nx * mat->ny));
425 for (s = 0; s < (size_t)mat->nmap; s++)
427 fprintf(fp, " %5.2f", total[s] / (real) (mat->nx * mat->ny));
436 int gmx_do_dssp(int argc, char *argv[])
438 const char *desc[] = {
440 "reads a trajectory file and computes the secondary structure for",
442 "calling the dssp program. If you do not have the dssp program,",
443 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
444 "that the dssp executable is located in ",
445 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
446 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
447 "executable, e.g.: [PAR]",
448 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
449 "Since version 2.0.0, dssp is invoked with a syntax that differs",
450 "from earlier versions. If you have an older version of dssp,",
451 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
452 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
453 "Even newer versions (which at the time of writing are not yet released)",
454 "are assumed to have the same syntax as 2.0.0.[PAR]",
455 "The structure assignment for each residue and time is written to an",
456 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
457 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
458 "Individual chains are separated by light grey lines in the [TT].xpm[tt] and",
460 "The number of residues with each secondary structure type and the",
461 "total secondary structure ([TT]-sss[tt]) count as a function of",
462 "time are also written to file ([TT]-sc[tt]).[PAR]",
463 "Solvent accessible surface (SAS) per residue can be calculated, both in",
464 "absolute values (A^2) and in fractions of the maximal accessible",
465 "surface of a residue. The maximal accessible surface is defined as",
466 "the accessible surface of a residue in a chain of glycines.",
467 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
468 "and that is more efficient.[PAR]",
469 "Finally, this program can dump the secondary structure in a special file",
470 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
471 "these two programs can be used to analyze dihedral properties as a",
472 "function of secondary structure type."
474 static gmx_bool bVerbose;
475 static const char *ss_string = "HEBT";
476 static int dsspVersion = 2;
478 { "-v", FALSE, etBOOL, {&bVerbose},
479 "HIDDENGenerate miles of useless information" },
480 { "-sss", FALSE, etSTR, {&ss_string},
481 "Secondary structures for structure count"},
482 { "-ver", FALSE, etINT, {&dsspVersion},
483 "DSSP major version. Syntax changed with version 2"}
488 FILE *ss, *acc, *fTArea, *tmpf;
489 const char *fnSCount, *fnArea, *fnTArea, *fnAArea;
490 const char *leg[] = { "Phobic", "Phylic" };
495 int nres, nr0, naccr, nres_plus_separators;
496 gmx_bool *bPhbres, bDoAccSurf;
498 int i, j, natoms, nframe = 0;
501 char *grpnm, *ss_str;
505 real **accr, *accr_ptr = NULL, *av_area, *norm_av_area;
506 char pdbfile[32], tmpfile[32], title[256];
510 gmx_rmpbc_t gpbc = NULL;
513 { efTRX, "-f", NULL, ffREAD },
514 { efTPS, NULL, NULL, ffREAD },
515 { efNDX, NULL, NULL, ffOPTRD },
516 { efDAT, "-ssdump", "ssdump", ffOPTWR },
517 { efMAP, "-map", "ss", ffLIBRD },
518 { efXPM, "-o", "ss", ffWRITE },
519 { efXVG, "-sc", "scount", ffWRITE },
520 { efXPM, "-a", "area", ffOPTWR },
521 { efXVG, "-ta", "totarea", ffOPTWR },
522 { efXVG, "-aa", "averarea", ffOPTWR }
524 #define NFILE asize(fnm)
526 if (!parse_common_args(&argc, argv,
527 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT,
528 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
532 fnSCount = opt2fn("-sc", NFILE, fnm);
533 fnArea = opt2fn_null("-a", NFILE, fnm);
534 fnTArea = opt2fn_null("-ta", NFILE, fnm);
535 fnAArea = opt2fn_null("-aa", NFILE, fnm);
536 bDoAccSurf = (fnArea || fnTArea || fnAArea);
538 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &xp, NULL, box, FALSE);
539 atoms = &(top.atoms);
541 bPhbres = bPhobics(atoms);
543 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
546 for (i = 0; (i < gnx); i++)
548 if (atoms->atom[index[i]].resind != nr0)
550 nr0 = atoms->atom[index[i]].resind;
554 fprintf(stderr, "There are %d residues in your selected group\n", nres);
556 strcpy(pdbfile, "ddXXXXXX");
558 if ((tmpf = fopen(pdbfile, "w")) == NULL)
560 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
562 if ((tmpf = fopen(pdbfile, "w")) == NULL)
564 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
572 strcpy(tmpfile, "ddXXXXXX");
574 if ((tmpf = fopen(tmpfile, "w")) == NULL)
576 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
578 if ((tmpf = fopen(tmpfile, "w")) == NULL)
580 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
588 if ((dptr = getenv("DSSP")) == NULL)
590 dptr = "/usr/local/bin/dssp";
592 if (!gmx_fexist(dptr))
594 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)",
597 if (dsspVersion >= 2)
601 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by do_dssp. Assuming version 2 syntax.\n\n", dsspVersion);
604 sprintf(dssp, "%s -i %s -o %s > /dev/null %s",
605 dptr, pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
609 sprintf(dssp, "%s %s %s %s > /dev/null %s",
610 dptr, bDoAccSurf ? "" : "-na", pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
613 fprintf(stderr, "dssp cmd='%s'\n", dssp);
617 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
618 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
619 xvgr_legend(fTArea, 2, leg, oenv);
627 mat.nmap = readcmap(opt2fn("-map", NFILE, fnm), &(mat.map));
629 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
630 if (natoms > atoms->nr)
632 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
636 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
639 snew(average_area, atoms->nres);
640 snew(av_area, atoms->nres);
641 snew(norm_av_area, atoms->nres);
645 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
648 t = output_env_conv_time(oenv, t);
649 if (bDoAccSurf && nframe >= naccr)
653 for (i = naccr-10; i < naccr; i++)
655 snew(accr[i], 2*atoms->nres-1);
658 gmx_rmpbc(gpbc, natoms, box, x);
659 tapein = gmx_ffopen(pdbfile, "w");
660 write_pdbfile_indexed(tapein, NULL, atoms, x, ePBC, box, ' ', -1, gnx, index, NULL, TRUE);
664 printf("Warning-- No calls to system(3) supported on this platform.");
665 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp);
668 if (0 != system(dssp))
670 gmx_fatal(FARGS, "Failed to execute command: %s\n",
671 "Try specifying your dssp version with the -ver option.", dssp);
675 /* strip_dssp returns the number of lines found in the dssp file, i.e.
676 * the number of residues plus the separator lines */
680 accr_ptr = accr[nframe];
683 nres_plus_separators = strip_dssp(tmpfile, nres, bPhbres, t,
684 accr_ptr, fTArea, &mat, average_area, oenv);
689 while (read_next_x(oenv, status, &t, x, box));
690 fprintf(stderr, "\n");
696 gmx_rmpbc_done(gpbc);
698 prune_ss_legend(&mat);
700 ss = opt2FILE("-o", NFILE, fnm, "w");
702 write_xpm_m(ss, mat);
705 if (opt2bSet("-ssdump", NFILE, fnm))
707 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
708 snew(ss_str, nres+1);
709 fprintf(ss, "%d\n", nres);
710 for (j = 0; j < mat.nx; j++)
712 for (i = 0; (i < mat.ny); i++)
714 ss_str[i] = mat.map[mat.matrix[j][i]].code.c1;
717 fprintf(ss, "%s\n", ss_str);
722 analyse_ss(fnSCount, &mat, ss_string, oenv);
726 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
728 for (i = 0; i < atoms->nres; i++)
730 av_area[i] = (average_area[i] / (real)nframe);
733 norm_acc(atoms, nres, av_area, norm_av_area);
737 acc = xvgropen(fnAArea, "Average Accessible Area",
738 "Residue", "A\\S2", oenv);
739 for (i = 0; (i < nres); i++)
741 fprintf(acc, "%5d %10g %10g\n", i+1, av_area[i], norm_av_area[i]);
747 view_all(oenv, NFILE, fnm);