2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/fileio/pdbio.h"
49 #include "gromacs/fileio/tpxio.h"
50 #include "gromacs/fileio/trxio.h"
53 #include "gromacs/commandline/pargs.h"
54 #include "gromacs/fileio/matio.h"
55 #include "gromacs/fileio/strdb.h"
56 #include "gromacs/fileio/xvgr.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/smalloc.h"
62 static int strip_dssp(char *dsspfile, int nres,
63 gmx_bool bPhobres[], real t,
64 real *acc, FILE *fTArea,
65 t_matrix *mat, int average_area[],
66 const output_env_t oenv)
68 static gmx_bool bFirst = TRUE;
71 static int xsize, frame;
74 int i, nr, iacc, nresidues;
75 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
79 tapeout = gmx_ffopen(dsspfile, "r");
84 fgets2(buf, STRLEN, tapeout);
86 while (strstr(buf, "KAPPA") == NULL);
89 /* Since we also have empty lines in the dssp output (temp) file,
90 * and some line content is saved to the ssbuf variable,
91 * we need more memory than just nres elements. To be shure,
92 * we allocate 2*nres-1, since for each chain there is a
93 * separating line in the temp file. (At most each residue
94 * could have been defined as a separate chain.) */
95 snew(ssbuf, 2*nres-1);
102 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != NULL); nr++)
104 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
106 SSTP = '='; /* Chain separator sign '=' */
110 SSTP = buf[16] == ' ' ? '~' : buf[16];
116 /* Only calculate solvent accessible area if needed */
117 if ((NULL != acc) && (buf[13] != '!'))
119 sscanf(&(buf[34]), "%d", &iacc);
121 /* average_area and bPhobres are counted from 0...nres-1 */
122 average_area[nresidues] += iacc;
123 if (bPhobres[nresidues])
133 /* Keep track of the residue number (do not count chain separator lines '!') */
143 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
146 sprintf(mat->title, "Secondary structure");
148 sprintf(mat->label_x, "%s", output_env_get_time_label(oenv));
149 sprintf(mat->label_y, "Residue");
150 mat->bDiscrete = TRUE;
152 snew(mat->axis_y, nr);
153 for (i = 0; i < nr; i++)
155 mat->axis_y[i] = i+1;
166 srenew(mat->axis_x, xsize);
167 srenew(mat->matrix, xsize);
169 mat->axis_x[frame] = t;
170 snew(mat->matrix[frame], nr);
172 for (i = 0; i < nr; i++)
175 mat->matrix[frame][i] = max(0, searchcmap(mat->nmap, mat->map, c));
182 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01*iaccb, 0.01*iaccf);
184 gmx_ffclose(tapeout);
186 /* Return the number of lines found in the dssp file (i.e. number
187 * of redidues plus chain separator lines).
188 * This is the number of y elements needed for the area xpm file */
192 static gmx_bool *bPhobics(t_atoms *atoms)
199 nb = get_strings("phbres.dat", &cb);
200 snew(bb, atoms->nres);
202 for (i = 0; (i < atoms->nres); i++)
204 if (-1 != search_str(nb, cb, *atoms->resinfo[i].name) )
212 static void check_oo(t_atoms *atoms)
220 for (i = 0; (i < atoms->nr); i++)
222 if (strcmp(*(atoms->atomname[i]), "OXT") == 0)
224 *atoms->atomname[i] = OOO;
226 else if (strcmp(*(atoms->atomname[i]), "O1") == 0)
228 *atoms->atomname[i] = OOO;
230 else if (strcmp(*(atoms->atomname[i]), "OC1") == 0)
232 *atoms->atomname[i] = OOO;
237 static void norm_acc(t_atoms *atoms, int nres,
238 real av_area[], real norm_av_area[])
242 char surffn[] = "surface.dat";
243 char **surf_res, **surf_lines;
246 n_surf = get_lines(surffn, &surf_lines);
248 snew(surf_res, n_surf);
249 for (i = 0; (i < n_surf); i++)
251 snew(surf_res[i], 5);
252 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
255 for (i = 0; (i < nres); i++)
257 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
260 norm_av_area[i] = av_area[i] / surf[n];
264 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
265 *atoms->resinfo[i].name, surffn);
270 void prune_ss_legend(t_matrix *mat)
274 int i, r, f, newnmap;
277 snew(present, mat->nmap);
278 snew(newnum, mat->nmap);
280 for (f = 0; f < mat->nx; f++)
282 for (r = 0; r < mat->ny; r++)
284 present[mat->matrix[f][r]] = TRUE;
290 for (i = 0; i < mat->nmap; i++)
297 srenew(newmap, newnmap);
298 newmap[newnmap-1] = mat->map[i];
301 if (newnmap != mat->nmap)
305 for (f = 0; f < mat->nx; f++)
307 for (r = 0; r < mat->ny; r++)
309 mat->matrix[f][r] = newnum[mat->matrix[f][r]];
315 void write_sas_mat(const char *fn, real **accr, int nframe, int nres, t_matrix *mat)
319 t_rgb rlo = {1, 1, 1}, rhi = {0, 0, 0};
324 hi = lo = accr[0][0];
325 for (i = 0; i < nframe; i++)
327 for (j = 0; j < nres; j++)
329 lo = min(lo, accr[i][j]);
330 hi = max(hi, accr[i][j]);
333 fp = gmx_ffopen(fn, "w");
335 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)",
336 "Time", "Residue Index", nframe, nres,
337 mat->axis_x, mat->axis_y, accr, lo, hi, rlo, rhi, &nlev);
342 void analyse_ss(const char *outfile, t_matrix *mat, const char *ss_string,
343 const output_env_t oenv)
347 int f, r, *count, *total, ss_count, total_count;
352 snew(count, mat->nmap);
353 snew(total, mat->nmap);
354 snew(leg, mat->nmap+1);
355 leg[0] = "Structure";
356 for (s = 0; s < (size_t)mat->nmap; s++)
358 leg[s+1] = strdup(map[s].desc);
361 fp = xvgropen(outfile, "Secondary Structure",
362 output_env_get_xvgr_tlabel(oenv), "Number of Residues", oenv);
363 if (output_env_get_print_xvgr_codes(oenv))
365 fprintf(fp, "@ subtitle \"Structure = ");
367 for (s = 0; s < strlen(ss_string); s++)
373 for (f = 0; f < mat->nmap; f++)
375 if (ss_string[s] == map[f].code.c1)
377 fprintf(fp, "%s", map[f].desc);
382 xvgr_legend(fp, mat->nmap+1, leg, oenv);
385 for (s = 0; s < (size_t)mat->nmap; s++)
389 for (f = 0; f < mat->nx; f++)
392 for (s = 0; s < (size_t)mat->nmap; s++)
396 for (r = 0; r < mat->ny; r++)
398 count[mat->matrix[f][r]]++;
399 total[mat->matrix[f][r]]++;
401 for (s = 0; s < (size_t)mat->nmap; s++)
403 if (strchr(ss_string, map[s].code.c1))
405 ss_count += count[s];
406 total_count += count[s];
409 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
410 for (s = 0; s < (size_t)mat->nmap; s++)
412 fprintf(fp, " %5d", count[s]);
416 /* now print column totals */
417 fprintf(fp, "%-8s %5d", "# Totals", total_count);
418 for (s = 0; s < (size_t)mat->nmap; s++)
420 fprintf(fp, " %5d", total[s]);
424 /* now print percentages */
425 fprintf(fp, "%-8s %5.2f", "# SS %", total_count / (real) (mat->nx * mat->ny));
426 for (s = 0; s < (size_t)mat->nmap; s++)
428 fprintf(fp, " %5.2f", total[s] / (real) (mat->nx * mat->ny));
437 int gmx_do_dssp(int argc, char *argv[])
439 const char *desc[] = {
441 "reads a trajectory file and computes the secondary structure for",
443 "calling the dssp program. If you do not have the dssp program,",
444 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
445 "that the dssp executable is located in ",
446 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
447 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
448 "executable, e.g.: [PAR]",
449 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
450 "Since version 2.0.0, dssp is invoked with a syntax that differs",
451 "from earlier versions. If you have an older version of dssp,",
452 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
453 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
454 "Even newer versions (which at the time of writing are not yet released)",
455 "are assumed to have the same syntax as 2.0.0.[PAR]",
456 "The structure assignment for each residue and time is written to an",
457 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
458 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
459 "Individual chains are separated by light grey lines in the [TT].xpm[tt] and",
461 "The number of residues with each secondary structure type and the",
462 "total secondary structure ([TT]-sss[tt]) count as a function of",
463 "time are also written to file ([TT]-sc[tt]).[PAR]",
464 "Solvent accessible surface (SAS) per residue can be calculated, both in",
465 "absolute values (A^2) and in fractions of the maximal accessible",
466 "surface of a residue. The maximal accessible surface is defined as",
467 "the accessible surface of a residue in a chain of glycines.",
468 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
469 "and that is more efficient.[PAR]",
470 "Finally, this program can dump the secondary structure in a special file",
471 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
472 "these two programs can be used to analyze dihedral properties as a",
473 "function of secondary structure type."
475 static gmx_bool bVerbose;
476 static const char *ss_string = "HEBT";
477 static int dsspVersion = 2;
479 { "-v", FALSE, etBOOL, {&bVerbose},
480 "HIDDENGenerate miles of useless information" },
481 { "-sss", FALSE, etSTR, {&ss_string},
482 "Secondary structures for structure count"},
483 { "-ver", FALSE, etINT, {&dsspVersion},
484 "DSSP major version. Syntax changed with version 2"}
489 FILE *ss, *acc, *fTArea, *tmpf;
490 const char *fnSCount, *fnArea, *fnTArea, *fnAArea;
491 const char *leg[] = { "Phobic", "Phylic" };
496 int nres, nr0, naccr, nres_plus_separators;
497 gmx_bool *bPhbres, bDoAccSurf;
499 int i, j, natoms, nframe = 0;
502 char *grpnm, *ss_str;
506 real **accr, *accr_ptr = NULL, *av_area, *norm_av_area;
507 char pdbfile[32], tmpfile[32], title[256];
511 gmx_rmpbc_t gpbc = NULL;
514 { efTRX, "-f", NULL, ffREAD },
515 { efTPS, NULL, NULL, ffREAD },
516 { efNDX, NULL, NULL, ffOPTRD },
517 { efDAT, "-ssdump", "ssdump", ffOPTWR },
518 { efMAP, "-map", "ss", ffLIBRD },
519 { efXPM, "-o", "ss", ffWRITE },
520 { efXVG, "-sc", "scount", ffWRITE },
521 { efXPM, "-a", "area", ffOPTWR },
522 { efXVG, "-ta", "totarea", ffOPTWR },
523 { efXVG, "-aa", "averarea", ffOPTWR }
525 #define NFILE asize(fnm)
527 if (!parse_common_args(&argc, argv,
528 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT | PCA_BE_NICE,
529 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
533 fnSCount = opt2fn("-sc", NFILE, fnm);
534 fnArea = opt2fn_null("-a", NFILE, fnm);
535 fnTArea = opt2fn_null("-ta", NFILE, fnm);
536 fnAArea = opt2fn_null("-aa", NFILE, fnm);
537 bDoAccSurf = (fnArea || fnTArea || fnAArea);
539 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &xp, NULL, box, FALSE);
540 atoms = &(top.atoms);
542 bPhbres = bPhobics(atoms);
544 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
547 for (i = 0; (i < gnx); i++)
549 if (atoms->atom[index[i]].resind != nr0)
551 nr0 = atoms->atom[index[i]].resind;
555 fprintf(stderr, "There are %d residues in your selected group\n", nres);
557 strcpy(pdbfile, "ddXXXXXX");
559 if ((tmpf = fopen(pdbfile, "w")) == NULL)
561 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
563 if ((tmpf = fopen(pdbfile, "w")) == NULL)
565 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
573 strcpy(tmpfile, "ddXXXXXX");
575 if ((tmpf = fopen(tmpfile, "w")) == NULL)
577 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
579 if ((tmpf = fopen(tmpfile, "w")) == NULL)
581 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
589 if ((dptr = getenv("DSSP")) == NULL)
591 dptr = "/usr/local/bin/dssp";
593 if (!gmx_fexist(dptr))
595 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)",
598 if (dsspVersion >= 2)
602 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by do_dssp. Assuming version 2 syntax.\n\n", dsspVersion);
605 sprintf(dssp, "%s -i %s -o %s > /dev/null %s",
606 dptr, pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
610 sprintf(dssp, "%s %s %s %s > /dev/null %s",
611 dptr, bDoAccSurf ? "" : "-na", pdbfile, tmpfile, bVerbose ? "" : "2> /dev/null");
614 fprintf(stderr, "dssp cmd='%s'\n", dssp);
618 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
619 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
620 xvgr_legend(fTArea, 2, leg, oenv);
628 mat.nmap = readcmap(opt2fn("-map", NFILE, fnm), &(mat.map));
630 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
631 if (natoms > atoms->nr)
633 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
637 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
640 snew(average_area, atoms->nres);
641 snew(av_area, atoms->nres);
642 snew(norm_av_area, atoms->nres);
646 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
649 t = output_env_conv_time(oenv, t);
650 if (bDoAccSurf && nframe >= naccr)
654 for (i = naccr-10; i < naccr; i++)
656 snew(accr[i], 2*atoms->nres-1);
659 gmx_rmpbc(gpbc, natoms, box, x);
660 tapein = gmx_ffopen(pdbfile, "w");
661 write_pdbfile_indexed(tapein, NULL, atoms, x, ePBC, box, ' ', -1, gnx, index, NULL, TRUE);
665 printf("Warning-- No calls to system(3) supported on this platform.");
666 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp);
669 if (0 != system(dssp))
671 gmx_fatal(FARGS, "Failed to execute command: %s\n",
672 "Try specifying your dssp version with the -ver option.", dssp);
676 /* strip_dssp returns the number of lines found in the dssp file, i.e.
677 * the number of residues plus the separator lines */
681 accr_ptr = accr[nframe];
684 nres_plus_separators = strip_dssp(tmpfile, nres, bPhbres, t,
685 accr_ptr, fTArea, &mat, average_area, oenv);
690 while (read_next_x(oenv, status, &t, x, box));
691 fprintf(stderr, "\n");
697 gmx_rmpbc_done(gpbc);
699 prune_ss_legend(&mat);
701 ss = opt2FILE("-o", NFILE, fnm, "w");
703 write_xpm_m(ss, mat);
706 if (opt2bSet("-ssdump", NFILE, fnm))
708 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
709 snew(ss_str, nres+1);
710 fprintf(ss, "%d\n", nres);
711 for (j = 0; j < mat.nx; j++)
713 for (i = 0; (i < mat.ny); i++)
715 ss_str[i] = mat.map[mat.matrix[j][i]].code.c1;
718 fprintf(ss, "%s\n", ss_str);
723 analyse_ss(fnSCount, &mat, ss_string, oenv);
727 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
729 for (i = 0; i < atoms->nres; i++)
731 av_area[i] = (average_area[i] / (real)nframe);
734 norm_acc(atoms, nres, av_area, norm_av_area);
738 acc = xvgropen(fnAArea, "Average Accessible Area",
739 "Residue", "A\\S2", oenv);
740 for (i = 0; (i < nres); i++)
742 fprintf(acc, "%5d %10g %10g\n", i+1, av_area[i], norm_av_area[i]);
748 view_all(oenv, NFILE, fnm);