2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/commandline/pargs.h"
48 #include "gromacs/commandline/viewit.h"
49 #include "gromacs/fileio/confio.h"
50 #include "gromacs/fileio/matio.h"
51 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/fileio/trxio.h"
53 #include "gromacs/fileio/xvgr.h"
54 #include "gromacs/gmxana/gmx_ana.h"
55 #include "gromacs/gmxana/gstat.h"
56 #include "gromacs/pbcutil/rmpbc.h"
57 #include "gromacs/topology/index.h"
58 #include "gromacs/topology/topology.h"
59 #include "gromacs/utility/arraysize.h"
60 #include "gromacs/utility/cstringutil.h"
61 #include "gromacs/utility/dir_separator.h"
62 #include "gromacs/utility/fatalerror.h"
63 #include "gromacs/utility/futil.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/strdb.h"
67 #if GMX_NATIVE_WINDOWS
68 # define NULL_DEVICE "nul"
70 # define pclose _pclose
72 # define NULL_DEVICE "/dev/null"
75 struct DsspInputStrings
82 static const char* prepareToRedirectStdout(bool bVerbose)
84 return bVerbose ? "" : "2>" NULL_DEVICE;
87 static void printDsspResult(char* dssp, const DsspInputStrings& strings, const std::string& redirectionString)
89 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
90 sprintf(dssp, "%s -i %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(), redirectionString.c_str());
92 sprintf(dssp, "%s -i %s -o %s > %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(),
93 strings.tmpfile.c_str(), NULL_DEVICE, redirectionString.c_str());
98 static int strip_dssp(FILE* tapeout,
100 const gmx_bool bPhobres[],
106 const gmx_output_env_t* oenv)
108 static gmx_bool bFirst = TRUE;
110 char buf[STRLEN + 1];
112 int nr, iacc, nresidues;
113 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
120 fgets2(buf, STRLEN, tapeout);
121 } while (std::strstr(buf, "KAPPA") == nullptr);
124 /* Since we also have empty lines in the dssp output (temp) file,
125 * and some line content is saved to the ssbuf variable,
126 * we need more memory than just nres elements. To be shure,
127 * we allocate 2*nres-1, since for each chain there is a
128 * separating line in the temp file. (At most each residue
129 * could have been defined as a separate chain.) */
130 snew(ssbuf, 2 * nres - 1);
137 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != nullptr); nr++)
139 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
141 SSTP = '='; /* Chain separator sign '=' */
145 SSTP = buf[16] == ' ' ? '~' : buf[16];
151 /* Only calculate solvent accessible area if needed */
152 if ((nullptr != acc) && (buf[13] != '!'))
154 sscanf(&(buf[34]), "%d", &iacc);
156 /* average_area and bPhobres are counted from 0...nres-1 */
157 average_area[nresidues] += iacc;
158 if (bPhobres[nresidues])
168 /* Keep track of the residue number (do not count chain separator lines '!') */
178 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n",
182 mat->title = "Secondary structure";
184 mat->label_x = output_env_get_time_label(oenv);
185 mat->label_y = "Residue";
186 mat->bDiscrete = true;
188 mat->axis_y.resize(nr);
189 std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
190 mat->axis_x.resize(0);
191 mat->matrix.resize(1, 1);
194 mat->axis_x.push_back(t);
195 mat->matrix.resize(mat->matrix.extent(0), nr);
196 mat->nx = mat->matrix.extent(0);
197 auto columnIndex = mat->nx - 1;
198 for (int i = 0; i < nr; i++)
200 t_xpmelmt c = { ssbuf[i], 0 };
201 mat->matrix(columnIndex, i) = std::max(static_cast<t_matelmt>(0), searchcmap(mat->map, c));
206 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01 * iaccb, 0.01 * iaccf);
209 /* Return the number of lines found in the dssp file (i.e. number
210 * of redidues plus chain separator lines).
211 * This is the number of y elements needed for the area xpm file */
215 static gmx_bool* bPhobics(t_atoms* atoms)
221 char surffn[] = "surface.dat";
222 char ** surf_res, **surf_lines;
225 nb = get_lines("phbres.dat", &cb);
226 snew(bb, atoms->nres);
228 n_surf = get_lines(surffn, &surf_lines);
229 snew(surf_res, n_surf);
230 for (i = 0; (i < n_surf); i++)
232 snew(surf_res[i], 5);
233 sscanf(surf_lines[i], "%s", surf_res[i]);
237 for (i = 0, j = 0; (i < atoms->nres); i++)
239 if (-1 != search_str(n_surf, surf_res, *atoms->resinfo[i].name))
241 bb[j++] = (-1 != search_str(nb, cb, *atoms->resinfo[i].name));
248 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n", j, i);
251 for (i = 0; (i < n_surf); i++)
260 static void check_oo(t_atoms* atoms)
266 OOO = gmx_strdup("O");
268 for (i = 0; (i < atoms->nr); i++)
270 if (std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
272 *atoms->atomname[i] = OOO;
274 else if (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
276 *atoms->atomname[i] = OOO;
278 else if (std::strcmp(*(atoms->atomname[i]), "OC1") == 0)
280 *atoms->atomname[i] = OOO;
285 static void norm_acc(t_atoms* atoms, int nres, const real av_area[], real norm_av_area[])
289 char surffn[] = "surface.dat";
290 char ** surf_res, **surf_lines;
293 n_surf = get_lines(surffn, &surf_lines);
295 snew(surf_res, n_surf);
296 for (i = 0; (i < n_surf); i++)
298 snew(surf_res[i], 5);
299 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
302 for (i = 0; (i < nres); i++)
304 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
307 norm_av_area[i] = av_area[i] / surf[n];
311 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
312 *atoms->resinfo[i].name, surffn);
317 static void prune_ss_legend(t_matrix* mat)
319 std::vector<bool> isPresent(mat->map.size());
320 std::vector<int> newnum(mat->map.size());
321 std::vector<t_mapping> newmap;
323 for (int f = 0; f < mat->nx; f++)
325 for (int r = 0; r < mat->ny; r++)
327 isPresent[mat->matrix(f, r)] = true;
331 for (size_t i = 0; i < mat->map.size(); i++)
336 newnum[i] = newmap.size();
337 newmap.emplace_back(mat->map[i]);
340 if (newmap.size() != mat->map.size())
342 std::swap(mat->map, newmap);
343 for (int f = 0; f < mat->nx; f++)
345 for (int r = 0; r < mat->ny; r++)
347 mat->matrix(f, r) = newnum[mat->matrix(f, r)];
353 static void write_sas_mat(const char* fn, real** accr, int nframe, int nres, t_matrix* mat)
357 t_rgb rlo = { 1, 1, 1 }, rhi = { 0, 0, 0 };
362 hi = lo = accr[0][0];
363 for (i = 0; i < nframe; i++)
365 for (j = 0; j < nres; j++)
367 lo = std::min(lo, accr[i][j]);
368 hi = std::max(hi, accr[i][j]);
371 fp = gmx_ffopen(fn, "w");
372 nlev = static_cast<int>(hi - lo + 1);
373 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)", "Time", "Residue Index",
374 nframe, nres, mat->axis_x.data(), mat->axis_y.data(), accr, lo, hi, rlo, rhi, &nlev);
379 static void analyse_ss(const char* outfile, t_matrix* mat, const char* ss_string, const gmx_output_env_t* oenv)
382 int ss_count, total_count;
384 gmx::ArrayRef<t_mapping> map = mat->map;
385 std::vector<int> count(map.size());
386 std::vector<int> total(map.size(), 0);
387 // This copying would not be necessary if xvgr_legend could take a
388 // view of string views
389 std::vector<std::string> leg;
390 leg.reserve(map.size() + 1);
391 leg.emplace_back("Structure");
392 for (const auto& m : map)
394 leg.emplace_back(m.desc);
397 fp = xvgropen(outfile, "Secondary Structure", output_env_get_xvgr_tlabel(oenv),
398 "Number of Residues", oenv);
399 if (output_env_get_print_xvgr_codes(oenv))
401 fprintf(fp, "@ subtitle \"Structure = ");
403 for (size_t s = 0; s < std::strlen(ss_string); s++)
409 for (const auto& m : map)
411 if (ss_string[s] == m.code.c1)
413 fprintf(fp, "%s", m.desc);
418 xvgrLegend(fp, leg, oenv);
421 for (int f = 0; f < mat->nx; f++)
424 for (auto& c : count)
428 for (int r = 0; r < mat->ny; r++)
430 count[mat->matrix(f, r)]++;
431 total[mat->matrix(f, r)]++;
433 for (gmx::index s = 0; s != gmx::ssize(map); ++s)
435 if (std::strchr(ss_string, map[s].code.c1))
437 ss_count += count[s];
438 total_count += count[s];
441 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
442 for (const auto& c : count)
444 fprintf(fp, " %5d", c);
448 /* now print column totals */
449 fprintf(fp, "%-8s %5d", "# Totals", total_count);
450 for (const auto& t : total)
452 fprintf(fp, " %5d", t);
456 /* now print probabilities */
457 fprintf(fp, "%-8s %5.2f", "# SS pr.", total_count / static_cast<real>(mat->nx * mat->ny));
458 for (const auto& t : total)
460 fprintf(fp, " %5.2f", t / static_cast<real>(mat->nx * mat->ny));
467 int gmx_do_dssp(int argc, char* argv[])
469 const char* desc[] = {
471 "reads a trajectory file and computes the secondary structure for",
473 "calling the dssp program. If you do not have the dssp program,",
474 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
475 "that the dssp executable is located in ",
476 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
477 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
478 "executable, e.g.: [PAR]",
479 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
480 "Since version 2.0.0, dssp is invoked with a syntax that differs",
481 "from earlier versions. If you have an older version of dssp,",
482 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
483 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
484 "Even newer versions (which at the time of writing are not yet released)",
485 "are assumed to have the same syntax as 2.0.0.[PAR]",
486 "The structure assignment for each residue and time is written to an",
487 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
488 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
489 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
491 "The number of residues with each secondary structure type and the",
492 "total secondary structure ([TT]-sss[tt]) count as a function of",
493 "time are also written to file ([TT]-sc[tt]).[PAR]",
494 "Solvent accessible surface (SAS) per residue can be calculated, both in",
495 "absolute values (A^2) and in fractions of the maximal accessible",
496 "surface of a residue. The maximal accessible surface is defined as",
497 "the accessible surface of a residue in a chain of glycines.",
498 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
499 "and that is more efficient.[PAR]",
500 "Finally, this program can dump the secondary structure in a special file",
501 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
502 "these two programs can be used to analyze dihedral properties as a",
503 "function of secondary structure type."
505 static gmx_bool bVerbose;
506 static const char* ss_string = "HEBT";
507 static int dsspVersion = 2;
509 { "-v", FALSE, etBOOL, { &bVerbose }, "HIDDENGenerate miles of useless information" },
510 { "-sss", FALSE, etSTR, { &ss_string }, "Secondary structures for structure count" },
515 "DSSP major version. Syntax changed with version 2" }
519 FILE * tapein, *tapeout;
520 FILE * ss, *acc, *fTArea, *tmpf;
521 const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
522 const char* leg[] = { "Phobic", "Phylic" };
527 int nres, nr0, naccr, nres_plus_separators;
528 gmx_bool * bPhbres, bDoAccSurf;
530 int natoms, nframe = 0;
531 matrix box = { { 0 } };
537 real ** accr, *accr_ptr = nullptr, *av_area, *norm_av_area;
538 char pdbfile[32], tmpfile[32];
541 gmx_output_env_t* oenv;
542 gmx_rmpbc_t gpbc = nullptr;
545 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
546 { efNDX, nullptr, nullptr, ffOPTRD }, { efDAT, "-ssdump", "ssdump", ffOPTWR },
547 { efMAP, "-map", "ss", ffLIBRD }, { efXPM, "-o", "ss", ffWRITE },
548 { efXVG, "-sc", "scount", ffWRITE }, { efXPM, "-a", "area", ffOPTWR },
549 { efXVG, "-ta", "totarea", ffOPTWR }, { efXVG, "-aa", "averarea", ffOPTWR }
551 #define NFILE asize(fnm)
553 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT, NFILE, fnm,
554 asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
558 fnSCount = opt2fn("-sc", NFILE, fnm);
559 fnArea = opt2fn_null("-a", NFILE, fnm);
560 fnTArea = opt2fn_null("-ta", NFILE, fnm);
561 fnAArea = opt2fn_null("-aa", NFILE, fnm);
562 bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
564 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xp, nullptr, box, FALSE);
565 atoms = &(top.atoms);
567 bPhbres = bPhobics(atoms);
569 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
572 for (int i = 0; (i < gnx); i++)
574 if (atoms->atom[index[i]].resind != nr0)
576 nr0 = atoms->atom[index[i]].resind;
580 fprintf(stderr, "There are %d residues in your selected group\n", nres);
582 std::strcpy(pdbfile, "ddXXXXXX");
584 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
586 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
588 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
590 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
595 std::strcpy(tmpfile, "ddXXXXXX");
597 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
599 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
601 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
603 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
608 if ((dptr = getenv("DSSP")) == nullptr)
610 dptr = "/usr/local/bin/dssp";
612 if (!gmx_fexist(dptr))
614 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr);
616 std::string redirectionString;
617 redirectionString = prepareToRedirectStdout(bVerbose);
618 DsspInputStrings dsspStrings;
619 dsspStrings.dptr = dptr;
620 dsspStrings.pdbfile = pdbfile;
621 dsspStrings.tmpfile = tmpfile;
622 if (dsspVersion >= 2)
626 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
627 "do_dssp. Assuming version 2 syntax.\n\n",
631 printDsspResult(dssp, dsspStrings, redirectionString);
637 dsspStrings.dptr.clear();
641 dsspStrings.dptr = "-na";
643 printDsspResult(dssp, dsspStrings, redirectionString);
645 fprintf(stderr, "dssp cmd='%s'\n", dssp);
649 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
650 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
651 xvgr_legend(fTArea, 2, leg, oenv);
658 mat.map = readcmap(opt2fn("-map", NFILE, fnm));
660 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
661 if (natoms > atoms->nr)
663 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
667 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
670 snew(average_area, atoms->nres);
671 snew(av_area, atoms->nres);
672 snew(norm_av_area, atoms->nres);
676 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
679 t = output_env_conv_time(oenv, t);
680 if (bDoAccSurf && nframe >= naccr)
684 for (int i = naccr - 10; i < naccr; i++)
686 snew(accr[i], 2 * atoms->nres - 1);
689 gmx_rmpbc(gpbc, natoms, box, x);
690 tapein = gmx_ffopen(pdbfile, "w");
691 write_pdbfile_indexed(tapein, nullptr, atoms, x, ePBC, box, ' ', -1, gnx, index, nullptr, FALSE);
693 /* strip_dssp returns the number of lines found in the dssp file, i.e.
694 * the number of residues plus the separator lines */
696 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
697 if (nullptr == (tapeout = popen(dssp, "r")))
699 if (0 != system(dssp) || nullptr == (tapeout = gmx_ffopen(tmpfile, "r")))
705 "Failed to execute command: %s\n"
706 "Try specifying your dssp version with the -ver option.",
711 accr_ptr = accr[nframe];
713 /* strip_dssp returns the number of lines found in the dssp file, i.e.
714 * the number of residues plus the separator lines */
715 nres_plus_separators =
716 strip_dssp(tapeout, nres, bPhbres, t, accr_ptr, fTArea, &mat, average_area, oenv);
717 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
720 gmx_ffclose(tapeout);
725 } while (read_next_x(oenv, status, &t, x, box));
726 fprintf(stderr, "\n");
732 gmx_rmpbc_done(gpbc);
734 prune_ss_legend(&mat);
736 ss = opt2FILE("-o", NFILE, fnm, "w");
738 write_xpm_m(ss, mat);
741 if (opt2bSet("-ssdump", NFILE, fnm))
743 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
744 fprintf(ss, "%d\n", nres);
745 for (gmx::index j = 0; j != mat.matrix.extent(0); ++j)
747 auto row = mat.matrix.asView()[j];
748 for (gmx::index i = 0; i != row.extent(0); ++i)
750 fputc(mat.map[row[i]].code.c1, ss);
756 analyse_ss(fnSCount, &mat, ss_string, oenv);
760 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
762 for (int i = 0; i < atoms->nres; i++)
764 av_area[i] = (average_area[i] / static_cast<real>(nframe));
767 norm_acc(atoms, nres, av_area, norm_av_area);
771 acc = xvgropen(fnAArea, "Average Accessible Area", "Residue", "A\\S2", oenv);
772 for (int i = 0; (i < nres); i++)
774 fprintf(acc, "%5d %10g %10g\n", i + 1, av_area[i], norm_av_area[i]);
780 view_all(oenv, NFILE, fnm);