2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/commandline/viewit.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/matio.h"
52 #include "gromacs/fileio/pdbio.h"
53 #include "gromacs/fileio/trxio.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/gmxana/gmx_ana.h"
56 #include "gromacs/gmxana/gstat.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/topology/index.h"
59 #include "gromacs/topology/topology.h"
60 #include "gromacs/utility/arraysize.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/dir_separator.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/strdb.h"
68 #if GMX_NATIVE_WINDOWS
69 # define NULL_DEVICE "nul"
71 # define pclose _pclose
73 # define NULL_DEVICE "/dev/null"
76 struct DsspInputStrings
83 static const char* prepareToRedirectStdout(bool bVerbose)
85 return bVerbose ? "" : "2>" NULL_DEVICE;
88 static void printDsspResult(char* dssp, const DsspInputStrings& strings, const std::string& redirectionString)
90 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
91 sprintf(dssp, "%s -i %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(), redirectionString.c_str());
94 "%s -i %s -o %s > %s %s",
96 strings.pdbfile.c_str(),
97 strings.tmpfile.c_str(),
99 redirectionString.c_str());
104 static int strip_dssp(FILE* tapeout,
106 const gmx_bool bPhobres[],
112 const gmx_output_env_t* oenv)
114 static gmx_bool bFirst = TRUE;
115 static std::string ssbuf;
116 char buf[STRLEN + 1];
118 int nr, iacc, nresidues;
119 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
126 fgets2(buf, STRLEN, tapeout);
127 } while (std::strstr(buf, "KAPPA") == nullptr);
130 /* Since we also have empty lines in the dssp output (temp) file,
131 * and some line content is saved to the ssbuf variable,
132 * we need more memory than just nres elements. To be shure,
133 * we allocate 2*nres-1, since for each chain there is a
134 * separating line in the temp file. (At most each residue
135 * could have been defined as a separate chain.) */
136 ssbuf.resize(2 * nres - 1);
143 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != nullptr); nr++)
145 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
147 SSTP = '='; /* Chain separator sign '=' */
151 SSTP = buf[16] == ' ' ? '~' : buf[16];
157 /* Only calculate solvent accessible area if needed */
158 if ((nullptr != acc) && (buf[13] != '!'))
160 sscanf(&(buf[34]), "%d", &iacc);
162 /* average_area and bPhobres are counted from 0...nres-1 */
163 average_area[nresidues] += iacc;
164 if (bPhobres[nresidues])
174 /* Keep track of the residue number (do not count chain separator lines '!') */
184 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
187 mat->title = "Secondary structure";
189 mat->label_x = output_env_get_time_label(oenv);
190 mat->label_y = "Residue";
191 mat->bDiscrete = true;
193 mat->axis_y.resize(nr);
194 std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
195 mat->axis_x.resize(0);
196 mat->matrix.resize(1, 1);
199 mat->axis_x.push_back(t);
200 mat->matrix.resize(++(mat->nx), nr);
201 auto columnIndex = mat->nx - 1;
202 for (int i = 0; i < nr; i++)
204 t_xpmelmt c = { ssbuf[i], 0 };
205 mat->matrix(columnIndex, i) = std::max(static_cast<t_matelmt>(0), searchcmap(mat->map, c));
210 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01 * iaccb, 0.01 * iaccf);
213 /* Return the number of lines found in the dssp file (i.e. number
214 * of redidues plus chain separator lines).
215 * This is the number of y elements needed for the area xpm file */
219 static gmx_bool* bPhobics(t_atoms* atoms)
225 char surffn[] = "surface.dat";
226 char ** surf_res, **surf_lines;
229 nb = get_lines("phbres.dat", &cb);
230 snew(bb, atoms->nres);
232 n_surf = get_lines(surffn, &surf_lines);
233 snew(surf_res, n_surf);
234 for (i = 0; (i < n_surf); i++)
236 snew(surf_res[i], 5);
237 sscanf(surf_lines[i], "%s", surf_res[i]);
241 for (i = 0, j = 0; (i < atoms->nres); i++)
243 if (-1 != search_str(n_surf, surf_res, *atoms->resinfo[i].name))
245 bb[j++] = (-1 != search_str(nb, cb, *atoms->resinfo[i].name));
252 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n",
257 for (i = 0; (i < n_surf); i++)
266 static void check_oo(t_atoms* atoms)
268 char* OOO = gmx_strdup("O");
270 for (int i = 0; (i < atoms->nr); i++)
272 if ((std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
273 || (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
274 || (std::strcmp(*(atoms->atomname[i]), "OC1") == 0)
275 || (std::strcmp(*(atoms->atomname[i]), "OT1") == 0))
277 *atoms->atomname[i] = OOO;
282 static void norm_acc(t_atoms* atoms, int nres, const real av_area[], real norm_av_area[])
286 char surffn[] = "surface.dat";
287 char ** surf_res, **surf_lines;
290 n_surf = get_lines(surffn, &surf_lines);
292 snew(surf_res, n_surf);
293 for (i = 0; (i < n_surf); i++)
295 snew(surf_res[i], 5);
296 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
299 for (i = 0; (i < nres); i++)
301 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
304 norm_av_area[i] = av_area[i] / surf[n];
308 fprintf(stderr, "Residue %s not found in surface database (%s)\n", *atoms->resinfo[i].name, surffn);
313 static void prune_ss_legend(t_matrix* mat)
315 std::vector<bool> isPresent(mat->map.size());
316 std::vector<int> newnum(mat->map.size());
317 std::vector<t_mapping> newmap;
319 for (int f = 0; f < mat->nx; f++)
321 for (int r = 0; r < mat->ny; r++)
323 isPresent[mat->matrix(f, r)] = true;
327 for (size_t i = 0; i < mat->map.size(); i++)
332 newnum[i] = newmap.size();
333 newmap.emplace_back(mat->map[i]);
336 if (newmap.size() != mat->map.size())
338 std::swap(mat->map, newmap);
339 for (int f = 0; f < mat->nx; f++)
341 for (int r = 0; r < mat->ny; r++)
343 mat->matrix(f, r) = newnum[mat->matrix(f, r)];
349 static void write_sas_mat(const char* fn, real** accr, int nframe, int nres, t_matrix* mat)
353 t_rgb rlo = { 1, 1, 1 }, rhi = { 0, 0, 0 };
358 hi = lo = accr[0][0];
359 for (i = 0; i < nframe; i++)
361 for (j = 0; j < nres; j++)
363 lo = std::min(lo, accr[i][j]);
364 hi = std::max(hi, accr[i][j]);
367 fp = gmx_ffopen(fn, "w");
368 nlev = static_cast<int>(hi - lo + 1);
371 "Solvent Accessible Surface",
389 static void analyse_ss(const char* outfile, t_matrix* mat, const char* ss_string, const gmx_output_env_t* oenv)
392 int ss_count, total_count;
394 gmx::ArrayRef<t_mapping> map = mat->map;
395 std::vector<int> count(map.size());
396 std::vector<int> total(map.size(), 0);
397 // This copying would not be necessary if xvgr_legend could take a
398 // view of string views
399 std::vector<std::string> leg;
400 leg.reserve(map.size() + 1);
401 leg.emplace_back("Structure");
402 for (const auto& m : map)
404 leg.emplace_back(m.desc);
408 outfile, "Secondary Structure", output_env_get_xvgr_tlabel(oenv), "Number of Residues", oenv);
409 if (output_env_get_print_xvgr_codes(oenv))
411 fprintf(fp, "@ subtitle \"Structure = ");
413 for (size_t s = 0; s < std::strlen(ss_string); s++)
419 for (const auto& m : map)
421 if (ss_string[s] == m.code.c1)
423 fprintf(fp, "%s", m.desc);
428 xvgrLegend(fp, leg, oenv);
431 for (int f = 0; f < mat->nx; f++)
434 for (auto& c : count)
438 for (int r = 0; r < mat->ny; r++)
440 count[mat->matrix(f, r)]++;
441 total[mat->matrix(f, r)]++;
443 for (gmx::index s = 0; s != gmx::ssize(map); ++s)
445 if (std::strchr(ss_string, map[s].code.c1))
447 ss_count += count[s];
448 total_count += count[s];
451 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
452 for (const auto& c : count)
454 fprintf(fp, " %5d", c);
458 /* now print column totals */
459 fprintf(fp, "%-8s %5d", "# Totals", total_count);
460 for (const auto& t : total)
462 fprintf(fp, " %5d", t);
466 /* now print probabilities */
467 fprintf(fp, "%-8s %5.2f", "# SS pr.", total_count / static_cast<real>(mat->nx * mat->ny));
468 for (const auto& t : total)
470 fprintf(fp, " %5.2f", t / static_cast<real>(mat->nx * mat->ny));
477 int gmx_do_dssp(int argc, char* argv[])
479 const char* desc[] = {
481 "reads a trajectory file and computes the secondary structure for",
483 "calling the dssp program. If you do not have the dssp program,",
484 "get it from https://swift.cmbi.umcn.nl/gv/dssp. [THISMODULE] assumes ",
485 "that the dssp executable is located in ",
486 // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
487 "[TT]" GMX_DSSP_PROGRAM_PATH "[tt]. If this is not the case, then you should",
488 "set an environment variable [TT]DSSP[tt] pointing to the dssp ",
489 "executable, e.g.: [PAR]",
490 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
491 "The dssp program is invoked with a syntax that differs",
492 "depending on version. Version 1, 2 and 4 are supported, and the correct",
493 "invocation format can be selected using the [TT]-ver[tt] option.",
494 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
495 "Newer versions might also have executable name [TT]mkdssp[tt] instead",
496 "of [TT]dssp[tt].[PAR]",
497 "The structure assignment for each residue and time is written to an",
498 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
499 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
500 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
502 "The number of residues with each secondary structure type and the",
503 "total secondary structure ([TT]-sss[tt]) count as a function of",
504 "time are also written to file ([TT]-sc[tt]).[PAR]",
505 "Solvent accessible surface (SAS) per residue can be calculated, both in",
506 "absolute values (A^2) and in fractions of the maximal accessible",
507 "surface of a residue. The maximal accessible surface is defined as",
508 "the accessible surface of a residue in a chain of glycines.",
509 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
510 "and that is more efficient.[PAR]",
511 "Finally, this program can dump the secondary structure in a special file",
512 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
513 "these two programs can be used to analyze dihedral properties as a",
514 "function of secondary structure type."
516 static gmx_bool bVerbose;
517 static const char* ss_string = "HEBT";
518 static int dsspVersion = 2;
520 { "-v", FALSE, etBOOL, { &bVerbose }, "HIDDENGenerate miles of useless information" },
521 { "-sss", FALSE, etSTR, { &ss_string }, "Secondary structures for structure count" },
526 "DSSP major version. Syntax changed with version 2 and 4." }
530 FILE * tapein, *tapeout;
531 FILE * ss, *acc, *fTArea, *tmpf;
532 const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
533 const char* leg[] = { "Phobic", "Phylic" };
538 int nres, nr0, naccr, nres_plus_separators;
539 gmx_bool * bPhbres, bDoAccSurf;
541 int natoms, nframe = 0;
542 matrix box = { { 0 } };
548 real ** accr, *accr_ptr = nullptr, *av_area, *norm_av_area;
549 char pdbfile[32], tmpfile[32];
552 gmx_output_env_t* oenv;
553 gmx_rmpbc_t gpbc = nullptr;
556 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
557 { efNDX, nullptr, nullptr, ffOPTRD }, { efDAT, "-ssdump", "ssdump", ffOPTWR },
558 { efMAP, "-map", "ss", ffLIBRD }, { efXPM, "-o", "ss", ffWRITE },
559 { efXVG, "-sc", "scount", ffWRITE }, { efXPM, "-a", "area", ffOPTWR },
560 { efXVG, "-ta", "totarea", ffOPTWR }, { efXVG, "-aa", "averarea", ffOPTWR }
562 #define NFILE asize(fnm)
564 if (!parse_common_args(&argc,
566 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT,
579 fnSCount = opt2fn("-sc", NFILE, fnm);
580 fnArea = opt2fn_null("-a", NFILE, fnm);
581 fnTArea = opt2fn_null("-ta", NFILE, fnm);
582 fnAArea = opt2fn_null("-aa", NFILE, fnm);
583 bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
585 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &pbcType, &xp, nullptr, box, FALSE);
586 atoms = &(top.atoms);
588 bPhbres = bPhobics(atoms);
590 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
593 for (int i = 0; (i < gnx); i++)
595 if (atoms->atom[index[i]].resind != nr0)
597 nr0 = atoms->atom[index[i]].resind;
601 fprintf(stderr, "There are %d residues in your selected group\n", nres);
603 std::strcpy(pdbfile, "ddXXXXXX");
605 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
607 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
609 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
611 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
616 std::strcpy(tmpfile, "ddXXXXXX");
618 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
620 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
622 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
624 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
629 const std::string defpathenv = GMX_DSSP_PROGRAM_PATH;
630 if ((dptr = getenv("DSSP")) == nullptr)
632 dptr = defpathenv.c_str();
634 if (!gmx_fexist(dptr))
636 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr);
638 std::string redirectionString;
639 redirectionString = prepareToRedirectStdout(bVerbose);
640 DsspInputStrings dsspStrings;
641 dsspStrings.dptr = dptr;
642 dsspStrings.pdbfile = pdbfile;
643 dsspStrings.tmpfile = tmpfile;
644 if (dsspVersion >= 2)
646 if (dsspVersion == 4)
648 std::string mkdsspCommandLine = dsspStrings.dptr;
649 mkdsspCommandLine += " --output-format dssp ";
650 mkdsspCommandLine += dsspStrings.pdbfile;
652 #if not HAVE_PIPES && not GMX_NATIVE_WINDOWS
653 // Without pipe/popen, rely on temporary file for output
654 mkdsspCommandLine += " " + dsspStrings.tmpfile;
657 GMX_RELEASE_ASSERT(mkdsspCommandLine.size() < 255, "DSSP v4 command line too long");
658 strcpy(dssp, mkdsspCommandLine.c_str());
660 else if (dsspVersion == 2)
662 printDsspResult(dssp, dsspStrings, redirectionString);
666 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
667 "do_dssp. Assuming version 2 syntax.\n\n",
675 dsspStrings.dptr.clear();
679 dsspStrings.dptr = "-na";
681 printDsspResult(dssp, dsspStrings, redirectionString);
683 fprintf(stderr, "dssp cmd='%s'\n", dssp);
687 fTArea = xvgropen(fnTArea,
688 "Solvent Accessible Surface Area",
689 output_env_get_xvgr_tlabel(oenv),
692 xvgr_legend(fTArea, 2, leg, oenv);
699 mat.map = readcmap(opt2fn("-map", NFILE, fnm));
701 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
702 if (natoms > atoms->nr)
704 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
708 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
711 snew(average_area, atoms->nres);
712 snew(av_area, atoms->nres);
713 snew(norm_av_area, atoms->nres);
717 gpbc = gmx_rmpbc_init(&top.idef, pbcType, natoms);
720 t = output_env_conv_time(oenv, t);
721 if (bDoAccSurf && nframe >= naccr)
725 for (int i = naccr - 10; i < naccr; i++)
727 snew(accr[i], 2 * atoms->nres - 1);
730 gmx_rmpbc(gpbc, natoms, box, x);
731 tapein = gmx_ffopen(pdbfile, "w");
732 write_pdbfile_indexed(
733 tapein, nullptr, atoms, x, pbcType, box, 'A', -1, gnx, index, nullptr, FALSE, true);
735 /* strip_dssp returns the number of lines found in the dssp file, i.e.
736 * the number of residues plus the separator lines */
738 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
739 if (nullptr == (tapeout = popen(dssp, "r")))
741 if (0 != system(dssp) || nullptr == (tapeout = gmx_ffopen(tmpfile, "r")))
747 "Failed to execute command: %s\n"
748 "Try specifying your dssp version with the -ver option.",
753 accr_ptr = accr[nframe];
755 /* strip_dssp returns the number of lines found in the dssp file, i.e.
756 * the number of residues plus the separator lines */
757 nres_plus_separators =
758 strip_dssp(tapeout, nres, bPhbres, t, accr_ptr, fTArea, &mat, average_area, oenv);
759 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
762 gmx_ffclose(tapeout);
767 } while (read_next_x(oenv, status, &t, x, box));
768 fprintf(stderr, "\n");
774 gmx_rmpbc_done(gpbc);
776 prune_ss_legend(&mat);
778 ss = opt2FILE("-o", NFILE, fnm, "w");
780 write_xpm_m(ss, mat);
783 if (opt2bSet("-ssdump", NFILE, fnm))
785 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
786 fprintf(ss, "%d\n", nres);
787 for (gmx::index j = 0; j != mat.matrix.extent(0); ++j)
789 auto row = mat.matrix.asView()[j];
790 for (gmx::index i = 0; i != row.extent(0); ++i)
792 fputc(mat.map[row[i]].code.c1, ss);
798 analyse_ss(fnSCount, &mat, ss_string, oenv);
802 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
804 for (int i = 0; i < atoms->nres; i++)
806 av_area[i] = (average_area[i] / static_cast<real>(nframe));
809 norm_acc(atoms, nres, av_area, norm_av_area);
813 acc = xvgropen(fnAArea, "Average Accessible Area", "Residue", "A\\S2", oenv);
814 for (int i = 0; (i < nres); i++)
816 fprintf(acc, "%5d %10g %10g\n", i + 1, av_area[i], norm_av_area[i]);
822 view_all(oenv, NFILE, fnm);