2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/commandline/viewit.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/matio.h"
52 #include "gromacs/fileio/pdbio.h"
53 #include "gromacs/fileio/trxio.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/gmxana/gmx_ana.h"
56 #include "gromacs/gmxana/gstat.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/topology/index.h"
59 #include "gromacs/topology/topology.h"
60 #include "gromacs/utility/arraysize.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/dir_separator.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/strdb.h"
68 #if GMX_NATIVE_WINDOWS
69 # define NULL_DEVICE "nul"
71 # define pclose _pclose
73 # define NULL_DEVICE "/dev/null"
76 struct DsspInputStrings
83 static const char* prepareToRedirectStdout(bool bVerbose)
85 return bVerbose ? "" : "2>" NULL_DEVICE;
88 static void printDsspResult(char* dssp, const DsspInputStrings& strings, const std::string& redirectionString)
90 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
91 sprintf(dssp, "%s -i %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(), redirectionString.c_str());
94 "%s -i %s -o %s > %s %s",
96 strings.pdbfile.c_str(),
97 strings.tmpfile.c_str(),
99 redirectionString.c_str());
104 static int strip_dssp(FILE* tapeout,
106 const gmx_bool bPhobres[],
112 const gmx_output_env_t* oenv)
114 static gmx_bool bFirst = TRUE;
115 static std::string ssbuf;
116 char buf[STRLEN + 1];
118 int nr, iacc, nresidues;
119 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
126 fgets2(buf, STRLEN, tapeout);
127 } while (std::strstr(buf, "KAPPA") == nullptr);
130 /* Since we also have empty lines in the dssp output (temp) file,
131 * and some line content is saved to the ssbuf variable,
132 * we need more memory than just nres elements. To be shure,
133 * we allocate 2*nres-1, since for each chain there is a
134 * separating line in the temp file. (At most each residue
135 * could have been defined as a separate chain.) */
136 ssbuf.resize(2 * nres - 1);
143 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != nullptr); nr++)
145 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
147 SSTP = '='; /* Chain separator sign '=' */
151 SSTP = buf[16] == ' ' ? '~' : buf[16];
157 /* Only calculate solvent accessible area if needed */
158 if ((nullptr != acc) && (buf[13] != '!'))
160 sscanf(&(buf[34]), "%d", &iacc);
162 /* average_area and bPhobres are counted from 0...nres-1 */
163 average_area[nresidues] += iacc;
164 if (bPhobres[nresidues])
174 /* Keep track of the residue number (do not count chain separator lines '!') */
184 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
187 mat->title = "Secondary structure";
189 mat->label_x = output_env_get_time_label(oenv);
190 mat->label_y = "Residue";
191 mat->bDiscrete = true;
193 mat->axis_y.resize(nr);
194 std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
195 mat->axis_x.resize(0);
196 mat->matrix.resize(1, 1);
199 mat->axis_x.push_back(t);
200 mat->matrix.resize(++(mat->nx), nr);
201 auto columnIndex = mat->nx - 1;
202 for (int i = 0; i < nr; i++)
204 t_xpmelmt c = { ssbuf[i], 0 };
205 mat->matrix(columnIndex, i) = std::max(static_cast<t_matelmt>(0), searchcmap(mat->map, c));
210 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01 * iaccb, 0.01 * iaccf);
213 /* Return the number of lines found in the dssp file (i.e. number
214 * of redidues plus chain separator lines).
215 * This is the number of y elements needed for the area xpm file */
219 static gmx_bool* bPhobics(t_atoms* atoms)
225 char surffn[] = "surface.dat";
226 char ** surf_res, **surf_lines;
229 nb = get_lines("phbres.dat", &cb);
230 snew(bb, atoms->nres);
232 n_surf = get_lines(surffn, &surf_lines);
233 snew(surf_res, n_surf);
234 for (i = 0; (i < n_surf); i++)
236 snew(surf_res[i], 5);
237 sscanf(surf_lines[i], "%s", surf_res[i]);
241 for (i = 0, j = 0; (i < atoms->nres); i++)
243 if (-1 != search_str(n_surf, surf_res, *atoms->resinfo[i].name))
245 bb[j++] = (-1 != search_str(nb, cb, *atoms->resinfo[i].name));
252 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n",
257 for (i = 0; (i < n_surf); i++)
266 static void check_oo(t_atoms* atoms)
268 char* OOO = gmx_strdup("O");
270 for (int i = 0; (i < atoms->nr); i++)
272 if ((std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
273 || (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
274 || (std::strcmp(*(atoms->atomname[i]), "OC1") == 0)
275 || (std::strcmp(*(atoms->atomname[i]), "OT1") == 0))
277 *atoms->atomname[i] = OOO;
282 static void norm_acc(t_atoms* atoms, int nres, const real av_area[], real norm_av_area[])
286 char surffn[] = "surface.dat";
287 char ** surf_res, **surf_lines;
290 n_surf = get_lines(surffn, &surf_lines);
292 snew(surf_res, n_surf);
293 for (i = 0; (i < n_surf); i++)
295 snew(surf_res[i], 5);
296 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
299 for (i = 0; (i < nres); i++)
301 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
304 norm_av_area[i] = av_area[i] / surf[n];
308 fprintf(stderr, "Residue %s not found in surface database (%s)\n", *atoms->resinfo[i].name, surffn);
313 static void prune_ss_legend(t_matrix* mat)
315 std::vector<bool> isPresent(mat->map.size());
316 std::vector<int> newnum(mat->map.size());
317 std::vector<t_mapping> newmap;
319 for (int f = 0; f < mat->nx; f++)
321 for (int r = 0; r < mat->ny; r++)
323 isPresent[mat->matrix(f, r)] = true;
327 for (size_t i = 0; i < mat->map.size(); i++)
332 newnum[i] = newmap.size();
333 newmap.emplace_back(mat->map[i]);
336 if (newmap.size() != mat->map.size())
338 std::swap(mat->map, newmap);
339 for (int f = 0; f < mat->nx; f++)
341 for (int r = 0; r < mat->ny; r++)
343 mat->matrix(f, r) = newnum[mat->matrix(f, r)];
349 static void write_sas_mat(const char* fn, real** accr, int nframe, int nres, t_matrix* mat)
353 t_rgb rlo = { 1, 1, 1 }, rhi = { 0, 0, 0 };
358 hi = lo = accr[0][0];
359 for (i = 0; i < nframe; i++)
361 for (j = 0; j < nres; j++)
363 lo = std::min(lo, accr[i][j]);
364 hi = std::max(hi, accr[i][j]);
367 fp = gmx_ffopen(fn, "w");
368 nlev = static_cast<int>(hi - lo + 1);
371 "Solvent Accessible Surface",
389 static void analyse_ss(const char* outfile, t_matrix* mat, const char* ss_string, const gmx_output_env_t* oenv)
392 int ss_count, total_count;
394 gmx::ArrayRef<t_mapping> map = mat->map;
395 std::vector<int> count(map.size());
396 std::vector<int> total(map.size(), 0);
397 // This copying would not be necessary if xvgr_legend could take a
398 // view of string views
399 std::vector<std::string> leg;
400 leg.reserve(map.size() + 1);
401 leg.emplace_back("Structure");
402 for (const auto& m : map)
404 leg.emplace_back(m.desc);
408 outfile, "Secondary Structure", output_env_get_xvgr_tlabel(oenv), "Number of Residues", oenv);
409 if (output_env_get_print_xvgr_codes(oenv))
411 fprintf(fp, "@ subtitle \"Structure = ");
413 for (size_t s = 0; s < std::strlen(ss_string); s++)
419 for (const auto& m : map)
421 if (ss_string[s] == m.code.c1)
423 fprintf(fp, "%s", m.desc);
428 xvgrLegend(fp, leg, oenv);
431 for (int f = 0; f < mat->nx; f++)
434 for (auto& c : count)
438 for (int r = 0; r < mat->ny; r++)
440 count[mat->matrix(f, r)]++;
441 total[mat->matrix(f, r)]++;
443 for (gmx::index s = 0; s != gmx::ssize(map); ++s)
445 if (std::strchr(ss_string, map[s].code.c1))
447 ss_count += count[s];
448 total_count += count[s];
451 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
452 for (const auto& c : count)
454 fprintf(fp, " %5d", c);
458 /* now print column totals */
459 fprintf(fp, "%-8s %5d", "# Totals", total_count);
460 for (const auto& t : total)
462 fprintf(fp, " %5d", t);
466 /* now print probabilities */
467 fprintf(fp, "%-8s %5.2f", "# SS pr.", total_count / static_cast<real>(mat->nx * mat->ny));
468 for (const auto& t : total)
470 fprintf(fp, " %5.2f", t / static_cast<real>(mat->nx * mat->ny));
477 int gmx_do_dssp(int argc, char* argv[])
479 const char* desc[] = {
481 "reads a trajectory file and computes the secondary structure for",
483 "calling the dssp program. If you do not have the dssp program,",
484 "get it from https://swift.cmbi.umcn.nl/gv/dssp. [THISMODULE] assumes ",
485 "that the dssp executable is located in ",
486 // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
487 "[TT]" GMX_DSSP_PROGRAM_PATH "[tt]. If this is not the case, then you should",
488 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
489 "executable, e.g.: [PAR]",
490 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
491 "Since version 2.0.0, dssp is invoked with a syntax that differs",
492 "from earlier versions. If you have an older version of dssp,",
493 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
494 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
495 "Even newer versions (which at the time of writing are not yet released)",
496 "are assumed to have the same syntax as 2.0.0.[PAR]",
497 "The structure assignment for each residue and time is written to an",
498 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
499 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
500 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
502 "The number of residues with each secondary structure type and the",
503 "total secondary structure ([TT]-sss[tt]) count as a function of",
504 "time are also written to file ([TT]-sc[tt]).[PAR]",
505 "Solvent accessible surface (SAS) per residue can be calculated, both in",
506 "absolute values (A^2) and in fractions of the maximal accessible",
507 "surface of a residue. The maximal accessible surface is defined as",
508 "the accessible surface of a residue in a chain of glycines.",
509 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
510 "and that is more efficient.[PAR]",
511 "Finally, this program can dump the secondary structure in a special file",
512 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
513 "these two programs can be used to analyze dihedral properties as a",
514 "function of secondary structure type."
516 static gmx_bool bVerbose;
517 static const char* ss_string = "HEBT";
518 static int dsspVersion = 2;
520 { "-v", FALSE, etBOOL, { &bVerbose }, "HIDDENGenerate miles of useless information" },
521 { "-sss", FALSE, etSTR, { &ss_string }, "Secondary structures for structure count" },
526 "DSSP major version. Syntax changed with version 2" }
530 FILE * tapein, *tapeout;
531 FILE * ss, *acc, *fTArea, *tmpf;
532 const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
533 const char* leg[] = { "Phobic", "Phylic" };
538 int nres, nr0, naccr, nres_plus_separators;
539 gmx_bool * bPhbres, bDoAccSurf;
541 int natoms, nframe = 0;
542 matrix box = { { 0 } };
548 real ** accr, *accr_ptr = nullptr, *av_area, *norm_av_area;
549 char pdbfile[32], tmpfile[32];
552 gmx_output_env_t* oenv;
553 gmx_rmpbc_t gpbc = nullptr;
556 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
557 { efNDX, nullptr, nullptr, ffOPTRD }, { efDAT, "-ssdump", "ssdump", ffOPTWR },
558 { efMAP, "-map", "ss", ffLIBRD }, { efXPM, "-o", "ss", ffWRITE },
559 { efXVG, "-sc", "scount", ffWRITE }, { efXPM, "-a", "area", ffOPTWR },
560 { efXVG, "-ta", "totarea", ffOPTWR }, { efXVG, "-aa", "averarea", ffOPTWR }
562 #define NFILE asize(fnm)
564 if (!parse_common_args(&argc,
566 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT,
579 fnSCount = opt2fn("-sc", NFILE, fnm);
580 fnArea = opt2fn_null("-a", NFILE, fnm);
581 fnTArea = opt2fn_null("-ta", NFILE, fnm);
582 fnAArea = opt2fn_null("-aa", NFILE, fnm);
583 bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
585 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &pbcType, &xp, nullptr, box, FALSE);
586 atoms = &(top.atoms);
588 bPhbres = bPhobics(atoms);
590 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
593 for (int i = 0; (i < gnx); i++)
595 if (atoms->atom[index[i]].resind != nr0)
597 nr0 = atoms->atom[index[i]].resind;
601 fprintf(stderr, "There are %d residues in your selected group\n", nres);
603 std::strcpy(pdbfile, "ddXXXXXX");
605 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
607 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
609 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
611 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
616 std::strcpy(tmpfile, "ddXXXXXX");
618 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
620 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
622 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
624 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
629 const std::string defpathenv = GMX_DSSP_PROGRAM_PATH;
630 if ((dptr = getenv("DSSP")) == nullptr)
632 dptr = defpathenv.c_str();
634 if (!gmx_fexist(dptr))
636 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr);
638 std::string redirectionString;
639 redirectionString = prepareToRedirectStdout(bVerbose);
640 DsspInputStrings dsspStrings;
641 dsspStrings.dptr = dptr;
642 dsspStrings.pdbfile = pdbfile;
643 dsspStrings.tmpfile = tmpfile;
644 if (dsspVersion >= 2)
648 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
649 "do_dssp. Assuming version 2 syntax.\n\n",
653 printDsspResult(dssp, dsspStrings, redirectionString);
659 dsspStrings.dptr.clear();
663 dsspStrings.dptr = "-na";
665 printDsspResult(dssp, dsspStrings, redirectionString);
667 fprintf(stderr, "dssp cmd='%s'\n", dssp);
671 fTArea = xvgropen(fnTArea,
672 "Solvent Accessible Surface Area",
673 output_env_get_xvgr_tlabel(oenv),
676 xvgr_legend(fTArea, 2, leg, oenv);
683 mat.map = readcmap(opt2fn("-map", NFILE, fnm));
685 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
686 if (natoms > atoms->nr)
688 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
692 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
695 snew(average_area, atoms->nres);
696 snew(av_area, atoms->nres);
697 snew(norm_av_area, atoms->nres);
701 gpbc = gmx_rmpbc_init(&top.idef, pbcType, natoms);
704 t = output_env_conv_time(oenv, t);
705 if (bDoAccSurf && nframe >= naccr)
709 for (int i = naccr - 10; i < naccr; i++)
711 snew(accr[i], 2 * atoms->nres - 1);
714 gmx_rmpbc(gpbc, natoms, box, x);
715 tapein = gmx_ffopen(pdbfile, "w");
716 write_pdbfile_indexed(tapein, nullptr, atoms, x, pbcType, box, ' ', -1, gnx, index, nullptr, FALSE);
718 /* strip_dssp returns the number of lines found in the dssp file, i.e.
719 * the number of residues plus the separator lines */
721 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
722 if (nullptr == (tapeout = popen(dssp, "r")))
724 if (0 != system(dssp) || nullptr == (tapeout = gmx_ffopen(tmpfile, "r")))
730 "Failed to execute command: %s\n"
731 "Try specifying your dssp version with the -ver option.",
736 accr_ptr = accr[nframe];
738 /* strip_dssp returns the number of lines found in the dssp file, i.e.
739 * the number of residues plus the separator lines */
740 nres_plus_separators =
741 strip_dssp(tapeout, nres, bPhbres, t, accr_ptr, fTArea, &mat, average_area, oenv);
742 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
745 gmx_ffclose(tapeout);
750 } while (read_next_x(oenv, status, &t, x, box));
751 fprintf(stderr, "\n");
757 gmx_rmpbc_done(gpbc);
759 prune_ss_legend(&mat);
761 ss = opt2FILE("-o", NFILE, fnm, "w");
763 write_xpm_m(ss, mat);
766 if (opt2bSet("-ssdump", NFILE, fnm))
768 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
769 fprintf(ss, "%d\n", nres);
770 for (gmx::index j = 0; j != mat.matrix.extent(0); ++j)
772 auto row = mat.matrix.asView()[j];
773 for (gmx::index i = 0; i != row.extent(0); ++i)
775 fputc(mat.map[row[i]].code.c1, ss);
781 analyse_ss(fnSCount, &mat, ss_string, oenv);
785 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
787 for (int i = 0; i < atoms->nres; i++)
789 av_area[i] = (average_area[i] / static_cast<real>(nframe));
792 norm_acc(atoms, nres, av_area, norm_av_area);
796 acc = xvgropen(fnAArea, "Average Accessible Area", "Residue", "A\\S2", oenv);
797 for (int i = 0; (i < nres); i++)
799 fprintf(acc, "%5d %10g %10g\n", i + 1, av_area[i], norm_av_area[i]);
805 view_all(oenv, NFILE, fnm);