2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/commandline/pargs.h"
48 #include "gromacs/commandline/viewit.h"
49 #include "gromacs/fileio/confio.h"
50 #include "gromacs/fileio/matio.h"
51 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/fileio/trxio.h"
53 #include "gromacs/fileio/xvgr.h"
54 #include "gromacs/gmxana/gmx_ana.h"
55 #include "gromacs/gmxana/gstat.h"
56 #include "gromacs/pbcutil/rmpbc.h"
57 #include "gromacs/topology/index.h"
58 #include "gromacs/topology/topology.h"
59 #include "gromacs/utility/arraysize.h"
60 #include "gromacs/utility/cstringutil.h"
61 #include "gromacs/utility/dir_separator.h"
62 #include "gromacs/utility/fatalerror.h"
63 #include "gromacs/utility/futil.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/strdb.h"
67 #if GMX_NATIVE_WINDOWS
68 # define NULL_DEVICE "nul"
70 # define pclose _pclose
72 # define NULL_DEVICE "/dev/null"
75 struct DsspInputStrings
82 static const char* prepareToRedirectStdout(bool bVerbose)
84 return bVerbose ? "" : "2>" NULL_DEVICE;
87 static void printDsspResult(char* dssp, const DsspInputStrings& strings, const std::string& redirectionString)
89 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
90 sprintf(dssp, "%s -i %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(), redirectionString.c_str());
92 sprintf(dssp, "%s -i %s -o %s > %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(),
93 strings.tmpfile.c_str(), NULL_DEVICE, redirectionString.c_str());
98 static int strip_dssp(FILE* tapeout,
100 const gmx_bool bPhobres[],
106 const gmx_output_env_t* oenv)
108 static gmx_bool bFirst = TRUE;
110 char buf[STRLEN + 1];
112 int nr, iacc, nresidues;
113 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
120 fgets2(buf, STRLEN, tapeout);
121 } while (std::strstr(buf, "KAPPA") == nullptr);
124 /* Since we also have empty lines in the dssp output (temp) file,
125 * and some line content is saved to the ssbuf variable,
126 * we need more memory than just nres elements. To be shure,
127 * we allocate 2*nres-1, since for each chain there is a
128 * separating line in the temp file. (At most each residue
129 * could have been defined as a separate chain.) */
130 snew(ssbuf, 2 * nres - 1);
137 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != nullptr); nr++)
139 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
141 SSTP = '='; /* Chain separator sign '=' */
145 SSTP = buf[16] == ' ' ? '~' : buf[16];
151 /* Only calculate solvent accessible area if needed */
152 if ((nullptr != acc) && (buf[13] != '!'))
154 sscanf(&(buf[34]), "%d", &iacc);
156 /* average_area and bPhobres are counted from 0...nres-1 */
157 average_area[nresidues] += iacc;
158 if (bPhobres[nresidues])
168 /* Keep track of the residue number (do not count chain separator lines '!') */
178 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n",
182 mat->title = "Secondary structure";
184 mat->label_x = output_env_get_time_label(oenv);
185 mat->label_y = "Residue";
186 mat->bDiscrete = true;
188 mat->axis_y.resize(nr);
189 std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
190 mat->axis_x.resize(0);
191 mat->matrix.resize(1, 1);
194 mat->axis_x.push_back(t);
195 mat->matrix.resize(++(mat->nx), nr);
196 auto columnIndex = mat->nx - 1;
197 for (int i = 0; i < nr; i++)
199 t_xpmelmt c = { ssbuf[i], 0 };
200 mat->matrix(columnIndex, i) = std::max(static_cast<t_matelmt>(0), searchcmap(mat->map, c));
205 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01 * iaccb, 0.01 * iaccf);
208 /* Return the number of lines found in the dssp file (i.e. number
209 * of redidues plus chain separator lines).
210 * This is the number of y elements needed for the area xpm file */
214 static gmx_bool* bPhobics(t_atoms* atoms)
220 char surffn[] = "surface.dat";
221 char ** surf_res, **surf_lines;
224 nb = get_lines("phbres.dat", &cb);
225 snew(bb, atoms->nres);
227 n_surf = get_lines(surffn, &surf_lines);
228 snew(surf_res, n_surf);
229 for (i = 0; (i < n_surf); i++)
231 snew(surf_res[i], 5);
232 sscanf(surf_lines[i], "%s", surf_res[i]);
236 for (i = 0, j = 0; (i < atoms->nres); i++)
238 if (-1 != search_str(n_surf, surf_res, *atoms->resinfo[i].name))
240 bb[j++] = (-1 != search_str(nb, cb, *atoms->resinfo[i].name));
247 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n", j, i);
250 for (i = 0; (i < n_surf); i++)
259 static void check_oo(t_atoms* atoms)
261 char* OOO = gmx_strdup("O");
263 for (int i = 0; (i < atoms->nr); i++)
265 if ((std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
266 || (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
267 || (std::strcmp(*(atoms->atomname[i]), "OC1") == 0)
268 || (std::strcmp(*(atoms->atomname[i]), "OT1") == 0))
270 *atoms->atomname[i] = OOO;
275 static void norm_acc(t_atoms* atoms, int nres, const real av_area[], real norm_av_area[])
279 char surffn[] = "surface.dat";
280 char ** surf_res, **surf_lines;
283 n_surf = get_lines(surffn, &surf_lines);
285 snew(surf_res, n_surf);
286 for (i = 0; (i < n_surf); i++)
288 snew(surf_res[i], 5);
289 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
292 for (i = 0; (i < nres); i++)
294 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
297 norm_av_area[i] = av_area[i] / surf[n];
301 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
302 *atoms->resinfo[i].name, surffn);
307 static void prune_ss_legend(t_matrix* mat)
309 std::vector<bool> isPresent(mat->map.size());
310 std::vector<int> newnum(mat->map.size());
311 std::vector<t_mapping> newmap;
313 for (int f = 0; f < mat->nx; f++)
315 for (int r = 0; r < mat->ny; r++)
317 isPresent[mat->matrix(f, r)] = true;
321 for (size_t i = 0; i < mat->map.size(); i++)
326 newnum[i] = newmap.size();
327 newmap.emplace_back(mat->map[i]);
330 if (newmap.size() != mat->map.size())
332 std::swap(mat->map, newmap);
333 for (int f = 0; f < mat->nx; f++)
335 for (int r = 0; r < mat->ny; r++)
337 mat->matrix(f, r) = newnum[mat->matrix(f, r)];
343 static void write_sas_mat(const char* fn, real** accr, int nframe, int nres, t_matrix* mat)
347 t_rgb rlo = { 1, 1, 1 }, rhi = { 0, 0, 0 };
352 hi = lo = accr[0][0];
353 for (i = 0; i < nframe; i++)
355 for (j = 0; j < nres; j++)
357 lo = std::min(lo, accr[i][j]);
358 hi = std::max(hi, accr[i][j]);
361 fp = gmx_ffopen(fn, "w");
362 nlev = static_cast<int>(hi - lo + 1);
363 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)", "Time", "Residue Index",
364 nframe, nres, mat->axis_x.data(), mat->axis_y.data(), accr, lo, hi, rlo, rhi, &nlev);
369 static void analyse_ss(const char* outfile, t_matrix* mat, const char* ss_string, const gmx_output_env_t* oenv)
372 int ss_count, total_count;
374 gmx::ArrayRef<t_mapping> map = mat->map;
375 std::vector<int> count(map.size());
376 std::vector<int> total(map.size(), 0);
377 // This copying would not be necessary if xvgr_legend could take a
378 // view of string views
379 std::vector<std::string> leg;
380 leg.reserve(map.size() + 1);
381 leg.emplace_back("Structure");
382 for (const auto& m : map)
384 leg.emplace_back(m.desc);
387 fp = xvgropen(outfile, "Secondary Structure", output_env_get_xvgr_tlabel(oenv),
388 "Number of Residues", oenv);
389 if (output_env_get_print_xvgr_codes(oenv))
391 fprintf(fp, "@ subtitle \"Structure = ");
393 for (size_t s = 0; s < std::strlen(ss_string); s++)
399 for (const auto& m : map)
401 if (ss_string[s] == m.code.c1)
403 fprintf(fp, "%s", m.desc);
408 xvgrLegend(fp, leg, oenv);
411 for (int f = 0; f < mat->nx; f++)
414 for (auto& c : count)
418 for (int r = 0; r < mat->ny; r++)
420 count[mat->matrix(f, r)]++;
421 total[mat->matrix(f, r)]++;
423 for (gmx::index s = 0; s != gmx::ssize(map); ++s)
425 if (std::strchr(ss_string, map[s].code.c1))
427 ss_count += count[s];
428 total_count += count[s];
431 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
432 for (const auto& c : count)
434 fprintf(fp, " %5d", c);
438 /* now print column totals */
439 fprintf(fp, "%-8s %5d", "# Totals", total_count);
440 for (const auto& t : total)
442 fprintf(fp, " %5d", t);
446 /* now print probabilities */
447 fprintf(fp, "%-8s %5.2f", "# SS pr.", total_count / static_cast<real>(mat->nx * mat->ny));
448 for (const auto& t : total)
450 fprintf(fp, " %5.2f", t / static_cast<real>(mat->nx * mat->ny));
457 int gmx_do_dssp(int argc, char* argv[])
459 const char* desc[] = {
460 "[THISMODULE] ", "reads a trajectory file and computes the secondary structure for",
461 "each time frame ", "calling the dssp program. If you do not have the dssp program,",
462 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
463 "that the dssp executable is located in ",
464 // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
465 "[TT]" GMX_DSSP_PROGRAM_PATH "[tt]. If this is not the case, then you should",
466 "set an environment variable [TT]DSSP[tt] pointing to the dssp", "executable, e.g.: [PAR]",
467 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
468 "Since version 2.0.0, dssp is invoked with a syntax that differs",
469 "from earlier versions. If you have an older version of dssp,",
470 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
471 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
472 "Even newer versions (which at the time of writing are not yet released)",
473 "are assumed to have the same syntax as 2.0.0.[PAR]",
474 "The structure assignment for each residue and time is written to an",
475 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
476 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
477 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
478 "postscript files.", "The number of residues with each secondary structure type and the",
479 "total secondary structure ([TT]-sss[tt]) count as a function of",
480 "time are also written to file ([TT]-sc[tt]).[PAR]",
481 "Solvent accessible surface (SAS) per residue can be calculated, both in",
482 "absolute values (A^2) and in fractions of the maximal accessible",
483 "surface of a residue. The maximal accessible surface is defined as",
484 "the accessible surface of a residue in a chain of glycines.",
485 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
486 "and that is more efficient.[PAR]",
487 "Finally, this program can dump the secondary structure in a special file",
488 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
489 "these two programs can be used to analyze dihedral properties as a",
490 "function of secondary structure type."
492 static gmx_bool bVerbose;
493 static const char* ss_string = "HEBT";
494 static int dsspVersion = 2;
496 { "-v", FALSE, etBOOL, { &bVerbose }, "HIDDENGenerate miles of useless information" },
497 { "-sss", FALSE, etSTR, { &ss_string }, "Secondary structures for structure count" },
502 "DSSP major version. Syntax changed with version 2" }
506 FILE * tapein, *tapeout;
507 FILE * ss, *acc, *fTArea, *tmpf;
508 const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
509 const char* leg[] = { "Phobic", "Phylic" };
514 int nres, nr0, naccr, nres_plus_separators;
515 gmx_bool * bPhbres, bDoAccSurf;
517 int natoms, nframe = 0;
518 matrix box = { { 0 } };
524 real ** accr, *accr_ptr = nullptr, *av_area, *norm_av_area;
525 char pdbfile[32], tmpfile[32];
528 gmx_output_env_t* oenv;
529 gmx_rmpbc_t gpbc = nullptr;
532 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
533 { efNDX, nullptr, nullptr, ffOPTRD }, { efDAT, "-ssdump", "ssdump", ffOPTWR },
534 { efMAP, "-map", "ss", ffLIBRD }, { efXPM, "-o", "ss", ffWRITE },
535 { efXVG, "-sc", "scount", ffWRITE }, { efXPM, "-a", "area", ffOPTWR },
536 { efXVG, "-ta", "totarea", ffOPTWR }, { efXVG, "-aa", "averarea", ffOPTWR }
538 #define NFILE asize(fnm)
540 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT, NFILE, fnm,
541 asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
545 fnSCount = opt2fn("-sc", NFILE, fnm);
546 fnArea = opt2fn_null("-a", NFILE, fnm);
547 fnTArea = opt2fn_null("-ta", NFILE, fnm);
548 fnAArea = opt2fn_null("-aa", NFILE, fnm);
549 bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
551 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xp, nullptr, box, FALSE);
552 atoms = &(top.atoms);
554 bPhbres = bPhobics(atoms);
556 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
559 for (int i = 0; (i < gnx); i++)
561 if (atoms->atom[index[i]].resind != nr0)
563 nr0 = atoms->atom[index[i]].resind;
567 fprintf(stderr, "There are %d residues in your selected group\n", nres);
569 std::strcpy(pdbfile, "ddXXXXXX");
571 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
573 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
575 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
577 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
582 std::strcpy(tmpfile, "ddXXXXXX");
584 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
586 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
588 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
590 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
595 const std::string defpathenv = GMX_DSSP_PROGRAM_PATH;
596 if ((dptr = getenv("DSSP")) == nullptr)
598 dptr = defpathenv.c_str();
600 if (!gmx_fexist(dptr))
602 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr);
604 std::string redirectionString;
605 redirectionString = prepareToRedirectStdout(bVerbose);
606 DsspInputStrings dsspStrings;
607 dsspStrings.dptr = dptr;
608 dsspStrings.pdbfile = pdbfile;
609 dsspStrings.tmpfile = tmpfile;
610 if (dsspVersion >= 2)
614 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
615 "do_dssp. Assuming version 2 syntax.\n\n",
619 printDsspResult(dssp, dsspStrings, redirectionString);
625 dsspStrings.dptr.clear();
629 dsspStrings.dptr = "-na";
631 printDsspResult(dssp, dsspStrings, redirectionString);
633 fprintf(stderr, "dssp cmd='%s'\n", dssp);
637 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
638 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
639 xvgr_legend(fTArea, 2, leg, oenv);
646 mat.map = readcmap(opt2fn("-map", NFILE, fnm));
648 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
649 if (natoms > atoms->nr)
651 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
655 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
658 snew(average_area, atoms->nres);
659 snew(av_area, atoms->nres);
660 snew(norm_av_area, atoms->nres);
664 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
667 t = output_env_conv_time(oenv, t);
668 if (bDoAccSurf && nframe >= naccr)
672 for (int i = naccr - 10; i < naccr; i++)
674 snew(accr[i], 2 * atoms->nres - 1);
677 gmx_rmpbc(gpbc, natoms, box, x);
678 tapein = gmx_ffopen(pdbfile, "w");
679 write_pdbfile_indexed(tapein, nullptr, atoms, x, ePBC, box, ' ', -1, gnx, index, nullptr, FALSE);
681 /* strip_dssp returns the number of lines found in the dssp file, i.e.
682 * the number of residues plus the separator lines */
684 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
685 if (nullptr == (tapeout = popen(dssp, "r")))
687 if (0 != system(dssp) || nullptr == (tapeout = gmx_ffopen(tmpfile, "r")))
693 "Failed to execute command: %s\n"
694 "Try specifying your dssp version with the -ver option.",
699 accr_ptr = accr[nframe];
701 /* strip_dssp returns the number of lines found in the dssp file, i.e.
702 * the number of residues plus the separator lines */
703 nres_plus_separators =
704 strip_dssp(tapeout, nres, bPhbres, t, accr_ptr, fTArea, &mat, average_area, oenv);
705 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
708 gmx_ffclose(tapeout);
713 } while (read_next_x(oenv, status, &t, x, box));
714 fprintf(stderr, "\n");
720 gmx_rmpbc_done(gpbc);
722 prune_ss_legend(&mat);
724 ss = opt2FILE("-o", NFILE, fnm, "w");
726 write_xpm_m(ss, mat);
729 if (opt2bSet("-ssdump", NFILE, fnm))
731 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
732 fprintf(ss, "%d\n", nres);
733 for (gmx::index j = 0; j != mat.matrix.extent(0); ++j)
735 auto row = mat.matrix.asView()[j];
736 for (gmx::index i = 0; i != row.extent(0); ++i)
738 fputc(mat.map[row[i]].code.c1, ss);
744 analyse_ss(fnSCount, &mat, ss_string, oenv);
748 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
750 for (int i = 0; i < atoms->nres; i++)
752 av_area[i] = (average_area[i] / static_cast<real>(nframe));
755 norm_acc(atoms, nres, av_area, norm_av_area);
759 acc = xvgropen(fnAArea, "Average Accessible Area", "Residue", "A\\S2", oenv);
760 for (int i = 0; (i < nres); i++)
762 fprintf(acc, "%5d %10g %10g\n", i + 1, av_area[i], norm_av_area[i]);
768 view_all(oenv, NFILE, fnm);