2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/commandline/pargs.h"
48 #include "gromacs/commandline/viewit.h"
49 #include "gromacs/fileio/confio.h"
50 #include "gromacs/fileio/matio.h"
51 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/fileio/trxio.h"
53 #include "gromacs/fileio/xvgr.h"
54 #include "gromacs/gmxana/gmx_ana.h"
55 #include "gromacs/gmxana/gstat.h"
56 #include "gromacs/pbcutil/rmpbc.h"
57 #include "gromacs/topology/index.h"
58 #include "gromacs/topology/topology.h"
59 #include "gromacs/utility/arraysize.h"
60 #include "gromacs/utility/cstringutil.h"
61 #include "gromacs/utility/dir_separator.h"
62 #include "gromacs/utility/fatalerror.h"
63 #include "gromacs/utility/futil.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/strdb.h"
67 #if GMX_NATIVE_WINDOWS
68 # define NULL_DEVICE "nul"
70 # define pclose _pclose
72 # define NULL_DEVICE "/dev/null"
75 struct DsspInputStrings
82 static const char* prepareToRedirectStdout(bool bVerbose)
84 return bVerbose ? "" : "2>" NULL_DEVICE;
87 static void printDsspResult(char* dssp, const DsspInputStrings& strings, const std::string& redirectionString)
89 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
90 sprintf(dssp, "%s -i %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(), redirectionString.c_str());
92 sprintf(dssp, "%s -i %s -o %s > %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(),
93 strings.tmpfile.c_str(), NULL_DEVICE, redirectionString.c_str());
98 static int strip_dssp(FILE* tapeout,
100 const gmx_bool bPhobres[],
106 const gmx_output_env_t* oenv)
108 static gmx_bool bFirst = TRUE;
110 char buf[STRLEN + 1];
112 int nr, iacc, nresidues;
113 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
120 fgets2(buf, STRLEN, tapeout);
121 } while (std::strstr(buf, "KAPPA") == nullptr);
124 /* Since we also have empty lines in the dssp output (temp) file,
125 * and some line content is saved to the ssbuf variable,
126 * we need more memory than just nres elements. To be shure,
127 * we allocate 2*nres-1, since for each chain there is a
128 * separating line in the temp file. (At most each residue
129 * could have been defined as a separate chain.) */
130 snew(ssbuf, 2 * nres - 1);
137 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != nullptr); nr++)
139 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
141 SSTP = '='; /* Chain separator sign '=' */
145 SSTP = buf[16] == ' ' ? '~' : buf[16];
151 /* Only calculate solvent accessible area if needed */
152 if ((nullptr != acc) && (buf[13] != '!'))
154 sscanf(&(buf[34]), "%d", &iacc);
156 /* average_area and bPhobres are counted from 0...nres-1 */
157 average_area[nresidues] += iacc;
158 if (bPhobres[nresidues])
168 /* Keep track of the residue number (do not count chain separator lines '!') */
178 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n",
182 mat->title = "Secondary structure";
184 mat->label_x = output_env_get_time_label(oenv);
185 mat->label_y = "Residue";
186 mat->bDiscrete = true;
188 mat->axis_y.resize(nr);
189 std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
190 mat->axis_x.resize(0);
191 mat->matrix.resize(1, 1);
194 mat->axis_x.push_back(t);
195 mat->matrix.resize(++(mat->nx), nr);
196 auto columnIndex = mat->nx - 1;
197 for (int i = 0; i < nr; i++)
199 t_xpmelmt c = { ssbuf[i], 0 };
200 mat->matrix(columnIndex, i) = std::max(static_cast<t_matelmt>(0), searchcmap(mat->map, c));
205 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01 * iaccb, 0.01 * iaccf);
208 /* Return the number of lines found in the dssp file (i.e. number
209 * of redidues plus chain separator lines).
210 * This is the number of y elements needed for the area xpm file */
214 static gmx_bool* bPhobics(t_atoms* atoms)
220 char surffn[] = "surface.dat";
221 char ** surf_res, **surf_lines;
224 nb = get_lines("phbres.dat", &cb);
225 snew(bb, atoms->nres);
227 n_surf = get_lines(surffn, &surf_lines);
228 snew(surf_res, n_surf);
229 for (i = 0; (i < n_surf); i++)
231 snew(surf_res[i], 5);
232 sscanf(surf_lines[i], "%s", surf_res[i]);
236 for (i = 0, j = 0; (i < atoms->nres); i++)
238 if (-1 != search_str(n_surf, surf_res, *atoms->resinfo[i].name))
240 bb[j++] = (-1 != search_str(nb, cb, *atoms->resinfo[i].name));
247 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n", j, i);
250 for (i = 0; (i < n_surf); i++)
259 static void check_oo(t_atoms* atoms)
265 OOO = gmx_strdup("O");
267 for (i = 0; (i < atoms->nr); i++)
269 if (std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
271 *atoms->atomname[i] = OOO;
273 else if (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
275 *atoms->atomname[i] = OOO;
277 else if (std::strcmp(*(atoms->atomname[i]), "OC1") == 0)
279 *atoms->atomname[i] = OOO;
284 static void norm_acc(t_atoms* atoms, int nres, const real av_area[], real norm_av_area[])
288 char surffn[] = "surface.dat";
289 char ** surf_res, **surf_lines;
292 n_surf = get_lines(surffn, &surf_lines);
294 snew(surf_res, n_surf);
295 for (i = 0; (i < n_surf); i++)
297 snew(surf_res[i], 5);
298 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
301 for (i = 0; (i < nres); i++)
303 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
306 norm_av_area[i] = av_area[i] / surf[n];
310 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
311 *atoms->resinfo[i].name, surffn);
316 static void prune_ss_legend(t_matrix* mat)
318 std::vector<bool> isPresent(mat->map.size());
319 std::vector<int> newnum(mat->map.size());
320 std::vector<t_mapping> newmap;
322 for (int f = 0; f < mat->nx; f++)
324 for (int r = 0; r < mat->ny; r++)
326 isPresent[mat->matrix(f, r)] = true;
330 for (size_t i = 0; i < mat->map.size(); i++)
335 newnum[i] = newmap.size();
336 newmap.emplace_back(mat->map[i]);
339 if (newmap.size() != mat->map.size())
341 std::swap(mat->map, newmap);
342 for (int f = 0; f < mat->nx; f++)
344 for (int r = 0; r < mat->ny; r++)
346 mat->matrix(f, r) = newnum[mat->matrix(f, r)];
352 static void write_sas_mat(const char* fn, real** accr, int nframe, int nres, t_matrix* mat)
356 t_rgb rlo = { 1, 1, 1 }, rhi = { 0, 0, 0 };
361 hi = lo = accr[0][0];
362 for (i = 0; i < nframe; i++)
364 for (j = 0; j < nres; j++)
366 lo = std::min(lo, accr[i][j]);
367 hi = std::max(hi, accr[i][j]);
370 fp = gmx_ffopen(fn, "w");
371 nlev = static_cast<int>(hi - lo + 1);
372 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)", "Time", "Residue Index",
373 nframe, nres, mat->axis_x.data(), mat->axis_y.data(), accr, lo, hi, rlo, rhi, &nlev);
378 static void analyse_ss(const char* outfile, t_matrix* mat, const char* ss_string, const gmx_output_env_t* oenv)
381 int ss_count, total_count;
383 gmx::ArrayRef<t_mapping> map = mat->map;
384 std::vector<int> count(map.size());
385 std::vector<int> total(map.size(), 0);
386 // This copying would not be necessary if xvgr_legend could take a
387 // view of string views
388 std::vector<std::string> leg;
389 leg.reserve(map.size() + 1);
390 leg.emplace_back("Structure");
391 for (const auto& m : map)
393 leg.emplace_back(m.desc);
396 fp = xvgropen(outfile, "Secondary Structure", output_env_get_xvgr_tlabel(oenv),
397 "Number of Residues", oenv);
398 if (output_env_get_print_xvgr_codes(oenv))
400 fprintf(fp, "@ subtitle \"Structure = ");
402 for (size_t s = 0; s < std::strlen(ss_string); s++)
408 for (const auto& m : map)
410 if (ss_string[s] == m.code.c1)
412 fprintf(fp, "%s", m.desc);
417 xvgrLegend(fp, leg, oenv);
420 for (int f = 0; f < mat->nx; f++)
423 for (auto& c : count)
427 for (int r = 0; r < mat->ny; r++)
429 count[mat->matrix(f, r)]++;
430 total[mat->matrix(f, r)]++;
432 for (gmx::index s = 0; s != gmx::ssize(map); ++s)
434 if (std::strchr(ss_string, map[s].code.c1))
436 ss_count += count[s];
437 total_count += count[s];
440 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
441 for (const auto& c : count)
443 fprintf(fp, " %5d", c);
447 /* now print column totals */
448 fprintf(fp, "%-8s %5d", "# Totals", total_count);
449 for (const auto& t : total)
451 fprintf(fp, " %5d", t);
455 /* now print probabilities */
456 fprintf(fp, "%-8s %5.2f", "# SS pr.", total_count / static_cast<real>(mat->nx * mat->ny));
457 for (const auto& t : total)
459 fprintf(fp, " %5.2f", t / static_cast<real>(mat->nx * mat->ny));
466 int gmx_do_dssp(int argc, char* argv[])
468 const char* desc[] = {
469 "[THISMODULE] ", "reads a trajectory file and computes the secondary structure for",
470 "each time frame ", "calling the dssp program. If you do not have the dssp program,",
471 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
472 "that the dssp executable is located in ",
473 // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
474 "[TT]" GMX_DSSP_PROGRAM_PATH "[tt]. If this is not the case, then you should",
475 "set an environment variable [TT]DSSP[tt] pointing to the dssp", "executable, e.g.: [PAR]",
476 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
477 "Since version 2.0.0, dssp is invoked with a syntax that differs",
478 "from earlier versions. If you have an older version of dssp,",
479 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
480 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
481 "Even newer versions (which at the time of writing are not yet released)",
482 "are assumed to have the same syntax as 2.0.0.[PAR]",
483 "The structure assignment for each residue and time is written to an",
484 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
485 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
486 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
487 "postscript files.", "The number of residues with each secondary structure type and the",
488 "total secondary structure ([TT]-sss[tt]) count as a function of",
489 "time are also written to file ([TT]-sc[tt]).[PAR]",
490 "Solvent accessible surface (SAS) per residue can be calculated, both in",
491 "absolute values (A^2) and in fractions of the maximal accessible",
492 "surface of a residue. The maximal accessible surface is defined as",
493 "the accessible surface of a residue in a chain of glycines.",
494 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
495 "and that is more efficient.[PAR]",
496 "Finally, this program can dump the secondary structure in a special file",
497 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
498 "these two programs can be used to analyze dihedral properties as a",
499 "function of secondary structure type."
501 static gmx_bool bVerbose;
502 static const char* ss_string = "HEBT";
503 static int dsspVersion = 2;
505 { "-v", FALSE, etBOOL, { &bVerbose }, "HIDDENGenerate miles of useless information" },
506 { "-sss", FALSE, etSTR, { &ss_string }, "Secondary structures for structure count" },
511 "DSSP major version. Syntax changed with version 2" }
515 FILE * tapein, *tapeout;
516 FILE * ss, *acc, *fTArea, *tmpf;
517 const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
518 const char* leg[] = { "Phobic", "Phylic" };
523 int nres, nr0, naccr, nres_plus_separators;
524 gmx_bool * bPhbres, bDoAccSurf;
526 int natoms, nframe = 0;
527 matrix box = { { 0 } };
533 real ** accr, *accr_ptr = nullptr, *av_area, *norm_av_area;
534 char pdbfile[32], tmpfile[32];
537 gmx_output_env_t* oenv;
538 gmx_rmpbc_t gpbc = nullptr;
541 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
542 { efNDX, nullptr, nullptr, ffOPTRD }, { efDAT, "-ssdump", "ssdump", ffOPTWR },
543 { efMAP, "-map", "ss", ffLIBRD }, { efXPM, "-o", "ss", ffWRITE },
544 { efXVG, "-sc", "scount", ffWRITE }, { efXPM, "-a", "area", ffOPTWR },
545 { efXVG, "-ta", "totarea", ffOPTWR }, { efXVG, "-aa", "averarea", ffOPTWR }
547 #define NFILE asize(fnm)
549 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT, NFILE, fnm,
550 asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
554 fnSCount = opt2fn("-sc", NFILE, fnm);
555 fnArea = opt2fn_null("-a", NFILE, fnm);
556 fnTArea = opt2fn_null("-ta", NFILE, fnm);
557 fnAArea = opt2fn_null("-aa", NFILE, fnm);
558 bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
560 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xp, nullptr, box, FALSE);
561 atoms = &(top.atoms);
563 bPhbres = bPhobics(atoms);
565 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
568 for (int i = 0; (i < gnx); i++)
570 if (atoms->atom[index[i]].resind != nr0)
572 nr0 = atoms->atom[index[i]].resind;
576 fprintf(stderr, "There are %d residues in your selected group\n", nres);
578 std::strcpy(pdbfile, "ddXXXXXX");
580 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
582 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
584 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
586 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
591 std::strcpy(tmpfile, "ddXXXXXX");
593 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
595 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
597 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
599 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
604 const std::string defpathenv = GMX_DSSP_PROGRAM_PATH;
605 if ((dptr = getenv("DSSP")) == nullptr)
607 dptr = defpathenv.c_str();
609 if (!gmx_fexist(dptr))
611 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr);
613 std::string redirectionString;
614 redirectionString = prepareToRedirectStdout(bVerbose);
615 DsspInputStrings dsspStrings;
616 dsspStrings.dptr = dptr;
617 dsspStrings.pdbfile = pdbfile;
618 dsspStrings.tmpfile = tmpfile;
619 if (dsspVersion >= 2)
623 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
624 "do_dssp. Assuming version 2 syntax.\n\n",
628 printDsspResult(dssp, dsspStrings, redirectionString);
634 dsspStrings.dptr.clear();
638 dsspStrings.dptr = "-na";
640 printDsspResult(dssp, dsspStrings, redirectionString);
642 fprintf(stderr, "dssp cmd='%s'\n", dssp);
646 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
647 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
648 xvgr_legend(fTArea, 2, leg, oenv);
655 mat.map = readcmap(opt2fn("-map", NFILE, fnm));
657 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
658 if (natoms > atoms->nr)
660 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
664 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
667 snew(average_area, atoms->nres);
668 snew(av_area, atoms->nres);
669 snew(norm_av_area, atoms->nres);
673 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
676 t = output_env_conv_time(oenv, t);
677 if (bDoAccSurf && nframe >= naccr)
681 for (int i = naccr - 10; i < naccr; i++)
683 snew(accr[i], 2 * atoms->nres - 1);
686 gmx_rmpbc(gpbc, natoms, box, x);
687 tapein = gmx_ffopen(pdbfile, "w");
688 write_pdbfile_indexed(tapein, nullptr, atoms, x, ePBC, box, ' ', -1, gnx, index, nullptr, FALSE);
690 /* strip_dssp returns the number of lines found in the dssp file, i.e.
691 * the number of residues plus the separator lines */
693 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
694 if (nullptr == (tapeout = popen(dssp, "r")))
696 if (0 != system(dssp) || nullptr == (tapeout = gmx_ffopen(tmpfile, "r")))
702 "Failed to execute command: %s\n"
703 "Try specifying your dssp version with the -ver option.",
708 accr_ptr = accr[nframe];
710 /* strip_dssp returns the number of lines found in the dssp file, i.e.
711 * the number of residues plus the separator lines */
712 nres_plus_separators =
713 strip_dssp(tapeout, nres, bPhbres, t, accr_ptr, fTArea, &mat, average_area, oenv);
714 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
717 gmx_ffclose(tapeout);
722 } while (read_next_x(oenv, status, &t, x, box));
723 fprintf(stderr, "\n");
729 gmx_rmpbc_done(gpbc);
731 prune_ss_legend(&mat);
733 ss = opt2FILE("-o", NFILE, fnm, "w");
735 write_xpm_m(ss, mat);
738 if (opt2bSet("-ssdump", NFILE, fnm))
740 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
741 fprintf(ss, "%d\n", nres);
742 for (gmx::index j = 0; j != mat.matrix.extent(0); ++j)
744 auto row = mat.matrix.asView()[j];
745 for (gmx::index i = 0; i != row.extent(0); ++i)
747 fputc(mat.map[row[i]].code.c1, ss);
753 analyse_ss(fnSCount, &mat, ss_string, oenv);
757 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
759 for (int i = 0; i < atoms->nres; i++)
761 av_area[i] = (average_area[i] / static_cast<real>(nframe));
764 norm_acc(atoms, nres, av_area, norm_av_area);
768 acc = xvgropen(fnAArea, "Average Accessible Area", "Residue", "A\\S2", oenv);
769 for (int i = 0; (i < nres); i++)
771 fprintf(acc, "%5d %10g %10g\n", i + 1, av_area[i], norm_av_area[i]);
777 view_all(oenv, NFILE, fnm);