2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/commandline/viewit.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/matio.h"
52 #include "gromacs/fileio/pdbio.h"
53 #include "gromacs/fileio/trxio.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/gmxana/gmx_ana.h"
56 #include "gromacs/gmxana/gstat.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/topology/index.h"
59 #include "gromacs/topology/topology.h"
60 #include "gromacs/utility/arraysize.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/dir_separator.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/strdb.h"
68 #if GMX_NATIVE_WINDOWS
69 # define NULL_DEVICE "nul"
71 # define pclose _pclose
73 # define NULL_DEVICE "/dev/null"
76 struct DsspInputStrings
83 static const char* prepareToRedirectStdout(bool bVerbose)
85 return bVerbose ? "" : "2>" NULL_DEVICE;
88 static void printDsspResult(char* dssp, const DsspInputStrings& strings, const std::string& redirectionString)
90 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
91 sprintf(dssp, "%s -i %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(), redirectionString.c_str());
93 sprintf(dssp, "%s -i %s -o %s > %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(),
94 strings.tmpfile.c_str(), NULL_DEVICE, redirectionString.c_str());
99 static int strip_dssp(FILE* tapeout,
101 const gmx_bool bPhobres[],
107 const gmx_output_env_t* oenv)
109 static gmx_bool bFirst = TRUE;
111 char buf[STRLEN + 1];
113 int nr, iacc, nresidues;
114 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
121 fgets2(buf, STRLEN, tapeout);
122 } while (std::strstr(buf, "KAPPA") == nullptr);
125 /* Since we also have empty lines in the dssp output (temp) file,
126 * and some line content is saved to the ssbuf variable,
127 * we need more memory than just nres elements. To be shure,
128 * we allocate 2*nres-1, since for each chain there is a
129 * separating line in the temp file. (At most each residue
130 * could have been defined as a separate chain.) */
131 snew(ssbuf, 2 * nres - 1);
138 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != nullptr); nr++)
140 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
142 SSTP = '='; /* Chain separator sign '=' */
146 SSTP = buf[16] == ' ' ? '~' : buf[16];
152 /* Only calculate solvent accessible area if needed */
153 if ((nullptr != acc) && (buf[13] != '!'))
155 sscanf(&(buf[34]), "%d", &iacc);
157 /* average_area and bPhobres are counted from 0...nres-1 */
158 average_area[nresidues] += iacc;
159 if (bPhobres[nresidues])
169 /* Keep track of the residue number (do not count chain separator lines '!') */
179 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n",
183 mat->title = "Secondary structure";
185 mat->label_x = output_env_get_time_label(oenv);
186 mat->label_y = "Residue";
187 mat->bDiscrete = true;
189 mat->axis_y.resize(nr);
190 std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
191 mat->axis_x.resize(0);
192 mat->matrix.resize(1, 1);
195 mat->axis_x.push_back(t);
196 mat->matrix.resize(++(mat->nx), nr);
197 auto columnIndex = mat->nx - 1;
198 for (int i = 0; i < nr; i++)
200 t_xpmelmt c = { ssbuf[i], 0 };
201 mat->matrix(columnIndex, i) = std::max(static_cast<t_matelmt>(0), searchcmap(mat->map, c));
206 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01 * iaccb, 0.01 * iaccf);
209 /* Return the number of lines found in the dssp file (i.e. number
210 * of redidues plus chain separator lines).
211 * This is the number of y elements needed for the area xpm file */
215 static gmx_bool* bPhobics(t_atoms* atoms)
221 char surffn[] = "surface.dat";
222 char ** surf_res, **surf_lines;
225 nb = get_lines("phbres.dat", &cb);
226 snew(bb, atoms->nres);
228 n_surf = get_lines(surffn, &surf_lines);
229 snew(surf_res, n_surf);
230 for (i = 0; (i < n_surf); i++)
232 snew(surf_res[i], 5);
233 sscanf(surf_lines[i], "%s", surf_res[i]);
237 for (i = 0, j = 0; (i < atoms->nres); i++)
239 if (-1 != search_str(n_surf, surf_res, *atoms->resinfo[i].name))
241 bb[j++] = (-1 != search_str(nb, cb, *atoms->resinfo[i].name));
248 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n", j, i);
251 for (i = 0; (i < n_surf); i++)
260 static void check_oo(t_atoms* atoms)
266 OOO = gmx_strdup("O");
268 for (i = 0; (i < atoms->nr); i++)
270 if ((std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
271 || (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
272 || (std::strcmp(*(atoms->atomname[i]), "OC1") == 0))
274 *atoms->atomname[i] = OOO;
279 static void norm_acc(t_atoms* atoms, int nres, const real av_area[], real norm_av_area[])
283 char surffn[] = "surface.dat";
284 char ** surf_res, **surf_lines;
287 n_surf = get_lines(surffn, &surf_lines);
289 snew(surf_res, n_surf);
290 for (i = 0; (i < n_surf); i++)
292 snew(surf_res[i], 5);
293 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
296 for (i = 0; (i < nres); i++)
298 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
301 norm_av_area[i] = av_area[i] / surf[n];
305 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
306 *atoms->resinfo[i].name, surffn);
311 static void prune_ss_legend(t_matrix* mat)
313 std::vector<bool> isPresent(mat->map.size());
314 std::vector<int> newnum(mat->map.size());
315 std::vector<t_mapping> newmap;
317 for (int f = 0; f < mat->nx; f++)
319 for (int r = 0; r < mat->ny; r++)
321 isPresent[mat->matrix(f, r)] = true;
325 for (size_t i = 0; i < mat->map.size(); i++)
330 newnum[i] = newmap.size();
331 newmap.emplace_back(mat->map[i]);
334 if (newmap.size() != mat->map.size())
336 std::swap(mat->map, newmap);
337 for (int f = 0; f < mat->nx; f++)
339 for (int r = 0; r < mat->ny; r++)
341 mat->matrix(f, r) = newnum[mat->matrix(f, r)];
347 static void write_sas_mat(const char* fn, real** accr, int nframe, int nres, t_matrix* mat)
351 t_rgb rlo = { 1, 1, 1 }, rhi = { 0, 0, 0 };
356 hi = lo = accr[0][0];
357 for (i = 0; i < nframe; i++)
359 for (j = 0; j < nres; j++)
361 lo = std::min(lo, accr[i][j]);
362 hi = std::max(hi, accr[i][j]);
365 fp = gmx_ffopen(fn, "w");
366 nlev = static_cast<int>(hi - lo + 1);
367 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)", "Time", "Residue Index",
368 nframe, nres, mat->axis_x.data(), mat->axis_y.data(), accr, lo, hi, rlo, rhi, &nlev);
373 static void analyse_ss(const char* outfile, t_matrix* mat, const char* ss_string, const gmx_output_env_t* oenv)
376 int ss_count, total_count;
378 gmx::ArrayRef<t_mapping> map = mat->map;
379 std::vector<int> count(map.size());
380 std::vector<int> total(map.size(), 0);
381 // This copying would not be necessary if xvgr_legend could take a
382 // view of string views
383 std::vector<std::string> leg;
384 leg.reserve(map.size() + 1);
385 leg.emplace_back("Structure");
386 for (const auto& m : map)
388 leg.emplace_back(m.desc);
391 fp = xvgropen(outfile, "Secondary Structure", output_env_get_xvgr_tlabel(oenv),
392 "Number of Residues", oenv);
393 if (output_env_get_print_xvgr_codes(oenv))
395 fprintf(fp, "@ subtitle \"Structure = ");
397 for (size_t s = 0; s < std::strlen(ss_string); s++)
403 for (const auto& m : map)
405 if (ss_string[s] == m.code.c1)
407 fprintf(fp, "%s", m.desc);
412 xvgrLegend(fp, leg, oenv);
415 for (int f = 0; f < mat->nx; f++)
418 for (auto& c : count)
422 for (int r = 0; r < mat->ny; r++)
424 count[mat->matrix(f, r)]++;
425 total[mat->matrix(f, r)]++;
427 for (gmx::index s = 0; s != gmx::ssize(map); ++s)
429 if (std::strchr(ss_string, map[s].code.c1))
431 ss_count += count[s];
432 total_count += count[s];
435 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
436 for (const auto& c : count)
438 fprintf(fp, " %5d", c);
442 /* now print column totals */
443 fprintf(fp, "%-8s %5d", "# Totals", total_count);
444 for (const auto& t : total)
446 fprintf(fp, " %5d", t);
450 /* now print probabilities */
451 fprintf(fp, "%-8s %5.2f", "# SS pr.", total_count / static_cast<real>(mat->nx * mat->ny));
452 for (const auto& t : total)
454 fprintf(fp, " %5.2f", t / static_cast<real>(mat->nx * mat->ny));
461 int gmx_do_dssp(int argc, char* argv[])
463 const char* desc[] = {
464 "[THISMODULE] ", "reads a trajectory file and computes the secondary structure for",
465 "each time frame ", "calling the dssp program. If you do not have the dssp program,",
466 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
467 "that the dssp executable is located in ",
468 // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
469 "[TT]" GMX_DSSP_PROGRAM_PATH "[tt]. If this is not the case, then you should",
470 "set an environment variable [TT]DSSP[tt] pointing to the dssp", "executable, e.g.: [PAR]",
471 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
472 "Since version 2.0.0, dssp is invoked with a syntax that differs",
473 "from earlier versions. If you have an older version of dssp,",
474 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
475 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
476 "Even newer versions (which at the time of writing are not yet released)",
477 "are assumed to have the same syntax as 2.0.0.[PAR]",
478 "The structure assignment for each residue and time is written to an",
479 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
480 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
481 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
482 "postscript files.", "The number of residues with each secondary structure type and the",
483 "total secondary structure ([TT]-sss[tt]) count as a function of",
484 "time are also written to file ([TT]-sc[tt]).[PAR]",
485 "Solvent accessible surface (SAS) per residue can be calculated, both in",
486 "absolute values (A^2) and in fractions of the maximal accessible",
487 "surface of a residue. The maximal accessible surface is defined as",
488 "the accessible surface of a residue in a chain of glycines.",
489 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
490 "and that is more efficient.[PAR]",
491 "Finally, this program can dump the secondary structure in a special file",
492 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
493 "these two programs can be used to analyze dihedral properties as a",
494 "function of secondary structure type."
496 static gmx_bool bVerbose;
497 static const char* ss_string = "HEBT";
498 static int dsspVersion = 2;
500 { "-v", FALSE, etBOOL, { &bVerbose }, "HIDDENGenerate miles of useless information" },
501 { "-sss", FALSE, etSTR, { &ss_string }, "Secondary structures for structure count" },
506 "DSSP major version. Syntax changed with version 2" }
510 FILE * tapein, *tapeout;
511 FILE * ss, *acc, *fTArea, *tmpf;
512 const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
513 const char* leg[] = { "Phobic", "Phylic" };
518 int nres, nr0, naccr, nres_plus_separators;
519 gmx_bool * bPhbres, bDoAccSurf;
521 int natoms, nframe = 0;
522 matrix box = { { 0 } };
528 real ** accr, *accr_ptr = nullptr, *av_area, *norm_av_area;
529 char pdbfile[32], tmpfile[32];
532 gmx_output_env_t* oenv;
533 gmx_rmpbc_t gpbc = nullptr;
536 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
537 { efNDX, nullptr, nullptr, ffOPTRD }, { efDAT, "-ssdump", "ssdump", ffOPTWR },
538 { efMAP, "-map", "ss", ffLIBRD }, { efXPM, "-o", "ss", ffWRITE },
539 { efXVG, "-sc", "scount", ffWRITE }, { efXPM, "-a", "area", ffOPTWR },
540 { efXVG, "-ta", "totarea", ffOPTWR }, { efXVG, "-aa", "averarea", ffOPTWR }
542 #define NFILE asize(fnm)
544 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT, NFILE, fnm,
545 asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
549 fnSCount = opt2fn("-sc", NFILE, fnm);
550 fnArea = opt2fn_null("-a", NFILE, fnm);
551 fnTArea = opt2fn_null("-ta", NFILE, fnm);
552 fnAArea = opt2fn_null("-aa", NFILE, fnm);
553 bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
555 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &pbcType, &xp, nullptr, box, FALSE);
556 atoms = &(top.atoms);
558 bPhbres = bPhobics(atoms);
560 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
563 for (int i = 0; (i < gnx); i++)
565 if (atoms->atom[index[i]].resind != nr0)
567 nr0 = atoms->atom[index[i]].resind;
571 fprintf(stderr, "There are %d residues in your selected group\n", nres);
573 std::strcpy(pdbfile, "ddXXXXXX");
575 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
577 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
579 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
581 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
586 std::strcpy(tmpfile, "ddXXXXXX");
588 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
590 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
592 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
594 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
599 const std::string defpathenv = GMX_DSSP_PROGRAM_PATH;
600 if ((dptr = getenv("DSSP")) == nullptr)
602 dptr = defpathenv.c_str();
604 if (!gmx_fexist(dptr))
606 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr);
608 std::string redirectionString;
609 redirectionString = prepareToRedirectStdout(bVerbose);
610 DsspInputStrings dsspStrings;
611 dsspStrings.dptr = dptr;
612 dsspStrings.pdbfile = pdbfile;
613 dsspStrings.tmpfile = tmpfile;
614 if (dsspVersion >= 2)
618 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
619 "do_dssp. Assuming version 2 syntax.\n\n",
623 printDsspResult(dssp, dsspStrings, redirectionString);
629 dsspStrings.dptr.clear();
633 dsspStrings.dptr = "-na";
635 printDsspResult(dssp, dsspStrings, redirectionString);
637 fprintf(stderr, "dssp cmd='%s'\n", dssp);
641 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
642 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
643 xvgr_legend(fTArea, 2, leg, oenv);
650 mat.map = readcmap(opt2fn("-map", NFILE, fnm));
652 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
653 if (natoms > atoms->nr)
655 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
659 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
662 snew(average_area, atoms->nres);
663 snew(av_area, atoms->nres);
664 snew(norm_av_area, atoms->nres);
668 gpbc = gmx_rmpbc_init(&top.idef, pbcType, natoms);
671 t = output_env_conv_time(oenv, t);
672 if (bDoAccSurf && nframe >= naccr)
676 for (int i = naccr - 10; i < naccr; i++)
678 snew(accr[i], 2 * atoms->nres - 1);
681 gmx_rmpbc(gpbc, natoms, box, x);
682 tapein = gmx_ffopen(pdbfile, "w");
683 write_pdbfile_indexed(tapein, nullptr, atoms, x, pbcType, box, ' ', -1, gnx, index, nullptr, FALSE);
685 /* strip_dssp returns the number of lines found in the dssp file, i.e.
686 * the number of residues plus the separator lines */
688 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
689 if (nullptr == (tapeout = popen(dssp, "r")))
691 if (0 != system(dssp) || nullptr == (tapeout = gmx_ffopen(tmpfile, "r")))
697 "Failed to execute command: %s\n"
698 "Try specifying your dssp version with the -ver option.",
703 accr_ptr = accr[nframe];
705 /* strip_dssp returns the number of lines found in the dssp file, i.e.
706 * the number of residues plus the separator lines */
707 nres_plus_separators =
708 strip_dssp(tapeout, nres, bPhbres, t, accr_ptr, fTArea, &mat, average_area, oenv);
709 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
712 gmx_ffclose(tapeout);
717 } while (read_next_x(oenv, status, &t, x, box));
718 fprintf(stderr, "\n");
724 gmx_rmpbc_done(gpbc);
726 prune_ss_legend(&mat);
728 ss = opt2FILE("-o", NFILE, fnm, "w");
730 write_xpm_m(ss, mat);
733 if (opt2bSet("-ssdump", NFILE, fnm))
735 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
736 fprintf(ss, "%d\n", nres);
737 for (gmx::index j = 0; j != mat.matrix.extent(0); ++j)
739 auto row = mat.matrix.asView()[j];
740 for (gmx::index i = 0; i != row.extent(0); ++i)
742 fputc(mat.map[row[i]].code.c1, ss);
748 analyse_ss(fnSCount, &mat, ss_string, oenv);
752 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
754 for (int i = 0; i < atoms->nres; i++)
756 av_area[i] = (average_area[i] / static_cast<real>(nframe));
759 norm_acc(atoms, nres, av_area, norm_av_area);
763 acc = xvgropen(fnAArea, "Average Accessible Area", "Residue", "A\\S2", oenv);
764 for (int i = 0; (i < nres); i++)
766 fprintf(acc, "%5d %10g %10g\n", i + 1, av_area[i], norm_av_area[i]);
772 view_all(oenv, NFILE, fnm);