2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/commandline/viewit.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/matio.h"
52 #include "gromacs/fileio/pdbio.h"
53 #include "gromacs/fileio/trxio.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/gmxana/gmx_ana.h"
56 #include "gromacs/gmxana/gstat.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/topology/index.h"
59 #include "gromacs/topology/topology.h"
60 #include "gromacs/utility/arraysize.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/dir_separator.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/strdb.h"
68 #if GMX_NATIVE_WINDOWS
69 # define NULL_DEVICE "nul"
71 # define pclose _pclose
73 # define NULL_DEVICE "/dev/null"
76 struct DsspInputStrings
83 static const char* prepareToRedirectStdout(bool bVerbose)
85 return bVerbose ? "" : "2>" NULL_DEVICE;
88 static void printDsspResult(char* dssp, const DsspInputStrings& strings, const std::string& redirectionString)
90 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
91 sprintf(dssp, "%s -i %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(), redirectionString.c_str());
93 sprintf(dssp, "%s -i %s -o %s > %s %s", strings.dptr.c_str(), strings.pdbfile.c_str(),
94 strings.tmpfile.c_str(), NULL_DEVICE, redirectionString.c_str());
99 static int strip_dssp(FILE* tapeout,
101 const gmx_bool bPhobres[],
107 const gmx_output_env_t* oenv)
109 static gmx_bool bFirst = TRUE;
111 char buf[STRLEN + 1];
113 int nr, iacc, nresidues;
114 int naccf, naccb; /* Count hydrophobic and hydrophilic residues */
121 fgets2(buf, STRLEN, tapeout);
122 } while (std::strstr(buf, "KAPPA") == nullptr);
125 /* Since we also have empty lines in the dssp output (temp) file,
126 * and some line content is saved to the ssbuf variable,
127 * we need more memory than just nres elements. To be shure,
128 * we allocate 2*nres-1, since for each chain there is a
129 * separating line in the temp file. (At most each residue
130 * could have been defined as a separate chain.) */
131 snew(ssbuf, 2 * nres - 1);
138 for (nr = 0; (fgets2(buf, STRLEN, tapeout) != nullptr); nr++)
140 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
142 SSTP = '='; /* Chain separator sign '=' */
146 SSTP = buf[16] == ' ' ? '~' : buf[16];
152 /* Only calculate solvent accessible area if needed */
153 if ((nullptr != acc) && (buf[13] != '!'))
155 sscanf(&(buf[34]), "%d", &iacc);
157 /* average_area and bPhobres are counted from 0...nres-1 */
158 average_area[nresidues] += iacc;
159 if (bPhobres[nresidues])
169 /* Keep track of the residue number (do not count chain separator lines '!') */
179 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n",
183 mat->title = "Secondary structure";
185 mat->label_x = output_env_get_time_label(oenv);
186 mat->label_y = "Residue";
187 mat->bDiscrete = true;
189 mat->axis_y.resize(nr);
190 std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
191 mat->axis_x.resize(0);
192 mat->matrix.resize(0, 0);
195 mat->axis_x.push_back(t);
196 mat->matrix.resize(mat->matrix.extent(0), nr);
197 mat->nx = mat->matrix.extent(0);
198 auto columnIndex = mat->nx - 1;
199 for (int i = 0; i < nr; i++)
201 t_xpmelmt c = { ssbuf[i], 0 };
202 mat->matrix(columnIndex, i) = std::max(static_cast<t_matelmt>(0), searchcmap(mat->map, c));
207 fprintf(fTArea, "%10g %10g %10g\n", t, 0.01 * iaccb, 0.01 * iaccf);
210 /* Return the number of lines found in the dssp file (i.e. number
211 * of redidues plus chain separator lines).
212 * This is the number of y elements needed for the area xpm file */
216 static gmx_bool* bPhobics(t_atoms* atoms)
222 char surffn[] = "surface.dat";
223 char ** surf_res, **surf_lines;
226 nb = get_lines("phbres.dat", &cb);
227 snew(bb, atoms->nres);
229 n_surf = get_lines(surffn, &surf_lines);
230 snew(surf_res, n_surf);
231 for (i = 0; (i < n_surf); i++)
233 snew(surf_res[i], 5);
234 sscanf(surf_lines[i], "%s", surf_res[i]);
238 for (i = 0, j = 0; (i < atoms->nres); i++)
240 if (-1 != search_str(n_surf, surf_res, *atoms->resinfo[i].name))
242 bb[j++] = (-1 != search_str(nb, cb, *atoms->resinfo[i].name));
249 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n", j, i);
252 for (i = 0; (i < n_surf); i++)
261 static void check_oo(t_atoms* atoms)
267 OOO = gmx_strdup("O");
269 for (i = 0; (i < atoms->nr); i++)
271 if (std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
273 *atoms->atomname[i] = OOO;
275 else if (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
277 *atoms->atomname[i] = OOO;
279 else if (std::strcmp(*(atoms->atomname[i]), "OC1") == 0)
281 *atoms->atomname[i] = OOO;
286 static void norm_acc(t_atoms* atoms, int nres, const real av_area[], real norm_av_area[])
290 char surffn[] = "surface.dat";
291 char ** surf_res, **surf_lines;
294 n_surf = get_lines(surffn, &surf_lines);
296 snew(surf_res, n_surf);
297 for (i = 0; (i < n_surf); i++)
299 snew(surf_res[i], 5);
300 sscanf(surf_lines[i], "%s %lf", surf_res[i], &surf[i]);
303 for (i = 0; (i < nres); i++)
305 n = search_str(n_surf, surf_res, *atoms->resinfo[i].name);
308 norm_av_area[i] = av_area[i] / surf[n];
312 fprintf(stderr, "Residue %s not found in surface database (%s)\n",
313 *atoms->resinfo[i].name, surffn);
318 static void prune_ss_legend(t_matrix* mat)
320 std::vector<bool> isPresent(mat->map.size());
321 std::vector<int> newnum(mat->map.size());
322 std::vector<t_mapping> newmap;
324 for (int f = 0; f < mat->nx; f++)
326 for (int r = 0; r < mat->ny; r++)
328 isPresent[mat->matrix(f, r)] = true;
332 for (size_t i = 0; i < mat->map.size(); i++)
337 newnum[i] = newmap.size();
338 newmap.emplace_back(mat->map[i]);
341 if (newmap.size() != mat->map.size())
343 std::swap(mat->map, newmap);
344 for (int f = 0; f < mat->nx; f++)
346 for (int r = 0; r < mat->ny; r++)
348 mat->matrix(f, r) = newnum[mat->matrix(f, r)];
354 static void write_sas_mat(const char* fn, real** accr, int nframe, int nres, t_matrix* mat)
358 t_rgb rlo = { 1, 1, 1 }, rhi = { 0, 0, 0 };
363 hi = lo = accr[0][0];
364 for (i = 0; i < nframe; i++)
366 for (j = 0; j < nres; j++)
368 lo = std::min(lo, accr[i][j]);
369 hi = std::max(hi, accr[i][j]);
372 fp = gmx_ffopen(fn, "w");
373 nlev = static_cast<int>(hi - lo + 1);
374 write_xpm(fp, 0, "Solvent Accessible Surface", "Surface (A^2)", "Time", "Residue Index",
375 nframe, nres, mat->axis_x.data(), mat->axis_y.data(), accr, lo, hi, rlo, rhi, &nlev);
380 static void analyse_ss(const char* outfile, t_matrix* mat, const char* ss_string, const gmx_output_env_t* oenv)
383 int ss_count, total_count;
385 gmx::ArrayRef<t_mapping> map = mat->map;
386 std::vector<int> count(map.size());
387 std::vector<int> total(map.size(), 0);
388 // This copying would not be necessary if xvgr_legend could take a
389 // view of string views
390 std::vector<std::string> leg;
391 leg.reserve(map.size() + 1);
392 leg.emplace_back("Structure");
393 for (const auto& m : map)
395 leg.emplace_back(m.desc);
398 fp = xvgropen(outfile, "Secondary Structure", output_env_get_xvgr_tlabel(oenv),
399 "Number of Residues", oenv);
400 if (output_env_get_print_xvgr_codes(oenv))
402 fprintf(fp, "@ subtitle \"Structure = ");
404 for (size_t s = 0; s < std::strlen(ss_string); s++)
410 for (const auto& m : map)
412 if (ss_string[s] == m.code.c1)
414 fprintf(fp, "%s", m.desc);
419 xvgrLegend(fp, leg, oenv);
422 for (int f = 0; f < mat->nx; f++)
425 for (auto& c : count)
429 for (int r = 0; r < mat->ny; r++)
431 count[mat->matrix(f, r)]++;
432 total[mat->matrix(f, r)]++;
434 for (gmx::index s = 0; s != gmx::ssize(map); ++s)
436 if (std::strchr(ss_string, map[s].code.c1))
438 ss_count += count[s];
439 total_count += count[s];
442 fprintf(fp, "%8g %5d", mat->axis_x[f], ss_count);
443 for (const auto& c : count)
445 fprintf(fp, " %5d", c);
449 /* now print column totals */
450 fprintf(fp, "%-8s %5d", "# Totals", total_count);
451 for (const auto& t : total)
453 fprintf(fp, " %5d", t);
457 /* now print probabilities */
458 fprintf(fp, "%-8s %5.2f", "# SS pr.", total_count / static_cast<real>(mat->nx * mat->ny));
459 for (const auto& t : total)
461 fprintf(fp, " %5.2f", t / static_cast<real>(mat->nx * mat->ny));
468 int gmx_do_dssp(int argc, char* argv[])
470 const char* desc[] = {
472 "reads a trajectory file and computes the secondary structure for",
474 "calling the dssp program. If you do not have the dssp program,",
475 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
476 "that the dssp executable is located in ",
477 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
478 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
479 "executable, e.g.: [PAR]",
480 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
481 "Since version 2.0.0, dssp is invoked with a syntax that differs",
482 "from earlier versions. If you have an older version of dssp,",
483 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
484 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
485 "Even newer versions (which at the time of writing are not yet released)",
486 "are assumed to have the same syntax as 2.0.0.[PAR]",
487 "The structure assignment for each residue and time is written to an",
488 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
489 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
490 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
492 "The number of residues with each secondary structure type and the",
493 "total secondary structure ([TT]-sss[tt]) count as a function of",
494 "time are also written to file ([TT]-sc[tt]).[PAR]",
495 "Solvent accessible surface (SAS) per residue can be calculated, both in",
496 "absolute values (A^2) and in fractions of the maximal accessible",
497 "surface of a residue. The maximal accessible surface is defined as",
498 "the accessible surface of a residue in a chain of glycines.",
499 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
500 "and that is more efficient.[PAR]",
501 "Finally, this program can dump the secondary structure in a special file",
502 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
503 "these two programs can be used to analyze dihedral properties as a",
504 "function of secondary structure type."
506 static gmx_bool bVerbose;
507 static const char* ss_string = "HEBT";
508 static int dsspVersion = 2;
510 { "-v", FALSE, etBOOL, { &bVerbose }, "HIDDENGenerate miles of useless information" },
511 { "-sss", FALSE, etSTR, { &ss_string }, "Secondary structures for structure count" },
516 "DSSP major version. Syntax changed with version 2" }
520 FILE * tapein, *tapeout;
521 FILE * ss, *acc, *fTArea, *tmpf;
522 const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
523 const char* leg[] = { "Phobic", "Phylic" };
528 int nres, nr0, naccr, nres_plus_separators;
529 gmx_bool * bPhbres, bDoAccSurf;
531 int natoms, nframe = 0;
532 matrix box = { { 0 } };
538 real ** accr, *accr_ptr = nullptr, *av_area, *norm_av_area;
539 char pdbfile[32], tmpfile[32];
542 gmx_output_env_t* oenv;
543 gmx_rmpbc_t gpbc = nullptr;
546 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
547 { efNDX, nullptr, nullptr, ffOPTRD }, { efDAT, "-ssdump", "ssdump", ffOPTWR },
548 { efMAP, "-map", "ss", ffLIBRD }, { efXPM, "-o", "ss", ffWRITE },
549 { efXVG, "-sc", "scount", ffWRITE }, { efXPM, "-a", "area", ffOPTWR },
550 { efXVG, "-ta", "totarea", ffOPTWR }, { efXVG, "-aa", "averarea", ffOPTWR }
552 #define NFILE asize(fnm)
554 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT, NFILE, fnm,
555 asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
559 fnSCount = opt2fn("-sc", NFILE, fnm);
560 fnArea = opt2fn_null("-a", NFILE, fnm);
561 fnTArea = opt2fn_null("-ta", NFILE, fnm);
562 fnAArea = opt2fn_null("-aa", NFILE, fnm);
563 bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
565 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xp, nullptr, box, FALSE);
566 atoms = &(top.atoms);
568 bPhbres = bPhobics(atoms);
570 get_index(atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpnm);
573 for (int i = 0; (i < gnx); i++)
575 if (atoms->atom[index[i]].resind != nr0)
577 nr0 = atoms->atom[index[i]].resind;
581 fprintf(stderr, "There are %d residues in your selected group\n", nres);
583 std::strcpy(pdbfile, "ddXXXXXX");
585 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
587 sprintf(pdbfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
589 if ((tmpf = fopen(pdbfile, "w")) == nullptr)
591 gmx_fatal(FARGS, "Can not open tmp file %s", pdbfile);
596 std::strcpy(tmpfile, "ddXXXXXX");
598 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
600 sprintf(tmpfile, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR, DIR_SEPARATOR);
602 if ((tmpf = fopen(tmpfile, "w")) == nullptr)
604 gmx_fatal(FARGS, "Can not open tmp file %s", tmpfile);
609 if ((dptr = getenv("DSSP")) == nullptr)
611 dptr = "/usr/local/bin/dssp";
613 if (!gmx_fexist(dptr))
615 gmx_fatal(FARGS, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr);
617 std::string redirectionString;
618 redirectionString = prepareToRedirectStdout(bVerbose);
619 DsspInputStrings dsspStrings;
620 dsspStrings.dptr = dptr;
621 dsspStrings.pdbfile = pdbfile;
622 dsspStrings.tmpfile = tmpfile;
623 if (dsspVersion >= 2)
627 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
628 "do_dssp. Assuming version 2 syntax.\n\n",
632 printDsspResult(dssp, dsspStrings, redirectionString);
638 dsspStrings.dptr.clear();
642 dsspStrings.dptr = "-na";
644 printDsspResult(dssp, dsspStrings, redirectionString);
646 fprintf(stderr, "dssp cmd='%s'\n", dssp);
650 fTArea = xvgropen(fnTArea, "Solvent Accessible Surface Area",
651 output_env_get_xvgr_tlabel(oenv), "Area (nm\\S2\\N)", oenv);
652 xvgr_legend(fTArea, 2, leg, oenv);
659 mat.map = readcmap(opt2fn("-map", NFILE, fnm));
661 natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
662 if (natoms > atoms->nr)
664 gmx_fatal(FARGS, "\nTrajectory does not match topology!");
668 gmx_fatal(FARGS, "\nTrajectory does not match selected group!");
671 snew(average_area, atoms->nres);
672 snew(av_area, atoms->nres);
673 snew(norm_av_area, atoms->nres);
677 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
680 t = output_env_conv_time(oenv, t);
681 if (bDoAccSurf && nframe >= naccr)
685 for (int i = naccr - 10; i < naccr; i++)
687 snew(accr[i], 2 * atoms->nres - 1);
690 gmx_rmpbc(gpbc, natoms, box, x);
691 tapein = gmx_ffopen(pdbfile, "w");
692 write_pdbfile_indexed(tapein, nullptr, atoms, x, ePBC, box, ' ', -1, gnx, index, nullptr, FALSE);
694 /* strip_dssp returns the number of lines found in the dssp file, i.e.
695 * the number of residues plus the separator lines */
697 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
698 if (nullptr == (tapeout = popen(dssp, "r")))
700 if (0 != system(dssp) || nullptr == (tapeout = gmx_ffopen(tmpfile, "r")))
706 "Failed to execute command: %s\n"
707 "Try specifying your dssp version with the -ver option.",
712 accr_ptr = accr[nframe];
714 /* strip_dssp returns the number of lines found in the dssp file, i.e.
715 * the number of residues plus the separator lines */
716 nres_plus_separators =
717 strip_dssp(tapeout, nres, bPhbres, t, accr_ptr, fTArea, &mat, average_area, oenv);
718 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
721 gmx_ffclose(tapeout);
726 } while (read_next_x(oenv, status, &t, x, box));
727 fprintf(stderr, "\n");
733 gmx_rmpbc_done(gpbc);
735 prune_ss_legend(&mat);
737 ss = opt2FILE("-o", NFILE, fnm, "w");
739 write_xpm_m(ss, mat);
742 if (opt2bSet("-ssdump", NFILE, fnm))
744 ss = opt2FILE("-ssdump", NFILE, fnm, "w");
745 fprintf(ss, "%d\n", nres);
746 for (gmx::index j = 0; j != mat.matrix.extent(0); ++j)
748 auto row = mat.matrix.asView()[j];
749 for (gmx::index i = 0; i != row.extent(0); ++i)
751 fputc(mat.map[row[i]].code.c1, ss);
757 analyse_ss(fnSCount, &mat, ss_string, oenv);
761 write_sas_mat(fnArea, accr, nframe, nres_plus_separators, &mat);
763 for (int i = 0; i < atoms->nres; i++)
765 av_area[i] = (average_area[i] / static_cast<real>(nframe));
768 norm_acc(atoms, nres, av_area, norm_av_area);
772 acc = xvgropen(fnAArea, "Average Accessible Area", "Residue", "A\\S2", oenv);
773 for (int i = 0; (i < nres); i++)
775 fprintf(acc, "%5d %10g %10g\n", i + 1, av_area[i], norm_av_area[i]);
781 view_all(oenv, NFILE, fnm);