2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "gromacs/fileio/pdbio.h"
50 #include "gromacs/fileio/gmxfio.h"
51 #include "gromacs/math/units.h"
52 #include "gromacs/math/utilities.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/pbcutil/pbc.h"
55 #include "gromacs/topology/atomprop.h"
56 #include "gromacs/topology/ifunc.h"
57 #include "gromacs/topology/residuetypes.h"
58 #include "gromacs/topology/symtab.h"
59 #include "gromacs/topology/topology.h"
60 #include "gromacs/utility/coolstuff.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/enumerationhelpers.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/snprintf.h"
67 #include "gromacs/utility/stringtoenumvalueconverter.h"
74 typedef struct gmx_conect_t
78 gmx_conection_t* conect;
81 const char* enumValueToString(PdbRecordType enumValue)
83 static constexpr gmx::EnumerationArray<PdbRecordType, const char*> pdbRecordTypeName = {
84 "ATOM ", "HETATM", "ANISOU", "CRYST1", "COMPND", "MODEL",
85 "ENDMDL", "TER", "HEADER", "TITLE", "REMARK", "CONECT"
87 return pdbRecordTypeName[enumValue];
90 #define REMARK_SIM_BOX "REMARK THIS IS A SIMULATION BOX"
92 void gmx_write_pdb_box(FILE* out, PbcType pbcType, const matrix box)
94 real alpha, beta, gamma;
96 if (pbcType == PbcType::Unset)
98 pbcType = guessPbcType(box);
101 if (pbcType == PbcType::No)
106 if (norm2(box[YY]) * norm2(box[ZZ]) != 0)
108 alpha = gmx::c_rad2Deg * gmx_angle(box[YY], box[ZZ]);
114 if (norm2(box[XX]) * norm2(box[ZZ]) != 0)
116 beta = gmx::c_rad2Deg * gmx_angle(box[XX], box[ZZ]);
122 if (norm2(box[XX]) * norm2(box[YY]) != 0)
124 gamma = gmx::c_rad2Deg * gmx_angle(box[XX], box[YY]);
130 fprintf(out, "REMARK THIS IS A SIMULATION BOX\n");
131 if (pbcType != PbcType::Screw)
134 "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s%4d\n",
146 /* Double the a-vector length and write the correct space group */
148 "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s%4d\n",
160 static void read_cryst1(char* line, PbcType* pbcType, matrix box)
163 char sa[12], sb[12], sc[12], sg[SG_SIZE + 1], ident;
164 double fa, fb, fc, alpha, beta, gamma, cosa, cosb, cosg, sing;
165 int syma, symb, symc;
168 sscanf(line, "%*s%s%s%s%lf%lf%lf", sa, sb, sc, &alpha, &beta, &gamma);
170 pbcTypeFile = PbcType::Unset;
171 if (strlen(line) >= 55)
173 strncpy(sg, line + 55, SG_SIZE);
179 sscanf(sg, "%c %d %d %d", &ident, &syma, &symb, &symc);
180 if (ident == 'P' && syma == 1 && symb <= 1 && symc <= 1)
182 fc = strtod(sc, nullptr) * 0.1;
183 pbcTypeFile = (fc > 0 ? PbcType::Xyz : PbcType::XY);
185 if (ident == 'P' && syma == 21 && symb == 1 && symc == 1)
187 pbcTypeFile = PbcType::Screw;
192 *pbcType = pbcTypeFile;
197 fa = strtod(sa, nullptr) * 0.1;
198 fb = strtod(sb, nullptr) * 0.1;
199 fc = strtod(sc, nullptr) * 0.1;
200 if (pbcTypeFile == PbcType::Screw)
206 if ((alpha != 90.0) || (beta != 90.0) || (gamma != 90.0))
210 cosa = std::cos(alpha * gmx::c_deg2Rad);
218 cosb = std::cos(beta * gmx::c_deg2Rad);
226 cosg = std::cos(gamma * gmx::c_deg2Rad);
227 sing = std::sin(gamma * gmx::c_deg2Rad);
234 box[YY][XX] = fb * cosg;
235 box[YY][YY] = fb * sing;
236 box[ZZ][XX] = fc * cosb;
237 box[ZZ][YY] = fc * (cosa - cosb * cosg) / sing;
238 box[ZZ][ZZ] = std::sqrt(fc * fc - box[ZZ][XX] * box[ZZ][XX] - box[ZZ][YY] * box[ZZ][YY]);
248 static int gmx_fprintf_pqr_atomline(FILE* fp,
249 PdbRecordType record,
251 const char* atom_name,
252 const char* res_name,
261 GMX_RELEASE_ASSERT(record == PdbRecordType::Atom || record == PdbRecordType::Hetatm,
262 "Can only print PQR atom lines as ATOM or HETATM records");
264 /* Check atom name */
265 GMX_RELEASE_ASSERT(atom_name != nullptr, "Need atom information to print pqr");
267 /* Check residue name */
268 GMX_RELEASE_ASSERT(res_name != nullptr, "Need residue information to print pqr");
270 /* Truncate integers so they fit */
271 atom_seq_number = atom_seq_number % 100000;
272 res_seq_number = res_seq_number % 10000;
275 "%-6s%5d %-4.4s%4.4s%c%4d %8.3f %8.3f %8.3f %6.2f %6.2f\n",
276 enumValueToString(record),
291 void write_pdbfile_indexed(FILE* out,
293 const t_atoms* atoms,
304 gmx_conect_t* gc = static_cast<gmx_conect_t*>(conect);
311 fprintf(out, "TITLE %s\n", (title && title[0]) ? title : gmx::bromacs().c_str());
312 if (box && ((norm2(box[XX]) != 0.0F) || (norm2(box[YY]) != 0.0F) || (norm2(box[ZZ]) != 0.0F)))
314 gmx_write_pdb_box(out, pbcType, box);
316 if (atoms->havePdbInfo)
318 /* Check whether any occupancies are set, in that case leave it as is,
319 * otherwise set them all to one
322 for (int ii = 0; (ii < nindex) && bOccup; ii++)
325 bOccup = bOccup && (atoms->pdbinfo[i].occup == 0.0);
333 fprintf(out, "MODEL %8d\n", model_nr > 0 ? model_nr : 1);
336 for (int ii = 0; ii < nindex; ii++)
339 int resind = atoms->atom[i].resind;
340 std::string resnm = *atoms->resinfo[resind].name;
341 std::string nm = *atoms->atomname[i];
343 int resnr = atoms->resinfo[resind].nr;
344 unsigned char resic = atoms->resinfo[resind].ic;
352 ch = atoms->resinfo[resind].chainid;
361 resnr = resnr % 10000;
364 if (atoms->pdbinfo != nullptr)
366 pdbinfo = atoms->pdbinfo[i];
370 gmx_pdbinfo_init_default(&pdbinfo);
373 altloc = pdbinfo.altloc;
374 if (!isalnum(altloc))
378 occup = bOccup ? 1.0 : pdbinfo.occup;
382 gmx_fprintf_pdb_atomline(out,
396 atoms->atom[i].elem);
398 if (atoms->pdbinfo && atoms->pdbinfo[i].bAnisotropic)
401 "ANISOU%5d %-4.4s%4.4s%c%4d%c %7d%7d%7d%7d%7d%7d\n",
407 (resic == '\0') ? ' ' : resic,
408 atoms->pdbinfo[i].uij[0],
409 atoms->pdbinfo[i].uij[1],
410 atoms->pdbinfo[i].uij[2],
411 atoms->pdbinfo[i].uij[3],
412 atoms->pdbinfo[i].uij[4],
413 atoms->pdbinfo[i].uij[5]);
418 gmx_fprintf_pqr_atomline(out,
433 fprintf(out, "TER\n");
434 fprintf(out, "ENDMDL\n");
438 /* Write conect records */
439 for (int i = 0; (i < gc->nconect); i++)
441 fprintf(out, "CONECT%5d%5d\n", gc->conect[i].ai + 1, gc->conect[i].aj + 1);
446 void write_pdbfile(FILE* out,
448 const t_atoms* atoms,
458 snew(index, atoms->nr);
459 for (i = 0; i < atoms->nr; i++)
463 write_pdbfile_indexed(
464 out, title, atoms, x, pbcType, box, chainid, model_nr, atoms->nr, index, conect, false);
468 static void read_anisou(char line[], int natom, t_atoms* atoms)
472 char anr[12], anm[12];
476 for (k = 0; (k < 5); k++, j++)
482 for (k = 0; (k < 4); k++, j++)
489 /* Strip off spaces */
492 /* Search backwards for number and name only */
493 atomnr = std::strtol(anr, nullptr, 10);
494 for (i = natom - 1; (i >= 0); i--)
496 if ((std::strcmp(anm, *(atoms->atomname[i])) == 0) && (atomnr == atoms->pdbinfo[i].atomnr))
503 fprintf(stderr, "Skipping ANISOU record (atom %s %d not found)\n", anm, atomnr);
507 if (sscanf(line + 29,
509 &atoms->pdbinfo[i].uij[U11],
510 &atoms->pdbinfo[i].uij[U22],
511 &atoms->pdbinfo[i].uij[U33],
512 &atoms->pdbinfo[i].uij[U12],
513 &atoms->pdbinfo[i].uij[U13],
514 &atoms->pdbinfo[i].uij[U23])
517 atoms->pdbinfo[i].bAnisotropic = TRUE;
521 fprintf(stderr, "Invalid ANISOU record for atom %d\n", i);
522 atoms->pdbinfo[i].bAnisotropic = FALSE;
527 void get_pdb_atomnumber(const t_atoms* atoms, AtomProperties* aps)
529 int i, atomnumber, len;
531 char anm[6], anm_copy[6];
537 gmx_incons("Trying to deduce atomnumbers when no pdb information is present");
539 for (i = 0; (i < atoms->nr); i++)
541 std::strcpy(anm, atoms->pdbinfo[i].atomnm);
542 std::strcpy(anm_copy, atoms->pdbinfo[i].atomnm);
543 bool atomNumberSet = false;
545 if ((anm[0] != ' ') && ((len <= 2) || !std::isdigit(anm[2])))
548 if (aps->setAtomProperty(epropElement, "???", anm_copy, &eval))
550 atomnumber = gmx::roundToInt(eval);
551 atomNumberSet = true;
556 if (aps->setAtomProperty(epropElement, "???", anm_copy, &eval))
558 atomnumber = gmx::roundToInt(eval);
559 atomNumberSet = true;
566 while ((k < std::strlen(anm)) && (std::isspace(anm[k]) || std::isdigit(anm[k])))
570 anm_copy[0] = anm[k];
572 if (aps->setAtomProperty(epropElement, "???", anm_copy, &eval))
574 atomnumber = gmx::roundToInt(eval);
575 atomNumberSet = true;
578 static constexpr size_t sc_maxElementNameLength = 3;
579 static_assert(sizeof(atoms->atom[i].elem) >= sc_maxElementNameLength + 1);
583 atoms->atom[i].atomnumber = atomnumber;
584 element = aps->elementFromAtomNumber(atomnumber);
587 fprintf(debug, "Atomnumber for atom '%s' is %d\n", anm, atomnumber);
590 element.resize(sc_maxElementNameLength);
591 std::strcpy(atoms->atom[i].elem, element.c_str());
596 read_atom(t_symtab* symtab, const char line[], PdbRecordType type, int natom, t_atoms* atoms, rvec x[], int chainnum)
601 char anr[12], anm[12], anm_copy[12], altloc, resnm[12], rnr[12], elem[3];
602 char xc[12], yc[12], zc[12], occup[12], bfac[12];
605 int resnr, atomnumber;
607 if (natom >= atoms->nr)
609 gmx_fatal(FARGS, "\nFound more atoms (%d) in pdb file than expected (%d)", natom + 1, atoms->nr);
614 for (k = 0; (k < 5); k++, j++)
621 for (k = 0; (k < 4); k++, j++)
626 std::strcpy(anm_copy, anm);
632 for (k = 0; (k < 4); k++, j++)
642 for (k = 0; (k < 4); k++, j++)
648 resnr = std::strtol(rnr, nullptr, 10);
652 /* X,Y,Z Coordinate */
653 for (k = 0; (k < 8); k++, j++)
658 for (k = 0; (k < 8); k++, j++)
663 for (k = 0; (k < 8); k++, j++)
670 for (k = 0; (k < 6); k++, j++)
677 for (k = 0; (k < 7); k++, j++)
687 for (k = 0; (k < 2); k++, j++)
696 atomn = &(atoms->atom[natom]);
697 if ((natom == 0) || atoms->resinfo[atoms->atom[natom - 1].resind].nr != resnr
698 || atoms->resinfo[atoms->atom[natom - 1].resind].ic != resic
699 || (strcmp(*atoms->resinfo[atoms->atom[natom - 1].resind].name, resnm) != 0))
707 atomn->resind = atoms->atom[natom - 1].resind + 1;
709 atoms->nres = atomn->resind + 1;
710 t_atoms_set_resinfo(atoms, natom, symtab, resnm, resnr, resic, chainnum, chainid);
714 atomn->resind = atoms->atom[natom - 1].resind;
716 atoms->atomname[natom] = put_symtab(symtab, anm);
719 atomn->atomnumber = atomnumber;
720 strncpy(atomn->elem, elem, 4);
722 x[natom][XX] = strtod(xc, nullptr) * 0.1;
723 x[natom][YY] = strtod(yc, nullptr) * 0.1;
724 x[natom][ZZ] = strtod(zc, nullptr) * 0.1;
727 atoms->pdbinfo[natom].type = type;
728 atoms->pdbinfo[natom].atomnr = strtol(anr, nullptr, 10);
729 atoms->pdbinfo[natom].altloc = altloc;
730 strcpy(atoms->pdbinfo[natom].atomnm, anm_copy);
731 atoms->pdbinfo[natom].bfac = strtod(bfac, nullptr);
732 atoms->pdbinfo[natom].occup = strtod(occup, nullptr);
739 gmx_bool is_hydrogen(const char* nm)
743 std::strcpy(buf, nm);
746 return ((buf[0] == 'H') || ((std::isdigit(buf[0]) != 0) && (buf[1] == 'H')));
749 gmx_bool is_dummymass(const char* nm)
753 std::strcpy(buf, nm);
756 return (buf[0] == 'M') && (std::isdigit(buf[strlen(buf) - 1]) != 0);
759 static void gmx_conect_addline(gmx_conect_t* con, char* line)
763 std::string form2 = "%*s";
764 std::string format = form2 + "%d";
765 if (sscanf(line, format.c_str(), &ai) == 1)
770 format = form2 + "%d";
771 n = sscanf(line, format.c_str(), &aj);
774 gmx_conect_add(con, ai - 1, aj - 1); /* to prevent duplicated records */
780 void gmx_conect_dump(FILE* fp, gmx_conect conect)
782 gmx_conect_t* gc = static_cast<gmx_conect_t*>(conect);
785 for (i = 0; (i < gc->nconect); i++)
787 fprintf(fp, "%6s%5d%5d\n", "CONECT", gc->conect[i].ai + 1, gc->conect[i].aj + 1);
791 gmx_conect gmx_conect_init()
800 void gmx_conect_done(gmx_conect conect)
802 gmx_conect_t* gc = conect;
807 gmx_bool gmx_conect_exist(gmx_conect conect, int ai, int aj)
809 gmx_conect_t* gc = conect;
815 for (i = 0; (i < gc->nconect); i++)
817 if (((gc->conect[i].ai == ai) && (gc->conect[i].aj == aj))
818 || ((gc->conect[i].aj == ai) && (gc->conect[i].ai == aj)))
826 void gmx_conect_add(gmx_conect conect, int ai, int aj)
828 gmx_conect_t* gc = static_cast<gmx_conect_t*>(conect);
833 if (!gmx_conect_exist(conect, ai, aj))
835 srenew(gc->conect, ++gc->nconect);
836 gc->conect[gc->nconect - 1].ai = ai;
837 gc->conect[gc->nconect - 1].aj = aj;
841 int read_pdbfile(FILE* in,
851 gmx_conect_t* gc = conect;
853 gmx_bool bConnWarn = FALSE;
854 char line[STRLEN + 1];
857 gmx_bool bStop = FALSE;
861 /* Only assume pbc when there is a CRYST1 entry */
862 *pbcType = PbcType::No;
869 atoms->haveMass = FALSE;
870 atoms->haveCharge = FALSE;
871 atoms->haveType = FALSE;
872 atoms->haveBState = FALSE;
873 atoms->havePdbInfo = (atoms->pdbinfo != nullptr);
879 static const gmx::StringToEnumValueConverter<PdbRecordType, enumValueToString, gmx::StringCompareType::Exact, gmx::StripStrings::Yes>
880 sc_pdbRecordTypeIdentifier;
881 while (!bStop && (fgets2(line, STRLEN, in) != nullptr))
883 // Convert line to a null-terminated string, then take a substring of at most 6 chars
884 std::string lineAsString(line);
885 std::optional<PdbRecordType> line_type =
886 sc_pdbRecordTypeIdentifier.valueFrom(lineAsString.substr(0, 6));
889 // Skip this line because it does not start with a
890 // recognized leading string describing a PDB record type.
894 switch (line_type.value())
896 case PdbRecordType::Atom:
897 case PdbRecordType::Hetatm:
898 natom = read_atom(symtab, line, line_type.value(), natom, atoms, x, chainnum);
901 case PdbRecordType::Anisou:
902 if (atoms->havePdbInfo)
904 read_anisou(line, natom, atoms);
908 case PdbRecordType::Cryst1: read_cryst1(line, pbcType, box); break;
910 case PdbRecordType::Title:
911 case PdbRecordType::Header:
912 if (std::strlen(line) > 6)
915 /* skip HEADER or TITLE and spaces */
924 /* truncate after title */
925 d = std::strstr(c, " ");
930 if (std::strlen(c) > 0)
932 std::strcpy(title, c);
937 case PdbRecordType::Compound:
938 if ((!std::strstr(line, ": ")) || (std::strstr(line + 6, "MOLECULE:")))
940 if (!(c = std::strstr(line + 6, "MOLECULE:")))
944 /* skip 'MOLECULE:' and spaces */
953 /* truncate after title */
957 while ((d[-1] == ';') && d > c)
967 std::strcat(title, "; ");
968 std::strcat(title, c);
972 std::strcpy(title, c);
979 case PdbRecordType::Ter: chainnum++; break;
981 case PdbRecordType::Model:
984 sscanf(line, "%*s%d", model_nr);
988 case PdbRecordType::EndModel: bStop = TRUE; break;
989 case PdbRecordType::Conect:
992 gmx_conect_addline(gc, line);
996 fprintf(stderr, "WARNING: all CONECT records are ignored\n");
1008 void get_pdb_coordnum(FILE* in, int* natoms)
1013 while (fgets2(line, STRLEN, in))
1015 if (std::strncmp(line, "ENDMDL", 6) == 0)
1019 if ((std::strncmp(line, "ATOM ", 6) == 0) || (std::strncmp(line, "HETATM", 6) == 0))
1026 void gmx_pdb_read_conf(const char* infile, t_symtab* symtab, char** name, t_atoms* atoms, rvec x[], PbcType* pbcType, matrix box)
1028 FILE* in = gmx_fio_fopen(infile, "r");
1030 read_pdbfile(in, title, nullptr, atoms, symtab, x, pbcType, box, nullptr);
1031 if (name != nullptr)
1033 *name = gmx_strdup(title);
1038 gmx_conect gmx_conect_generate(const t_topology* top)
1043 /* Fill the conect records */
1044 gc = gmx_conect_init();
1046 for (f = 0; (f < F_NRE); f++)
1050 for (i = 0; (i < top->idef.il[f].nr); i += interaction_function[f].nratoms + 1)
1052 gmx_conect_add(gc, top->idef.il[f].iatoms[i + 1], top->idef.il[f].iatoms[i + 2]);
1059 int gmx_fprintf_pdb_atomline(FILE* fp,
1060 PdbRecordType record,
1061 int atom_seq_number,
1062 const char* atom_name,
1063 char alternate_location,
1064 const char* res_name,
1067 char res_insertion_code,
1073 const char* element)
1075 char tmp_atomname[6], tmp_resname[6];
1076 gmx_bool start_name_in_col13;
1079 if (record != PdbRecordType::Atom && record != PdbRecordType::Hetatm)
1081 gmx_fatal(FARGS, "Can only print PDB atom lines as ATOM or HETATM records");
1084 /* Format atom name */
1085 if (atom_name != nullptr)
1087 /* If the atom name is an element name with two chars, it should start already in column 13.
1088 * Otherwise it should start in column 14, unless the name length is 4 chars.
1090 if ((element != nullptr) && (std::strlen(element) >= 2)
1091 && (gmx_strncasecmp(atom_name, element, 2) == 0))
1093 start_name_in_col13 = TRUE;
1097 start_name_in_col13 = (std::strlen(atom_name) >= 4);
1099 snprintf(tmp_atomname, sizeof(tmp_atomname), start_name_in_col13 ? "" : " ");
1100 std::strncat(tmp_atomname, atom_name, 4);
1101 tmp_atomname[5] = '\0';
1105 tmp_atomname[0] = '\0';
1108 /* Format residue name */
1109 std::strncpy(tmp_resname, (res_name != nullptr) ? res_name : "", 4);
1110 /* Make sure the string is terminated if strlen was > 4 */
1111 tmp_resname[4] = '\0';
1112 /* String is properly terminated, so now we can use strcat. By adding a
1113 * space we can write it right-justified, and if the original name was
1114 * three characters or less there will be a space added on the right side.
1116 std::strcat(tmp_resname, " ");
1118 /* Truncate integers so they fit */
1119 atom_seq_number = atom_seq_number % 100000;
1120 res_seq_number = res_seq_number % 10000;
1123 "%-6s%5d %-4.4s%c%4.4s%c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f %2s\n",
1124 enumValueToString(record),
1137 (element != nullptr) ? element : "");