2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/fileio/gmxfio.h"
46 #include "gromacs/legacyheaders/copyrite.h"
47 #include "gromacs/legacyheaders/typedefs.h"
48 #include "gromacs/legacyheaders/types/ifunc.h"
49 #include "gromacs/math/units.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/pbcutil/pbc.h"
52 #include "gromacs/topology/atomprop.h"
53 #include "gromacs/topology/residuetypes.h"
54 #include "gromacs/topology/symtab.h"
55 #include "gromacs/topology/topology.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/smalloc.h"
65 typedef struct gmx_conect_t {
68 gmx_conection_t *conect;
71 static const char *pdbtp[epdbNR] = {
72 "ATOM ", "HETATM", "ANISOU", "CRYST1",
73 "COMPND", "MODEL", "ENDMDL", "TER", "HEADER", "TITLE", "REMARK",
78 #define REMARK_SIM_BOX "REMARK THIS IS A SIMULATION BOX"
80 static void xlate_atomname_pdb2gmx(char *name)
85 length = strlen(name);
86 if (length > 3 && isdigit(name[0]))
89 for (i = 1; i < length; i++)
93 name[length-1] = temp;
97 static void xlate_atomname_gmx2pdb(char *name)
102 length = strlen(name);
103 if (length > 3 && isdigit(name[length-1]))
105 temp = name[length-1];
106 for (i = length-1; i > 0; --i)
115 void gmx_write_pdb_box(FILE *out, int ePBC, matrix box)
117 real alpha, beta, gamma;
121 ePBC = guess_ePBC(box);
124 if (ePBC == epbcNONE)
129 if (norm2(box[YY])*norm2(box[ZZ]) != 0)
131 alpha = RAD2DEG*acos(cos_angle_no_table(box[YY], box[ZZ]));
137 if (norm2(box[XX])*norm2(box[ZZ]) != 0)
139 beta = RAD2DEG*acos(cos_angle_no_table(box[XX], box[ZZ]));
145 if (norm2(box[XX])*norm2(box[YY]) != 0)
147 gamma = RAD2DEG*acos(cos_angle_no_table(box[XX], box[YY]));
153 fprintf(out, "REMARK THIS IS A SIMULATION BOX\n");
154 if (ePBC != epbcSCREW)
156 fprintf(out, "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s%4d\n",
157 10*norm(box[XX]), 10*norm(box[YY]), 10*norm(box[ZZ]),
158 alpha, beta, gamma, "P 1", 1);
162 /* Double the a-vector length and write the correct space group */
163 fprintf(out, "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s%4d\n",
164 20*norm(box[XX]), 10*norm(box[YY]), 10*norm(box[ZZ]),
165 alpha, beta, gamma, "P 21 1 1", 1);
170 static void read_cryst1(char *line, int *ePBC, matrix box)
173 char sa[12], sb[12], sc[12], sg[SG_SIZE+1], ident;
174 double fa, fb, fc, alpha, beta, gamma, cosa, cosb, cosg, sing;
175 int syma, symb, symc;
178 sscanf(line, "%*s%s%s%s%lf%lf%lf", sa, sb, sc, &alpha, &beta, &gamma);
181 if (strlen(line) >= 55)
183 strncpy(sg, line+55, SG_SIZE);
189 sscanf(sg, "%c %d %d %d", &ident, &syma, &symb, &symc);
190 if (ident == 'P' && syma == 1 && symb <= 1 && symc <= 1)
192 fc = strtod(sc, NULL)*0.1;
193 ePBC_file = (fc > 0 ? epbcXYZ : epbcXY);
195 if (ident == 'P' && syma == 21 && symb == 1 && symc == 1)
197 ePBC_file = epbcSCREW;
207 fa = strtod(sa, NULL)*0.1;
208 fb = strtod(sb, NULL)*0.1;
209 fc = strtod(sc, NULL)*0.1;
210 if (ePBC_file == epbcSCREW)
216 if ((alpha != 90.0) || (beta != 90.0) || (gamma != 90.0))
220 cosa = cos(alpha*DEG2RAD);
228 cosb = cos(beta*DEG2RAD);
236 cosg = cos(gamma*DEG2RAD);
237 sing = sin(gamma*DEG2RAD);
244 box[YY][XX] = fb*cosg;
245 box[YY][YY] = fb*sing;
246 box[ZZ][XX] = fc*cosb;
247 box[ZZ][YY] = fc*(cosa - cosb*cosg)/sing;
248 box[ZZ][ZZ] = sqrt(fc*fc
249 - box[ZZ][XX]*box[ZZ][XX] - box[ZZ][YY]*box[ZZ][YY]);
259 void write_pdbfile_indexed(FILE *out, const char *title,
260 t_atoms *atoms, rvec x[],
261 int ePBC, matrix box, char chainid,
262 int model_nr, atom_id nindex, const atom_id index[],
263 gmx_conect conect, gmx_bool bTerSepChains)
265 gmx_conect_t *gc = (gmx_conect_t *)conect;
266 char resnm[6], nm[6], pukestring[100];
269 enum PDB_record type;
270 unsigned char resic, ch;
275 int chainnum, lastchainnum;
276 int lastresind, lastchainresind;
277 gmx_residuetype_t*rt;
278 const char *p_restype;
279 const char *p_lastrestype;
281 gmx_residuetype_init(&rt);
283 bromacs(pukestring, 99);
284 fprintf(out, "TITLE %s\n", (title && title[0]) ? title : pukestring);
285 if (box && ( norm2(box[XX]) || norm2(box[YY]) || norm2(box[ZZ]) ) )
287 gmx_write_pdb_box(out, ePBC, box);
291 /* Check whether any occupancies are set, in that case leave it as is,
292 * otherwise set them all to one
295 for (ii = 0; (ii < nindex) && bOccup; ii++)
298 bOccup = bOccup && (atoms->pdbinfo[i].occup == 0.0);
306 fprintf(out, "MODEL %8d\n", model_nr > 0 ? model_nr : 1);
308 lastchainresind = -1;
313 for (ii = 0; ii < nindex; ii++)
317 resind = atoms->atom[i].resind;
318 chainnum = atoms->resinfo[resind].chainnum;
319 p_lastrestype = p_restype;
320 gmx_residuetype_get_type(rt, *atoms->resinfo[resind].name, &p_restype);
322 /* Add a TER record if we changed chain, and if either the previous or this chain is protein/DNA/RNA. */
323 if (bTerSepChains && ii > 0 && chainnum != lastchainnum)
325 /* Only add TER if the previous chain contained protein/DNA/RNA. */
326 if (gmx_residuetype_is_protein(rt, p_lastrestype) || gmx_residuetype_is_dna(rt, p_lastrestype) || gmx_residuetype_is_rna(rt, p_lastrestype))
328 fprintf(out, "TER\n");
330 lastchainnum = chainnum;
331 lastchainresind = lastresind;
334 strncpy(resnm, *atoms->resinfo[resind].name, sizeof(resnm)-1);
335 resnm[sizeof(resnm)-1] = 0;
336 strncpy(nm, *atoms->atomname[i], sizeof(nm)-1);
337 nm[sizeof(nm)-1] = 0;
339 /* rename HG12 to 2HG1, etc. */
340 xlate_atomname_gmx2pdb(nm);
341 resnr = atoms->resinfo[resind].nr;
342 resic = atoms->resinfo[resind].ic;
349 ch = atoms->resinfo[resind].chainid;
358 resnr = resnr % 10000;
362 type = (enum PDB_record)(atoms->pdbinfo[i].type);
363 altloc = atoms->pdbinfo[i].altloc;
364 if (!isalnum(altloc))
368 occup = bOccup ? 1.0 : atoms->pdbinfo[i].occup;
369 bfac = atoms->pdbinfo[i].bfac;
379 gmx_fprintf_pdb_atomline(out,
388 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ],
391 atoms->atom[i].elem);
393 if (atoms->pdbinfo && atoms->pdbinfo[i].bAnisotropic)
395 fprintf(out, "ANISOU%5u %-4.4s%4.4s%c%4d%c %7d%7d%7d%7d%7d%7d\n",
396 (i+1)%100000, nm, resnm, ch, resnr,
397 (resic == '\0') ? ' ' : resic,
398 atoms->pdbinfo[i].uij[0], atoms->pdbinfo[i].uij[1],
399 atoms->pdbinfo[i].uij[2], atoms->pdbinfo[i].uij[3],
400 atoms->pdbinfo[i].uij[4], atoms->pdbinfo[i].uij[5]);
404 fprintf(out, "TER\n");
405 fprintf(out, "ENDMDL\n");
409 /* Write conect records */
410 for (i = 0; (i < gc->nconect); i++)
412 fprintf(out, "CONECT%5d%5d\n", gc->conect[i].ai+1, gc->conect[i].aj+1);
416 gmx_residuetype_destroy(rt);
419 void write_pdbfile(FILE *out, const char *title, t_atoms *atoms, rvec x[],
420 int ePBC, matrix box, char chainid, int model_nr, gmx_conect conect, gmx_bool bTerSepChains)
424 snew(index, atoms->nr);
425 for (i = 0; i < atoms->nr; i++)
429 write_pdbfile_indexed(out, title, atoms, x, ePBC, box, chainid, model_nr,
430 atoms->nr, index, conect, bTerSepChains);
434 int line2type(char *line)
439 for (k = 0; (k < 6); k++)
445 for (k = 0; (k < epdbNR); k++)
447 if (strncmp(type, pdbtp[k], strlen(pdbtp[k])) == 0)
456 static void read_anisou(char line[], int natom, t_atoms *atoms)
460 char anr[12], anm[12];
464 for (k = 0; (k < 5); k++, j++)
470 for (k = 0; (k < 4); k++, j++)
477 /* Strip off spaces */
480 /* Search backwards for number and name only */
481 atomnr = strtol(anr, NULL, 10);
482 for (i = natom-1; (i >= 0); i--)
484 if ((strcmp(anm, *(atoms->atomname[i])) == 0) &&
485 (atomnr == atoms->pdbinfo[i].atomnr))
492 fprintf(stderr, "Skipping ANISOU record (atom %s %d not found)\n",
497 if (sscanf(line+29, "%d%d%d%d%d%d",
498 &atoms->pdbinfo[i].uij[U11], &atoms->pdbinfo[i].uij[U22],
499 &atoms->pdbinfo[i].uij[U33], &atoms->pdbinfo[i].uij[U12],
500 &atoms->pdbinfo[i].uij[U13], &atoms->pdbinfo[i].uij[U23])
503 atoms->pdbinfo[i].bAnisotropic = TRUE;
507 fprintf(stderr, "Invalid ANISOU record for atom %d\n", i);
508 atoms->pdbinfo[i].bAnisotropic = FALSE;
513 void get_pdb_atomnumber(t_atoms *atoms, gmx_atomprop_t aps)
515 int i, atomnumber, len;
517 char anm[6], anm_copy[6], *ptr;
523 gmx_incons("Trying to deduce atomnumbers when no pdb information is present");
525 for (i = 0; (i < atoms->nr); i++)
527 strcpy(anm, atoms->pdbinfo[i].atomnm);
528 strcpy(anm_copy, atoms->pdbinfo[i].atomnm);
531 if ((anm[0] != ' ') && ((len <= 2) || ((len > 2) && !isdigit(anm[2]))))
534 if (gmx_atomprop_query(aps, epropElement, "???", anm_copy, &eval))
536 atomnumber = gmx_nint(eval);
541 if (gmx_atomprop_query(aps, epropElement, "???", anm_copy, &eval))
543 atomnumber = gmx_nint(eval);
547 if (atomnumber == NOTSET)
550 while ((k < strlen(anm)) && (isspace(anm[k]) || isdigit(anm[k])))
554 anm_copy[0] = anm[k];
556 if (gmx_atomprop_query(aps, epropElement, "???", anm_copy, &eval))
558 atomnumber = gmx_nint(eval);
561 atoms->atom[i].atomnumber = atomnumber;
562 ptr = gmx_atomprop_element(aps, atomnumber);
563 strncpy(atoms->atom[i].elem, ptr == NULL ? "" : ptr, 4);
566 fprintf(debug, "Atomnumber for atom '%s' is %d\n", anm, atomnumber);
571 static int read_atom(t_symtab *symtab,
572 char line[], int type, int natom,
573 t_atoms *atoms, rvec x[], int chainnum, gmx_bool bChange)
578 char anr[12], anm[12], anm_copy[12], altloc, resnm[12], rnr[12], elem[3];
579 char xc[12], yc[12], zc[12], occup[12], bfac[12];
582 int resnr, atomnumber;
584 if (natom >= atoms->nr)
586 gmx_fatal(FARGS, "\nFound more atoms (%d) in pdb file than expected (%d)",
592 for (k = 0; (k < 5); k++, j++)
599 for (k = 0; (k < 4); k++, j++)
604 strcpy(anm_copy, anm);
610 for (k = 0; (k < 4); k++, j++)
620 for (k = 0; (k < 4); k++, j++)
626 resnr = strtol(rnr, NULL, 10);
630 /* X,Y,Z Coordinate */
631 for (k = 0; (k < 8); k++, j++)
636 for (k = 0; (k < 8); k++, j++)
641 for (k = 0; (k < 8); k++, j++)
648 for (k = 0; (k < 6); k++, j++)
655 for (k = 0; (k < 7); k++, j++)
665 for (k = 0; (k < 2); k++, j++)
674 atomn = &(atoms->atom[natom]);
676 atoms->resinfo[atoms->atom[natom-1].resind].nr != resnr ||
677 atoms->resinfo[atoms->atom[natom-1].resind].ic != resic ||
678 (strcmp(*atoms->resinfo[atoms->atom[natom-1].resind].name, resnm) != 0))
686 atomn->resind = atoms->atom[natom-1].resind + 1;
688 atoms->nres = atomn->resind + 1;
689 t_atoms_set_resinfo(atoms, natom, symtab, resnm, resnr, resic, chainnum, chainid);
693 atomn->resind = atoms->atom[natom-1].resind;
697 xlate_atomname_pdb2gmx(anm);
699 atoms->atomname[natom] = put_symtab(symtab, anm);
702 atomn->atomnumber = atomnumber;
703 strncpy(atomn->elem, elem, 4);
705 x[natom][XX] = strtod(xc, NULL)*0.1;
706 x[natom][YY] = strtod(yc, NULL)*0.1;
707 x[natom][ZZ] = strtod(zc, NULL)*0.1;
710 atoms->pdbinfo[natom].type = type;
711 atoms->pdbinfo[natom].atomnr = strtol(anr, NULL, 10);
712 atoms->pdbinfo[natom].altloc = altloc;
713 strcpy(atoms->pdbinfo[natom].atomnm, anm_copy);
714 atoms->pdbinfo[natom].bfac = strtod(bfac, NULL);
715 atoms->pdbinfo[natom].occup = strtod(occup, NULL);
722 gmx_bool is_hydrogen(const char *nm)
733 else if ((isdigit(buf[0])) && (buf[1] == 'H'))
740 gmx_bool is_dummymass(const char *nm)
747 if ((buf[0] == 'M') && isdigit(buf[strlen(buf)-1]))
755 static void gmx_conect_addline(gmx_conect_t *con, char *line)
758 char format[32], form2[32];
760 sprintf(form2, "%%*s");
761 sprintf(format, "%s%%d", form2);
762 if (sscanf(line, format, &ai) == 1)
766 strcat(form2, "%*s");
767 sprintf(format, "%s%%d", form2);
768 n = sscanf(line, format, &aj);
771 srenew(con->conect, ++con->nconect);
772 con->conect[con->nconect-1].ai = ai-1;
773 con->conect[con->nconect-1].aj = aj-1;
780 void gmx_conect_dump(FILE *fp, gmx_conect conect)
782 gmx_conect_t *gc = (gmx_conect_t *)conect;
785 for (i = 0; (i < gc->nconect); i++)
787 fprintf(fp, "%6s%5d%5d\n", "CONECT",
788 gc->conect[i].ai+1, gc->conect[i].aj+1);
792 gmx_conect gmx_conect_init()
798 return (gmx_conect) gc;
801 void gmx_conect_done(gmx_conect conect)
803 gmx_conect_t *gc = (gmx_conect_t *)conect;
808 gmx_bool gmx_conect_exist(gmx_conect conect, int ai, int aj)
810 gmx_conect_t *gc = (gmx_conect_t *)conect;
816 for (i = 0; (i < gc->nconect); i++)
818 if (((gc->conect[i].ai == ai) &&
819 (gc->conect[i].aj == aj)) ||
820 ((gc->conect[i].aj == ai) &&
821 (gc->conect[i].ai == aj)))
829 void gmx_conect_add(gmx_conect conect, int ai, int aj)
831 gmx_conect_t *gc = (gmx_conect_t *)conect;
837 if (!gmx_conect_exist(conect, ai, aj))
839 srenew(gc->conect, ++gc->nconect);
840 gc->conect[gc->nconect-1].ai = ai;
841 gc->conect[gc->nconect-1].aj = aj;
845 int read_pdbfile(FILE *in, char *title, int *model_nr,
846 t_atoms *atoms, rvec x[], int *ePBC, matrix box, gmx_bool bChange,
849 gmx_conect_t *gc = (gmx_conect_t *)conect;
852 gmx_bool bConnWarn = FALSE;
856 int natom, chainnum, nres_ter_prev = 0;
858 gmx_bool bStop = FALSE;
862 /* Only assume pbc when there is a CRYST1 entry */
870 open_symtab(&symtab);
876 while (!bStop && (fgets2(line, STRLEN, in) != NULL))
878 line_type = line2type(line);
884 natom = read_atom(&symtab, line, line_type, natom, atoms, x, chainnum, bChange);
890 read_anisou(line, natom, atoms);
895 read_cryst1(line, ePBC, box);
900 if (strlen(line) > 6)
903 /* skip HEADER or TITLE and spaces */
912 /* truncate after title */
926 if ((!strstr(line, ": ")) || (strstr(line+6, "MOLECULE:")))
928 if (!(c = strstr(line+6, "MOLECULE:")) )
932 /* skip 'MOLECULE:' and spaces */
941 /* truncate after title */
945 while ( (d[-1] == ';') && d > c)
974 sscanf(line, "%*s%d", model_nr);
984 gmx_conect_addline(gc, line);
988 fprintf(stderr, "WARNING: all CONECT records are ignored\n");
998 free_symtab(&symtab);
1002 void get_pdb_coordnum(FILE *in, int *natoms)
1007 while (fgets2(line, STRLEN, in))
1009 if (strncmp(line, "ENDMDL", 6) == 0)
1013 if ((strncmp(line, "ATOM ", 6) == 0) || (strncmp(line, "HETATM", 6) == 0))
1020 void read_pdb_conf(const char *infile, char *title,
1021 t_atoms *atoms, rvec x[], int *ePBC, matrix box, gmx_bool bChange,
1026 in = gmx_fio_fopen(infile, "r");
1027 read_pdbfile(in, title, NULL, atoms, x, ePBC, box, bChange, conect);
1031 gmx_conect gmx_conect_generate(t_topology *top)
1036 /* Fill the conect records */
1037 gc = gmx_conect_init();
1039 for (f = 0; (f < F_NRE); f++)
1043 for (i = 0; (i < top->idef.il[f].nr); i += interaction_function[f].nratoms+1)
1045 gmx_conect_add(gc, top->idef.il[f].iatoms[i+1],
1046 top->idef.il[f].iatoms[i+2]);
1054 gmx_fprintf_pdb_atomline(FILE * fp,
1055 enum PDB_record record,
1056 int atom_seq_number,
1057 const char * atom_name,
1058 char alternate_location,
1059 const char * res_name,
1062 char res_insertion_code,
1068 const char * element)
1070 char tmp_atomname[6], tmp_resname[6];
1071 gmx_bool start_name_in_col13;
1074 if (record != epdbATOM && record != epdbHETATM)
1076 gmx_fatal(FARGS, "Can only print PDB atom lines as ATOM or HETATM records");
1079 /* Format atom name */
1080 if (atom_name != NULL)
1082 /* If the atom name is an element name with two chars, it should start already in column 13.
1083 * Otherwise it should start in column 14, unless the name length is 4 chars.
1085 if ( (element != NULL) && (strlen(element) >= 2) && (gmx_strncasecmp(atom_name, element, 2) == 0) )
1087 start_name_in_col13 = TRUE;
1091 start_name_in_col13 = (strlen(atom_name) >= 4);
1093 sprintf(tmp_atomname, start_name_in_col13 ? "" : " ");
1094 strncat(tmp_atomname, atom_name, 4);
1095 tmp_atomname[5] = '\0';
1099 tmp_atomname[0] = '\0';
1102 /* Format residue name */
1103 strncpy(tmp_resname, (res_name != NULL) ? res_name : "", 4);
1104 /* Make sure the string is terminated if strlen was > 4 */
1105 tmp_resname[4] = '\0';
1106 /* String is properly terminated, so now we can use strcat. By adding a
1107 * space we can write it right-justified, and if the original name was
1108 * three characters or less there will be a space added on the right side.
1110 strcat(tmp_resname, " ");
1112 /* Truncate integers so they fit */
1113 atom_seq_number = atom_seq_number % 100000;
1114 res_seq_number = res_seq_number % 10000;
1117 "%-6s%5d %-4.4s%c%4.4s%c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f %2s\n",
1129 (element != NULL) ? element : "");