Add DSSP v4 support to do_dssp
[alexxy/gromacs.git] / src / gromacs / fileio / pdbio.cpp
index d2f373c63d78b88e0e0497ac394ee0d82a04232b..c3c3995039774070b923690aa6330de820617a77 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -39,6 +39,7 @@
 
 #include "gromacs/fileio/pdbio.h"
 
+#include <algorithm>
 #include <cctype>
 #include <cmath>
 #include <cstdlib>
 
 #include "gromacs/fileio/gmxfio.h"
 #include "gromacs/math/units.h"
+#include "gromacs/math/utilities.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/topology/atomprop.h"
 #include "gromacs/topology/ifunc.h"
-#include "gromacs/topology/residuetypes.h"
 #include "gromacs/topology/symtab.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/coolstuff.h"
 #include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/enumerationhelpers.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/futil.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/snprintf.h"
+#include "gromacs/utility/stringtoenumvalueconverter.h"
 
 typedef struct
 {
@@ -74,10 +77,14 @@ typedef struct gmx_conect_t
     gmx_conection_t* conect;
 } gmx_conect_t;
 
-static const char* pdbtp[epdbNR] = { "ATOM  ", "HETATM", "ANISOU", "CRYST1", "COMPND", "MODEL",
-                                     "ENDMDL", "TER",    "HEADER", "TITLE",  "REMARK", "CONECT" };
-
-#define REMARK_SIM_BOX "REMARK    THIS IS A SIMULATION BOX"
+const char* enumValueToString(PdbRecordType enumValue)
+{
+    static constexpr gmx::EnumerationArray<PdbRecordType, const char*> pdbRecordTypeName = {
+        "ATOM  ", "HETATM", "ANISOU", "CRYST1", "COMPND", "MODEL",
+        "ENDMDL", "TER",    "HEADER", "TITLE",  "REMARK", "CONECT"
+    };
+    return pdbRecordTypeName[enumValue];
+}
 
 void gmx_write_pdb_box(FILE* out, PbcType pbcType, const matrix box)
 {
@@ -95,7 +102,7 @@ void gmx_write_pdb_box(FILE* out, PbcType pbcType, const matrix box)
 
     if (norm2(box[YY]) * norm2(box[ZZ]) != 0)
     {
-        alpha = RAD2DEG * gmx_angle(box[YY], box[ZZ]);
+        alpha = gmx::c_rad2Deg * gmx_angle(box[YY], box[ZZ]);
     }
     else
     {
@@ -103,7 +110,7 @@ void gmx_write_pdb_box(FILE* out, PbcType pbcType, const matrix box)
     }
     if (norm2(box[XX]) * norm2(box[ZZ]) != 0)
     {
-        beta = RAD2DEG * gmx_angle(box[XX], box[ZZ]);
+        beta = gmx::c_rad2Deg * gmx_angle(box[XX], box[ZZ]);
     }
     else
     {
@@ -111,7 +118,7 @@ void gmx_write_pdb_box(FILE* out, PbcType pbcType, const matrix box)
     }
     if (norm2(box[XX]) * norm2(box[YY]) != 0)
     {
-        gamma = RAD2DEG * gmx_angle(box[XX], box[YY]);
+        gamma = gmx::c_rad2Deg * gmx_angle(box[XX], box[YY]);
     }
     else
     {
@@ -197,7 +204,7 @@ static void read_cryst1(char* line, PbcType* pbcType, matrix box)
         {
             if (alpha != 90.0)
             {
-                cosa = std::cos(alpha * DEG2RAD);
+                cosa = std::cos(alpha * gmx::c_deg2Rad);
             }
             else
             {
@@ -205,7 +212,7 @@ static void read_cryst1(char* line, PbcType* pbcType, matrix box)
             }
             if (beta != 90.0)
             {
-                cosb = std::cos(beta * DEG2RAD);
+                cosb = std::cos(beta * gmx::c_deg2Rad);
             }
             else
             {
@@ -213,8 +220,8 @@ static void read_cryst1(char* line, PbcType* pbcType, matrix box)
             }
             if (gamma != 90.0)
             {
-                cosg = std::cos(gamma * DEG2RAD);
-                sing = std::sin(gamma * DEG2RAD);
+                cosg = std::cos(gamma * gmx::c_deg2Rad);
+                sing = std::sin(gamma * gmx::c_deg2Rad);
             }
             else
             {
@@ -235,20 +242,20 @@ static void read_cryst1(char* line, PbcType* pbcType, matrix box)
     }
 }
 
-static int gmx_fprintf_pqr_atomline(FILE*           fp,
-                                    enum PDB_record record,
-                                    int             atom_seq_number,
-                                    const char*     atom_name,
-                                    const char*     res_name,
-                                    char            chain_id,
-                                    int             res_seq_number,
-                                    real            x,
-                                    real            y,
-                                    real            z,
-                                    real            occupancy,
-                                    real            b_factor)
+static int gmx_fprintf_pqr_atomline(FILE*         fp,
+                                    PdbRecordType record,
+                                    int           atom_seq_number,
+                                    const char*   atom_name,
+                                    const char*   res_name,
+                                    char          chain_id,
+                                    int           res_seq_number,
+                                    real          x,
+                                    real          y,
+                                    real          z,
+                                    real          occupancy,
+                                    real          b_factor)
 {
-    GMX_RELEASE_ASSERT(record == epdbATOM || record == epdbHETATM,
+    GMX_RELEASE_ASSERT(record == PdbRecordType::Atom || record == PdbRecordType::Hetatm,
                        "Can only print PQR atom lines as ATOM or HETATM records");
 
     /* Check atom name */
@@ -263,7 +270,7 @@ static int gmx_fprintf_pqr_atomline(FILE*           fp,
 
     int n = fprintf(fp,
                     "%-6s%5d %-4.4s%4.4s%c%4d %8.3f %8.3f %8.3f %6.2f %6.2f\n",
-                    pdbtp[record],
+                    enumValueToString(record),
                     atom_seq_number,
                     atom_name,
                     res_name,
@@ -289,16 +296,36 @@ void write_pdbfile_indexed(FILE*          out,
                            int            nindex,
                            const int      index[],
                            gmx_conect     conect,
-                           bool           usePqrFormat)
+                           bool           usePqrFormat,
+                           bool           standardCompliantMode)
 {
-    gmx_conect_t*   gc = static_cast<gmx_conect_t*>(conect);
-    enum PDB_record type;
-    char            altloc;
-    real            occup, bfac;
-    gmx_bool        bOccup;
+    gmx_conect_t* gc = static_cast<gmx_conect_t*>(conect);
+    PdbRecordType type;
+    char          altloc;
+    real          occup, bfac;
+    gmx_bool      bOccup;
+
+    if (standardCompliantMode)
+    {
+        fprintf(out, "HEADER    GROMACS SIMULATION BOX                  01-JAN-00   0000\n");
+        fprintf(out,
+                "TITLE     Gromacs simulation box\n"
+                "COMPND    MOL_ID:  1;                                                           \n"
+                "COMPND   2 MOLECULE:  GROMACS SIMULATION BOX;                                   \n"
+                "COMPND   3 CHAIN: A;  \n"
+                "SOURCE    MOL_ID: 1;\n"
+                "SOURCE   2 SYNTHETIC\n"
+                "KEYWDS    GROMACS\n"
+                "EXPDTA    PURE PRODUCT OF COMPUTER SIMULATION\n"
+                "AUTHOR    GROMACS\n"
+                "REVDAT   1   01-JAN-00 0000    0\n");
+    }
+    else
+    {
+        fprintf(out, "TITLE     %s\n", (title && title[0]) ? title : gmx::bromacs().c_str());
+    }
 
 
-    fprintf(out, "TITLE     %s\n", (title && title[0]) ? title : gmx::bromacs().c_str());
     if (box && ((norm2(box[XX]) != 0.0F) || (norm2(box[YY]) != 0.0F) || (norm2(box[ZZ]) != 0.0F)))
     {
         gmx_write_pdb_box(out, pbcType, box);
@@ -322,7 +349,11 @@ void write_pdbfile_indexed(FILE*          out,
 
     fprintf(out, "MODEL %8d\n", model_nr > 0 ? model_nr : 1);
 
-    ResidueType rt;
+    // Collect last printed values for TER record
+    int         lastAtomNumber = 0;
+    std::string lastResName;
+    int         lastResNr = 0;
+
     for (int ii = 0; ii < nindex; ii++)
     {
         int         i      = index[ii];
@@ -359,7 +390,7 @@ void write_pdbfile_indexed(FILE*          out,
         {
             gmx_pdbinfo_init_default(&pdbinfo);
         }
-        type   = static_cast<enum PDB_record>(pdbinfo.type);
+        type   = pdbinfo.type;
         altloc = pdbinfo.altloc;
         if (!isalnum(altloc))
         {
@@ -385,6 +416,10 @@ void write_pdbfile_indexed(FILE*          out,
                                      bfac,
                                      atoms->atom[i].elem);
 
+            lastAtomNumber = i + 1;
+            lastResName    = resnm;
+            lastResNr      = resnr;
+
             if (atoms->pdbinfo && atoms->pdbinfo[i].bAnisotropic)
             {
                 fprintf(out,
@@ -420,7 +455,15 @@ void write_pdbfile_indexed(FILE*          out,
         }
     }
 
-    fprintf(out, "TER\n");
+    if (standardCompliantMode)
+    {
+        fprintf(out, "TER   %5d      %4.4s%c%4d\n", lastAtomNumber, lastResName.c_str(), chainid, lastResNr);
+    }
+    else
+    {
+        fprintf(out, "TER\n");
+    }
+
     fprintf(out, "ENDMDL\n");
 
     if (nullptr != gc)
@@ -455,28 +498,6 @@ void write_pdbfile(FILE*          out,
     sfree(index);
 }
 
-static int line2type(const char* line)
-{
-    int  k;
-    char type[8];
-
-    for (k = 0; (k < 6); k++)
-    {
-        type[k] = line[k];
-    }
-    type[k] = '\0';
-
-    for (k = 0; (k < epdbNR); k++)
-    {
-        if (std::strncmp(type, pdbtp[k], strlen(pdbtp[k])) == 0)
-        {
-            break;
-        }
-    }
-
-    return k;
-}
-
 static void read_anisou(char line[], int natom, t_atoms* atoms)
 {
     int  i, j, k, atomnr;
@@ -587,22 +608,25 @@ void get_pdb_atomnumber(const t_atoms* atoms, AtomProperties* aps)
                 atomNumberSet = true;
             }
         }
-        std::string buf;
+        static constexpr size_t sc_maxElementNameLength = 3;
+        static_assert(sizeof(atoms->atom[i].elem) >= sc_maxElementNameLength + 1);
+        std::string element;
         if (atomNumberSet)
         {
             atoms->atom[i].atomnumber = atomnumber;
-            buf                       = aps->elementFromAtomNumber(atomnumber);
+            element                   = aps->elementFromAtomNumber(atomnumber);
             if (debug)
             {
                 fprintf(debug, "Atomnumber for atom '%s' is %d\n", anm, atomnumber);
             }
         }
-        buf.resize(3);
-        std::strncpy(atoms->atom[i].elem, buf.c_str(), 4);
+        element.resize(sc_maxElementNameLength);
+        std::strcpy(atoms->atom[i].elem, element.c_str());
     }
 }
 
-static int read_atom(t_symtab* symtab, const char line[], int type, int natom, t_atoms* atoms, rvec x[], int chainnum)
+static int
+read_atom(t_symtab* symtab, const char line[], PdbRecordType type, int natom, t_atoms* atoms, rvec x[], int chainnum)
 {
     t_atom*       atomn;
     int           j, k;
@@ -861,7 +885,6 @@ int read_pdbfile(FILE*      in,
     gmx_bool      bCOMPND;
     gmx_bool      bConnWarn = FALSE;
     char          line[STRLEN + 1];
-    int           line_type;
     char *        c, *d;
     int           natom, chainnum;
     gmx_bool      bStop = FALSE;
@@ -886,28 +909,39 @@ int read_pdbfile(FILE*      in,
     title[0] = '\0';
     natom    = 0;
     chainnum = 0;
+    static const gmx::StringToEnumValueConverter<PdbRecordType, enumValueToString, gmx::StringCompareType::Exact, gmx::StripStrings::Yes>
+            sc_pdbRecordTypeIdentifier;
     while (!bStop && (fgets2(line, STRLEN, in) != nullptr))
     {
-        line_type = line2type(line);
+        // Convert line to a null-terminated string, then take a substring of at most 6 chars
+        std::string                  lineAsString(line);
+        std::optional<PdbRecordType> line_type =
+                sc_pdbRecordTypeIdentifier.valueFrom(lineAsString.substr(0, 6));
+        if (!line_type)
+        {
+            // Skip this line because it does not start with a
+            // recognized leading string describing a PDB record type.
+            continue;
+        }
 
-        switch (line_type)
+        switch (line_type.value())
         {
-            case epdbATOM:
-            case epdbHETATM:
-                natom = read_atom(symtab, line, line_type, natom, atoms, x, chainnum);
+            case PdbRecordType::Atom:
+            case PdbRecordType::Hetatm:
+                natom = read_atom(symtab, line, line_type.value(), natom, atoms, x, chainnum);
                 break;
 
-            case epdbANISOU:
+            case PdbRecordType::Anisou:
                 if (atoms->havePdbInfo)
                 {
                     read_anisou(line, natom, atoms);
                 }
                 break;
 
-            case epdbCRYST1: read_cryst1(line, pbcType, box); break;
+            case PdbRecordType::Cryst1: read_cryst1(line, pbcType, box); break;
 
-            case epdbTITLE:
-            case epdbHEADER:
+            case PdbRecordType::Title:
+            case PdbRecordType::Header:
                 if (std::strlen(line) > 6)
                 {
                     c = line + 6;
@@ -933,7 +967,7 @@ int read_pdbfile(FILE*      in,
                 }
                 break;
 
-            case epdbCOMPND:
+            case PdbRecordType::Compound:
                 if ((!std::strstr(line, ": ")) || (std::strstr(line + 6, "MOLECULE:")))
                 {
                     if (!(c = std::strstr(line + 6, "MOLECULE:")))
@@ -975,17 +1009,17 @@ int read_pdbfile(FILE*      in,
                 }
                 break;
 
-            case epdbTER: chainnum++; break;
+            case PdbRecordType::Ter: chainnum++; break;
 
-            case epdbMODEL:
+            case PdbRecordType::Model:
                 if (model_nr)
                 {
                     sscanf(line, "%*s%d", model_nr);
                 }
                 break;
 
-            case epdbENDMDL: bStop = TRUE; break;
-            case epdbCONECT:
+            case PdbRecordType::EndModel: bStop = TRUE; break;
+            case PdbRecordType::Conect:
                 if (gc)
                 {
                     gmx_conect_addline(gc, line);
@@ -1055,27 +1089,27 @@ gmx_conect gmx_conect_generate(const t_topology* top)
     return gc;
 }
 
-int gmx_fprintf_pdb_atomline(FILE*           fp,
-                             enum PDB_record record,
-                             int             atom_seq_number,
-                             const char*     atom_name,
-                             char            alternate_location,
-                             const char*     res_name,
-                             char            chain_id,
-                             int             res_seq_number,
-                             char            res_insertion_code,
-                             real            x,
-                             real            y,
-                             real            z,
-                             real            occupancy,
-                             real            b_factor,
-                             const char*     element)
+int gmx_fprintf_pdb_atomline(FILE*         fp,
+                             PdbRecordType record,
+                             int           atom_seq_number,
+                             const char*   atom_name,
+                             char          alternate_location,
+                             const char*   res_name,
+                             char          chain_id,
+                             int           res_seq_number,
+                             char          res_insertion_code,
+                             real          x,
+                             real          y,
+                             real          z,
+                             real          occupancy,
+                             real          b_factor,
+                             const char*   element)
 {
     char     tmp_atomname[6], tmp_resname[6];
     gmx_bool start_name_in_col13;
     int      n;
 
-    if (record != epdbATOM && record != epdbHETATM)
+    if (record != PdbRecordType::Atom && record != PdbRecordType::Hetatm)
     {
         gmx_fatal(FARGS, "Can only print PDB atom lines as ATOM or HETATM records");
     }
@@ -1120,7 +1154,7 @@ int gmx_fprintf_pdb_atomline(FILE*           fp,
 
     n = fprintf(fp,
                 "%-6s%5d %-4.4s%c%4.4s%c%4d%c   %8.3f%8.3f%8.3f%6.2f%6.2f          %2s\n",
-                pdbtp[record],
+                enumValueToString(record),
                 atom_seq_number,
                 tmp_atomname,
                 alternate_location,