e31a25c9679b9e7f8a209fe73702552ff03b333f
[alexxy/gromacs.git] / src / gromacs / gmxpreprocess / pdb2top.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "pdb2top.h"
41
42 #include <cctype>
43 #include <cmath>
44 #include <cstdio>
45 #include <cstring>
46
47 #include <algorithm>
48 #include <string>
49 #include <vector>
50
51 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/gmxpreprocess/add_par.h"
53 #include "gromacs/gmxpreprocess/fflibutil.h"
54 #include "gromacs/gmxpreprocess/gen_ad.h"
55 #include "gromacs/gmxpreprocess/gen_vsite.h"
56 #include "gromacs/gmxpreprocess/grompp_impl.h"
57 #include "gromacs/gmxpreprocess/h_db.h"
58 #include "gromacs/gmxpreprocess/notset.h"
59 #include "gromacs/gmxpreprocess/pgutil.h"
60 #include "gromacs/gmxpreprocess/specbond.h"
61 #include "gromacs/gmxpreprocess/topdirs.h"
62 #include "gromacs/gmxpreprocess/topio.h"
63 #include "gromacs/gmxpreprocess/toputil.h"
64 #include "gromacs/math/functions.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/topology/residuetypes.h"
67 #include "gromacs/topology/symtab.h"
68 #include "gromacs/utility/binaryinformation.h"
69 #include "gromacs/utility/cstringutil.h"
70 #include "gromacs/utility/dir_separator.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/futil.h"
74 #include "gromacs/utility/logger.h"
75 #include "gromacs/utility/niceheader.h"
76 #include "gromacs/utility/path.h"
77 #include "gromacs/utility/programcontext.h"
78 #include "gromacs/utility/smalloc.h"
79 #include "gromacs/utility/strconvert.h"
80 #include "gromacs/utility/strdb.h"
81 #include "gromacs/utility/stringutil.h"
82 #include "gromacs/utility/textwriter.h"
83
84 #include "hackblock.h"
85 #include "resall.h"
86
87 /* this must correspond to enum in pdb2top.h */
88 const char* hh[ehisNR] = { "HISD", "HISE", "HISH", "HIS1" };
89
90 static int missing_atoms(const PreprocessResidue* rp, int resind, t_atoms* at, int i0, int i, const gmx::MDLogger& logger)
91 {
92     int nmiss = 0;
93     for (int j = 0; j < rp->natom(); j++)
94     {
95         const char* name   = *(rp->atomname[j]);
96         bool        bFound = false;
97         for (int k = i0; k < i; k++)
98         {
99             bFound = (bFound || (gmx_strcasecmp(*(at->atomname[k]), name) == 0));
100         }
101         if (!bFound)
102         {
103             nmiss++;
104             GMX_LOG(logger.warning)
105                     .asParagraph()
106                     .appendTextFormatted("atom %s is missing in residue %s %d in the pdb file",
107                                          name,
108                                          *(at->resinfo[resind].name),
109                                          at->resinfo[resind].nr);
110             if (name[0] == 'H' || name[0] == 'h')
111             {
112                 GMX_LOG(logger.warning)
113                         .asParagraph()
114                         .appendTextFormatted(
115                                 "You might need to add atom %s to the hydrogen database of "
116                                 "building block %s "
117                                 "in the file %s.hdb (see the manual)",
118                                 name,
119                                 *(at->resinfo[resind].rtp),
120                                 rp->filebase.c_str());
121             }
122         }
123     }
124
125     return nmiss;
126 }
127
128 bool is_int(double x)
129 {
130     const double tol = 1e-4;
131     int          ix;
132
133     if (x < 0)
134     {
135         x = -x;
136     }
137     ix = gmx::roundToInt(x);
138
139     return (fabs(x - ix) < tol);
140 }
141
142 static void choose_ff_impl(const char*          ffsel,
143                            char*                forcefield,
144                            int                  ff_maxlen,
145                            char*                ffdir,
146                            int                  ffdir_maxlen,
147                            const gmx::MDLogger& logger)
148 {
149     std::vector<gmx::DataFileInfo> ffdirs = fflib_enumerate_forcefields();
150     const int                      nff    = ssize(ffdirs);
151
152     /* Replace with unix path separators */
153 #if DIR_SEPARATOR != '/'
154     for (int i = 0; i < nff; ++i)
155     {
156         std::replace(ffdirs[i].dir.begin(), ffdirs[i].dir.end(), DIR_SEPARATOR, '/');
157     }
158 #endif
159
160     /* Store the force field names in ffs */
161     std::vector<std::string> ffs;
162     ffs.reserve(ffdirs.size());
163     for (int i = 0; i < nff; ++i)
164     {
165         ffs.push_back(gmx::stripSuffixIfPresent(ffdirs[i].name, fflib_forcefield_dir_ext()));
166     }
167
168     int sel;
169     if (ffsel != nullptr)
170     {
171         sel        = -1;
172         int cwdsel = -1;
173         int nfound = 0;
174         for (int i = 0; i < nff; ++i)
175         {
176             if (ffs[i] == ffsel)
177             {
178                 /* Matching ff name */
179                 sel = i;
180                 nfound++;
181
182                 if (ffdirs[i].dir == ".")
183                 {
184                     cwdsel = i;
185                 }
186             }
187         }
188
189         if (cwdsel != -1)
190         {
191             sel = cwdsel;
192         }
193
194         if (nfound > 1)
195         {
196             if (cwdsel != -1)
197             {
198                 GMX_LOG(logger.warning)
199                         .asParagraph()
200                         .appendTextFormatted(
201                                 "Force field '%s' occurs in %d places. pdb2gmx is using the one in "
202                                 "the current directory. Use interactive selection "
203                                 "(not the -ff option) if you would prefer a different one.",
204                                 ffsel,
205                                 nfound);
206             }
207             else
208             {
209                 std::string message = gmx::formatString(
210                         "Force field '%s' occurs in %d places, but not in "
211                         "the current directory.\n"
212                         "Run without the -ff switch and select the force "
213                         "field interactively.",
214                         ffsel,
215                         nfound);
216                 GMX_THROW(gmx::InconsistentInputError(message));
217             }
218         }
219         else if (nfound == 0)
220         {
221             std::string message = gmx::formatString(
222                     "Could not find force field '%s' in current directory, "
223                     "install tree or GMXLIB path.",
224                     ffsel);
225             GMX_THROW(gmx::InconsistentInputError(message));
226         }
227     }
228     else if (nff > 1)
229     {
230         std::vector<std::string> desc;
231         desc.reserve(ffdirs.size());
232         for (int i = 0; i < nff; ++i)
233         {
234             std::string docFileName(gmx::Path::join(ffdirs[i].dir, ffdirs[i].name, fflib_forcefield_doc()));
235             // TODO: Just try to open the file with a method that does not
236             // throw/bail out with a fatal error instead of multiple checks.
237             if (gmx::File::exists(docFileName, gmx::File::returnFalseOnError))
238             {
239                 // TODO: Use a C++ API without such an intermediate/fixed-length buffer.
240                 char buf[STRLEN];
241                 /* We don't use fflib_open, because we don't want printf's */
242                 FILE* fp = gmx_ffopen(docFileName, "r");
243                 get_a_line(fp, buf, STRLEN);
244                 gmx_ffclose(fp);
245                 desc.emplace_back(buf);
246             }
247             else
248             {
249                 desc.push_back(ffs[i]);
250             }
251         }
252         /* Order force fields from the same dir alphabetically
253          * and put deprecated force fields at the end.
254          */
255         for (int i = 0; i < nff; ++i)
256         {
257             for (int j = i + 1; j < nff; ++j)
258             {
259                 if (ffdirs[i].dir == ffdirs[j].dir
260                     && ((desc[i][0] == '[' && desc[j][0] != '[')
261                         || ((desc[i][0] == '[' || desc[j][0] != '[')
262                             && gmx_strcasecmp(desc[i].c_str(), desc[j].c_str()) > 0)))
263                 {
264                     std::swap(ffdirs[i].name, ffdirs[j].name);
265                     std::swap(ffs[i], ffs[j]);
266                     std::swap(desc[i], desc[j]);
267                 }
268             }
269         }
270
271         GMX_LOG(logger.info).asParagraph().appendTextFormatted("Select the Force Field:");
272         for (int i = 0; i < nff; ++i)
273         {
274             if (i == 0 || ffdirs[i - 1].dir != ffdirs[i].dir)
275             {
276                 if (ffdirs[i].dir == ".")
277                 {
278                     GMX_LOG(logger.info)
279                             .asParagraph()
280                             .appendTextFormatted("From current directory:");
281                 }
282                 else
283                 {
284                     GMX_LOG(logger.info)
285                             .asParagraph()
286                             .appendTextFormatted("From '%s':", ffdirs[i].dir.c_str());
287                 }
288             }
289             GMX_LOG(logger.info).asParagraph().appendTextFormatted("%2d: %s", i + 1, desc[i].c_str());
290         }
291
292         sel = -1;
293         // TODO: Add a C++ API for this.
294         char  buf[STRLEN];
295         char* pret;
296         do
297         {
298             pret = fgets(buf, STRLEN, stdin);
299
300             if (pret != nullptr)
301             {
302                 sel = strtol(buf, nullptr, 10);
303                 sel--;
304             }
305         } while (pret == nullptr || (sel < 0) || (sel >= nff));
306
307         /* Check for a current limitation of the fflib code.
308          * It will always read from the first ff directory in the list.
309          * This check assumes that the order of ffs matches the order
310          * in which fflib_open searches ff library files.
311          */
312         for (int i = 0; i < sel; i++)
313         {
314             if (ffs[i] == ffs[sel])
315             {
316                 std::string message = gmx::formatString(
317                         "Can only select the first of multiple force "
318                         "field entries with directory name '%s%s' in "
319                         "the list. If you want to use the next entry, "
320                         "run pdb2gmx in a different directory, set GMXLIB "
321                         "to point to the desired force field first, and/or "
322                         "rename or move the force field directory present "
323                         "in the current working directory.",
324                         ffs[sel].c_str(),
325                         fflib_forcefield_dir_ext());
326                 GMX_THROW(gmx::NotImplementedError(message));
327             }
328         }
329     }
330     else
331     {
332         sel = 0;
333     }
334
335     if (ffs[sel].length() >= static_cast<size_t>(ff_maxlen))
336     {
337         std::string message = gmx::formatString("Length of force field name (%d) >= maxlen (%d)",
338                                                 static_cast<int>(ffs[sel].length()),
339                                                 ff_maxlen);
340         GMX_THROW(gmx::InvalidInputError(message));
341     }
342     strcpy(forcefield, ffs[sel].c_str());
343
344     std::string ffpath;
345     if (ffdirs[sel].bFromDefaultDir)
346     {
347         ffpath = ffdirs[sel].name;
348     }
349     else
350     {
351         ffpath = gmx::Path::join(ffdirs[sel].dir, ffdirs[sel].name);
352     }
353     if (ffpath.length() >= static_cast<size_t>(ffdir_maxlen))
354     {
355         std::string message = gmx::formatString("Length of force field dir (%d) >= maxlen (%d)",
356                                                 static_cast<int>(ffpath.length()),
357                                                 ffdir_maxlen);
358         GMX_THROW(gmx::InvalidInputError(message));
359     }
360     strcpy(ffdir, ffpath.c_str());
361 }
362
363 void choose_ff(const char* ffsel, char* forcefield, int ff_maxlen, char* ffdir, int ffdir_maxlen, const gmx::MDLogger& logger)
364 {
365     try
366     {
367         choose_ff_impl(ffsel, forcefield, ff_maxlen, ffdir, ffdir_maxlen, logger);
368     }
369     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
370 }
371
372 void choose_watermodel(const char* wmsel, const char* ffdir, char** watermodel, const gmx::MDLogger& logger)
373 {
374     const char* fn_watermodels = "watermodels.dat";
375     FILE*       fp;
376     char        buf[STRLEN];
377     int         nwm, sel, i;
378     char**      model;
379     char*       pret;
380
381     if (strcmp(wmsel, "none") == 0)
382     {
383         *watermodel = nullptr;
384
385         return;
386     }
387     else if (strcmp(wmsel, "select") != 0)
388     {
389         *watermodel = gmx_strdup(wmsel);
390
391         return;
392     }
393
394     std::string filename = gmx::Path::join(ffdir, fn_watermodels);
395     if (!fflib_fexist(filename))
396     {
397         GMX_LOG(logger.warning)
398                 .asParagraph()
399                 .appendTextFormatted("No file '%s' found, will not include a water model", fn_watermodels);
400         *watermodel = nullptr;
401
402         return;
403     }
404
405     fp = fflib_open(filename);
406     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Select the Water Model:");
407     nwm   = 0;
408     model = nullptr;
409     while (get_a_line(fp, buf, STRLEN))
410     {
411         srenew(model, nwm + 1);
412         snew(model[nwm], STRLEN);
413         sscanf(buf, "%s%n", model[nwm], &i);
414         if (i > 0)
415         {
416             ltrim(buf + i);
417             GMX_LOG(logger.info).asParagraph().appendTextFormatted("%2d: %s", nwm + 1, buf + i);
418             nwm++;
419         }
420         else
421         {
422             sfree(model[nwm]);
423         }
424     }
425     gmx_ffclose(fp);
426     GMX_LOG(logger.info).asParagraph().appendTextFormatted("%2d: %s", nwm + 1, "None");
427
428     sel = -1;
429     do
430     {
431         pret = fgets(buf, STRLEN, stdin);
432
433         if (pret != nullptr)
434         {
435             sel = strtol(buf, nullptr, 10);
436             sel--;
437         }
438     } while (pret == nullptr || sel < 0 || sel > nwm);
439
440     if (sel == nwm)
441     {
442         *watermodel = nullptr;
443     }
444     else
445     {
446         *watermodel = gmx_strdup(model[sel]);
447     }
448
449     for (i = 0; i < nwm; i++)
450     {
451         sfree(model[i]);
452     }
453     sfree(model);
454 }
455
456 static int name2type(t_atoms*                               at,
457                      int**                                  cgnr,
458                      gmx::ArrayRef<const PreprocessResidue> usedPpResidues,
459                      ResidueType*                           rt,
460                      const gmx::MDLogger&                   logger)
461 {
462     int    i, j, prevresind, i0, prevcg, cg, curcg;
463     char*  name;
464     bool   bNterm;
465     double qt;
466     int    nmissat;
467
468     nmissat = 0;
469
470     int resind = -1;
471     bNterm     = false;
472     i0         = 0;
473     snew(*cgnr, at->nr);
474     qt    = 0;
475     curcg = 0;
476     cg    = -1;
477
478     for (i = 0; (i < at->nr); i++)
479     {
480         prevresind = resind;
481         if (at->atom[i].resind != resind)
482         {
483             resind     = at->atom[i].resind;
484             bool bProt = rt->namedResidueHasType(*(at->resinfo[resind].name), "Protein");
485             bNterm     = bProt && (resind == 0);
486             if (resind > 0)
487             {
488                 nmissat += missing_atoms(&usedPpResidues[prevresind], prevresind, at, i0, i, logger);
489             }
490             i0 = i;
491         }
492         if (at->atom[i].m == 0)
493         {
494             qt               = 0;
495             prevcg           = cg;
496             name             = *(at->atomname[i]);
497             j                = search_jtype(usedPpResidues[resind], name, bNterm);
498             at->atom[i].type = usedPpResidues[resind].atom[j].type;
499             at->atom[i].q    = usedPpResidues[resind].atom[j].q;
500             at->atom[i].m    = usedPpResidues[resind].atom[j].m;
501             cg               = usedPpResidues[resind].cgnr[j];
502             /* A charge group number -1 signals a separate charge group
503              * for this atom.
504              */
505             if ((cg == -1) || (cg != prevcg) || (resind != prevresind))
506             {
507                 curcg++;
508             }
509         }
510         else
511         {
512             cg = -1;
513             if (is_int(qt))
514             {
515                 qt = 0;
516                 curcg++;
517             }
518             qt += at->atom[i].q;
519         }
520         (*cgnr)[i]        = curcg;
521         at->atom[i].typeB = at->atom[i].type;
522         at->atom[i].qB    = at->atom[i].q;
523         at->atom[i].mB    = at->atom[i].m;
524     }
525     nmissat += missing_atoms(&usedPpResidues[resind], resind, at, i0, i, logger);
526
527     return nmissat;
528 }
529
530 static void print_top_heavy_H(FILE* out, real mHmult)
531 {
532     if (mHmult == 2.0)
533     {
534         fprintf(out, "; Using deuterium instead of hydrogen\n\n");
535     }
536     else if (mHmult == 4.0)
537     {
538         fprintf(out, "#define HEAVY_H\n\n");
539     }
540     else if (mHmult != 1.0)
541     {
542         fprintf(stderr,
543                 "WARNING: unsupported proton mass multiplier (%g) "
544                 "in pdb2top\n",
545                 mHmult);
546     }
547 }
548
549 void print_top_comment(FILE* out, const char* filename, const char* ffdir, bool bITP)
550 {
551     char  ffdir_parent[STRLEN];
552     char* p;
553
554     try
555     {
556         gmx::TextWriter writer(out);
557         gmx::niceHeader(&writer, filename, ';');
558         writer.writeLine(gmx::formatString(";\tThis is a %s topology file", bITP ? "include" : "standalone"));
559         writer.writeLine(";");
560
561         gmx::BinaryInformationSettings settings;
562         settings.generatedByHeader(true);
563         settings.linePrefix(";\t");
564         gmx::printBinaryInformation(&writer, gmx::getProgramContext(), settings);
565     }
566     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
567
568     if (strchr(ffdir, '/') == nullptr)
569     {
570         fprintf(out, ";\tForce field was read from the standard GROMACS share directory.\n;\n\n");
571     }
572     else if (ffdir[0] == '.')
573     {
574         fprintf(out,
575                 ";\tForce field was read from current directory or a relative path - path "
576                 "added.\n;\n\n");
577     }
578     else
579     {
580         strncpy(ffdir_parent, ffdir, STRLEN - 1);
581         ffdir_parent[STRLEN - 1] = '\0'; /*make sure it is 0-terminated even for long string*/
582         p                        = strrchr(ffdir_parent, '/');
583
584         *p = '\0';
585
586         fprintf(out,
587                 ";\tForce field data was read from:\n"
588                 ";\t%s\n"
589                 ";\n"
590                 ";\tNote:\n"
591                 ";\tThis might be a non-standard force field location. When you use this topology, "
592                 "the\n"
593                 ";\tforce field must either be present in the current directory, or the location\n"
594                 ";\tspecified in the GMXLIB path variable or with the 'include' mdp file "
595                 "option.\n;\n\n",
596                 ffdir_parent);
597     }
598 }
599
600 void print_top_header(FILE* out, const char* filename, bool bITP, const char* ffdir, real mHmult)
601 {
602     const char* p;
603
604     print_top_comment(out, filename, ffdir, bITP);
605
606     print_top_heavy_H(out, mHmult);
607     fprintf(out, "; Include forcefield parameters\n");
608
609     p = strrchr(ffdir, '/');
610     p = (ffdir[0] == '.' || p == nullptr) ? ffdir : p + 1;
611
612     fprintf(out, "#include \"%s/%s\"\n\n", p, fflib_forcefield_itp());
613 }
614
615 static void print_top_posre(FILE* out, const char* pr)
616 {
617     fprintf(out, "; Include Position restraint file\n");
618     fprintf(out, "#ifdef POSRES\n");
619     fprintf(out, "#include \"%s\"\n", pr);
620     fprintf(out, "#endif\n\n");
621 }
622
623 static void print_top_water(FILE* out, const char* ffdir, const char* water)
624 {
625     const char* p;
626     char        buf[STRLEN];
627
628     fprintf(out, "; Include water topology\n");
629
630     p = strrchr(ffdir, '/');
631     p = (ffdir[0] == '.' || p == nullptr) ? ffdir : p + 1;
632     fprintf(out, "#include \"%s/%s.itp\"\n", p, water);
633
634     fprintf(out, "\n");
635     fprintf(out, "#ifdef POSRES_WATER\n");
636     fprintf(out, "; Position restraint for each water oxygen\n");
637     fprintf(out, "[ position_restraints ]\n");
638     fprintf(out, ";%3s %5s %9s %10s %10s\n", "i", "funct", "fcx", "fcy", "fcz");
639     fprintf(out, "%4d %4d %10g %10g %10g\n", 1, 1, 1000.0, 1000.0, 1000.0);
640     fprintf(out, "#endif\n");
641     fprintf(out, "\n");
642
643     sprintf(buf, "%s/ions.itp", p);
644
645     if (fflib_fexist(buf))
646     {
647         fprintf(out, "; Include topology for ions\n");
648         fprintf(out, "#include \"%s\"\n", buf);
649         fprintf(out, "\n");
650     }
651 }
652
653 static void print_top_system(FILE* out, const char* title)
654 {
655     fprintf(out, "[ %s ]\n", enumValueToString(Directive::d_system));
656     fprintf(out, "; Name\n");
657     fprintf(out, "%s\n\n", title[0] ? title : "Protein");
658 }
659
660 void print_top_mols(FILE*                            out,
661                     const char*                      title,
662                     const char*                      ffdir,
663                     const char*                      water,
664                     gmx::ArrayRef<const std::string> incls,
665                     gmx::ArrayRef<const t_mols>      mols)
666 {
667
668     if (!incls.empty())
669     {
670         fprintf(out, "; Include chain topologies\n");
671         for (const auto& incl : incls)
672         {
673             fprintf(out, "#include \"%s\"\n", gmx::Path::getFilename(incl).c_str());
674         }
675         fprintf(out, "\n");
676     }
677
678     if (water)
679     {
680         print_top_water(out, ffdir, water);
681     }
682     print_top_system(out, title);
683
684     if (!mols.empty())
685     {
686         fprintf(out, "[ %s ]\n", enumValueToString(Directive::d_molecules));
687         fprintf(out, "; %-15s %5s\n", "Compound", "#mols");
688         for (const auto& mol : mols)
689         {
690             fprintf(out, "%-15s %5d\n", mol.name.c_str(), mol.nr);
691         }
692     }
693 }
694
695 void write_top(FILE*                                   out,
696                const char*                             pr,
697                const char*                             molname,
698                t_atoms*                                at,
699                bool                                    bRTPresname,
700                gmx::ArrayRef<const int>                bts,
701                gmx::ArrayRef<const InteractionsOfType> plist,
702                t_excls                                 excls[],
703                PreprocessingAtomTypes*                 atype,
704                int*                                    cgnr,
705                int                                     nrexcl)
706 /* NOTE: nrexcl is not the size of *excl! */
707 {
708     if (at && atype && cgnr)
709     {
710         fprintf(out, "[ %s ]\n", enumValueToString(Directive::d_moleculetype));
711         fprintf(out, "; %-15s %5s\n", "Name", "nrexcl");
712         fprintf(out, "%-15s %5d\n\n", molname ? molname : "Protein", nrexcl);
713
714         print_atoms(out, atype, at, cgnr, bRTPresname);
715         print_bondeds(
716                 out, at->nr, Directive::d_bonds, F_BONDS, bts[static_cast<int>(BondedTypes::Bonds)], plist);
717         print_bondeds(out, at->nr, Directive::d_constraints, F_CONSTR, 0, plist);
718         print_bondeds(out, at->nr, Directive::d_constraints, F_CONSTRNC, 0, plist);
719         print_bondeds(out, at->nr, Directive::d_pairs, F_LJ14, 0, plist);
720         print_excl(out, at->nr, excls);
721         print_bondeds(
722                 out, at->nr, Directive::d_angles, F_ANGLES, bts[static_cast<int>(BondedTypes::Angles)], plist);
723         print_bondeds(out,
724                       at->nr,
725                       Directive::d_dihedrals,
726                       F_PDIHS,
727                       bts[static_cast<int>(BondedTypes::ProperDihedrals)],
728                       plist);
729         print_bondeds(out,
730                       at->nr,
731                       Directive::d_dihedrals,
732                       F_IDIHS,
733                       bts[static_cast<int>(BondedTypes::ImproperDihedrals)],
734                       plist);
735         print_bondeds(out, at->nr, Directive::d_cmap, F_CMAP, bts[static_cast<int>(BondedTypes::Cmap)], plist);
736         print_bondeds(out, at->nr, Directive::d_polarization, F_POLARIZATION, 0, plist);
737         print_bondeds(out, at->nr, Directive::d_thole_polarization, F_THOLE_POL, 0, plist);
738         print_bondeds(out, at->nr, Directive::d_vsites2, F_VSITE2, 0, plist);
739         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3, 0, plist);
740         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3FD, 0, plist);
741         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3FAD, 0, plist);
742         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3OUT, 0, plist);
743         print_bondeds(out, at->nr, Directive::d_vsites4, F_VSITE4FD, 0, plist);
744         print_bondeds(out, at->nr, Directive::d_vsites4, F_VSITE4FDN, 0, plist);
745
746         if (pr)
747         {
748             print_top_posre(out, pr);
749         }
750     }
751 }
752
753
754 static void do_ssbonds(InteractionsOfType*                ps,
755                        t_atoms*                           atoms,
756                        gmx::ArrayRef<const DisulfideBond> ssbonds,
757                        bool                               bAllowMissing)
758 {
759     for (const auto& bond : ssbonds)
760     {
761         int ri = bond.firstResidue;
762         int rj = bond.secondResidue;
763         int ai = search_res_atom(bond.firstAtom.c_str(), ri, atoms, "special bond", bAllowMissing);
764         int aj = search_res_atom(bond.secondAtom.c_str(), rj, atoms, "special bond", bAllowMissing);
765         if ((ai == -1) || (aj == -1))
766         {
767             gmx_fatal(FARGS,
768                       "Trying to make impossible special bond (%s-%s)!",
769                       bond.firstAtom.c_str(),
770                       bond.secondAtom.c_str());
771         }
772         add_param(ps, ai, aj, {}, nullptr);
773     }
774 }
775
776 static void at2bonds(InteractionsOfType*                  psb,
777                      gmx::ArrayRef<MoleculePatchDatabase> globalPatches,
778                      t_atoms*                             atoms,
779                      gmx::ArrayRef<const gmx::RVec>       x,
780                      real                                 long_bond_dist,
781                      real                                 short_bond_dist,
782                      gmx::ArrayRef<const int>             cyclicBondsIndex,
783                      const gmx::MDLogger&                 logger)
784 {
785     real        long_bond_dist2, short_bond_dist2;
786     const char* ptr;
787
788     long_bond_dist2  = gmx::square(long_bond_dist);
789     short_bond_dist2 = gmx::square(short_bond_dist);
790
791     if (debug)
792     {
793         ptr = "bond";
794     }
795     else
796     {
797         ptr = "check";
798     }
799
800     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Making bonds...");
801     int i = 0;
802     for (int resind = 0; (resind < atoms->nres) && (i < atoms->nr); resind++)
803     {
804         /* add bonds from list of bonded interactions */
805         for (const auto& patch : globalPatches[resind].rb[BondedTypes::Bonds].b)
806         {
807             /* Unfortunately we can not issue errors or warnings
808              * for missing atoms in bonds, as the hydrogens and terminal atoms
809              * have not been added yet.
810              */
811             int ai = search_atom(patch.ai().c_str(), i, atoms, ptr, TRUE, cyclicBondsIndex);
812             int aj = search_atom(patch.aj().c_str(), i, atoms, ptr, TRUE, cyclicBondsIndex);
813             if (ai != -1 && aj != -1)
814             {
815                 real dist2 = distance2(x[ai], x[aj]);
816                 if (dist2 > long_bond_dist2)
817
818                 {
819                     GMX_LOG(logger.warning)
820                             .asParagraph()
821                             .appendTextFormatted(
822                                     "Long Bond (%d-%d = %g nm)", ai + 1, aj + 1, std::sqrt(dist2));
823                 }
824                 else if (dist2 < short_bond_dist2)
825                 {
826                     GMX_LOG(logger.warning)
827                             .asParagraph()
828                             .appendTextFormatted(
829                                     "Short Bond (%d-%d = %g nm)", ai + 1, aj + 1, std::sqrt(dist2));
830                 }
831                 add_param(psb, ai, aj, {}, patch.s.c_str());
832             }
833         }
834         /* add bonds from list of hacks (each added atom gets a bond) */
835         while ((i < atoms->nr) && (atoms->atom[i].resind == resind))
836         {
837             for (const auto& patch : globalPatches[resind].hack)
838             {
839                 if ((patch.tp > 0 || patch.type() == MoleculePatchType::Add)
840                     && patch.a[0] == *(atoms->atomname[i]))
841                 {
842                     switch (patch.tp)
843                     {
844                         case 9:                                        /* COOH terminus */
845                             add_param(psb, i, i + 1, {}, nullptr);     /* C-O  */
846                             add_param(psb, i, i + 2, {}, nullptr);     /* C-OA */
847                             add_param(psb, i + 2, i + 3, {}, nullptr); /* OA-H */
848                             break;
849                         default:
850                             for (int k = 0; (k < patch.nr); k++)
851                             {
852                                 add_param(psb, i, i + k + 1, {}, nullptr);
853                             }
854                     }
855                 }
856             }
857             i++;
858         }
859         /* we're now at the start of the next residue */
860     }
861 }
862
863 static bool pcompar(const InteractionOfType& a, const InteractionOfType& b)
864 {
865     int d;
866
867     if (((d = a.ai() - b.ai()) != 0) || ((d = a.aj() - b.aj()) != 0))
868     {
869         return d < 0;
870     }
871     else
872     {
873         return a.interactionTypeName().length() > b.interactionTypeName().length();
874     }
875 }
876
877 static void clean_bonds(InteractionsOfType* ps, const gmx::MDLogger& logger)
878 {
879     if (ps->size() > 0)
880     {
881         /* Sort bonds */
882         for (auto& bond : ps->interactionTypes)
883         {
884             bond.sortAtomIds();
885         }
886         std::sort(ps->interactionTypes.begin(), ps->interactionTypes.end(), pcompar);
887
888         /* remove doubles, keep the first one always. */
889         int oldNumber = ps->size();
890         for (auto parm = ps->interactionTypes.begin() + 1; parm != ps->interactionTypes.end();)
891         {
892             auto prev = parm - 1;
893             if (parm->ai() == prev->ai() && parm->aj() == prev->aj())
894             {
895                 parm = ps->interactionTypes.erase(parm);
896             }
897             else
898             {
899                 ++parm;
900             }
901         }
902         GMX_LOG(logger.info)
903                 .asParagraph()
904                 .appendTextFormatted("Number of bonds was %d, now %zu", oldNumber, ps->size());
905     }
906     else
907     {
908         GMX_LOG(logger.info).asParagraph().appendTextFormatted("No bonds");
909     }
910 }
911
912 void print_sums(const t_atoms* atoms, bool bSystem, const gmx::MDLogger& logger)
913 {
914     double      m, qtot;
915     int         i;
916     const char* where;
917
918     if (bSystem)
919     {
920         where = " in system";
921     }
922     else
923     {
924         where = "";
925     }
926
927     m    = 0;
928     qtot = 0;
929     for (i = 0; (i < atoms->nr); i++)
930     {
931         m += atoms->atom[i].m;
932         qtot += atoms->atom[i].q;
933     }
934
935     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Total mass%s %.3f a.m.u.", where, m);
936     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Total charge%s %.3f e", where, qtot);
937 }
938
939 static void check_restp_type(const char* name, int t1, int t2)
940 {
941     if (t1 != t2)
942     {
943         gmx_fatal(FARGS, "Residues in one molecule have a different '%s' type: %d and %d", name, t1, t2);
944     }
945 }
946
947 static void check_restp_types(const PreprocessResidue& r0, const PreprocessResidue& r1)
948 {
949     check_restp_type("all dihedrals",
950                      static_cast<int>(r0.bKeepAllGeneratedDihedrals),
951                      static_cast<int>(r1.bKeepAllGeneratedDihedrals));
952     check_restp_type("nrexcl", r0.nrexcl, r1.nrexcl);
953     check_restp_type("HH14",
954                      static_cast<int>(r0.bGenerateHH14Interactions),
955                      static_cast<int>(r1.bGenerateHH14Interactions));
956     check_restp_type("remove dihedrals",
957                      static_cast<int>(r0.bRemoveDihedralIfWithImproper),
958                      static_cast<int>(r1.bRemoveDihedralIfWithImproper));
959
960     for (auto i : gmx::EnumerationWrapper<BondedTypes>{})
961     {
962         check_restp_type(enumValueToString(i), r0.rb[i].type, r1.rb[i].type);
963     }
964 }
965
966 static void add_atom_to_restp(PreprocessResidue*   usedPpResidues,
967                               t_symtab*            symtab,
968                               int                  at_start,
969                               const MoleculePatch* patch)
970 {
971     /* now add them */
972     for (int k = 0; k < patch->nr; k++)
973     {
974         /* set counter in atomname */
975         std::string buf = patch->nname;
976         if (patch->nr > 1)
977         {
978             buf.append(gmx::formatString("%d", k + 1));
979         }
980         usedPpResidues->atomname.insert(usedPpResidues->atomname.begin() + at_start + 1 + k,
981                                         put_symtab(symtab, buf.c_str()));
982         usedPpResidues->atom.insert(usedPpResidues->atom.begin() + at_start + 1 + k, patch->atom.back());
983         if (patch->cgnr != NOTSET)
984         {
985             usedPpResidues->cgnr.insert(usedPpResidues->cgnr.begin() + at_start + 1 + k, patch->cgnr);
986         }
987         else
988         {
989             usedPpResidues->cgnr.insert(usedPpResidues->cgnr.begin() + at_start + 1 + k,
990                                         usedPpResidues->cgnr[at_start]);
991         }
992     }
993 }
994
995 void get_hackblocks_rtp(std::vector<MoleculePatchDatabase>*    globalPatches,
996                         std::vector<PreprocessResidue>*        usedPpResidues,
997                         gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
998                         int                                    nres,
999                         t_resinfo*                             resinfo,
1000                         int                                    nterpairs,
1001                         t_symtab*                              symtab,
1002                         gmx::ArrayRef<MoleculePatchDatabase*>  ntdb,
1003                         gmx::ArrayRef<MoleculePatchDatabase*>  ctdb,
1004                         gmx::ArrayRef<const int>               rn,
1005                         gmx::ArrayRef<const int>               rc,
1006                         bool                                   bAllowMissing,
1007                         const gmx::MDLogger&                   logger)
1008 {
1009     char* key;
1010     bool  bRM;
1011
1012     globalPatches->resize(nres);
1013     usedPpResidues->clear();
1014     /* first the termini */
1015     for (int i = 0; i < nterpairs; i++)
1016     {
1017         if (rn[i] >= 0 && ntdb[i] != nullptr)
1018         {
1019             copyModificationBlocks(*ntdb[i], &globalPatches->at(rn[i]));
1020         }
1021         if (rc[i] >= 0 && ctdb[i] != nullptr)
1022         {
1023             mergeAtomAndBondModifications(*ctdb[i], &globalPatches->at(rc[i]));
1024         }
1025     }
1026
1027     /* then the whole rtp */
1028     for (int i = 0; i < nres; i++)
1029     {
1030         /* Here we allow a mismatch of one character when looking for the rtp entry.
1031          * For such a mismatch there should be only one mismatching name.
1032          * This is mainly useful for small molecules such as ions.
1033          * Note that this will usually not work for protein, DNA and RNA,
1034          * since there the residue names should be listed in residuetypes.dat
1035          * and an error will have been generated earlier in the process.
1036          */
1037         key = *resinfo[i].rtp;
1038
1039         resinfo[i].rtp = put_symtab(symtab, searchResidueDatabase(key, rtpFFDB, logger).c_str());
1040         auto res       = getDatabaseEntry(*resinfo[i].rtp, rtpFFDB);
1041         usedPpResidues->push_back(PreprocessResidue());
1042         PreprocessResidue* newentry = &usedPpResidues->back();
1043         copyPreprocessResidues(*res, newentry, symtab);
1044
1045         /* Check that we do not have different bonded types in one molecule */
1046         check_restp_types(usedPpResidues->front(), *newentry);
1047
1048         int tern = -1;
1049         for (int j = 0; j < nterpairs && tern == -1; j++)
1050         {
1051             if (i == rn[j])
1052             {
1053                 tern = j;
1054             }
1055         }
1056         int terc = -1;
1057         for (int j = 0; j < nterpairs && terc == -1; j++)
1058         {
1059             if (i == rc[j])
1060             {
1061                 terc = j;
1062             }
1063         }
1064         bRM = mergeBondedInteractionList(res->rb, globalPatches->at(i).rb, tern >= 0, terc >= 0);
1065
1066         if (bRM && ((tern >= 0 && ntdb[tern] == nullptr) || (terc >= 0 && ctdb[terc] == nullptr)))
1067         {
1068             const char* errString =
1069                     "There is a dangling bond at at least one of the terminal ends and the force "
1070                     "field does not provide terminal entries or files. Fix your terminal "
1071                     "residues so that they match the residue database (.rtp) entries, or provide "
1072                     "terminal database entries (.tdb).";
1073             if (bAllowMissing)
1074             {
1075                 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("%s", errString);
1076             }
1077             else
1078             {
1079                 gmx_fatal(FARGS, "%s", errString);
1080             }
1081         }
1082         else if (bRM && ((tern >= 0 && ntdb[tern]->nhack() == 0) || (terc >= 0 && ctdb[terc]->nhack() == 0)))
1083         {
1084             const char* errString =
1085                     "There is a dangling bond at at least one of the terminal ends. Fix your "
1086                     "coordinate file, add a new terminal database entry (.tdb), or select the "
1087                     "proper existing terminal entry.";
1088             if (bAllowMissing)
1089             {
1090                 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("%s", errString);
1091             }
1092             else
1093             {
1094                 gmx_fatal(FARGS, "%s", errString);
1095             }
1096         }
1097     }
1098
1099     /* Apply patchs to t_restp entries
1100        i.e. add's and deletes from termini database will be
1101        added to/removed from residue topology
1102        we'll do this on one big dirty loop, so it won't make easy reading! */
1103     for (auto modifiedResidue = globalPatches->begin(); modifiedResidue != globalPatches->end();
1104          modifiedResidue++)
1105     {
1106         const int          pos    = std::distance(globalPatches->begin(), modifiedResidue);
1107         PreprocessResidue* posres = &usedPpResidues->at(pos);
1108         for (auto patch = modifiedResidue->hack.begin(); patch != modifiedResidue->hack.end(); patch++)
1109         {
1110             if (patch->nr != 0)
1111             {
1112                 /* find atom in restp */
1113                 auto found = std::find_if(
1114                         posres->atomname.begin(), posres->atomname.end(), [&patch](char** name) {
1115                             return (patch->oname.empty() && patch->a[0] == *name)
1116                                    || (patch->oname == *name);
1117                         });
1118
1119                 if (found == posres->atomname.end())
1120                 {
1121                     /* If we are doing an atom rename only, we don't need
1122                      * to generate a fatal error if the old name is not found
1123                      * in the rtp.
1124                      */
1125                     /* Deleting can happen also only on the input atoms,
1126                      * not necessarily always on the rtp entry.
1127                      */
1128                     if (patch->type() == MoleculePatchType::Add)
1129                     {
1130                         gmx_fatal(FARGS,
1131                                   "atom %s not found in buiding block %d%s "
1132                                   "while combining tdb and rtp",
1133                                   patch->oname.empty() ? patch->a[0].c_str() : patch->oname.c_str(),
1134                                   pos + 1,
1135                                   *resinfo[pos].rtp);
1136                     }
1137                 }
1138                 else
1139                 {
1140                     int l = std::distance(posres->atomname.begin(), found);
1141                     switch (patch->type())
1142                     {
1143                         case MoleculePatchType::Add:
1144                         {
1145                             /* we're adding: */
1146                             add_atom_to_restp(posres, symtab, l, &(*patch));
1147                             break;
1148                         }
1149                         case MoleculePatchType::Delete:
1150                         { /* we're deleting */
1151                             posres->atom.erase(posres->atom.begin() + l);
1152                             posres->atomname.erase(posres->atomname.begin() + l);
1153                             posres->cgnr.erase(posres->cgnr.begin() + l);
1154                             break;
1155                         }
1156                         case MoleculePatchType::Replace:
1157                         {
1158                             /* we're replacing */
1159                             posres->atom[l]     = patch->atom.back();
1160                             posres->atomname[l] = put_symtab(symtab, patch->nname.c_str());
1161                             if (patch->cgnr != NOTSET)
1162                             {
1163                                 posres->cgnr[l] = patch->cgnr;
1164                             }
1165                             break;
1166                         }
1167                     }
1168                 }
1169             }
1170         }
1171     }
1172 }
1173
1174 static bool atomname_cmp_nr(const char* anm, const MoleculePatch* patch, int* nr)
1175 {
1176
1177     if (patch->nr == 1)
1178     {
1179         *nr = 0;
1180
1181         return (gmx::equalCaseInsensitive(anm, patch->nname));
1182     }
1183     else
1184     {
1185         if (isdigit(anm[strlen(anm) - 1]))
1186         {
1187             *nr = anm[strlen(anm) - 1] - '0';
1188         }
1189         else
1190         {
1191             *nr = 0;
1192         }
1193         if (*nr <= 0 || *nr > patch->nr)
1194         {
1195             return FALSE;
1196         }
1197         else
1198         {
1199             return (strlen(anm) == patch->nname.length() + 1
1200                     && gmx_strncasecmp(anm, patch->nname.c_str(), patch->nname.length()) == 0);
1201         }
1202     }
1203 }
1204
1205 static bool match_atomnames_with_rtp_atom(t_atoms*                     pdba,
1206                                           gmx::ArrayRef<gmx::RVec>     x,
1207                                           t_symtab*                    symtab,
1208                                           int                          atind,
1209                                           PreprocessResidue*           localPpResidue,
1210                                           const MoleculePatchDatabase& singlePatch,
1211                                           bool                         bVerbose,
1212                                           const gmx::MDLogger&         logger)
1213 {
1214     int   resnr;
1215     char* oldnm;
1216     int   anmnr;
1217     bool  bDeleted;
1218
1219     oldnm = *pdba->atomname[atind];
1220     resnr = pdba->resinfo[pdba->atom[atind].resind].nr;
1221
1222     bDeleted = FALSE;
1223     for (auto patch = singlePatch.hack.begin(); patch != singlePatch.hack.end(); patch++)
1224     {
1225         if (patch->type() == MoleculePatchType::Replace && gmx::equalCaseInsensitive(oldnm, patch->oname))
1226         {
1227             /* This is a replace entry. */
1228             /* Check if we are not replacing a replaced atom. */
1229             bool bReplaceReplace = false;
1230             for (auto selfPatch = singlePatch.hack.begin(); selfPatch != singlePatch.hack.end(); selfPatch++)
1231             {
1232                 if (patch != selfPatch && selfPatch->type() == MoleculePatchType::Replace
1233                     && gmx::equalCaseInsensitive(selfPatch->nname, patch->oname))
1234                 {
1235                     /* The replace in patch replaces an atom that
1236                      * was already replaced in selfPatch, we do not want
1237                      * second or higher level replaces at this stage.
1238                      */
1239                     bReplaceReplace = true;
1240                 }
1241             }
1242             if (bReplaceReplace)
1243             {
1244                 /* Skip this replace. */
1245                 continue;
1246             }
1247
1248             /* This atom still has the old name, rename it */
1249             std::string newnm = patch->nname;
1250             auto        found = std::find_if(
1251                     localPpResidue->atomname.begin(),
1252                     localPpResidue->atomname.end(),
1253                     [&newnm](char** name) { return gmx::equalCaseInsensitive(newnm, *name); });
1254             if (found == localPpResidue->atomname.end())
1255             {
1256                 /* The new name is not present in the rtp.
1257                  * We need to apply the replace also to the rtp entry.
1258                  */
1259
1260                 /* We need to find the add hack that can add this atom
1261                  * to find out after which atom it should be added.
1262                  */
1263                 bool bFoundInAdd = false;
1264                 for (auto rtpModification = singlePatch.hack.begin();
1265                      rtpModification != singlePatch.hack.end();
1266                      rtpModification++)
1267                 {
1268                     int         k = std::distance(localPpResidue->atomname.begin(), found);
1269                     std::string start_at;
1270                     if (rtpModification->type() == MoleculePatchType::Add
1271                         && atomname_cmp_nr(newnm.c_str(), &(*rtpModification), &anmnr))
1272                     {
1273                         if (anmnr <= 1)
1274                         {
1275                             start_at = singlePatch.hack[k].a[0];
1276                         }
1277                         else
1278                         {
1279                             start_at = gmx::formatString(
1280                                     "%s%d", singlePatch.hack[k].nname.c_str(), anmnr - 1);
1281                         }
1282                         auto found2 = std::find_if(localPpResidue->atomname.begin(),
1283                                                    localPpResidue->atomname.end(),
1284                                                    [&start_at](char** name) {
1285                                                        return gmx::equalCaseInsensitive(start_at, *name);
1286                                                    });
1287                         if (found2 == localPpResidue->atomname.end())
1288                         {
1289                             gmx_fatal(FARGS,
1290                                       "Could not find atom '%s' in residue building block '%s' to "
1291                                       "add atom '%s' to",
1292                                       start_at.c_str(),
1293                                       localPpResidue->resname.c_str(),
1294                                       newnm.c_str());
1295                         }
1296                         /* We can add the atom after atom start_nr */
1297                         add_atom_to_restp(localPpResidue,
1298                                           symtab,
1299                                           std::distance(localPpResidue->atomname.begin(), found2),
1300                                           &(*patch));
1301
1302                         bFoundInAdd = true;
1303                     }
1304                 }
1305
1306                 if (!bFoundInAdd)
1307                 {
1308                     gmx_fatal(FARGS,
1309                               "Could not find an 'add' entry for atom named '%s' corresponding to "
1310                               "the 'replace' entry from atom name '%s' to '%s' for tdb or hdb "
1311                               "database of residue type '%s'",
1312                               newnm.c_str(),
1313                               patch->oname.c_str(),
1314                               patch->nname.c_str(),
1315                               localPpResidue->resname.c_str());
1316                 }
1317             }
1318
1319             if (bVerbose)
1320             {
1321                 GMX_LOG(logger.info)
1322                         .asParagraph()
1323                         .appendTextFormatted("Renaming atom '%s' in residue '%s' %d to '%s'",
1324                                              oldnm,
1325                                              localPpResidue->resname.c_str(),
1326                                              resnr,
1327                                              newnm.c_str());
1328             }
1329             /* Rename the atom in pdba */
1330             pdba->atomname[atind] = put_symtab(symtab, newnm.c_str());
1331         }
1332         else if (patch->type() == MoleculePatchType::Delete
1333                  && gmx::equalCaseInsensitive(oldnm, patch->oname))
1334         {
1335             /* This is a delete entry, check if this atom is present
1336              * in the rtp entry of this residue.
1337              */
1338             auto found3 = std::find_if(
1339                     localPpResidue->atomname.begin(),
1340                     localPpResidue->atomname.end(),
1341                     [&oldnm](char** name) { return gmx::equalCaseInsensitive(oldnm, *name); });
1342             if (found3 == localPpResidue->atomname.end())
1343             {
1344                 /* This atom is not present in the rtp entry,
1345                  * delete is now from the input pdba.
1346                  */
1347                 if (bVerbose)
1348                 {
1349                     GMX_LOG(logger.info)
1350                             .asParagraph()
1351                             .appendTextFormatted("Deleting atom '%s' in residue '%s' %d",
1352                                                  oldnm,
1353                                                  localPpResidue->resname.c_str(),
1354                                                  resnr);
1355                 }
1356                 /* We should free the atom name,
1357                  * but it might be used multiple times in the symtab.
1358                  * sfree(pdba->atomname[atind]);
1359                  */
1360                 for (int k = atind + 1; k < pdba->nr; k++)
1361                 {
1362                     pdba->atom[k - 1]     = pdba->atom[k];
1363                     pdba->atomname[k - 1] = pdba->atomname[k];
1364                     copy_rvec(x[k], x[k - 1]);
1365                 }
1366                 pdba->nr--;
1367                 bDeleted = true;
1368             }
1369         }
1370     }
1371
1372     return bDeleted;
1373 }
1374
1375 void match_atomnames_with_rtp(gmx::ArrayRef<PreprocessResidue>     usedPpResidues,
1376                               gmx::ArrayRef<MoleculePatchDatabase> globalPatches,
1377                               t_atoms*                             pdba,
1378                               t_symtab*                            symtab,
1379                               gmx::ArrayRef<gmx::RVec>             x,
1380                               bool                                 bVerbose,
1381                               const gmx::MDLogger&                 logger)
1382 {
1383     for (int i = 0; i < pdba->nr; i++)
1384     {
1385         const char*        oldnm          = *pdba->atomname[i];
1386         PreprocessResidue* localPpResidue = &usedPpResidues[pdba->atom[i].resind];
1387         auto               found          = std::find_if(
1388                 localPpResidue->atomname.begin(), localPpResidue->atomname.end(), [&oldnm](char** name) {
1389                     return gmx::equalCaseInsensitive(oldnm, *name);
1390                 });
1391         if (found == localPpResidue->atomname.end())
1392         {
1393             /* Not found yet, check if we have to rename this atom */
1394             if (match_atomnames_with_rtp_atom(
1395                         pdba, x, symtab, i, localPpResidue, globalPatches[pdba->atom[i].resind], bVerbose, logger))
1396             {
1397                 /* We deleted this atom, decrease the atom counter by 1. */
1398                 i--;
1399             }
1400         }
1401     }
1402 }
1403
1404 #define NUM_CMAP_ATOMS 5
1405 static void gen_cmap(InteractionsOfType*                    psb,
1406                      gmx::ArrayRef<const PreprocessResidue> usedPpResidues,
1407                      t_atoms*                               atoms,
1408                      gmx::ArrayRef<const int>               cyclicBondsIndex,
1409                      const gmx::MDLogger&                   logger)
1410 {
1411     int         residx;
1412     const char* ptr;
1413     t_resinfo*  resinfo = atoms->resinfo;
1414     int         nres    = atoms->nres;
1415     int         cmap_atomid[NUM_CMAP_ATOMS];
1416     int         cmap_chainnum = -1;
1417
1418     if (debug)
1419     {
1420         ptr = "cmap";
1421     }
1422     else
1423     {
1424         ptr = "check";
1425     }
1426
1427     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Making cmap torsions...");
1428     int i = 0;
1429     /* Most cmap entries use the N atom from the next residue, so the last
1430      * residue should not have its CMAP entry in that case, but for things like
1431      * dipeptides we sometimes define a complete CMAP entry inside a residue,
1432      * and in this case we need to process everything through the last residue.
1433      */
1434     for (residx = 0; residx < nres; residx++)
1435     {
1436         /* Add CMAP terms from the list of CMAP interactions */
1437         for (const auto& b : usedPpResidues[residx].rb[BondedTypes::Cmap].b)
1438         {
1439             bool bAddCMAP = true;
1440             /* Loop over atoms in a candidate CMAP interaction and
1441              * check that they exist, are from the same chain and are
1442              * from residues labelled as protein. */
1443             for (int k = 0; k < NUM_CMAP_ATOMS && bAddCMAP; k++)
1444             {
1445                 /* Assign the pointer to the name of the next reference atom.
1446                  * This can use -/+ labels to refer to previous/next residue.
1447                  */
1448                 const char* pname = b.a[k].c_str();
1449                 /* Skip this CMAP entry if it refers to residues before the
1450                  * first or after the last residue.
1451                  */
1452                 if ((cyclicBondsIndex.empty() && ((strchr(pname, '-') != nullptr) && (residx == 0)))
1453                     || ((strchr(pname, '+') != nullptr) && (residx == nres - 1)))
1454                 {
1455                     bAddCMAP = false;
1456                     break;
1457                 }
1458
1459                 cmap_atomid[k] = search_atom(pname, i, atoms, ptr, TRUE, cyclicBondsIndex);
1460                 bAddCMAP       = bAddCMAP && (cmap_atomid[k] != -1);
1461                 if (!bAddCMAP)
1462                 {
1463                     /* This break is necessary, because cmap_atomid[k]
1464                      * == -1 cannot be safely used as an index
1465                      * into the atom array. */
1466                     break;
1467                 }
1468                 int this_residue_index = atoms->atom[cmap_atomid[k]].resind;
1469                 if (0 == k)
1470                 {
1471                     cmap_chainnum = resinfo[this_residue_index].chainnum;
1472                 }
1473                 else
1474                 {
1475                     /* Does the residue for this atom have the same
1476                      * chain number as the residues for previous
1477                      * atoms? */
1478                     bAddCMAP = bAddCMAP && cmap_chainnum == resinfo[this_residue_index].chainnum;
1479                 }
1480                 /* Here we used to check that the residuetype was protein and
1481                  * disable bAddCMAP if that was not the case. However, some
1482                  * special residues (say, alanine dipeptides) might not adhere
1483                  * to standard naming, and if we start calling them normal
1484                  * protein residues the user will be bugged to select termini.
1485                  *
1486                  * Instead, I believe that the right course of action is to
1487                  * keep the CMAP interaction if it is present in the RTP file
1488                  * and we correctly identified all atoms (which is the case
1489                  * if we got here).
1490                  */
1491             }
1492
1493             if (bAddCMAP)
1494             {
1495                 add_cmap_param(psb,
1496                                cmap_atomid[0],
1497                                cmap_atomid[1],
1498                                cmap_atomid[2],
1499                                cmap_atomid[3],
1500                                cmap_atomid[4],
1501                                b.s.c_str());
1502             }
1503         }
1504
1505         if (residx < nres - 1)
1506         {
1507             while (atoms->atom[i].resind < residx + 1)
1508             {
1509                 i++;
1510             }
1511         }
1512     }
1513     /* Start the next residue */
1514 }
1515
1516 static void scrub_charge_groups(int* cgnr, int natoms)
1517 {
1518     int i;
1519
1520     for (i = 0; i < natoms; i++)
1521     {
1522         cgnr[i] = i + 1;
1523     }
1524 }
1525
1526
1527 void pdb2top(FILE*                                  top_file,
1528              const char*                            posre_fn,
1529              const char*                            molname,
1530              t_atoms*                               atoms,
1531              std::vector<gmx::RVec>*                x,
1532              PreprocessingAtomTypes*                atype,
1533              t_symtab*                              tab,
1534              gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1535              gmx::ArrayRef<PreprocessResidue>       usedPpResidues,
1536              gmx::ArrayRef<MoleculePatchDatabase>   globalPatches,
1537              bool                                   bAllowMissing,
1538              bool                                   bVsites,
1539              bool                                   bVsiteAromatics,
1540              const char*                            ffdir,
1541              real                                   mHmult,
1542              gmx::ArrayRef<const DisulfideBond>     ssbonds,
1543              real                                   long_bond_dist,
1544              real                                   short_bond_dist,
1545              bool                                   bDeuterate,
1546              bool                                   bChargeGroups,
1547              bool                                   bCmap,
1548              bool                                   bRenumRes,
1549              bool                                   bRTPresname,
1550              gmx::ArrayRef<const int>               cyclicBondsIndex,
1551              const gmx::MDLogger&                   logger)
1552 {
1553     std::array<InteractionsOfType, F_NRE>   plist;
1554     t_excls*                                excls;
1555     int*                                    cgnr;
1556     int*                                    vsite_type;
1557     int                                     i, nmissat;
1558     gmx::EnumerationArray<BondedTypes, int> bts;
1559
1560     ResidueType rt;
1561
1562     /* Make bonds */
1563     at2bonds(&(plist[F_BONDS]), globalPatches, atoms, *x, long_bond_dist, short_bond_dist, cyclicBondsIndex, logger);
1564
1565     /* specbonds: disulphide bonds & heme-his */
1566     do_ssbonds(&(plist[F_BONDS]), atoms, ssbonds, bAllowMissing);
1567
1568     nmissat = name2type(atoms, &cgnr, usedPpResidues, &rt, logger);
1569     if (nmissat)
1570     {
1571         if (bAllowMissing)
1572         {
1573             GMX_LOG(logger.warning)
1574                     .asParagraph()
1575                     .appendTextFormatted("There were %d missing atoms in molecule %s", nmissat, molname);
1576         }
1577         else
1578         {
1579             gmx_fatal(FARGS,
1580                       "There were %d missing atoms in molecule %s, if you want to use this "
1581                       "incomplete topology anyhow, use the option -missing",
1582                       nmissat,
1583                       molname);
1584         }
1585     }
1586
1587     /* Cleanup bonds (sort and rm doubles) */
1588     clean_bonds(&(plist[F_BONDS]), logger);
1589
1590     snew(vsite_type, atoms->nr);
1591     for (i = 0; i < atoms->nr; i++)
1592     {
1593         vsite_type[i] = NOTSET;
1594     }
1595     if (bVsites)
1596     {
1597         if (bVsiteAromatics)
1598         {
1599             GMX_LOG(logger.info)
1600                     .asParagraph()
1601                     .appendTextFormatted(
1602                             "The conversion of aromatic rings into virtual sites is deprecated "
1603                             "and may be removed in a future version of GROMACS");
1604         }
1605         /* determine which atoms will be vsites and add dummy masses
1606            also renumber atom numbers in plist[0..F_NRE]! */
1607         do_vsites(rtpFFDB, atype, atoms, tab, x, plist, &vsite_type, &cgnr, mHmult, bVsiteAromatics, ffdir);
1608     }
1609
1610     /* Make Angles and Dihedrals */
1611     GMX_LOG(logger.info)
1612             .asParagraph()
1613             .appendTextFormatted("Generating angles, dihedrals and pairs...");
1614     snew(excls, atoms->nr);
1615     gen_pad(atoms, usedPpResidues, plist, excls, globalPatches, bAllowMissing, cyclicBondsIndex);
1616
1617     /* Make CMAP */
1618     if (bCmap)
1619     {
1620         gen_cmap(&(plist[F_CMAP]), usedPpResidues, atoms, cyclicBondsIndex, logger);
1621         if (plist[F_CMAP].size() > 0)
1622         {
1623             GMX_LOG(logger.info)
1624                     .asParagraph()
1625                     .appendTextFormatted("There are %4zu cmap torsion pairs", plist[F_CMAP].size());
1626         }
1627     }
1628
1629     /* set mass of all remaining hydrogen atoms */
1630     if (mHmult != 1.0)
1631     {
1632         do_h_mass(&(plist[F_BONDS]), vsite_type, atoms, mHmult, bDeuterate);
1633     }
1634     sfree(vsite_type);
1635
1636     /* Cleanup bonds (sort and rm doubles) */
1637     /* clean_bonds(&(plist[F_BONDS]));*/
1638
1639     GMX_LOG(logger.info)
1640             .asParagraph()
1641             .appendTextFormatted(
1642                     "There are %4zu dihedrals, %4zu impropers, %4zu angles\n"
1643                     "          %4zu pairs,     %4zu bonds and  %4zu virtual sites",
1644                     plist[F_PDIHS].size(),
1645                     plist[F_IDIHS].size(),
1646                     plist[F_ANGLES].size(),
1647                     plist[F_LJ14].size(),
1648                     plist[F_BONDS].size(),
1649                     plist[F_VSITE2].size() + plist[F_VSITE3].size() + plist[F_VSITE3FD].size()
1650                             + plist[F_VSITE3FAD].size() + plist[F_VSITE3OUT].size()
1651                             + plist[F_VSITE4FD].size() + plist[F_VSITE4FDN].size());
1652
1653     print_sums(atoms, FALSE, logger);
1654
1655     if (!bChargeGroups)
1656     {
1657         scrub_charge_groups(cgnr, atoms->nr);
1658     }
1659
1660     if (bRenumRes)
1661     {
1662         for (i = 0; i < atoms->nres; i++)
1663         {
1664             atoms->resinfo[i].nr = i + 1;
1665             atoms->resinfo[i].ic = ' ';
1666         }
1667     }
1668
1669     if (top_file)
1670     {
1671         GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing topology");
1672         /* We can copy the bonded types from the first restp,
1673          * since the types have to be identical for all residues in one molecule.
1674          */
1675         for (auto i : gmx::EnumerationWrapper<BondedTypes>{})
1676         {
1677             bts[i] = usedPpResidues[0].rb[i].type;
1678         }
1679         write_top(top_file,
1680                   posre_fn,
1681                   molname,
1682                   atoms,
1683                   bRTPresname,
1684                   bts,
1685                   plist,
1686                   excls,
1687                   atype,
1688                   cgnr,
1689                   usedPpResidues[0].nrexcl);
1690     }
1691
1692
1693     /* we should clean up hb and restp here, but that is a *L*O*T* of work! */
1694     sfree(cgnr);
1695     for (i = 0; i < atoms->nr; i++)
1696     {
1697         sfree(excls[i].e);
1698     }
1699     sfree(excls);
1700 }