b9c8680c2ef9310cffb36646e6ffefea4f297764
[alexxy/gromacs.git] / src / gromacs / gmxpreprocess / genhydro.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013-2019,2020,2021, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "genhydro.h"
40
41 #include <cstring>
42 #include <ctime>
43
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/gmxlib/network.h"
46 #include "gromacs/gmxpreprocess/calch.h"
47 #include "gromacs/gmxpreprocess/h_db.h"
48 #include "gromacs/gmxpreprocess/notset.h"
49 #include "gromacs/gmxpreprocess/pgutil.h"
50 #include "gromacs/gmxpreprocess/ter_db.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/topology/atoms.h"
53 #include "gromacs/topology/symtab.h"
54 #include "gromacs/utility/cstringutil.h"
55 #include "gromacs/utility/exceptions.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/utility/smalloc.h"
59
60 #include "hackblock.h"
61 #include "resall.h"
62
63 static void copy_atom(const t_atoms* atoms1, int a1, t_atoms* atoms2, int a2, t_symtab* symtab)
64 {
65     atoms2->atom[a2]     = atoms1->atom[a1];
66     atoms2->atomname[a2] = put_symtab(symtab, *atoms1->atomname[a1]);
67 }
68
69 static int pdbasearch_atom(const char*              name,
70                            int                      resind,
71                            const t_atoms*           pdba,
72                            const char*              searchtype,
73                            bool                     bAllowMissing,
74                            gmx::ArrayRef<const int> cyclicBondsIndex)
75 {
76     int i;
77
78     for (i = 0; (i < pdba->nr) && (pdba->atom[i].resind != resind); i++) {}
79
80     return search_atom(name, i, pdba, searchtype, bAllowMissing, cyclicBondsIndex);
81 }
82
83 /*! \brief Return the index of the first atom whose residue index
84  * matches and which has a patch with the given name.
85  *
86  * \param[out] ii      Index of the first atom in the residue that matches,
87  *                       -1 if no match occurs.
88  * \param[out] jj      Index of the patch that matches,
89  *                       unchanged if no match occurs.
90  * \param[in]  name    Name of the desired patch to match
91  * \param[in]  patches The patch database to search
92  * \param[in]  resind  The residue index to match
93  * \param[in]  pdba    The atoms to work with
94  *
95  * \todo The short-circuit logic will be simpler if this returned a
96  * std::pair<int, int> as soon as the first double match is found.
97  */
98 static void hacksearch_atom(int*                                            ii,
99                             int*                                            jj,
100                             const char*                                     name,
101                             gmx::ArrayRef<const std::vector<MoleculePatch>> patches,
102                             int                                             resind,
103                             const t_atoms*                                  pdba)
104 {
105     int i;
106
107     *ii = -1;
108     if (name[0] == '-')
109     {
110         name++;
111         resind--;
112     }
113     for (i = 0; (i < pdba->nr) && (pdba->atom[i].resind != resind); i++) {}
114     for (; (i < pdba->nr) && (pdba->atom[i].resind == resind) && (*ii < 0); i++)
115     {
116         int j = 0;
117         for (const auto& patch : patches[i])
118         {
119             if (patch.nname == name)
120             {
121                 *ii = i;
122                 *jj = j;
123                 if (*ii >= 0)
124                 {
125                     break;
126                 }
127             }
128             j++;
129         }
130     }
131 }
132
133 static std::vector<MoleculePatchDatabase>
134 getMoleculePatchDatabases(const t_atoms*                              pdba,
135                           gmx::ArrayRef<const MoleculePatchDatabase>  globalPatches,
136                           int                                         nterpairs,
137                           gmx::ArrayRef<MoleculePatchDatabase* const> ntdb,
138                           gmx::ArrayRef<MoleculePatchDatabase* const> ctdb,
139                           gmx::ArrayRef<const int>                    rN,
140                           gmx::ArrayRef<const int>                    rC)
141 {
142     std::vector<MoleculePatchDatabase> modBlock(pdba->nres);
143     /* make space */
144     /* first the termini */
145     for (int i = 0; i < nterpairs; i++)
146     {
147         if (ntdb[i] != nullptr)
148         {
149             copyModificationBlocks(*ntdb[i], &modBlock[rN[i]]);
150         }
151         if (ctdb[i] != nullptr)
152         {
153             mergeAtomAndBondModifications(*ctdb[i], &modBlock[rC[i]]);
154         }
155     }
156     /* then the whole hdb */
157     for (int rnr = 0; rnr < pdba->nres; rnr++)
158     {
159         auto ahptr = search_h_db(globalPatches, *pdba->resinfo[rnr].rtp);
160         if (ahptr != globalPatches.end())
161         {
162             if (modBlock[rnr].name.empty())
163             {
164                 modBlock[rnr].name = ahptr->name;
165             }
166             mergeAtomModifications(*ahptr, &modBlock[rnr]);
167         }
168     }
169     return modBlock;
170 }
171
172 static void expand_hackblocks_one(const MoleculePatchDatabase& newPatch,
173                                   const std::string localAtomName, //NOLINT(performance-unnecessary-value-param)
174                                   std::vector<MoleculePatch>* globalPatches,
175                                   bool                        bN,
176                                   bool                        bC)
177 {
178     /* we'll recursively add atoms to atoms */
179     int pos = 0;
180     for (const auto& singlePatch : newPatch.hack)
181     {
182         /* first check if we're in the N- or C-terminus, then we should ignore
183            all hacks involving atoms from resp. previous or next residue
184            (i.e. which name begins with '-' (N) or '+' (C) */
185         bool bIgnore = false;
186         if (bN) /* N-terminus: ignore '-' */
187         {
188             for (int k = 0; k < 4 && !singlePatch.a[k].empty() && !bIgnore; k++)
189             {
190                 bIgnore = singlePatch.a[k][0] == '-';
191             }
192         }
193         if (bC) /* C-terminus: ignore '+' */
194         {
195             for (int k = 0; k < 4 && !singlePatch.a[k].empty() && !bIgnore; k++)
196             {
197                 bIgnore = singlePatch.a[k][0] == '+';
198             }
199         }
200         /* must be either hdb entry (tp>0) or add from tdb (oname==NULL)
201            and first control aton (AI) matches this atom or
202            delete/replace from tdb (oname!=NULL) and oname matches this atom */
203
204         if (!bIgnore
205             && (((singlePatch.tp > 0 || singlePatch.oname.empty()) && singlePatch.a[0] == localAtomName)
206                 || (singlePatch.oname == localAtomName)))
207         {
208             /* now expand all hacks for this atom */
209             for (int k = 0; k < singlePatch.nr; k++)
210             {
211                 globalPatches->push_back(singlePatch);
212                 MoleculePatch* patch = &globalPatches->back();
213                 patch->bXSet         = false;
214                 /* if we're adding (oname==NULL) and don't have a new name (nname)
215                    yet, build it from localAtomName */
216                 if (patch->nname.empty())
217                 {
218                     if (patch->oname.empty())
219                     {
220                         patch->nname    = localAtomName;
221                         patch->nname[0] = 'H';
222                     }
223                 }
224                 else
225                 {
226                     if (gmx_debug_at)
227                     {
228                         fprintf(debug,
229                                 "Hack '%s' %d, replacing nname '%s' with '%s' (old name '%s')\n",
230                                 localAtomName.c_str(),
231                                 pos,
232                                 patch->nname.c_str(),
233                                 singlePatch.nname.c_str(),
234                                 patch->oname.empty() ? "" : patch->oname.c_str());
235                     }
236                     patch->nname = singlePatch.nname;
237                 }
238
239                 if (singlePatch.tp == 10 && k == 2)
240                 {
241                     /* This is a water virtual site, not a hydrogen */
242                     /* Ugly hardcoded name hack to replace 'H' with 'M' */
243                     GMX_RELEASE_ASSERT(
244                             !patch->nname.empty() && patch->nname[0] == 'H',
245                             "Water virtual site should be named starting with H at this point");
246                     patch->nname[0] = 'M';
247                 }
248                 else if (singlePatch.tp == 11 && k >= 2)
249                 {
250                     /* This is a water lone pair, not a hydrogen */
251                     /* Ugly hardcoded name hack */
252                     patch->nname.assign(gmx::formatString("LP%d", 1 + k - 2));
253                 }
254                 else if (singlePatch.nr > 1)
255                 {
256                     /* adding more than one atom, number them */
257                     patch->nname.append(gmx::formatString("%d", 1 + k));
258                 }
259             }
260
261             /* add hacks to atoms we've just added */
262             if (singlePatch.tp > 0 || singlePatch.oname.empty())
263             {
264                 for (int k = 0; k < singlePatch.nr; k++)
265                 {
266                     expand_hackblocks_one(
267                             newPatch,
268                             globalPatches->at(globalPatches->size() - singlePatch.nr + k).nname,
269                             globalPatches,
270                             bN,
271                             bC);
272                 }
273             }
274         }
275         pos++;
276     }
277 }
278
279 static void expand_hackblocks(const t_atoms*                             pdba,
280                               gmx::ArrayRef<const MoleculePatchDatabase> hb,
281                               gmx::ArrayRef<std::vector<MoleculePatch>>  patches,
282                               int                                        nterpairs,
283                               gmx::ArrayRef<const int>                   rN,
284                               gmx::ArrayRef<const int>                   rC)
285 {
286     for (int i = 0; i < pdba->nr; i++)
287     {
288         bool bN = false;
289         for (int j = 0; j < nterpairs && !bN; j++)
290         {
291             bN = pdba->atom[i].resind == rN[j];
292         }
293         bool bC = false;
294         for (int j = 0; j < nterpairs && !bC; j++)
295         {
296             bC = pdba->atom[i].resind == rC[j];
297         }
298
299         /* add hacks to this atom */
300         expand_hackblocks_one(hb[pdba->atom[i].resind], *pdba->atomname[i], &patches[i], bN, bC);
301     }
302 }
303
304 static int check_atoms_present(const t_atoms*                            pdba,
305                                gmx::ArrayRef<std::vector<MoleculePatch>> patches,
306                                gmx::ArrayRef<const int>                  cyclicBondsIndex)
307 {
308     int nadd = 0;
309     for (int i = 0; i < pdba->nr; i++)
310     {
311         int rnr = pdba->atom[i].resind;
312         for (auto patch = patches[i].begin(); patch != patches[i].end(); patch++)
313         {
314             switch (patch->type())
315             {
316                 case MoleculePatchType::Add:
317                 {
318                     /* we're adding */
319                     /* check if the atom is already present */
320                     int k = pdbasearch_atom(patch->nname.c_str(), rnr, pdba, "check", TRUE, cyclicBondsIndex);
321                     if (k != -1)
322                     {
323                         /* We found the added atom. */
324                         patch->bAlreadyPresent = true;
325                     }
326                     else
327                     {
328                         patch->bAlreadyPresent = false;
329                         /* count how many atoms we'll add */
330                         nadd++;
331                     }
332                     break;
333                 }
334                 case MoleculePatchType::Delete:
335                 {
336                     /* we're deleting */
337                     nadd--;
338                     break;
339                 }
340                 case MoleculePatchType::Replace: { break;
341                 }
342                 default: { GMX_THROW(gmx::InternalError("Case not handled"));
343                 }
344             }
345         }
346     }
347     return nadd;
348 }
349
350 static void calc_all_pos(const t_atoms*                            pdba,
351                          gmx::ArrayRef<const gmx::RVec>            x,
352                          gmx::ArrayRef<std::vector<MoleculePatch>> patches,
353                          bool                                      bCheckMissing,
354                          gmx::ArrayRef<const int>                  cyclicBondsIndex)
355 {
356     int ii, l = 0;
357 #define MAXH 4
358     rvec xa[4];    /* control atoms for calc_h_pos */
359     rvec xh[MAXH]; /* hydrogen positions from calc_h_pos */
360
361     int jj = 0;
362
363     for (int i = 0; i < pdba->nr; i++)
364     {
365         int rnr = pdba->atom[i].resind;
366         for (auto patch = patches[i].begin(); patch != patches[i].end(); patch += patch->nr)
367         {
368             GMX_RELEASE_ASSERT(patch < patches[i].end(),
369                                "The number of patches in the last patch can not exceed the total "
370                                "number of patches");
371             /* check if we're adding: */
372             if (patch->type() == MoleculePatchType::Add && patch->tp > 0)
373             {
374                 bool bFoundAll = true;
375                 for (int m = 0; (m < patch->nctl && bFoundAll); m++)
376                 {
377                     int ia = pdbasearch_atom(patch->a[m].c_str(),
378                                              rnr,
379                                              pdba,
380                                              bCheckMissing ? "atom" : "check",
381                                              !bCheckMissing,
382                                              cyclicBondsIndex);
383                     if (ia < 0)
384                     {
385                         /* not found in original atoms, might still be in
386                          * the patch Instructions (patches) */
387                         hacksearch_atom(&ii, &jj, patch->a[m].c_str(), patches, rnr, pdba);
388                         if (ii >= 0)
389                         {
390                             copy_rvec(patches[ii][jj].newx, xa[m]);
391                         }
392                         else
393                         {
394                             bFoundAll = false;
395                             if (bCheckMissing)
396                             {
397                                 gmx_fatal(FARGS,
398                                           "Atom %s not found in residue %s %d"
399                                           ", rtp entry %s"
400                                           " while adding hydrogens",
401                                           patch->a[m].c_str(),
402                                           *pdba->resinfo[rnr].name,
403                                           pdba->resinfo[rnr].nr,
404                                           *pdba->resinfo[rnr].rtp);
405                             }
406                         }
407                     }
408                     else
409                     {
410                         copy_rvec(x[ia], xa[m]);
411                     }
412                 }
413                 if (bFoundAll)
414                 {
415                     for (int m = 0; (m < MAXH); m++)
416                     {
417                         for (int d = 0; d < DIM; d++)
418                         {
419                             if (m < patch->nr)
420                             {
421                                 xh[m][d] = 0;
422                             }
423                             else
424                             {
425                                 xh[m][d] = NOTSET;
426                             }
427                         }
428                     }
429                     calc_h_pos(patch->tp, xa, xh, &l);
430                     for (int m = 0; m < patch->nr; m++)
431                     {
432                         auto next = patch + m;
433                         copy_rvec(xh[m], next->newx);
434                         next->bXSet = true;
435                     }
436                 }
437             }
438         }
439     }
440 }
441
442 static int add_h_low(t_atoms**                                   initialAtoms,
443                      t_atoms**                                   modifiedAtoms,
444                      std::vector<gmx::RVec>*                     xptr,
445                      gmx::ArrayRef<const MoleculePatchDatabase>  globalPatches,
446                      t_symtab*                                   symtab,
447                      const int                                   nterpairs,
448                      gmx::ArrayRef<MoleculePatchDatabase* const> ntdb,
449                      gmx::ArrayRef<MoleculePatchDatabase* const> ctdb,
450                      gmx::ArrayRef<const int>                    rN,
451                      gmx::ArrayRef<const int>                    rC,
452                      const bool                                  bCheckMissing,
453                      gmx::ArrayRef<const int>                    cyclicBondsIndex)
454 {
455     int                                     nadd;
456     int                                     newi, natoms, nalreadypresent;
457     std::vector<std::vector<MoleculePatch>> patches;
458     std::vector<gmx::RVec>                  xn;
459
460     t_atoms* pdba = *initialAtoms;
461
462     /* set flags for adding hydrogens (according to hdb) */
463     natoms = pdba->nr;
464
465     {
466         /* We'll have to do all the hard work */
467         /* first get all the hackblocks for each residue: */
468         std::vector<MoleculePatchDatabase> hb =
469                 getMoleculePatchDatabases(pdba, globalPatches, nterpairs, ntdb, ctdb, rN, rC);
470
471         /* expand the hackblocks to atom level */
472         patches.resize(natoms);
473         expand_hackblocks(pdba, hb, patches, nterpairs, rN, rC);
474     }
475
476     /* Now calc the positions */
477     calc_all_pos(pdba, *xptr, patches, bCheckMissing, cyclicBondsIndex);
478
479     /* we don't have to add atoms that are already present in initialAtoms,
480        so we will remove them from the patches (MoleculePatch) */
481     nadd = check_atoms_present(pdba, patches, cyclicBondsIndex);
482
483     /* Copy old atoms, making space for new ones */
484     if (nadd > 0)
485     {
486         srenew(*modifiedAtoms, 1);
487         init_t_atoms(*modifiedAtoms, natoms + nadd, FALSE);
488         (*modifiedAtoms)->nres = pdba->nres;
489         srenew((*modifiedAtoms)->resinfo, pdba->nres);
490         std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*modifiedAtoms)->resinfo);
491     }
492     if (nadd == 0)
493     {
494         return natoms;
495     }
496
497     xn.resize(natoms + nadd);
498     newi = 0;
499     for (int i = 0; (i < natoms); i++)
500     {
501         /* check if this atom wasn't scheduled for deletion */
502         if (patches[i].empty() || (!patches[i][0].nname.empty()))
503         {
504             if (newi >= natoms + nadd)
505             {
506                 /*gmx_fatal(FARGS,"Not enough space for adding atoms");*/
507                 nadd += 10;
508                 xn.resize(natoms + nadd);
509                 srenew((*modifiedAtoms)->atom, natoms + nadd);
510                 srenew((*modifiedAtoms)->atomname, natoms + nadd);
511             }
512             copy_atom(pdba, i, (*modifiedAtoms), newi, symtab);
513             copy_rvec((*xptr)[i], xn[newi]);
514             /* process the hacks for this atom */
515             nalreadypresent = 0;
516             for (auto patch = patches[i].begin(); patch != patches[i].end(); patch++)
517             {
518                 if (patch->type() == MoleculePatchType::Add) /* add */
519                 {
520                     newi++;
521                     if (newi >= natoms + nadd)
522                     {
523                         /* gmx_fatal(FARGS,"Not enough space for adding atoms");*/
524                         nadd += 10;
525                         xn.resize(natoms + nadd);
526                         srenew((*modifiedAtoms)->atom, natoms + nadd);
527                         srenew((*modifiedAtoms)->atomname, natoms + nadd);
528                     }
529                     (*modifiedAtoms)->atom[newi].resind = pdba->atom[i].resind;
530                 }
531                 if (!patch->nname.empty()
532                     && (patch->oname.empty() || patch->oname == *(*modifiedAtoms)->atomname[newi]))
533                 {
534                     /* add or replace */
535                     if (patch->type() == MoleculePatchType::Add && patch->bAlreadyPresent)
536                     {
537                         /* This atom is already present, copy it from the input. */
538                         nalreadypresent++;
539                         copy_atom(pdba, i + nalreadypresent, (*modifiedAtoms), newi, symtab);
540                         copy_rvec((*xptr)[i + nalreadypresent], xn[newi]);
541                     }
542                     else
543                     {
544                         if (gmx_debug_at)
545                         {
546                             fprintf(debug,
547                                     "Replacing %d '%s' with (old name '%s') %s\n",
548                                     newi,
549                                     ((*modifiedAtoms)->atomname[newi] && *(*modifiedAtoms)->atomname[newi])
550                                             ? *(*modifiedAtoms)->atomname[newi]
551                                             : "",
552                                     patch->oname.empty() ? "" : patch->oname.c_str(),
553                                     patch->nname.c_str());
554                         }
555                         (*modifiedAtoms)->atomname[newi] = put_symtab(symtab, patch->nname.c_str());
556                         if (patch->bXSet)
557                         {
558                             copy_rvec(patch->newx, xn[newi]);
559                         }
560                     }
561                     if (debug)
562                     {
563                         fprintf(debug,
564                                 " %s %g %g",
565                                 *(*modifiedAtoms)->atomname[newi],
566                                 (*modifiedAtoms)->atom[newi].m,
567                                 (*modifiedAtoms)->atom[newi].q);
568                     }
569                 }
570             }
571             newi++;
572             i += nalreadypresent;
573         }
574     }
575     (*modifiedAtoms)->nr = newi;
576
577     done_atom(pdba);
578     *initialAtoms = *modifiedAtoms;
579
580     *xptr = xn;
581
582     return newi;
583 }
584
585 int add_h(t_atoms**                                   initialAtoms,
586           t_atoms**                                   localAtoms,
587           std::vector<gmx::RVec>*                     xptr,
588           gmx::ArrayRef<const MoleculePatchDatabase>  globalPatches,
589           t_symtab*                                   symtab,
590           const int                                   nterpairs,
591           gmx::ArrayRef<MoleculePatchDatabase* const> ntdb,
592           gmx::ArrayRef<MoleculePatchDatabase* const> ctdb,
593           gmx::ArrayRef<const int>                    rN,
594           gmx::ArrayRef<const int>                    rC,
595           const bool                                  bAllowMissing,
596           gmx::ArrayRef<const int>                    cyclicBondsIndex)
597 {
598     int nold, nnew, niter;
599
600     /* Here we loop to be able to add atoms to added atoms.
601      * We should not check for missing atoms here.
602      */
603     niter = 0;
604     nnew  = 0;
605     do
606     {
607         nold = nnew;
608         nnew = add_h_low(
609                 initialAtoms, localAtoms, xptr, globalPatches, symtab, nterpairs, ntdb, ctdb, rN, rC, FALSE, cyclicBondsIndex);
610         niter++;
611         if (niter > 100)
612         {
613             gmx_fatal(FARGS,
614                       "More than 100 iterations of add_h. Maybe you are trying to replace an added "
615                       "atom (this is not supported)?");
616         }
617     } while (nnew > nold);
618
619     if (!bAllowMissing)
620     {
621         /* Call add_h_low once more, now only for the missing atoms check */
622         add_h_low(initialAtoms, localAtoms, xptr, globalPatches, symtab, nterpairs, ntdb, ctdb, rN, rC, TRUE, cyclicBondsIndex);
623     }
624
625     return nnew;
626 }