2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/fileio/pdbio.h"
46 #include "gromacs/gmxlib/conformation-utilities.h"
47 #include "gromacs/gmxpreprocess/addconf.h"
48 #include "gromacs/gmxpreprocess/read-conformation.h"
49 #include "gromacs/legacyheaders/macros.h"
50 #include "gromacs/legacyheaders/names.h"
51 #include "gromacs/legacyheaders/typedefs.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/topology/atomprop.h"
54 #include "gromacs/utility/cstringutil.h"
55 #include "gromacs/utility/fatalerror.h"
56 #include "gromacs/utility/futil.h"
57 #include "gromacs/utility/smalloc.h"
60 static void print_stat(rvec *x, int natoms, matrix box)
64 for (m = 0; (m < DIM); m++)
69 for (i = 0; (i < natoms); i++)
71 for (m = 0; m < DIM; m++)
73 xmin[m] = min(xmin[m], x[i][m]);
74 xmax[m] = max(xmax[m], x[i][m]);
77 for (m = 0; (m < DIM); m++)
79 fprintf(stderr, "DIM %d XMIN %8.3f XMAX %8.3f BOX %8.3f\n",
80 m, xmin[m], xmax[m], box[m][m]);
93 static void sort_molecule(t_atoms **atoms_solvt, rvec *x, rvec *v, real *r)
95 int atnr, i, j, moltp = 0, nrmoltypes, resi_o, resi_n, resnr;
97 t_atoms *atoms, *newatoms;
98 rvec *newx, *newv = NULL;
101 fprintf(stderr, "Sorting configuration\n");
103 atoms = *atoms_solvt;
105 /* copy each residue from *atoms to a molecule in *molecule */
108 for (i = 0; i < atoms->nr; i++)
110 if ( (i == 0) || (atoms->atom[i].resind != atoms->atom[i-1].resind) )
112 /* see if this was a molecule type we haven't had yet: */
114 for (j = 0; (j < nrmoltypes) && (moltp == NOTSET); j++)
116 /* cppcheck-suppress nullPointer
117 * moltypes is guaranteed to be allocated because otherwise
118 * nrmoltypes is 0. */
119 if (strcmp(*(atoms->resinfo[atoms->atom[i].resind].name), moltypes[j].name) == 0)
128 srenew(moltypes, nrmoltypes);
129 moltypes[moltp].name = *(atoms->resinfo[atoms->atom[i].resind].name);
131 while ((i+atnr < atoms->nr) &&
132 (atoms->atom[i].resind == atoms->atom[i+atnr].resind))
136 moltypes[moltp].natoms = atnr;
137 moltypes[moltp].nmol = 0;
139 moltypes[moltp].nmol++;
143 fprintf(stderr, "Found %d%s molecule type%s:\n",
144 nrmoltypes, nrmoltypes == 1 ? "" : " different", nrmoltypes == 1 ? "" : "s");
145 for (j = 0; j < nrmoltypes; j++)
149 moltypes[j].res0 = 0;
153 moltypes[j].res0 = moltypes[j-1].res0+moltypes[j-1].nmol;
155 fprintf(stderr, "%7s (%4d atoms): %5d residues\n",
156 moltypes[j].name, moltypes[j].natoms, moltypes[j].nmol);
159 /* if we have only 1 moleculetype, we don't have to sort */
162 /* find out which molecules should go where: */
163 moltypes[0].i = moltypes[0].i0 = 0;
164 for (j = 1; j < nrmoltypes; j++)
168 moltypes[j-1].i0+moltypes[j-1].natoms*moltypes[j-1].nmol;
171 /* now put them there: */
173 init_t_atoms(newatoms, atoms->nr, FALSE);
174 newatoms->nres = atoms->nres;
175 snew(newatoms->resinfo, atoms->nres);
176 snew(newx, atoms->nr);
179 snew(newv, atoms->nr);
181 snew(newr, atoms->nr);
186 for (moltp = 0; moltp < nrmoltypes; moltp++)
189 while (i < atoms->nr)
191 resi_o = atoms->atom[i].resind;
192 if (strcmp(*atoms->resinfo[resi_o].name, moltypes[moltp].name) == 0)
194 /* Copy the residue info */
195 newatoms->resinfo[resi_n] = atoms->resinfo[resi_o];
196 newatoms->resinfo[resi_n].nr = resnr;
197 /* Copy the atom info */
200 newatoms->atom[j] = atoms->atom[i];
201 newatoms->atomname[j] = atoms->atomname[i];
202 newatoms->atom[j].resind = resi_n;
203 copy_rvec(x[i], newx[j]);
206 copy_rvec(v[i], newv[j]);
212 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
213 /* Increase the new residue counters */
219 /* Skip this residue */
224 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
229 /* put them back into the original arrays and throw away temporary arrays */
230 sfree(atoms->atomname);
231 sfree(atoms->resinfo);
234 *atoms_solvt = newatoms;
235 for (i = 0; i < (*atoms_solvt)->nr; i++)
237 copy_rvec(newx[i], x[i]);
240 copy_rvec(newv[i], v[i]);
254 static void rm_res_pbc(t_atoms *atoms, rvec *x, matrix box)
256 int i, start, n, d, nat;
262 for (n = 0; n < atoms->nr; n++)
264 if (!is_hydrogen(*(atoms->atomname[n])))
269 if ( (n+1 == atoms->nr) ||
270 (atoms->atom[n+1].resind != atoms->atom[n].resind) )
272 /* if nat==0 we have only hydrogens in the solvent,
273 we take last coordinate as cg */
277 copy_rvec(x[n], xcg);
279 svmul(1.0/nat, xcg, xcg);
280 for (d = 0; d < DIM; d++)
284 for (i = start; i <= n; i++)
286 x[i][d] += box[d][d];
290 while (xcg[d] >= box[d][d])
292 for (i = start; i <= n; i++)
294 x[i][d] -= box[d][d];
306 /* Make a new configuration by adding boxes*/
307 static void make_new_conformation(t_atoms *atoms, rvec *x, rvec *v, real *r, matrix box, ivec n_box)
309 int i, ix, iy, iz, m, j, imol, offset;
313 nmol = n_box[XX]*n_box[YY]*n_box[ZZ];
316 fprintf(stderr, "Generating configuration\n");
318 for (ix = 0; (ix < n_box[XX]); ix++)
320 delta[XX] = ix*box[XX][XX];
321 for (iy = 0; (iy < n_box[YY]); iy++)
323 delta[YY] = iy*box[YY][YY];
324 for (iz = 0; (iz < n_box[ZZ]); iz++)
326 delta[ZZ] = iz*box[ZZ][ZZ];
327 offset = imol*atoms->nr;
328 for (i = 0; (i < atoms->nr); i++)
330 for (m = 0; (m < DIM); m++)
332 x[offset+i][m] = delta[m]+x[i][m];
336 for (m = 0; (m < DIM); m++)
338 v[offset+i][m] = v[i][m];
347 for (i = 1; (i < nmol); i++)
349 int offs = i*atoms->nr;
350 int offsres = i*atoms->nres;
351 for (j = 0; (j < atoms->nr); j++)
353 atoms->atomname[offs+j] = atoms->atomname[j];
354 atoms->atom[offs+j].resind = atoms->atom[j].resind + offsres;
355 atoms->resinfo[atoms->atom[offs+j].resind] =
356 atoms->resinfo[atoms->atom[j].resind];
357 atoms->resinfo[atoms->atom[offs+j].resind].nr += offsres;
362 for (i = 0; i < DIM; i++)
364 for (j = 0; j < DIM; j++)
366 box[j][i] *= n_box[j];
371 static void add_solv(const char *fn, t_atoms *atoms, rvec **x, rvec **v, real **exclusionDistances,
372 int ePBC, matrix box,
374 real defaultDistance, real scaleFactor, int *atoms_added,
375 int *residues_added, real rshell, int max_sol,
376 const output_env_t oenv)
380 char filename[STRLEN];
381 char title_solvt[STRLEN];
382 t_atoms *atoms_solvt;
383 rvec *x_solvt, *v_solvt = NULL;
384 real *exclusionDistances_solvt;
391 strncpy(filename, lfn, STRLEN);
395 get_stx_coordnum(filename, &natoms);
398 gmx_fatal(FARGS, "No solvent in %s, please check your input\n", filename);
400 snew(atoms_solvt, 1);
401 init_t_atoms(atoms_solvt, natoms, FALSE);
403 snew(x_solvt, atoms_solvt->nr);
406 snew(v_solvt, atoms_solvt->nr);
408 snew(exclusionDistances_solvt, atoms_solvt->nr);
409 snew(atoms_solvt->resinfo, atoms_solvt->nr);
410 snew(atoms_solvt->atomname, atoms_solvt->nr);
411 snew(atoms_solvt->atom, atoms_solvt->nr);
412 atoms_solvt->pdbinfo = NULL;
413 fprintf(stderr, "Reading solvent configuration%s\n",
414 v_solvt ? " and velocities" : "");
415 read_stx_conf(filename, title_solvt, atoms_solvt, x_solvt, v_solvt,
416 &ePBC_solvt, box_solvt);
417 fprintf(stderr, "\"%s\"\n", title_solvt);
418 fprintf(stderr, "solvent configuration contains %d atoms in %d residues\n",
419 atoms_solvt->nr, atoms_solvt->nres);
420 fprintf(stderr, "\n");
422 /* apply pbc for solvent configuration for whole molecules */
423 rm_res_pbc(atoms_solvt, x_solvt, box_solvt);
425 /* initialise distance arrays for solvent configuration */
426 exclusionDistances_solvt = makeExclusionDistances(atoms_solvt, aps, defaultDistance, scaleFactor);
428 /* calculate the box multiplication factors n_box[0...DIM] */
430 for (i = 0; (i < DIM); i++)
433 while (n_box[i]*box_solvt[i][i] < box[i][i])
439 fprintf(stderr, "Will generate new solvent configuration of %dx%dx%d boxes\n",
440 n_box[XX], n_box[YY], n_box[ZZ]);
442 /* realloc atoms_solvt for the new solvent configuration */
443 srenew(atoms_solvt->resinfo, atoms_solvt->nres*nmol);
444 srenew(atoms_solvt->atomname, atoms_solvt->nr*nmol);
445 srenew(atoms_solvt->atom, atoms_solvt->nr*nmol);
446 srenew(x_solvt, atoms_solvt->nr*nmol);
449 srenew(v_solvt, atoms_solvt->nr*nmol);
451 srenew(exclusionDistances_solvt, atoms_solvt->nr*nmol);
453 /* generate a new solvent configuration */
454 make_new_conformation(atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, box_solvt, n_box);
457 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
461 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
463 /* Sort the solvent mixture, not the protein... */
464 sort_molecule(&atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt);
466 /* add the two configurations */
469 add_conf(atoms, x, v, exclusionDistances, TRUE, ePBC, box, FALSE,
470 atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, TRUE, rshell, max_sol, oenv);
471 *atoms_added = atoms->nr-onr;
472 *residues_added = atoms->nres-onres;
475 sfree(exclusionDistances_solvt);
476 done_atom(atoms_solvt);
479 fprintf(stderr, "Generated solvent containing %d atoms in %d residues\n",
480 *atoms_added, *residues_added);
483 static void update_top(t_atoms *atoms, matrix box, int NFILE, t_filenm fnm[],
486 #define TEMP_FILENM "temp.top"
488 char buf[STRLEN], buf2[STRLEN], *temp;
489 const char *topinout;
491 gmx_bool bSystem, bMolecules, bSkip;
496 for (i = 0; (i < atoms->nres); i++)
498 /* calculate number of SOLvent molecules */
499 if ( (strcmp(*atoms->resinfo[i].name, "SOL") == 0) ||
500 (strcmp(*atoms->resinfo[i].name, "WAT") == 0) ||
501 (strcmp(*atoms->resinfo[i].name, "HOH") == 0) )
507 for (i = 0; (i < atoms->nr); i++)
509 gmx_atomprop_query(aps, epropMass,
510 *atoms->resinfo[atoms->atom[i].resind].name,
511 *atoms->atomname[i], &mm);
517 fprintf(stderr, "Volume : %10g (nm^3)\n", vol);
518 fprintf(stderr, "Density : %10g (g/l)\n",
519 (mtot*1e24)/(AVOGADRO*vol));
520 fprintf(stderr, "Number of SOL molecules: %5d \n\n", nsol);
522 /* open topology file and append sol molecules */
523 topinout = ftp2fn(efTOP, NFILE, fnm);
524 if (ftp2bSet(efTOP, NFILE, fnm) )
526 fprintf(stderr, "Processing topology\n");
527 fpin = gmx_ffopen(topinout, "r");
528 fpout = gmx_ffopen(TEMP_FILENM, "w");
530 bSystem = bMolecules = FALSE;
531 while (fgets(buf, STRLEN, fpin))
536 if ((temp = strchr(buf2, '\n')) != NULL)
544 if ((temp = strchr(buf2, '\n')) != NULL)
549 if (buf2[strlen(buf2)-1] == ']')
551 buf2[strlen(buf2)-1] = '\0';
554 bSystem = (gmx_strcasecmp(buf2, "system") == 0);
555 bMolecules = (gmx_strcasecmp(buf2, "molecules") == 0);
558 else if (bSystem && nsol && (buf[0] != ';') )
560 /* if sol present, append "in water" to system name */
562 if (buf2[0] && (!strstr(buf2, " water")) )
564 sprintf(buf, "%s in water\n", buf2);
570 /* check if this is a line with solvent molecules */
571 sscanf(buf, "%4095s", buf2);
572 if (strcmp(buf2, "SOL") == 0)
574 sscanf(buf, "%*4095s %20d", &i);
585 if ((temp = strchr(buf, '\n')) != NULL)
589 fprintf(stdout, "Removing line #%d '%s' from topology file (%s)\n",
590 line, buf, topinout);
594 fprintf(fpout, "%s", buf);
600 fprintf(stdout, "Adding line for %d solvent molecules to "
601 "topology file (%s)\n", nsol, topinout);
602 fprintf(fpout, "%-15s %5d\n", "SOL", nsol);
605 /* use gmx_ffopen to generate backup of topinout */
606 fpout = gmx_ffopen(topinout, "w");
608 rename(TEMP_FILENM, topinout);
613 int gmx_solvate(int argc, char *argv[])
615 const char *desc[] = {
616 "[THISMODULE] can do one of 2 things:[PAR]",
618 "1) Generate a box of solvent. Specify [TT]-cs[tt] and [TT]-box[tt].",
619 "Or specify [TT]-cs[tt] and [TT]-cp[tt] with a structure file with",
620 "a box, but without atoms.[PAR]",
622 "2) Solvate a solute configuration, e.g. a protein, in a bath of solvent ",
623 "molecules. Specify [TT]-cp[tt] (solute) and [TT]-cs[tt] (solvent). ",
624 "The box specified in the solute coordinate file ([TT]-cp[tt]) is used,",
625 "unless [TT]-box[tt] is set.",
626 "If you want the solute to be centered in the box,",
627 "the program [gmx-editconf] has sophisticated options",
628 "to change the box dimensions and center the solute.",
629 "Solvent molecules are removed from the box where the ",
630 "distance between any atom of the solute molecule(s) and any atom of ",
631 "the solvent molecule is less than the sum of the scaled van der Waals",
632 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
633 "Waals radii is read by the program, and the resulting radii scaled",
634 "by [TT]-scale[tt]. If radii are not found in the database, those"
635 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].[PAR]",
637 "The default solvent is Simple Point Charge water (SPC), with coordinates ",
638 "from [TT]$GMXLIB/spc216.gro[tt]. These coordinates can also be used",
639 "for other 3-site water models, since a short equibilibration will remove",
640 "the small differences between the models.",
641 "Other solvents are also supported, as well as mixed solvents. The",
642 "only restriction to solvent types is that a solvent molecule consists",
643 "of exactly one residue. The residue information in the coordinate",
644 "files is used, and should therefore be more or less consistent.",
645 "In practice this means that two subsequent solvent molecules in the ",
646 "solvent coordinate file should have different residue number.",
647 "The box of solute is built by stacking the coordinates read from",
648 "the coordinate file. This means that these coordinates should be ",
649 "equlibrated in periodic boundary conditions to ensure a good",
650 "alignment of molecules on the stacking interfaces.",
651 "The [TT]-maxsol[tt] option simply adds only the first [TT]-maxsol[tt]",
652 "solvent molecules and leaves out the rest that would have fitted",
653 "into the box. This can create a void that can cause problems later.",
654 "Choose your volume wisely.[PAR]",
656 "The program can optionally rotate the solute molecule to align the",
657 "longest molecule axis along a box edge. This way the amount of solvent",
658 "molecules necessary is reduced.",
659 "It should be kept in mind that this only works for",
660 "short simulations, as e.g. an alpha-helical peptide in solution can ",
661 "rotate over 90 degrees, within 500 ps. In general it is therefore ",
662 "better to make a more or less cubic box.[PAR]",
664 "Setting [TT]-shell[tt] larger than zero will place a layer of water of",
665 "the specified thickness (nm) around the solute. Hint: it is a good",
666 "idea to put the protein in the center of a box first (using [gmx-editconf]).",
669 "Finally, [THISMODULE] will optionally remove lines from your topology file in ",
670 "which a number of solvent molecules is already added, and adds a ",
671 "line with the total number of solvent molecules in your coordinate file."
674 const char *bugs[] = {
675 "Molecules must be whole in the initial configurations.",
679 gmx_bool bProt, bBox;
680 const char *conf_prot, *confout;
681 real *exclusionDistances = NULL;
684 /* protein configuration data */
687 rvec *x = NULL, *v = NULL;
691 /* other data types */
692 int atoms_added, residues_added;
695 { efSTX, "-cp", "protein", ffOPTRD },
696 { efSTX, "-cs", "spc216", ffLIBRD},
697 { efSTO, NULL, NULL, ffWRITE},
698 { efTOP, NULL, NULL, ffOPTRW},
700 #define NFILE asize(fnm)
702 static real defaultDistance = 0.105, r_shell = 0, scaleFactor = 0.57;
703 static rvec new_box = {0.0, 0.0, 0.0};
704 static gmx_bool bReadV = FALSE;
705 static int max_sol = 0;
708 { "-box", FALSE, etRVEC, {new_box},
709 "Box size (in nm)" },
710 { "-radius", FALSE, etREAL, {&defaultDistance},
711 "Default van der Waals distance"},
712 { "-scale", FALSE, etREAL, {&scaleFactor},
713 "Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water." },
714 { "-shell", FALSE, etREAL, {&r_shell},
715 "Thickness of optional water layer around solute" },
716 { "-maxsol", FALSE, etINT, {&max_sol},
717 "Maximum number of solvent molecules to add if they fit in the box. If zero (default) this is ignored" },
718 { "-vel", FALSE, etBOOL, {&bReadV},
719 "Keep velocities from input solute and solvent" },
722 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa,
723 asize(desc), desc, asize(bugs), bugs, &oenv))
728 const char *solventFileName = opt2fn("-cs", NFILE, fnm);
729 bProt = opt2bSet("-cp", NFILE, fnm);
730 bBox = opt2parg_bSet("-box", asize(pa), pa);
735 gmx_fatal(FARGS, "When no solute (-cp) is specified, "
736 "a box size (-box) must be specified");
739 aps = gmx_atomprop_init();
742 init_t_atoms(atoms, 0, FALSE);
745 /* Generate a solute configuration */
746 conf_prot = opt2fn("-cp", NFILE, fnm);
747 title = readConformation(conf_prot, atoms, &x,
748 bReadV ? &v : NULL, &ePBC, box);
749 exclusionDistances = makeExclusionDistances(atoms, aps, defaultDistance, scaleFactor);
753 fprintf(stderr, "Note: no velocities found\n");
757 fprintf(stderr, "Note: no atoms in %s\n", conf_prot);
765 box[XX][XX] = new_box[XX];
766 box[YY][YY] = new_box[YY];
767 box[ZZ][ZZ] = new_box[ZZ];
771 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
772 "or give explicit -box command line option");
775 add_solv(solventFileName, atoms, &x, v ? &v : NULL, &exclusionDistances, ePBC, box,
776 aps, defaultDistance, scaleFactor, &atoms_added, &residues_added, r_shell, max_sol,
779 /* write new configuration 1 to file confout */
780 confout = ftp2fn(efSTO, NFILE, fnm);
781 fprintf(stderr, "Writing generated configuration to %s\n", confout);
784 write_sto_conf(confout, title, atoms, x, v, ePBC, box);
785 /* print box sizes and box type to stderr */
786 fprintf(stderr, "%s\n", title);
790 write_sto_conf(confout, "Generated by gmx solvate", atoms, x, v, ePBC, box);
793 /* print size of generated configuration */
794 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
795 atoms->nr, atoms->nres);
796 update_top(atoms, box, NFILE, fnm, aps);
798 gmx_atomprop_destroy(aps);
799 sfree(exclusionDistances);