2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/utility/smalloc.h"
48 #include "gromacs/utility/cstringutil.h"
49 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/futil.h"
55 #include "gmx_fatal.h"
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/gmxlib/conformation-utilities.h"
59 #include "read-conformation.h"
60 #include "gromacs/fileio/pdbio.h"
64 static void print_stat(rvec *x, int natoms, matrix box)
68 for (m = 0; (m < DIM); m++)
73 for (i = 0; (i < natoms); i++)
75 for (m = 0; m < DIM; m++)
77 xmin[m] = min(xmin[m], x[i][m]);
78 xmax[m] = max(xmax[m], x[i][m]);
81 for (m = 0; (m < DIM); m++)
83 fprintf(stderr, "DIM %d XMIN %8.3f XMAX %8.3f BOX %8.3f\n",
84 m, xmin[m], xmax[m], box[m][m]);
97 static void sort_molecule(t_atoms **atoms_solvt, rvec *x, rvec *v, real *r)
99 int atnr, i, j, moltp = 0, nrmoltypes, resi_o, resi_n, resnr;
100 t_moltypes *moltypes;
101 t_atoms *atoms, *newatoms;
102 rvec *newx, *newv = NULL;
105 fprintf(stderr, "Sorting configuration\n");
107 atoms = *atoms_solvt;
109 /* copy each residue from *atoms to a molecule in *molecule */
112 for (i = 0; i < atoms->nr; i++)
114 if ( (i == 0) || (atoms->atom[i].resind != atoms->atom[i-1].resind) )
116 /* see if this was a molecule type we haven't had yet: */
118 for (j = 0; (j < nrmoltypes) && (moltp == NOTSET); j++)
120 if (strcmp(*(atoms->resinfo[atoms->atom[i].resind].name),
121 moltypes[j].name) == 0)
130 srenew(moltypes, nrmoltypes);
131 moltypes[moltp].name = *(atoms->resinfo[atoms->atom[i].resind].name);
133 while ((i+atnr < atoms->nr) &&
134 (atoms->atom[i].resind == atoms->atom[i+atnr].resind))
138 moltypes[moltp].natoms = atnr;
139 moltypes[moltp].nmol = 0;
141 moltypes[moltp].nmol++;
145 fprintf(stderr, "Found %d%s molecule type%s:\n",
146 nrmoltypes, nrmoltypes == 1 ? "" : " different", nrmoltypes == 1 ? "" : "s");
147 for (j = 0; j < nrmoltypes; j++)
151 moltypes[j].res0 = 0;
155 moltypes[j].res0 = moltypes[j-1].res0+moltypes[j-1].nmol;
157 fprintf(stderr, "%7s (%4d atoms): %5d residues\n",
158 moltypes[j].name, moltypes[j].natoms, moltypes[j].nmol);
161 /* if we have only 1 moleculetype, we don't have to sort */
164 /* find out which molecules should go where: */
165 moltypes[0].i = moltypes[0].i0 = 0;
166 for (j = 1; j < nrmoltypes; j++)
170 moltypes[j-1].i0+moltypes[j-1].natoms*moltypes[j-1].nmol;
173 /* now put them there: */
175 init_t_atoms(newatoms, atoms->nr, FALSE);
176 newatoms->nres = atoms->nres;
177 snew(newatoms->resinfo, atoms->nres);
178 snew(newx, atoms->nr);
181 snew(newv, atoms->nr);
183 snew(newr, atoms->nr);
188 for (moltp = 0; moltp < nrmoltypes; moltp++)
191 while (i < atoms->nr)
193 resi_o = atoms->atom[i].resind;
194 if (strcmp(*atoms->resinfo[resi_o].name, moltypes[moltp].name) == 0)
196 /* Copy the residue info */
197 newatoms->resinfo[resi_n] = atoms->resinfo[resi_o];
198 newatoms->resinfo[resi_n].nr = resnr;
199 /* Copy the atom info */
202 newatoms->atom[j] = atoms->atom[i];
203 newatoms->atomname[j] = atoms->atomname[i];
204 newatoms->atom[j].resind = resi_n;
205 copy_rvec(x[i], newx[j]);
208 copy_rvec(v[i], newv[j]);
214 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
215 /* Increase the new residue counters */
221 /* Skip this residue */
226 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
231 /* put them back into the original arrays and throw away temporary arrays */
232 sfree(atoms->atomname);
233 sfree(atoms->resinfo);
236 *atoms_solvt = newatoms;
237 for (i = 0; i < (*atoms_solvt)->nr; i++)
239 copy_rvec(newx[i], x[i]);
242 copy_rvec(newv[i], v[i]);
256 static void rm_res_pbc(t_atoms *atoms, rvec *x, matrix box)
258 int i, start, n, d, nat;
264 for (n = 0; n < atoms->nr; n++)
266 if (!is_hydrogen(*(atoms->atomname[n])))
271 if ( (n+1 == atoms->nr) ||
272 (atoms->atom[n+1].resind != atoms->atom[n].resind) )
274 /* if nat==0 we have only hydrogens in the solvent,
275 we take last coordinate as cg */
279 copy_rvec(x[n], xcg);
281 svmul(1.0/nat, xcg, xcg);
282 for (d = 0; d < DIM; d++)
286 for (i = start; i <= n; i++)
288 x[i][d] += box[d][d];
292 while (xcg[d] >= box[d][d])
294 for (i = start; i <= n; i++)
296 x[i][d] -= box[d][d];
308 /* Make a new configuration by adding boxes*/
309 static void make_new_conformation(t_atoms *atoms, rvec *x, rvec *v, real *r, matrix box, ivec n_box)
311 int i, ix, iy, iz, m, j, imol, offset;
315 nmol = n_box[XX]*n_box[YY]*n_box[ZZ];
318 fprintf(stderr, "Generating configuration\n");
320 for (ix = 0; (ix < n_box[XX]); ix++)
322 delta[XX] = ix*box[XX][XX];
323 for (iy = 0; (iy < n_box[YY]); iy++)
325 delta[YY] = iy*box[YY][YY];
326 for (iz = 0; (iz < n_box[ZZ]); iz++)
328 delta[ZZ] = iz*box[ZZ][ZZ];
329 offset = imol*atoms->nr;
330 for (i = 0; (i < atoms->nr); i++)
332 for (m = 0; (m < DIM); m++)
334 x[offset+i][m] = delta[m]+x[i][m];
338 for (m = 0; (m < DIM); m++)
340 v[offset+i][m] = v[i][m];
349 for (i = 1; (i < nmol); i++)
351 int offs = i*atoms->nr;
352 int offsres = i*atoms->nres;
353 for (j = 0; (j < atoms->nr); j++)
355 atoms->atomname[offs+j] = atoms->atomname[j];
356 atoms->atom[offs+j].resind = atoms->atom[j].resind + offsres;
357 atoms->resinfo[atoms->atom[offs+j].resind] =
358 atoms->resinfo[atoms->atom[j].resind];
359 atoms->resinfo[atoms->atom[offs+j].resind].nr += offsres;
364 for (i = 0; i < DIM; i++)
366 for (j = 0; j < DIM; j++)
368 box[j][i] *= n_box[j];
373 static void add_solv(const char *fn, t_atoms *atoms, rvec **x, rvec **v, real **exclusionDistances,
374 int ePBC, matrix box,
376 real defaultDistance, real scaleFactor, int *atoms_added,
377 int *residues_added, real rshell, int max_sol,
378 const output_env_t oenv)
382 char filename[STRLEN];
383 char title_solvt[STRLEN];
384 t_atoms *atoms_solvt;
385 rvec *x_solvt, *v_solvt = NULL;
386 real *exclusionDistances_solvt;
393 strncpy(filename, lfn, STRLEN);
397 get_stx_coordnum(filename, &natoms);
400 gmx_fatal(FARGS, "No solvent in %s, please check your input\n", filename);
402 snew(atoms_solvt, 1);
403 init_t_atoms(atoms_solvt, natoms, FALSE);
405 snew(x_solvt, atoms_solvt->nr);
408 snew(v_solvt, atoms_solvt->nr);
410 snew(exclusionDistances_solvt, atoms_solvt->nr);
411 snew(atoms_solvt->resinfo, atoms_solvt->nr);
412 snew(atoms_solvt->atomname, atoms_solvt->nr);
413 snew(atoms_solvt->atom, atoms_solvt->nr);
414 atoms_solvt->pdbinfo = NULL;
415 fprintf(stderr, "Reading solvent configuration%s\n",
416 v_solvt ? " and velocities" : "");
417 read_stx_conf(filename, title_solvt, atoms_solvt, x_solvt, v_solvt,
418 &ePBC_solvt, box_solvt);
419 fprintf(stderr, "\"%s\"\n", title_solvt);
420 fprintf(stderr, "solvent configuration contains %d atoms in %d residues\n",
421 atoms_solvt->nr, atoms_solvt->nres);
422 fprintf(stderr, "\n");
424 /* apply pbc for solvent configuration for whole molecules */
425 rm_res_pbc(atoms_solvt, x_solvt, box_solvt);
427 /* initialise distance arrays for solvent configuration */
428 exclusionDistances_solvt = makeExclusionDistances(atoms_solvt, aps, defaultDistance, scaleFactor);
430 /* calculate the box multiplication factors n_box[0...DIM] */
432 for (i = 0; (i < DIM); i++)
435 while (n_box[i]*box_solvt[i][i] < box[i][i])
441 fprintf(stderr, "Will generate new solvent configuration of %dx%dx%d boxes\n",
442 n_box[XX], n_box[YY], n_box[ZZ]);
444 /* realloc atoms_solvt for the new solvent configuration */
445 srenew(atoms_solvt->resinfo, atoms_solvt->nres*nmol);
446 srenew(atoms_solvt->atomname, atoms_solvt->nr*nmol);
447 srenew(atoms_solvt->atom, atoms_solvt->nr*nmol);
448 srenew(x_solvt, atoms_solvt->nr*nmol);
451 srenew(v_solvt, atoms_solvt->nr*nmol);
453 srenew(exclusionDistances_solvt, atoms_solvt->nr*nmol);
455 /* generate a new solvent configuration */
456 make_new_conformation(atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, box_solvt, n_box);
459 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
463 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
465 /* Sort the solvent mixture, not the protein... */
466 sort_molecule(&atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt);
468 /* add the two configurations */
471 add_conf(atoms, x, v, exclusionDistances, TRUE, ePBC, box, FALSE,
472 atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, TRUE, rshell, max_sol, oenv);
473 *atoms_added = atoms->nr-onr;
474 *residues_added = atoms->nres-onres;
477 sfree(exclusionDistances_solvt);
478 done_atom(atoms_solvt);
481 fprintf(stderr, "Generated solvent containing %d atoms in %d residues\n",
482 *atoms_added, *residues_added);
485 static void update_top(t_atoms *atoms, matrix box, int NFILE, t_filenm fnm[],
488 #define TEMP_FILENM "temp.top"
490 char buf[STRLEN], buf2[STRLEN], *temp;
491 const char *topinout;
493 gmx_bool bSystem, bMolecules, bSkip;
498 for (i = 0; (i < atoms->nres); i++)
500 /* calculate number of SOLvent molecules */
501 if ( (strcmp(*atoms->resinfo[i].name, "SOL") == 0) ||
502 (strcmp(*atoms->resinfo[i].name, "WAT") == 0) ||
503 (strcmp(*atoms->resinfo[i].name, "HOH") == 0) )
509 for (i = 0; (i < atoms->nr); i++)
511 gmx_atomprop_query(aps, epropMass,
512 *atoms->resinfo[atoms->atom[i].resind].name,
513 *atoms->atomname[i], &mm);
519 fprintf(stderr, "Volume : %10g (nm^3)\n", vol);
520 fprintf(stderr, "Density : %10g (g/l)\n",
521 (mtot*1e24)/(AVOGADRO*vol));
522 fprintf(stderr, "Number of SOL molecules: %5d \n\n", nsol);
524 /* open topology file and append sol molecules */
525 topinout = ftp2fn(efTOP, NFILE, fnm);
526 if (ftp2bSet(efTOP, NFILE, fnm) )
528 fprintf(stderr, "Processing topology\n");
529 fpin = gmx_ffopen(topinout, "r");
530 fpout = gmx_ffopen(TEMP_FILENM, "w");
532 bSystem = bMolecules = FALSE;
533 while (fgets(buf, STRLEN, fpin))
538 if ((temp = strchr(buf2, '\n')) != NULL)
546 if ((temp = strchr(buf2, '\n')) != NULL)
551 if (buf2[strlen(buf2)-1] == ']')
553 buf2[strlen(buf2)-1] = '\0';
556 bSystem = (gmx_strcasecmp(buf2, "system") == 0);
557 bMolecules = (gmx_strcasecmp(buf2, "molecules") == 0);
560 else if (bSystem && nsol && (buf[0] != ';') )
562 /* if sol present, append "in water" to system name */
564 if (buf2[0] && (!strstr(buf2, " water")) )
566 sprintf(buf, "%s in water\n", buf2);
572 /* check if this is a line with solvent molecules */
573 sscanf(buf, "%4095s", buf2);
574 if (strcmp(buf2, "SOL") == 0)
576 sscanf(buf, "%*4095s %20d", &i);
587 if ((temp = strchr(buf, '\n')) != NULL)
591 fprintf(stdout, "Removing line #%d '%s' from topology file (%s)\n",
592 line, buf, topinout);
596 fprintf(fpout, "%s", buf);
602 fprintf(stdout, "Adding line for %d solvent molecules to "
603 "topology file (%s)\n", nsol, topinout);
604 fprintf(fpout, "%-15s %5d\n", "SOL", nsol);
607 /* use gmx_ffopen to generate backup of topinout */
608 fpout = gmx_ffopen(topinout, "w");
610 rename(TEMP_FILENM, topinout);
615 int gmx_solvate(int argc, char *argv[])
617 const char *desc[] = {
618 "[THISMODULE] can do one of 2 things:[PAR]",
620 "1) Generate a box of solvent. Specify [TT]-cs[tt] and [TT]-box[tt].",
621 "Or specify [TT]-cs[tt] and [TT]-cp[tt] with a structure file with",
622 "a box, but without atoms.[PAR]",
624 "2) Solvate a solute configuration, e.g. a protein, in a bath of solvent ",
625 "molecules. Specify [TT]-cp[tt] (solute) and [TT]-cs[tt] (solvent). ",
626 "The box specified in the solute coordinate file ([TT]-cp[tt]) is used,",
627 "unless [TT]-box[tt] is set.",
628 "If you want the solute to be centered in the box,",
629 "the program [gmx-editconf] has sophisticated options",
630 "to change the box dimensions and center the solute.",
631 "Solvent molecules are removed from the box where the ",
632 "distance between any atom of the solute molecule(s) and any atom of ",
633 "the solvent molecule is less than the sum of the scaled van der Waals",
634 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
635 "Waals radii is read by the program, and the resulting radii scaled",
636 "by [TT]-scale[tt]. If radii are not found in the database, those"
637 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].[PAR]",
639 "The default solvent is Simple Point Charge water (SPC), with coordinates ",
640 "from [TT]$GMXLIB/spc216.gro[tt]. These coordinates can also be used",
641 "for other 3-site water models, since a short equibilibration will remove",
642 "the small differences between the models.",
643 "Other solvents are also supported, as well as mixed solvents. The",
644 "only restriction to solvent types is that a solvent molecule consists",
645 "of exactly one residue. The residue information in the coordinate",
646 "files is used, and should therefore be more or less consistent.",
647 "In practice this means that two subsequent solvent molecules in the ",
648 "solvent coordinate file should have different residue number.",
649 "The box of solute is built by stacking the coordinates read from",
650 "the coordinate file. This means that these coordinates should be ",
651 "equlibrated in periodic boundary conditions to ensure a good",
652 "alignment of molecules on the stacking interfaces.",
653 "The [TT]-maxsol[tt] option simply adds only the first [TT]-maxsol[tt]",
654 "solvent molecules and leaves out the rest that would have fitted",
655 "into the box. This can create a void that can cause problems later.",
656 "Choose your volume wisely.[PAR]",
658 "The program can optionally rotate the solute molecule to align the",
659 "longest molecule axis along a box edge. This way the amount of solvent",
660 "molecules necessary is reduced.",
661 "It should be kept in mind that this only works for",
662 "short simulations, as e.g. an alpha-helical peptide in solution can ",
663 "rotate over 90 degrees, within 500 ps. In general it is therefore ",
664 "better to make a more or less cubic box.[PAR]",
666 "Setting [TT]-shell[tt] larger than zero will place a layer of water of",
667 "the specified thickness (nm) around the solute. Hint: it is a good",
668 "idea to put the protein in the center of a box first (using [gmx-editconf]).",
671 "Finally, [THISMODULE] will optionally remove lines from your topology file in ",
672 "which a number of solvent molecules is already added, and adds a ",
673 "line with the total number of solvent molecules in your coordinate file."
676 const char *bugs[] = {
677 "Molecules must be whole in the initial configurations.",
681 gmx_bool bProt, bBox;
682 const char *conf_prot, *confout;
683 real *exclusionDistances = NULL;
686 /* protein configuration data */
689 rvec *x = NULL, *v = NULL;
693 /* other data types */
694 int atoms_added, residues_added;
697 { efSTX, "-cp", "protein", ffOPTRD },
698 { efSTX, "-cs", "spc216", ffLIBRD},
699 { efSTO, NULL, NULL, ffWRITE},
700 { efTOP, NULL, NULL, ffOPTRW},
702 #define NFILE asize(fnm)
704 static real defaultDistance = 0.105, r_shell = 0, scaleFactor = 0.57;
705 static rvec new_box = {0.0, 0.0, 0.0};
706 static gmx_bool bReadV = FALSE;
707 static int max_sol = 0;
710 { "-box", FALSE, etRVEC, {new_box},
711 "Box size (in nm)" },
712 { "-radius", FALSE, etREAL, {&defaultDistance},
713 "Default van der Waals distance"},
714 { "-scale", FALSE, etREAL, {&scaleFactor},
715 "Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water." },
716 { "-shell", FALSE, etREAL, {&r_shell},
717 "Thickness of optional water layer around solute" },
718 { "-maxsol", FALSE, etINT, {&max_sol},
719 "Maximum number of solvent molecules to add if they fit in the box. If zero (default) this is ignored" },
720 { "-vel", FALSE, etBOOL, {&bReadV},
721 "Keep velocities from input solute and solvent" },
724 if (!parse_common_args(&argc, argv, PCA_BE_NICE, NFILE, fnm, asize(pa), pa,
725 asize(desc), desc, asize(bugs), bugs, &oenv))
730 const char *solventFileName = opt2fn("-cs", NFILE, fnm);
731 bProt = opt2bSet("-cp", NFILE, fnm);
732 bBox = opt2parg_bSet("-box", asize(pa), pa);
737 gmx_fatal(FARGS, "When no solute (-cp) is specified, "
738 "a box size (-box) must be specified");
741 aps = gmx_atomprop_init();
744 init_t_atoms(atoms, 0, FALSE);
747 /* Generate a solute configuration */
748 conf_prot = opt2fn("-cp", NFILE, fnm);
749 title = readConformation(conf_prot, atoms, &x,
750 bReadV ? &v : NULL, &ePBC, box);
751 exclusionDistances = makeExclusionDistances(atoms, aps, defaultDistance, scaleFactor);
755 fprintf(stderr, "Note: no velocities found\n");
759 fprintf(stderr, "Note: no atoms in %s\n", conf_prot);
767 box[XX][XX] = new_box[XX];
768 box[YY][YY] = new_box[YY];
769 box[ZZ][ZZ] = new_box[ZZ];
773 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
774 "or give explicit -box command line option");
777 add_solv(solventFileName, atoms, &x, v ? &v : NULL, &exclusionDistances, ePBC, box,
778 aps, defaultDistance, scaleFactor, &atoms_added, &residues_added, r_shell, max_sol,
781 /* write new configuration 1 to file confout */
782 confout = ftp2fn(efSTO, NFILE, fnm);
783 fprintf(stderr, "Writing generated configuration to %s\n", confout);
786 write_sto_conf(confout, title, atoms, x, v, ePBC, box);
787 /* print box sizes and box type to stderr */
788 fprintf(stderr, "%s\n", title);
792 write_sto_conf(confout, "Generated by gmx solvate", atoms, x, v, ePBC, box);
795 /* print size of generated configuration */
796 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
797 atoms->nr, atoms->nres);
798 update_top(atoms, box, NFILE, fnm, aps);
800 gmx_atomprop_destroy(aps);
801 sfree(exclusionDistances);