2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/utility/smalloc.h"
47 #include "gromacs/utility/cstringutil.h"
48 #include "gromacs/fileio/confio.h"
50 #include "gromacs/utility/futil.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/utility/fatalerror.h"
55 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/gmxlib/conformation-utilities.h"
58 #include "read-conformation.h"
59 #include "gromacs/fileio/pdbio.h"
63 static void print_stat(rvec *x, int natoms, matrix box)
67 for (m = 0; (m < DIM); m++)
72 for (i = 0; (i < natoms); i++)
74 for (m = 0; m < DIM; m++)
76 xmin[m] = min(xmin[m], x[i][m]);
77 xmax[m] = max(xmax[m], x[i][m]);
80 for (m = 0; (m < DIM); m++)
82 fprintf(stderr, "DIM %d XMIN %8.3f XMAX %8.3f BOX %8.3f\n",
83 m, xmin[m], xmax[m], box[m][m]);
96 static void sort_molecule(t_atoms **atoms_solvt, rvec *x, rvec *v, real *r)
98 int atnr, i, j, moltp = 0, nrmoltypes, resi_o, resi_n, resnr;
100 t_atoms *atoms, *newatoms;
101 rvec *newx, *newv = NULL;
104 fprintf(stderr, "Sorting configuration\n");
106 atoms = *atoms_solvt;
108 /* copy each residue from *atoms to a molecule in *molecule */
111 for (i = 0; i < atoms->nr; i++)
113 if ( (i == 0) || (atoms->atom[i].resind != atoms->atom[i-1].resind) )
115 /* see if this was a molecule type we haven't had yet: */
117 for (j = 0; (j < nrmoltypes) && (moltp == NOTSET); j++)
119 /* cppcheck-suppress nullPointer
120 * moltypes is guaranteed to be allocated because otherwise
121 * nrmoltypes is 0. */
122 if (strcmp(*(atoms->resinfo[atoms->atom[i].resind].name), moltypes[j].name) == 0)
131 srenew(moltypes, nrmoltypes);
132 moltypes[moltp].name = *(atoms->resinfo[atoms->atom[i].resind].name);
134 while ((i+atnr < atoms->nr) &&
135 (atoms->atom[i].resind == atoms->atom[i+atnr].resind))
139 moltypes[moltp].natoms = atnr;
140 moltypes[moltp].nmol = 0;
142 moltypes[moltp].nmol++;
146 fprintf(stderr, "Found %d%s molecule type%s:\n",
147 nrmoltypes, nrmoltypes == 1 ? "" : " different", nrmoltypes == 1 ? "" : "s");
148 for (j = 0; j < nrmoltypes; j++)
152 moltypes[j].res0 = 0;
156 moltypes[j].res0 = moltypes[j-1].res0+moltypes[j-1].nmol;
158 fprintf(stderr, "%7s (%4d atoms): %5d residues\n",
159 moltypes[j].name, moltypes[j].natoms, moltypes[j].nmol);
162 /* if we have only 1 moleculetype, we don't have to sort */
165 /* find out which molecules should go where: */
166 moltypes[0].i = moltypes[0].i0 = 0;
167 for (j = 1; j < nrmoltypes; j++)
171 moltypes[j-1].i0+moltypes[j-1].natoms*moltypes[j-1].nmol;
174 /* now put them there: */
176 init_t_atoms(newatoms, atoms->nr, FALSE);
177 newatoms->nres = atoms->nres;
178 snew(newatoms->resinfo, atoms->nres);
179 snew(newx, atoms->nr);
182 snew(newv, atoms->nr);
184 snew(newr, atoms->nr);
189 for (moltp = 0; moltp < nrmoltypes; moltp++)
192 while (i < atoms->nr)
194 resi_o = atoms->atom[i].resind;
195 if (strcmp(*atoms->resinfo[resi_o].name, moltypes[moltp].name) == 0)
197 /* Copy the residue info */
198 newatoms->resinfo[resi_n] = atoms->resinfo[resi_o];
199 newatoms->resinfo[resi_n].nr = resnr;
200 /* Copy the atom info */
203 newatoms->atom[j] = atoms->atom[i];
204 newatoms->atomname[j] = atoms->atomname[i];
205 newatoms->atom[j].resind = resi_n;
206 copy_rvec(x[i], newx[j]);
209 copy_rvec(v[i], newv[j]);
215 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
216 /* Increase the new residue counters */
222 /* Skip this residue */
227 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
232 /* put them back into the original arrays and throw away temporary arrays */
233 sfree(atoms->atomname);
234 sfree(atoms->resinfo);
237 *atoms_solvt = newatoms;
238 for (i = 0; i < (*atoms_solvt)->nr; i++)
240 copy_rvec(newx[i], x[i]);
243 copy_rvec(newv[i], v[i]);
257 static void rm_res_pbc(t_atoms *atoms, rvec *x, matrix box)
259 int i, start, n, d, nat;
265 for (n = 0; n < atoms->nr; n++)
267 if (!is_hydrogen(*(atoms->atomname[n])))
272 if ( (n+1 == atoms->nr) ||
273 (atoms->atom[n+1].resind != atoms->atom[n].resind) )
275 /* if nat==0 we have only hydrogens in the solvent,
276 we take last coordinate as cg */
280 copy_rvec(x[n], xcg);
282 svmul(1.0/nat, xcg, xcg);
283 for (d = 0; d < DIM; d++)
287 for (i = start; i <= n; i++)
289 x[i][d] += box[d][d];
293 while (xcg[d] >= box[d][d])
295 for (i = start; i <= n; i++)
297 x[i][d] -= box[d][d];
309 /* Make a new configuration by adding boxes*/
310 static void make_new_conformation(t_atoms *atoms, rvec *x, rvec *v, real *r, matrix box, ivec n_box)
312 int i, ix, iy, iz, m, j, imol, offset;
316 nmol = n_box[XX]*n_box[YY]*n_box[ZZ];
319 fprintf(stderr, "Generating configuration\n");
321 for (ix = 0; (ix < n_box[XX]); ix++)
323 delta[XX] = ix*box[XX][XX];
324 for (iy = 0; (iy < n_box[YY]); iy++)
326 delta[YY] = iy*box[YY][YY];
327 for (iz = 0; (iz < n_box[ZZ]); iz++)
329 delta[ZZ] = iz*box[ZZ][ZZ];
330 offset = imol*atoms->nr;
331 for (i = 0; (i < atoms->nr); i++)
333 for (m = 0; (m < DIM); m++)
335 x[offset+i][m] = delta[m]+x[i][m];
339 for (m = 0; (m < DIM); m++)
341 v[offset+i][m] = v[i][m];
350 for (i = 1; (i < nmol); i++)
352 int offs = i*atoms->nr;
353 int offsres = i*atoms->nres;
354 for (j = 0; (j < atoms->nr); j++)
356 atoms->atomname[offs+j] = atoms->atomname[j];
357 atoms->atom[offs+j].resind = atoms->atom[j].resind + offsres;
358 atoms->resinfo[atoms->atom[offs+j].resind] =
359 atoms->resinfo[atoms->atom[j].resind];
360 atoms->resinfo[atoms->atom[offs+j].resind].nr += offsres;
365 for (i = 0; i < DIM; i++)
367 for (j = 0; j < DIM; j++)
369 box[j][i] *= n_box[j];
374 static void add_solv(const char *fn, t_atoms *atoms, rvec **x, rvec **v, real **exclusionDistances,
375 int ePBC, matrix box,
377 real defaultDistance, real scaleFactor, int *atoms_added,
378 int *residues_added, real rshell, int max_sol,
379 const output_env_t oenv)
383 char filename[STRLEN];
384 char title_solvt[STRLEN];
385 t_atoms *atoms_solvt;
386 rvec *x_solvt, *v_solvt = NULL;
387 real *exclusionDistances_solvt;
394 strncpy(filename, lfn, STRLEN);
398 get_stx_coordnum(filename, &natoms);
401 gmx_fatal(FARGS, "No solvent in %s, please check your input\n", filename);
403 snew(atoms_solvt, 1);
404 init_t_atoms(atoms_solvt, natoms, FALSE);
406 snew(x_solvt, atoms_solvt->nr);
409 snew(v_solvt, atoms_solvt->nr);
411 snew(exclusionDistances_solvt, atoms_solvt->nr);
412 snew(atoms_solvt->resinfo, atoms_solvt->nr);
413 snew(atoms_solvt->atomname, atoms_solvt->nr);
414 snew(atoms_solvt->atom, atoms_solvt->nr);
415 atoms_solvt->pdbinfo = NULL;
416 fprintf(stderr, "Reading solvent configuration%s\n",
417 v_solvt ? " and velocities" : "");
418 read_stx_conf(filename, title_solvt, atoms_solvt, x_solvt, v_solvt,
419 &ePBC_solvt, box_solvt);
420 fprintf(stderr, "\"%s\"\n", title_solvt);
421 fprintf(stderr, "solvent configuration contains %d atoms in %d residues\n",
422 atoms_solvt->nr, atoms_solvt->nres);
423 fprintf(stderr, "\n");
425 /* apply pbc for solvent configuration for whole molecules */
426 rm_res_pbc(atoms_solvt, x_solvt, box_solvt);
428 /* initialise distance arrays for solvent configuration */
429 exclusionDistances_solvt = makeExclusionDistances(atoms_solvt, aps, defaultDistance, scaleFactor);
431 /* calculate the box multiplication factors n_box[0...DIM] */
433 for (i = 0; (i < DIM); i++)
436 while (n_box[i]*box_solvt[i][i] < box[i][i])
442 fprintf(stderr, "Will generate new solvent configuration of %dx%dx%d boxes\n",
443 n_box[XX], n_box[YY], n_box[ZZ]);
445 /* realloc atoms_solvt for the new solvent configuration */
446 srenew(atoms_solvt->resinfo, atoms_solvt->nres*nmol);
447 srenew(atoms_solvt->atomname, atoms_solvt->nr*nmol);
448 srenew(atoms_solvt->atom, atoms_solvt->nr*nmol);
449 srenew(x_solvt, atoms_solvt->nr*nmol);
452 srenew(v_solvt, atoms_solvt->nr*nmol);
454 srenew(exclusionDistances_solvt, atoms_solvt->nr*nmol);
456 /* generate a new solvent configuration */
457 make_new_conformation(atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, box_solvt, n_box);
460 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
464 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
466 /* Sort the solvent mixture, not the protein... */
467 sort_molecule(&atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt);
469 /* add the two configurations */
472 add_conf(atoms, x, v, exclusionDistances, TRUE, ePBC, box, FALSE,
473 atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, TRUE, rshell, max_sol, oenv);
474 *atoms_added = atoms->nr-onr;
475 *residues_added = atoms->nres-onres;
478 sfree(exclusionDistances_solvt);
479 done_atom(atoms_solvt);
482 fprintf(stderr, "Generated solvent containing %d atoms in %d residues\n",
483 *atoms_added, *residues_added);
486 static void update_top(t_atoms *atoms, matrix box, int NFILE, t_filenm fnm[],
489 #define TEMP_FILENM "temp.top"
491 char buf[STRLEN], buf2[STRLEN], *temp;
492 const char *topinout;
494 gmx_bool bSystem, bMolecules, bSkip;
499 for (i = 0; (i < atoms->nres); i++)
501 /* calculate number of SOLvent molecules */
502 if ( (strcmp(*atoms->resinfo[i].name, "SOL") == 0) ||
503 (strcmp(*atoms->resinfo[i].name, "WAT") == 0) ||
504 (strcmp(*atoms->resinfo[i].name, "HOH") == 0) )
510 for (i = 0; (i < atoms->nr); i++)
512 gmx_atomprop_query(aps, epropMass,
513 *atoms->resinfo[atoms->atom[i].resind].name,
514 *atoms->atomname[i], &mm);
520 fprintf(stderr, "Volume : %10g (nm^3)\n", vol);
521 fprintf(stderr, "Density : %10g (g/l)\n",
522 (mtot*1e24)/(AVOGADRO*vol));
523 fprintf(stderr, "Number of SOL molecules: %5d \n\n", nsol);
525 /* open topology file and append sol molecules */
526 topinout = ftp2fn(efTOP, NFILE, fnm);
527 if (ftp2bSet(efTOP, NFILE, fnm) )
529 fprintf(stderr, "Processing topology\n");
530 fpin = gmx_ffopen(topinout, "r");
531 fpout = gmx_ffopen(TEMP_FILENM, "w");
533 bSystem = bMolecules = FALSE;
534 while (fgets(buf, STRLEN, fpin))
539 if ((temp = strchr(buf2, '\n')) != NULL)
547 if ((temp = strchr(buf2, '\n')) != NULL)
552 if (buf2[strlen(buf2)-1] == ']')
554 buf2[strlen(buf2)-1] = '\0';
557 bSystem = (gmx_strcasecmp(buf2, "system") == 0);
558 bMolecules = (gmx_strcasecmp(buf2, "molecules") == 0);
561 else if (bSystem && nsol && (buf[0] != ';') )
563 /* if sol present, append "in water" to system name */
565 if (buf2[0] && (!strstr(buf2, " water")) )
567 sprintf(buf, "%s in water\n", buf2);
573 /* check if this is a line with solvent molecules */
574 sscanf(buf, "%4095s", buf2);
575 if (strcmp(buf2, "SOL") == 0)
577 sscanf(buf, "%*4095s %20d", &i);
588 if ((temp = strchr(buf, '\n')) != NULL)
592 fprintf(stdout, "Removing line #%d '%s' from topology file (%s)\n",
593 line, buf, topinout);
597 fprintf(fpout, "%s", buf);
603 fprintf(stdout, "Adding line for %d solvent molecules to "
604 "topology file (%s)\n", nsol, topinout);
605 fprintf(fpout, "%-15s %5d\n", "SOL", nsol);
608 /* use gmx_ffopen to generate backup of topinout */
609 fpout = gmx_ffopen(topinout, "w");
611 rename(TEMP_FILENM, topinout);
616 int gmx_solvate(int argc, char *argv[])
618 const char *desc[] = {
619 "[THISMODULE] can do one of 2 things:[PAR]",
621 "1) Generate a box of solvent. Specify [TT]-cs[tt] and [TT]-box[tt].",
622 "Or specify [TT]-cs[tt] and [TT]-cp[tt] with a structure file with",
623 "a box, but without atoms.[PAR]",
625 "2) Solvate a solute configuration, e.g. a protein, in a bath of solvent ",
626 "molecules. Specify [TT]-cp[tt] (solute) and [TT]-cs[tt] (solvent). ",
627 "The box specified in the solute coordinate file ([TT]-cp[tt]) is used,",
628 "unless [TT]-box[tt] is set.",
629 "If you want the solute to be centered in the box,",
630 "the program [gmx-editconf] has sophisticated options",
631 "to change the box dimensions and center the solute.",
632 "Solvent molecules are removed from the box where the ",
633 "distance between any atom of the solute molecule(s) and any atom of ",
634 "the solvent molecule is less than the sum of the scaled van der Waals",
635 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
636 "Waals radii is read by the program, and the resulting radii scaled",
637 "by [TT]-scale[tt]. If radii are not found in the database, those"
638 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].[PAR]",
640 "The default solvent is Simple Point Charge water (SPC), with coordinates ",
641 "from [TT]$GMXLIB/spc216.gro[tt]. These coordinates can also be used",
642 "for other 3-site water models, since a short equibilibration will remove",
643 "the small differences between the models.",
644 "Other solvents are also supported, as well as mixed solvents. The",
645 "only restriction to solvent types is that a solvent molecule consists",
646 "of exactly one residue. The residue information in the coordinate",
647 "files is used, and should therefore be more or less consistent.",
648 "In practice this means that two subsequent solvent molecules in the ",
649 "solvent coordinate file should have different residue number.",
650 "The box of solute is built by stacking the coordinates read from",
651 "the coordinate file. This means that these coordinates should be ",
652 "equlibrated in periodic boundary conditions to ensure a good",
653 "alignment of molecules on the stacking interfaces.",
654 "The [TT]-maxsol[tt] option simply adds only the first [TT]-maxsol[tt]",
655 "solvent molecules and leaves out the rest that would have fitted",
656 "into the box. This can create a void that can cause problems later.",
657 "Choose your volume wisely.[PAR]",
659 "The program can optionally rotate the solute molecule to align the",
660 "longest molecule axis along a box edge. This way the amount of solvent",
661 "molecules necessary is reduced.",
662 "It should be kept in mind that this only works for",
663 "short simulations, as e.g. an alpha-helical peptide in solution can ",
664 "rotate over 90 degrees, within 500 ps. In general it is therefore ",
665 "better to make a more or less cubic box.[PAR]",
667 "Setting [TT]-shell[tt] larger than zero will place a layer of water of",
668 "the specified thickness (nm) around the solute. Hint: it is a good",
669 "idea to put the protein in the center of a box first (using [gmx-editconf]).",
672 "Finally, [THISMODULE] will optionally remove lines from your topology file in ",
673 "which a number of solvent molecules is already added, and adds a ",
674 "line with the total number of solvent molecules in your coordinate file."
677 const char *bugs[] = {
678 "Molecules must be whole in the initial configurations.",
682 gmx_bool bProt, bBox;
683 const char *conf_prot, *confout;
684 real *exclusionDistances = NULL;
687 /* protein configuration data */
690 rvec *x = NULL, *v = NULL;
694 /* other data types */
695 int atoms_added, residues_added;
698 { efSTX, "-cp", "protein", ffOPTRD },
699 { efSTX, "-cs", "spc216", ffLIBRD},
700 { efSTO, NULL, NULL, ffWRITE},
701 { efTOP, NULL, NULL, ffOPTRW},
703 #define NFILE asize(fnm)
705 static real defaultDistance = 0.105, r_shell = 0, scaleFactor = 0.57;
706 static rvec new_box = {0.0, 0.0, 0.0};
707 static gmx_bool bReadV = FALSE;
708 static int max_sol = 0;
711 { "-box", FALSE, etRVEC, {new_box},
712 "Box size (in nm)" },
713 { "-radius", FALSE, etREAL, {&defaultDistance},
714 "Default van der Waals distance"},
715 { "-scale", FALSE, etREAL, {&scaleFactor},
716 "Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water." },
717 { "-shell", FALSE, etREAL, {&r_shell},
718 "Thickness of optional water layer around solute" },
719 { "-maxsol", FALSE, etINT, {&max_sol},
720 "Maximum number of solvent molecules to add if they fit in the box. If zero (default) this is ignored" },
721 { "-vel", FALSE, etBOOL, {&bReadV},
722 "Keep velocities from input solute and solvent" },
725 if (!parse_common_args(&argc, argv, PCA_BE_NICE, NFILE, fnm, asize(pa), pa,
726 asize(desc), desc, asize(bugs), bugs, &oenv))
731 const char *solventFileName = opt2fn("-cs", NFILE, fnm);
732 bProt = opt2bSet("-cp", NFILE, fnm);
733 bBox = opt2parg_bSet("-box", asize(pa), pa);
738 gmx_fatal(FARGS, "When no solute (-cp) is specified, "
739 "a box size (-box) must be specified");
742 aps = gmx_atomprop_init();
745 init_t_atoms(atoms, 0, FALSE);
748 /* Generate a solute configuration */
749 conf_prot = opt2fn("-cp", NFILE, fnm);
750 title = readConformation(conf_prot, atoms, &x,
751 bReadV ? &v : NULL, &ePBC, box);
752 exclusionDistances = makeExclusionDistances(atoms, aps, defaultDistance, scaleFactor);
756 fprintf(stderr, "Note: no velocities found\n");
760 fprintf(stderr, "Note: no atoms in %s\n", conf_prot);
768 box[XX][XX] = new_box[XX];
769 box[YY][YY] = new_box[YY];
770 box[ZZ][ZZ] = new_box[ZZ];
774 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
775 "or give explicit -box command line option");
778 add_solv(solventFileName, atoms, &x, v ? &v : NULL, &exclusionDistances, ePBC, box,
779 aps, defaultDistance, scaleFactor, &atoms_added, &residues_added, r_shell, max_sol,
782 /* write new configuration 1 to file confout */
783 confout = ftp2fn(efSTO, NFILE, fnm);
784 fprintf(stderr, "Writing generated configuration to %s\n", confout);
787 write_sto_conf(confout, title, atoms, x, v, ePBC, box);
788 /* print box sizes and box type to stderr */
789 fprintf(stderr, "%s\n", title);
793 write_sto_conf(confout, "Generated by gmx solvate", atoms, x, v, ePBC, box);
796 /* print size of generated configuration */
797 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
798 atoms->nr, atoms->nres);
799 update_top(atoms, box, NFILE, fnm, aps);
801 gmx_atomprop_destroy(aps);
802 sfree(exclusionDistances);