2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/legacyheaders/typedefs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/legacyheaders/macros.h"
46 #include "gromacs/utility/futil.h"
47 #include "gromacs/legacyheaders/names.h"
48 #include "gromacs/gmxlib/conformation-utilities.h"
50 #include "read-conformation.h"
51 #include "gromacs/fileio/pdbio.h"
53 #include "gromacs/commandline/pargs.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/topology/atomprop.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/smalloc.h"
61 static void print_stat(rvec *x, int natoms, matrix box)
65 for (m = 0; (m < DIM); m++)
70 for (i = 0; (i < natoms); i++)
72 for (m = 0; m < DIM; m++)
74 xmin[m] = min(xmin[m], x[i][m]);
75 xmax[m] = max(xmax[m], x[i][m]);
78 for (m = 0; (m < DIM); m++)
80 fprintf(stderr, "DIM %d XMIN %8.3f XMAX %8.3f BOX %8.3f\n",
81 m, xmin[m], xmax[m], box[m][m]);
94 static void sort_molecule(t_atoms **atoms_solvt, rvec *x, rvec *v, real *r)
96 int atnr, i, j, moltp = 0, nrmoltypes, resi_o, resi_n, resnr;
98 t_atoms *atoms, *newatoms;
99 rvec *newx, *newv = NULL;
102 fprintf(stderr, "Sorting configuration\n");
104 atoms = *atoms_solvt;
106 /* copy each residue from *atoms to a molecule in *molecule */
109 for (i = 0; i < atoms->nr; i++)
111 if ( (i == 0) || (atoms->atom[i].resind != atoms->atom[i-1].resind) )
113 /* see if this was a molecule type we haven't had yet: */
115 for (j = 0; (j < nrmoltypes) && (moltp == NOTSET); j++)
117 /* cppcheck-suppress nullPointer
118 * moltypes is guaranteed to be allocated because otherwise
119 * nrmoltypes is 0. */
120 if (strcmp(*(atoms->resinfo[atoms->atom[i].resind].name), moltypes[j].name) == 0)
129 srenew(moltypes, nrmoltypes);
130 moltypes[moltp].name = *(atoms->resinfo[atoms->atom[i].resind].name);
132 while ((i+atnr < atoms->nr) &&
133 (atoms->atom[i].resind == atoms->atom[i+atnr].resind))
137 moltypes[moltp].natoms = atnr;
138 moltypes[moltp].nmol = 0;
140 moltypes[moltp].nmol++;
144 fprintf(stderr, "Found %d%s molecule type%s:\n",
145 nrmoltypes, nrmoltypes == 1 ? "" : " different", nrmoltypes == 1 ? "" : "s");
146 for (j = 0; j < nrmoltypes; j++)
150 moltypes[j].res0 = 0;
154 moltypes[j].res0 = moltypes[j-1].res0+moltypes[j-1].nmol;
156 fprintf(stderr, "%7s (%4d atoms): %5d residues\n",
157 moltypes[j].name, moltypes[j].natoms, moltypes[j].nmol);
160 /* if we have only 1 moleculetype, we don't have to sort */
163 /* find out which molecules should go where: */
164 moltypes[0].i = moltypes[0].i0 = 0;
165 for (j = 1; j < nrmoltypes; j++)
169 moltypes[j-1].i0+moltypes[j-1].natoms*moltypes[j-1].nmol;
172 /* now put them there: */
174 init_t_atoms(newatoms, atoms->nr, FALSE);
175 newatoms->nres = atoms->nres;
176 snew(newatoms->resinfo, atoms->nres);
177 snew(newx, atoms->nr);
180 snew(newv, atoms->nr);
182 snew(newr, atoms->nr);
187 for (moltp = 0; moltp < nrmoltypes; moltp++)
190 while (i < atoms->nr)
192 resi_o = atoms->atom[i].resind;
193 if (strcmp(*atoms->resinfo[resi_o].name, moltypes[moltp].name) == 0)
195 /* Copy the residue info */
196 newatoms->resinfo[resi_n] = atoms->resinfo[resi_o];
197 newatoms->resinfo[resi_n].nr = resnr;
198 /* Copy the atom info */
201 newatoms->atom[j] = atoms->atom[i];
202 newatoms->atomname[j] = atoms->atomname[i];
203 newatoms->atom[j].resind = resi_n;
204 copy_rvec(x[i], newx[j]);
207 copy_rvec(v[i], newv[j]);
213 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
214 /* Increase the new residue counters */
220 /* Skip this residue */
225 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
230 /* put them back into the original arrays and throw away temporary arrays */
231 sfree(atoms->atomname);
232 sfree(atoms->resinfo);
235 *atoms_solvt = newatoms;
236 for (i = 0; i < (*atoms_solvt)->nr; i++)
238 copy_rvec(newx[i], x[i]);
241 copy_rvec(newv[i], v[i]);
255 static void rm_res_pbc(t_atoms *atoms, rvec *x, matrix box)
257 int i, start, n, d, nat;
263 for (n = 0; n < atoms->nr; n++)
265 if (!is_hydrogen(*(atoms->atomname[n])))
270 if ( (n+1 == atoms->nr) ||
271 (atoms->atom[n+1].resind != atoms->atom[n].resind) )
273 /* if nat==0 we have only hydrogens in the solvent,
274 we take last coordinate as cg */
278 copy_rvec(x[n], xcg);
280 svmul(1.0/nat, xcg, xcg);
281 for (d = 0; d < DIM; d++)
285 for (i = start; i <= n; i++)
287 x[i][d] += box[d][d];
291 while (xcg[d] >= box[d][d])
293 for (i = start; i <= n; i++)
295 x[i][d] -= box[d][d];
307 /* Make a new configuration by adding boxes*/
308 static void make_new_conformation(t_atoms *atoms, rvec *x, rvec *v, real *r, matrix box, ivec n_box)
310 int i, ix, iy, iz, m, j, imol, offset;
314 nmol = n_box[XX]*n_box[YY]*n_box[ZZ];
317 fprintf(stderr, "Generating configuration\n");
319 for (ix = 0; (ix < n_box[XX]); ix++)
321 delta[XX] = ix*box[XX][XX];
322 for (iy = 0; (iy < n_box[YY]); iy++)
324 delta[YY] = iy*box[YY][YY];
325 for (iz = 0; (iz < n_box[ZZ]); iz++)
327 delta[ZZ] = iz*box[ZZ][ZZ];
328 offset = imol*atoms->nr;
329 for (i = 0; (i < atoms->nr); i++)
331 for (m = 0; (m < DIM); m++)
333 x[offset+i][m] = delta[m]+x[i][m];
337 for (m = 0; (m < DIM); m++)
339 v[offset+i][m] = v[i][m];
348 for (i = 1; (i < nmol); i++)
350 int offs = i*atoms->nr;
351 int offsres = i*atoms->nres;
352 for (j = 0; (j < atoms->nr); j++)
354 atoms->atomname[offs+j] = atoms->atomname[j];
355 atoms->atom[offs+j].resind = atoms->atom[j].resind + offsres;
356 atoms->resinfo[atoms->atom[offs+j].resind] =
357 atoms->resinfo[atoms->atom[j].resind];
358 atoms->resinfo[atoms->atom[offs+j].resind].nr += offsres;
363 for (i = 0; i < DIM; i++)
365 for (j = 0; j < DIM; j++)
367 box[j][i] *= n_box[j];
372 static void add_solv(const char *fn, t_atoms *atoms, rvec **x, rvec **v, real **exclusionDistances,
373 int ePBC, matrix box,
375 real defaultDistance, real scaleFactor, int *atoms_added,
376 int *residues_added, real rshell, int max_sol,
377 const output_env_t oenv)
381 char filename[STRLEN];
382 char title_solvt[STRLEN];
383 t_atoms *atoms_solvt;
384 rvec *x_solvt, *v_solvt = NULL;
385 real *exclusionDistances_solvt;
392 strncpy(filename, lfn, STRLEN);
396 get_stx_coordnum(filename, &natoms);
399 gmx_fatal(FARGS, "No solvent in %s, please check your input\n", filename);
401 snew(atoms_solvt, 1);
402 init_t_atoms(atoms_solvt, natoms, FALSE);
404 snew(x_solvt, atoms_solvt->nr);
407 snew(v_solvt, atoms_solvt->nr);
409 snew(exclusionDistances_solvt, atoms_solvt->nr);
410 snew(atoms_solvt->resinfo, atoms_solvt->nr);
411 snew(atoms_solvt->atomname, atoms_solvt->nr);
412 snew(atoms_solvt->atom, atoms_solvt->nr);
413 atoms_solvt->pdbinfo = NULL;
414 fprintf(stderr, "Reading solvent configuration%s\n",
415 v_solvt ? " and velocities" : "");
416 read_stx_conf(filename, title_solvt, atoms_solvt, x_solvt, v_solvt,
417 &ePBC_solvt, box_solvt);
418 fprintf(stderr, "\"%s\"\n", title_solvt);
419 fprintf(stderr, "solvent configuration contains %d atoms in %d residues\n",
420 atoms_solvt->nr, atoms_solvt->nres);
421 fprintf(stderr, "\n");
423 /* apply pbc for solvent configuration for whole molecules */
424 rm_res_pbc(atoms_solvt, x_solvt, box_solvt);
426 /* initialise distance arrays for solvent configuration */
427 exclusionDistances_solvt = makeExclusionDistances(atoms_solvt, aps, defaultDistance, scaleFactor);
429 /* calculate the box multiplication factors n_box[0...DIM] */
431 for (i = 0; (i < DIM); i++)
434 while (n_box[i]*box_solvt[i][i] < box[i][i])
440 fprintf(stderr, "Will generate new solvent configuration of %dx%dx%d boxes\n",
441 n_box[XX], n_box[YY], n_box[ZZ]);
443 /* realloc atoms_solvt for the new solvent configuration */
444 srenew(atoms_solvt->resinfo, atoms_solvt->nres*nmol);
445 srenew(atoms_solvt->atomname, atoms_solvt->nr*nmol);
446 srenew(atoms_solvt->atom, atoms_solvt->nr*nmol);
447 srenew(x_solvt, atoms_solvt->nr*nmol);
450 srenew(v_solvt, atoms_solvt->nr*nmol);
452 srenew(exclusionDistances_solvt, atoms_solvt->nr*nmol);
454 /* generate a new solvent configuration */
455 make_new_conformation(atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, box_solvt, n_box);
458 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
462 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
464 /* Sort the solvent mixture, not the protein... */
465 sort_molecule(&atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt);
467 /* add the two configurations */
470 add_conf(atoms, x, v, exclusionDistances, TRUE, ePBC, box, FALSE,
471 atoms_solvt, x_solvt, v_solvt, exclusionDistances_solvt, TRUE, rshell, max_sol, oenv);
472 *atoms_added = atoms->nr-onr;
473 *residues_added = atoms->nres-onres;
476 sfree(exclusionDistances_solvt);
477 done_atom(atoms_solvt);
480 fprintf(stderr, "Generated solvent containing %d atoms in %d residues\n",
481 *atoms_added, *residues_added);
484 static void update_top(t_atoms *atoms, matrix box, int NFILE, t_filenm fnm[],
487 #define TEMP_FILENM "temp.top"
489 char buf[STRLEN], buf2[STRLEN], *temp;
490 const char *topinout;
492 gmx_bool bSystem, bMolecules, bSkip;
497 for (i = 0; (i < atoms->nres); i++)
499 /* calculate number of SOLvent molecules */
500 if ( (strcmp(*atoms->resinfo[i].name, "SOL") == 0) ||
501 (strcmp(*atoms->resinfo[i].name, "WAT") == 0) ||
502 (strcmp(*atoms->resinfo[i].name, "HOH") == 0) )
508 for (i = 0; (i < atoms->nr); i++)
510 gmx_atomprop_query(aps, epropMass,
511 *atoms->resinfo[atoms->atom[i].resind].name,
512 *atoms->atomname[i], &mm);
518 fprintf(stderr, "Volume : %10g (nm^3)\n", vol);
519 fprintf(stderr, "Density : %10g (g/l)\n",
520 (mtot*1e24)/(AVOGADRO*vol));
521 fprintf(stderr, "Number of SOL molecules: %5d \n\n", nsol);
523 /* open topology file and append sol molecules */
524 topinout = ftp2fn(efTOP, NFILE, fnm);
525 if (ftp2bSet(efTOP, NFILE, fnm) )
527 fprintf(stderr, "Processing topology\n");
528 fpin = gmx_ffopen(topinout, "r");
529 fpout = gmx_ffopen(TEMP_FILENM, "w");
531 bSystem = bMolecules = FALSE;
532 while (fgets(buf, STRLEN, fpin))
537 if ((temp = strchr(buf2, '\n')) != NULL)
545 if ((temp = strchr(buf2, '\n')) != NULL)
550 if (buf2[strlen(buf2)-1] == ']')
552 buf2[strlen(buf2)-1] = '\0';
555 bSystem = (gmx_strcasecmp(buf2, "system") == 0);
556 bMolecules = (gmx_strcasecmp(buf2, "molecules") == 0);
559 else if (bSystem && nsol && (buf[0] != ';') )
561 /* if sol present, append "in water" to system name */
563 if (buf2[0] && (!strstr(buf2, " water")) )
565 sprintf(buf, "%s in water\n", buf2);
571 /* check if this is a line with solvent molecules */
572 sscanf(buf, "%4095s", buf2);
573 if (strcmp(buf2, "SOL") == 0)
575 sscanf(buf, "%*4095s %20d", &i);
586 if ((temp = strchr(buf, '\n')) != NULL)
590 fprintf(stdout, "Removing line #%d '%s' from topology file (%s)\n",
591 line, buf, topinout);
595 fprintf(fpout, "%s", buf);
601 fprintf(stdout, "Adding line for %d solvent molecules to "
602 "topology file (%s)\n", nsol, topinout);
603 fprintf(fpout, "%-15s %5d\n", "SOL", nsol);
606 /* use gmx_ffopen to generate backup of topinout */
607 fpout = gmx_ffopen(topinout, "w");
609 rename(TEMP_FILENM, topinout);
614 int gmx_solvate(int argc, char *argv[])
616 const char *desc[] = {
617 "[THISMODULE] can do one of 2 things:[PAR]",
619 "1) Generate a box of solvent. Specify [TT]-cs[tt] and [TT]-box[tt].",
620 "Or specify [TT]-cs[tt] and [TT]-cp[tt] with a structure file with",
621 "a box, but without atoms.[PAR]",
623 "2) Solvate a solute configuration, e.g. a protein, in a bath of solvent ",
624 "molecules. Specify [TT]-cp[tt] (solute) and [TT]-cs[tt] (solvent). ",
625 "The box specified in the solute coordinate file ([TT]-cp[tt]) is used,",
626 "unless [TT]-box[tt] is set.",
627 "If you want the solute to be centered in the box,",
628 "the program [gmx-editconf] has sophisticated options",
629 "to change the box dimensions and center the solute.",
630 "Solvent molecules are removed from the box where the ",
631 "distance between any atom of the solute molecule(s) and any atom of ",
632 "the solvent molecule is less than the sum of the scaled van der Waals",
633 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
634 "Waals radii is read by the program, and the resulting radii scaled",
635 "by [TT]-scale[tt]. If radii are not found in the database, those"
636 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].[PAR]",
638 "The default solvent is Simple Point Charge water (SPC), with coordinates ",
639 "from [TT]$GMXLIB/spc216.gro[tt]. These coordinates can also be used",
640 "for other 3-site water models, since a short equibilibration will remove",
641 "the small differences between the models.",
642 "Other solvents are also supported, as well as mixed solvents. The",
643 "only restriction to solvent types is that a solvent molecule consists",
644 "of exactly one residue. The residue information in the coordinate",
645 "files is used, and should therefore be more or less consistent.",
646 "In practice this means that two subsequent solvent molecules in the ",
647 "solvent coordinate file should have different residue number.",
648 "The box of solute is built by stacking the coordinates read from",
649 "the coordinate file. This means that these coordinates should be ",
650 "equlibrated in periodic boundary conditions to ensure a good",
651 "alignment of molecules on the stacking interfaces.",
652 "The [TT]-maxsol[tt] option simply adds only the first [TT]-maxsol[tt]",
653 "solvent molecules and leaves out the rest that would have fitted",
654 "into the box. This can create a void that can cause problems later.",
655 "Choose your volume wisely.[PAR]",
657 "The program can optionally rotate the solute molecule to align the",
658 "longest molecule axis along a box edge. This way the amount of solvent",
659 "molecules necessary is reduced.",
660 "It should be kept in mind that this only works for",
661 "short simulations, as e.g. an alpha-helical peptide in solution can ",
662 "rotate over 90 degrees, within 500 ps. In general it is therefore ",
663 "better to make a more or less cubic box.[PAR]",
665 "Setting [TT]-shell[tt] larger than zero will place a layer of water of",
666 "the specified thickness (nm) around the solute. Hint: it is a good",
667 "idea to put the protein in the center of a box first (using [gmx-editconf]).",
670 "Finally, [THISMODULE] will optionally remove lines from your topology file in ",
671 "which a number of solvent molecules is already added, and adds a ",
672 "line with the total number of solvent molecules in your coordinate file."
675 const char *bugs[] = {
676 "Molecules must be whole in the initial configurations.",
680 gmx_bool bProt, bBox;
681 const char *conf_prot, *confout;
682 real *exclusionDistances = NULL;
685 /* protein configuration data */
688 rvec *x = NULL, *v = NULL;
692 /* other data types */
693 int atoms_added, residues_added;
696 { efSTX, "-cp", "protein", ffOPTRD },
697 { efSTX, "-cs", "spc216", ffLIBRD},
698 { efSTO, NULL, NULL, ffWRITE},
699 { efTOP, NULL, NULL, ffOPTRW},
701 #define NFILE asize(fnm)
703 static real defaultDistance = 0.105, r_shell = 0, scaleFactor = 0.57;
704 static rvec new_box = {0.0, 0.0, 0.0};
705 static gmx_bool bReadV = FALSE;
706 static int max_sol = 0;
709 { "-box", FALSE, etRVEC, {new_box},
710 "Box size (in nm)" },
711 { "-radius", FALSE, etREAL, {&defaultDistance},
712 "Default van der Waals distance"},
713 { "-scale", FALSE, etREAL, {&scaleFactor},
714 "Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water." },
715 { "-shell", FALSE, etREAL, {&r_shell},
716 "Thickness of optional water layer around solute" },
717 { "-maxsol", FALSE, etINT, {&max_sol},
718 "Maximum number of solvent molecules to add if they fit in the box. If zero (default) this is ignored" },
719 { "-vel", FALSE, etBOOL, {&bReadV},
720 "Keep velocities from input solute and solvent" },
723 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa,
724 asize(desc), desc, asize(bugs), bugs, &oenv))
729 const char *solventFileName = opt2fn("-cs", NFILE, fnm);
730 bProt = opt2bSet("-cp", NFILE, fnm);
731 bBox = opt2parg_bSet("-box", asize(pa), pa);
736 gmx_fatal(FARGS, "When no solute (-cp) is specified, "
737 "a box size (-box) must be specified");
740 aps = gmx_atomprop_init();
743 init_t_atoms(atoms, 0, FALSE);
746 /* Generate a solute configuration */
747 conf_prot = opt2fn("-cp", NFILE, fnm);
748 title = readConformation(conf_prot, atoms, &x,
749 bReadV ? &v : NULL, &ePBC, box);
750 exclusionDistances = makeExclusionDistances(atoms, aps, defaultDistance, scaleFactor);
754 fprintf(stderr, "Note: no velocities found\n");
758 fprintf(stderr, "Note: no atoms in %s\n", conf_prot);
766 box[XX][XX] = new_box[XX];
767 box[YY][YY] = new_box[YY];
768 box[ZZ][ZZ] = new_box[ZZ];
772 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
773 "or give explicit -box command line option");
776 add_solv(solventFileName, atoms, &x, v ? &v : NULL, &exclusionDistances, ePBC, box,
777 aps, defaultDistance, scaleFactor, &atoms_added, &residues_added, r_shell, max_sol,
780 /* write new configuration 1 to file confout */
781 confout = ftp2fn(efSTO, NFILE, fnm);
782 fprintf(stderr, "Writing generated configuration to %s\n", confout);
785 write_sto_conf(confout, title, atoms, x, v, ePBC, box);
786 /* print box sizes and box type to stderr */
787 fprintf(stderr, "%s\n", title);
791 write_sto_conf(confout, "Generated by gmx solvate", atoms, x, v, ePBC, box);
794 /* print size of generated configuration */
795 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
796 atoms->nr, atoms->nres);
797 update_top(atoms, box, NFILE, fnm, aps);
799 gmx_atomprop_destroy(aps);
800 sfree(exclusionDistances);