2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/fileio/confio.h"
50 #include "gromacs/fileio/futil.h"
54 #include "gmx_fatal.h"
55 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/gmxlib/conformation-utilities.h"
58 #include "read-conformation.h"
59 #include "gromacs/fileio/pdbio.h"
63 static void print_stat(rvec *x, int natoms, matrix box)
67 for (m = 0; (m < DIM); m++)
72 for (i = 0; (i < natoms); i++)
74 for (m = 0; m < DIM; m++)
76 xmin[m] = min(xmin[m], x[i][m]);
77 xmax[m] = max(xmax[m], x[i][m]);
80 for (m = 0; (m < DIM); m++)
82 fprintf(stderr, "DIM %d XMIN %8.3f XMAX %8.3f BOX %8.3f\n",
83 m, xmin[m], xmax[m], box[m][m]);
96 static void sort_molecule(t_atoms **atoms_solvt, rvec *x, rvec *v, real *r)
98 int atnr, i, j, moltp = 0, nrmoltypes, resi_o, resi_n, resnr;
100 t_atoms *atoms, *newatoms;
101 rvec *newx, *newv = NULL;
104 fprintf(stderr, "Sorting configuration\n");
106 atoms = *atoms_solvt;
108 /* copy each residue from *atoms to a molecule in *molecule */
111 for (i = 0; i < atoms->nr; i++)
113 if ( (i == 0) || (atoms->atom[i].resind != atoms->atom[i-1].resind) )
115 /* see if this was a molecule type we haven't had yet: */
117 for (j = 0; (j < nrmoltypes) && (moltp == NOTSET); j++)
119 if (strcmp(*(atoms->resinfo[atoms->atom[i].resind].name),
120 moltypes[j].name) == 0)
129 srenew(moltypes, nrmoltypes);
130 moltypes[moltp].name = *(atoms->resinfo[atoms->atom[i].resind].name);
132 while ((i+atnr < atoms->nr) &&
133 (atoms->atom[i].resind == atoms->atom[i+atnr].resind))
137 moltypes[moltp].natoms = atnr;
138 moltypes[moltp].nmol = 0;
140 moltypes[moltp].nmol++;
144 fprintf(stderr, "Found %d%s molecule type%s:\n",
145 nrmoltypes, nrmoltypes == 1 ? "" : " different", nrmoltypes == 1 ? "" : "s");
146 for (j = 0; j < nrmoltypes; j++)
150 moltypes[j].res0 = 0;
154 moltypes[j].res0 = moltypes[j-1].res0+moltypes[j-1].nmol;
156 fprintf(stderr, "%7s (%4d atoms): %5d residues\n",
157 moltypes[j].name, moltypes[j].natoms, moltypes[j].nmol);
160 /* if we have only 1 moleculetype, we don't have to sort */
163 /* find out which molecules should go where: */
164 moltypes[0].i = moltypes[0].i0 = 0;
165 for (j = 1; j < nrmoltypes; j++)
169 moltypes[j-1].i0+moltypes[j-1].natoms*moltypes[j-1].nmol;
172 /* now put them there: */
174 init_t_atoms(newatoms, atoms->nr, FALSE);
175 newatoms->nres = atoms->nres;
176 snew(newatoms->resinfo, atoms->nres);
177 snew(newx, atoms->nr);
180 snew(newv, atoms->nr);
182 snew(newr, atoms->nr);
187 for (moltp = 0; moltp < nrmoltypes; moltp++)
190 while (i < atoms->nr)
192 resi_o = atoms->atom[i].resind;
193 if (strcmp(*atoms->resinfo[resi_o].name, moltypes[moltp].name) == 0)
195 /* Copy the residue info */
196 newatoms->resinfo[resi_n] = atoms->resinfo[resi_o];
197 newatoms->resinfo[resi_n].nr = resnr;
198 /* Copy the atom info */
201 newatoms->atom[j] = atoms->atom[i];
202 newatoms->atomname[j] = atoms->atomname[i];
203 newatoms->atom[j].resind = resi_n;
204 copy_rvec(x[i], newx[j]);
207 copy_rvec(v[i], newv[j]);
213 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
214 /* Increase the new residue counters */
220 /* Skip this residue */
225 while (i < atoms->nr && atoms->atom[i].resind == resi_o);
230 /* put them back into the original arrays and throw away temporary arrays */
231 sfree(atoms->atomname);
232 sfree(atoms->resinfo);
235 *atoms_solvt = newatoms;
236 for (i = 0; i < (*atoms_solvt)->nr; i++)
238 copy_rvec(newx[i], x[i]);
241 copy_rvec(newv[i], v[i]);
255 static void rm_res_pbc(t_atoms *atoms, rvec *x, matrix box)
257 int i, start, n, d, nat;
263 for (n = 0; n < atoms->nr; n++)
265 if (!is_hydrogen(*(atoms->atomname[n])))
270 if ( (n+1 == atoms->nr) ||
271 (atoms->atom[n+1].resind != atoms->atom[n].resind) )
273 /* if nat==0 we have only hydrogens in the solvent,
274 we take last coordinate as cg */
278 copy_rvec(x[n], xcg);
280 svmul(1.0/nat, xcg, xcg);
281 for (d = 0; d < DIM; d++)
285 for (i = start; i <= n; i++)
287 x[i][d] += box[d][d];
291 while (xcg[d] >= box[d][d])
293 for (i = start; i <= n; i++)
295 x[i][d] -= box[d][d];
307 /* Make a new configuration by adding boxes*/
308 static void make_new_conformation(t_atoms *atoms, rvec *x, rvec *v, real *r, matrix box, ivec n_box)
310 int i, ix, iy, iz, m, j, imol, offset;
314 nmol = n_box[XX]*n_box[YY]*n_box[ZZ];
317 fprintf(stderr, "Generating configuration\n");
319 for (ix = 0; (ix < n_box[XX]); ix++)
321 delta[XX] = ix*box[XX][XX];
322 for (iy = 0; (iy < n_box[YY]); iy++)
324 delta[YY] = iy*box[YY][YY];
325 for (iz = 0; (iz < n_box[ZZ]); iz++)
327 delta[ZZ] = iz*box[ZZ][ZZ];
328 offset = imol*atoms->nr;
329 for (i = 0; (i < atoms->nr); i++)
331 for (m = 0; (m < DIM); m++)
333 x[offset+i][m] = delta[m]+x[i][m];
337 for (m = 0; (m < DIM); m++)
339 v[offset+i][m] = v[i][m];
348 for (i = 1; (i < nmol); i++)
350 int offs = i*atoms->nr;
351 int offsres = i*atoms->nres;
352 for (j = 0; (j < atoms->nr); j++)
354 atoms->atomname[offs+j] = atoms->atomname[j];
355 atoms->atom[offs+j].resind = atoms->atom[j].resind + offsres;
356 atoms->resinfo[atoms->atom[offs+j].resind] =
357 atoms->resinfo[atoms->atom[j].resind];
358 atoms->resinfo[atoms->atom[offs+j].resind].nr += offsres;
363 for (i = 0; i < DIM; i++)
365 for (j = 0; j < DIM; j++)
367 box[j][i] *= n_box[j];
372 static void add_solv(const char *fn, t_atoms *atoms, rvec **x, rvec **v, real **r,
373 int ePBC, matrix box,
375 real r_distance, real r_scale, int *atoms_added,
376 int *residues_added, real rshell, int max_sol,
377 const output_env_t oenv)
381 char filename[STRLEN];
382 char title_solvt[STRLEN];
383 t_atoms *atoms_solvt;
384 rvec *x_solvt, *v_solvt = NULL;
392 strncpy(filename, lfn, STRLEN);
394 snew(atoms_solvt, 1);
395 get_stx_coordnum(filename, &(atoms_solvt->nr));
396 if (atoms_solvt->nr == 0)
398 gmx_fatal(FARGS, "No solvent in %s, please check your input\n", filename);
400 snew(x_solvt, atoms_solvt->nr);
403 snew(v_solvt, atoms_solvt->nr);
405 snew(r_solvt, atoms_solvt->nr);
406 snew(atoms_solvt->resinfo, atoms_solvt->nr);
407 snew(atoms_solvt->atomname, atoms_solvt->nr);
408 snew(atoms_solvt->atom, atoms_solvt->nr);
409 atoms_solvt->pdbinfo = NULL;
410 fprintf(stderr, "Reading solvent configuration%s\n",
411 v_solvt ? " and velocities" : "");
412 read_stx_conf(filename, title_solvt, atoms_solvt, x_solvt, v_solvt,
413 &ePBC_solvt, box_solvt);
414 fprintf(stderr, "\"%s\"\n", title_solvt);
415 fprintf(stderr, "solvent configuration contains %d atoms in %d residues\n",
416 atoms_solvt->nr, atoms_solvt->nres);
417 fprintf(stderr, "\n");
419 /* apply pbc for solvent configuration for whole molecules */
420 rm_res_pbc(atoms_solvt, x_solvt, box_solvt);
422 /* initialise van der waals arrays for solvent configuration */
423 mk_vdw(atoms_solvt, r_solvt, aps, r_distance, r_scale);
425 /* calculate the box multiplication factors n_box[0...DIM] */
427 for (i = 0; (i < DIM); i++)
430 while (n_box[i]*box_solvt[i][i] < box[i][i])
436 fprintf(stderr, "Will generate new solvent configuration of %dx%dx%d boxes\n",
437 n_box[XX], n_box[YY], n_box[ZZ]);
439 /* realloc atoms_solvt for the new solvent configuration */
440 srenew(atoms_solvt->resinfo, atoms_solvt->nres*nmol);
441 srenew(atoms_solvt->atomname, atoms_solvt->nr*nmol);
442 srenew(atoms_solvt->atom, atoms_solvt->nr*nmol);
443 srenew(x_solvt, atoms_solvt->nr*nmol);
446 srenew(v_solvt, atoms_solvt->nr*nmol);
448 srenew(r_solvt, atoms_solvt->nr*nmol);
450 /* generate a new solvent configuration */
451 make_new_conformation(atoms_solvt, x_solvt, v_solvt, r_solvt, box_solvt, n_box);
454 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
458 print_stat(x_solvt, atoms_solvt->nr, box_solvt);
460 /* Sort the solvent mixture, not the protein... */
461 sort_molecule(&atoms_solvt, x_solvt, v_solvt, r_solvt);
463 /* add the two configurations */
466 add_conf(atoms, x, v, r, TRUE, ePBC, box, FALSE,
467 atoms_solvt, x_solvt, v_solvt, r_solvt, TRUE, rshell, max_sol, oenv);
468 *atoms_added = atoms->nr-onr;
469 *residues_added = atoms->nres-onres;
474 fprintf(stderr, "Generated solvent containing %d atoms in %d residues\n",
475 *atoms_added, *residues_added);
478 static void update_top(t_atoms *atoms, matrix box, int NFILE, t_filenm fnm[],
481 #define TEMP_FILENM "temp.top"
483 char buf[STRLEN], buf2[STRLEN], *temp;
484 const char *topinout;
486 gmx_bool bSystem, bMolecules, bSkip;
491 for (i = 0; (i < atoms->nres); i++)
493 /* calculate number of SOLvent molecules */
494 if ( (strcmp(*atoms->resinfo[i].name, "SOL") == 0) ||
495 (strcmp(*atoms->resinfo[i].name, "WAT") == 0) ||
496 (strcmp(*atoms->resinfo[i].name, "HOH") == 0) )
502 for (i = 0; (i < atoms->nr); i++)
504 gmx_atomprop_query(aps, epropMass,
505 *atoms->resinfo[atoms->atom[i].resind].name,
506 *atoms->atomname[i], &mm);
512 fprintf(stderr, "Volume : %10g (nm^3)\n", vol);
513 fprintf(stderr, "Density : %10g (g/l)\n",
514 (mtot*1e24)/(AVOGADRO*vol));
515 fprintf(stderr, "Number of SOL molecules: %5d \n\n", nsol);
517 /* open topology file and append sol molecules */
518 topinout = ftp2fn(efTOP, NFILE, fnm);
519 if (ftp2bSet(efTOP, NFILE, fnm) )
521 fprintf(stderr, "Processing topology\n");
522 fpin = ffopen(topinout, "r");
523 fpout = ffopen(TEMP_FILENM, "w");
525 bSystem = bMolecules = FALSE;
526 while (fgets(buf, STRLEN, fpin))
531 if ((temp = strchr(buf2, '\n')) != NULL)
539 if ((temp = strchr(buf2, '\n')) != NULL)
544 if (buf2[strlen(buf2)-1] == ']')
546 buf2[strlen(buf2)-1] = '\0';
549 bSystem = (gmx_strcasecmp(buf2, "system") == 0);
550 bMolecules = (gmx_strcasecmp(buf2, "molecules") == 0);
553 else if (bSystem && nsol && (buf[0] != ';') )
555 /* if sol present, append "in water" to system name */
557 if (buf2[0] && (!strstr(buf2, " water")) )
559 sprintf(buf, "%s in water\n", buf2);
565 /* check if this is a line with solvent molecules */
566 sscanf(buf, "%4095s", buf2);
567 if (strcmp(buf2, "SOL") == 0)
569 sscanf(buf, "%*4095s %20d", &i);
580 if ((temp = strchr(buf, '\n')) != NULL)
584 fprintf(stdout, "Removing line #%d '%s' from topology file (%s)\n",
585 line, buf, topinout);
589 fprintf(fpout, "%s", buf);
595 fprintf(stdout, "Adding line for %d solvent molecules to "
596 "topology file (%s)\n", nsol, topinout);
597 fprintf(fpout, "%-15s %5d\n", "SOL", nsol);
600 /* use ffopen to generate backup of topinout */
601 fpout = ffopen(topinout, "w");
603 rename(TEMP_FILENM, topinout);
608 int gmx_solvate(int argc, char *argv[])
610 const char *desc[] = {
611 "[THISMODULE] can do one of 2 things:[PAR]",
613 "1) Generate a box of solvent. Specify [TT]-cs[tt] and [TT]-box[tt].",
614 "Or specify [TT]-cs[tt] and [TT]-cp[tt] with a structure file with",
615 "a box, but without atoms.[PAR]",
617 "2) Solvate a solute configuration, e.g. a protein, in a bath of solvent ",
618 "molecules. Specify [TT]-cp[tt] (solute) and [TT]-cs[tt] (solvent). ",
619 "The box specified in the solute coordinate file ([TT]-cp[tt]) is used,",
620 "unless [TT]-box[tt] is set.",
621 "If you want the solute to be centered in the box,",
622 "the program [gmx-editconf] has sophisticated options",
623 "to change the box dimensions and center the solute.",
624 "Solvent molecules are removed from the box where the ",
625 "distance between any atom of the solute molecule(s) and any atom of ",
626 "the solvent molecule is less than the sum of the van der Waals radii of ",
627 "both atoms. A database ([TT]vdwradii.dat[tt]) of van der Waals radii is ",
628 "read by the program, and atoms not in the database are ",
629 "assigned a default distance [TT]-vdwd[tt].",
630 "Note that this option will also influence the distances between",
631 "solvent molecules if they contain atoms that are not in the database.",
634 "The default solvent is Simple Point Charge water (SPC), with coordinates ",
635 "from [TT]$GMXLIB/spc216.gro[tt]. These coordinates can also be used",
636 "for other 3-site water models, since a short equibilibration will remove",
637 "the small differences between the models.",
638 "Other solvents are also supported, as well as mixed solvents. The",
639 "only restriction to solvent types is that a solvent molecule consists",
640 "of exactly one residue. The residue information in the coordinate",
641 "files is used, and should therefore be more or less consistent.",
642 "In practice this means that two subsequent solvent molecules in the ",
643 "solvent coordinate file should have different residue number.",
644 "The box of solute is built by stacking the coordinates read from",
645 "the coordinate file. This means that these coordinates should be ",
646 "equlibrated in periodic boundary conditions to ensure a good",
647 "alignment of molecules on the stacking interfaces.",
648 "The [TT]-maxsol[tt] option simply adds only the first [TT]-maxsol[tt]",
649 "solvent molecules and leaves out the rest that would have fitted",
650 "into the box. This can create a void that can cause problems later.",
651 "Choose your volume wisely.[PAR]",
653 "The program can optionally rotate the solute molecule to align the",
654 "longest molecule axis along a box edge. This way the amount of solvent",
655 "molecules necessary is reduced.",
656 "It should be kept in mind that this only works for",
657 "short simulations, as e.g. an alpha-helical peptide in solution can ",
658 "rotate over 90 degrees, within 500 ps. In general it is therefore ",
659 "better to make a more or less cubic box.[PAR]",
661 "Setting [TT]-shell[tt] larger than zero will place a layer of water of",
662 "the specified thickness (nm) around the solute. Hint: it is a good",
663 "idea to put the protein in the center of a box first (using [gmx-editconf]).",
666 "Finally, [THISMODULE] will optionally remove lines from your topology file in ",
667 "which a number of solvent molecules is already added, and adds a ",
668 "line with the total number of solvent molecules in your coordinate file."
671 const char *bugs[] = {
672 "Molecules must be whole in the initial configurations.",
676 gmx_bool bProt, bBox;
677 const char *conf_prot, *confout;
681 /* protein configuration data */
688 /* other data types */
689 int atoms_added, residues_added;
692 { efSTX, "-cp", "protein", ffOPTRD },
693 { efSTX, "-cs", "spc216", ffLIBRD},
694 { efSTO, NULL, NULL, ffWRITE},
695 { efTOP, NULL, NULL, ffOPTRW},
697 #define NFILE asize(fnm)
699 static real r_distance = 0.105, r_shell = 0, r_scale = 0.57;
700 static rvec new_box = {0.0, 0.0, 0.0};
701 static gmx_bool bReadV = FALSE;
702 static int max_sol = 0;
705 { "-box", FALSE, etRVEC, {new_box},
706 "Box size (in nm)" },
707 { "-vdwd", FALSE, etREAL, {&r_distance},
708 "Default van der Waals distance"},
709 { "-vdwscale", FALSE, etREAL, {&r_scale},
710 "HIDDENScale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water." },
711 { "-shell", FALSE, etREAL, {&r_shell},
712 "Thickness of optional water layer around solute" },
713 { "-maxsol", FALSE, etINT, {&max_sol},
714 "Maximum number of solvent molecules to add if they fit in the box. If zero (default) this is ignored" },
715 { "-vel", FALSE, etBOOL, {&bReadV},
716 "Keep velocities from input solute and solvent" },
719 if (!parse_common_args(&argc, argv, PCA_BE_NICE, NFILE, fnm, asize(pa), pa,
720 asize(desc), desc, asize(bugs), bugs, &oenv))
725 const char *solventFileName = opt2fn("-cs", NFILE, fnm);
726 bProt = opt2bSet("-cp", NFILE, fnm);
727 bBox = opt2parg_bSet("-box", asize(pa), pa);
732 gmx_fatal(FARGS, "When no solute (-cp) is specified, "
733 "a box size (-box) must be specified");
736 aps = gmx_atomprop_init();
740 /* Generate a solute configuration */
741 conf_prot = opt2fn("-cp", NFILE, fnm);
742 title = read_conformation(conf_prot, &atoms, &x, bReadV ? &v : NULL, &r, &ePBC, box,
743 aps, r_distance, r_scale);
746 fprintf(stderr, "Note: no velocities found\n");
750 fprintf(stderr, "Note: no atoms in %s\n", conf_prot);
758 atoms.resinfo = NULL;
759 atoms.atomname = NULL;
761 atoms.pdbinfo = NULL;
769 box[XX][XX] = new_box[XX];
770 box[YY][YY] = new_box[YY];
771 box[ZZ][ZZ] = new_box[ZZ];
775 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
776 "or give explicit -box command line option");
779 add_solv(solventFileName, &atoms, &x, v ? &v : NULL, &r, ePBC, box,
780 aps, r_distance, r_scale, &atoms_added, &residues_added, r_shell, max_sol,
783 /* write new configuration 1 to file confout */
784 confout = ftp2fn(efSTO, NFILE, fnm);
785 fprintf(stderr, "Writing generated configuration to %s\n", confout);
788 write_sto_conf(confout, title, &atoms, x, v, ePBC, box);
789 /* print box sizes and box type to stderr */
790 fprintf(stderr, "%s\n", title);
794 write_sto_conf(confout, "Generated by gmx solvate", &atoms, x, v, ePBC, box);
797 /* print size of generated configuration */
798 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
799 atoms.nr, atoms.nres);
800 update_top(&atoms, box, NFILE, fnm, aps);
802 gmx_atomprop_destroy(aps);