2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, The GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/commandline/pargs.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/gmxfio.h"
52 #include "gromacs/fileio/pdbio.h"
53 #include "gromacs/fileio/tngio.h"
54 #include "gromacs/fileio/trxio.h"
55 #include "gromacs/fileio/xtcio.h"
56 #include "gromacs/fileio/xvgr.h"
57 #include "gromacs/math/vec.h"
58 #include "gromacs/mdtypes/md_enums.h"
59 #include "gromacs/topology/index.h"
60 #include "gromacs/trajectory/trajectoryframe.h"
61 #include "gromacs/utility/arrayref.h"
62 #include "gromacs/utility/arraysize.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/stringutil.h"
68 #define TIME_EXPLICIT 0
69 #define TIME_CONTINUE 1
74 #define FLAGS (TRX_READ_X | TRX_READ_V | TRX_READ_F)
76 static void scan_trj_files(gmx::ArrayRef<const std::string> files,
80 const gmx_output_env_t* oenv)
82 /* Check start time of all files */
88 for (gmx::index i = 0; i < files.ssize(); i++)
90 ok = read_first_frame(oenv, &status, files[i].c_str(), &fr, FLAGS);
94 gmx_fatal(FARGS, "\nCouldn't read frame from file.");
98 readtime[i] = fr.time;
103 fprintf(stderr, "\nWARNING: Couldn't find a time in the frame.\n");
114 if (natoms != fr.natoms)
116 gmx_fatal(FARGS, "\nDifferent numbers of atoms (%d/%d) in files", natoms, fr.natoms);
121 if (fr.natoms <= imax)
123 gmx_fatal(FARGS, "\nNot enough atoms (%d) for index group (%d)", fr.natoms, imax);
127 ok = read_next_frame(oenv, status, &fr);
130 timestep[i] = fr.time - readtime[i];
151 fprintf(stderr, "\n");
154 static void sort_files(gmx::ArrayRef<std::string> files, real* settime)
156 for (gmx::index i = 0; i < files.ssize(); i++)
158 gmx::index minidx = i;
159 for (gmx::index j = i + 1; j < files.ssize(); j++)
161 if (settime[j] < settime[minidx])
168 real timeswap = settime[i];
169 settime[i] = settime[minidx];
170 settime[minidx] = timeswap;
171 std::swap(files[i], files[minidx]);
176 static void edit_files(gmx::ArrayRef<std::string> files,
183 const gmx_output_env_t* oenv)
186 char inputstring[STRLEN], *chptr;
188 auto timeUnit = output_env_get_time_unit(oenv);
192 "\n\nEnter the new start time (%s) for each file.\n"
193 "There are two special options, both disable sorting:\n\n"
194 "c (continue) - The start time is taken from the end\n"
195 "of the previous file. Use it when your continuation run\n"
196 "restarts with t=0.\n\n"
197 "l (last) - The time in this file will be changed the\n"
198 "same amount as in the previous. Use it when the time in the\n"
199 "new run continues from the end of the previous one,\n"
200 "since this takes possible overlap into account.\n\n",
204 " File Current start (%s) New start (%s)\n"
205 "---------------------------------------------------------\n",
209 for (gmx::index i = 0; i < files.ssize(); i++)
214 output_env_conv_time(oenv, readtime[i]),
219 if (nullptr == fgets(inputstring, STRLEN - 1, stdin))
221 gmx_fatal(FARGS, "Error reading user input");
224 inputstring[std::strlen(inputstring) - 1] = 0;
226 if (inputstring[0] == 'c' || inputstring[0] == 'C')
228 cont_type[i] = TIME_CONTINUE;
231 settime[i] = FLT_MAX;
233 else if (inputstring[0] == 'l' || inputstring[0] == 'L')
235 cont_type[i] = TIME_LAST;
238 settime[i] = FLT_MAX;
242 settime[i] = strtod(inputstring, &chptr) * output_env_get_time_invfactor(oenv);
243 if (chptr == inputstring)
246 "'%s' not recognized as a floating point number, 'c' or 'l'. "
252 cont_type[i] = TIME_EXPLICIT;
258 if (cont_type[0] != TIME_EXPLICIT)
260 cont_type[0] = TIME_EXPLICIT;
266 for (gmx::index i = 0; i < files.ssize(); i++)
268 settime[i] = readtime[i];
273 fprintf(stderr, "Sorting disabled.\n");
277 sort_files(files, settime);
279 /* Write out the new order and start times */
281 "\nSummary of files and start times used:\n\n"
282 " File Start time Time step\n"
283 "---------------------------------------------------------\n");
284 for (gmx::index i = 0; i < files.ssize(); i++)
286 switch (cont_type[i])
290 "%25s %10.3f %s %10.3f %s",
292 output_env_conv_time(oenv, settime[i]),
294 output_env_conv_time(oenv, timestep[i]),
296 if (i > 0 && cont_type[i - 1] == TIME_EXPLICIT && settime[i] == settime[i - 1])
298 fprintf(stderr, " WARNING: same Start time as previous");
300 fprintf(stderr, "\n");
303 fprintf(stderr, "%25s Continue from last file\n", files[i].c_str());
306 fprintf(stderr, "%25s Change by same amount as last file\n", files[i].c_str());
310 fprintf(stderr, "\n");
312 settime[files.size()] = FLT_MAX;
313 cont_type[files.size()] = TIME_EXPLICIT;
314 readtime[files.size()] = FLT_MAX;
317 static void do_demux(gmx::ArrayRef<const std::string> inFiles,
318 gmx::ArrayRef<const std::string> outFiles,
326 const gmx_output_env_t* oenv)
329 t_trxstatus **fp_in, **fp_out;
330 gmx_bool bCont, *bSet;
331 real t, first_time = 0;
334 snew(fp_in, inFiles.size());
335 snew(trx, inFiles.size());
336 snew(bSet, inFiles.size());
339 for (gmx::index i = 0; i < inFiles.ssize(); i++)
341 read_first_frame(oenv, &(fp_in[i]), inFiles[i].c_str(), &(trx[i]), TRX_NEED_X);
344 natoms = trx[i].natoms;
345 first_time = trx[i].time;
347 else if (natoms != trx[i].natoms)
350 "Trajectory file %s has %d atoms while previous trajs had %d atoms",
359 else if (t != trx[i].time)
362 "Trajectory file %s has time %f while previous trajs had time %f",
369 snew(fp_out, inFiles.size());
370 for (gmx::index i = 0; i < inFiles.ssize(); i++)
372 fp_out[i] = open_trx(outFiles[i].c_str(), "w");
375 if (std::round(time[k] - t) != 0)
377 gmx_fatal(FARGS, "First time in demuxing table does not match trajectories");
381 while ((k + 1 < nval) && ((trx[0].time - time[k + 1]) > dt_remd * 0.1))
387 fprintf(debug, "trx[0].time = %g, time[k] = %g\n", trx[0].time, time[k]);
389 for (gmx::index i = 0; i < inFiles.ssize(); i++)
393 for (gmx::index i = 0; i < inFiles.ssize(); i++)
395 int j = gmx::roundToInt(value[i][k]);
396 range_check(j, 0, inFiles.size());
399 gmx_fatal(FARGS, "Demuxing the same replica %d twice at time %f", j, trx[0].time);
403 if (dt == 0 || bRmod(trx[i].time, first_time, dt))
407 write_trxframe_indexed(fp_out[j], &trx[i], isize, index, nullptr);
411 write_trxframe(fp_out[j], &trx[i], nullptr);
417 for (gmx::index i = 0; i < inFiles.ssize(); i++)
419 bCont = bCont && read_next_frame(oenv, fp_in[i], &trx[i]);
423 for (gmx::index i = 0; i < inFiles.ssize(); i++)
426 close_trx(fp_out[i]);
430 int gmx_trjcat(int argc, char* argv[])
432 const char* desc[] = {
433 "[THISMODULE] concatenates several input trajectory files in sorted order. ",
434 "In case of double time frames the one in the later file is used. ",
435 "By specifying [TT]-settime[tt] you will be asked for the start time ",
436 "of each file. The input files are taken from the command line, ",
437 "such that a command like [TT]gmx trjcat -f *.trr -o fixed.trr[tt] should do ",
438 "the trick. Using [TT]-cat[tt], you can simply paste several files ",
439 "together without removal of frames with identical time stamps.[PAR]",
440 "One important option is inferred when the output file is amongst the",
441 "input files. In that case that particular file will be appended to",
442 "which implies you do not need to store double the amount of data.",
443 "Obviously the file to append to has to be the one with lowest starting",
444 "time since one can only append at the end of a file.[PAR]",
445 "If the [TT]-demux[tt] option is given, the N trajectories that are",
446 "read, are written in another order as specified in the [REF].xvg[ref] file.",
447 "The [REF].xvg[ref] file should contain something like::",
452 "The first number is the time, and subsequent numbers point to",
453 "trajectory indices.",
454 "The frames corresponding to the numbers present at the first line",
455 "are collected into the output trajectory. If the number of frames in",
456 "the trajectory does not match that in the [REF].xvg[ref] file then the program",
457 "tries to be smart. Beware."
459 static gmx_bool bCat = FALSE;
460 static gmx_bool bSort = TRUE;
461 static gmx_bool bKeepLast = FALSE;
462 static gmx_bool bKeepLastAppend = FALSE;
463 static gmx_bool bOverwrite = FALSE;
464 static gmx_bool bSetTime = FALSE;
465 static gmx_bool bDeMux;
466 static real begin = -1;
467 static real end = -1;
471 { "-b", FALSE, etTIME, { &begin }, "First time to use (%t)" },
472 { "-e", FALSE, etTIME, { &end }, "Last time to use (%t)" },
473 { "-dt", FALSE, etTIME, { &dt }, "Only write frame when t MOD dt = first time (%t)" },
474 { "-settime", FALSE, etBOOL, { &bSetTime }, "Change starting time interactively" },
475 { "-sort", FALSE, etBOOL, { &bSort }, "Sort trajectory files (not frames)" },
480 "Keep overlapping frames at end of trajectory" },
485 "Overwrite overlapping frames during appending" },
486 { "-cat", FALSE, etBOOL, { &bCat }, "Do not discard double time frames" }
488 #define npargs asize(pa)
489 int ftpin, i, frame, frame_out;
490 t_trxstatus * status, *trxout = nullptr;
492 t_trxframe fr, frout;
494 gmx_bool bNewFile, bIndex, bWrite;
496 real * readtime, *timest, *settime;
497 real first_time = 0, lasttime = 0, last_ok_t = -1, timestep;
498 gmx_bool lastTimeSet = FALSE;
499 real last_frame_time, searchtime;
501 int * index = nullptr, imax;
503 real ** val = nullptr, *t = nullptr, dt_remd;
504 int n, nset, ftpout = -1, prevEndStep = 0, filetype;
506 gmx_output_env_t* oenv;
507 t_filenm fnm[] = { { efTRX, "-f", nullptr, ffRDMULT },
508 { efTRO, "-o", nullptr, ffWRMULT },
509 { efNDX, "-n", "index", ffOPTRD },
510 { efXVG, "-demux", "remd", ffOPTRD } };
512 #define NFILE asize(fnm)
514 if (!parse_common_args(
515 &argc, argv, PCA_TIME_UNIT, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
520 "Note that major changes are planned in future for "
521 "trjcat, to improve usability and utility.");
523 auto timeUnit = output_env_get_time_unit(oenv);
525 bIndex = ftp2bSet(efNDX, NFILE, fnm);
526 bDeMux = ftp2bSet(efXVG, NFILE, fnm);
527 bSort = bSort && !bDeMux;
532 printf("Select group for output\n");
533 rd_index(ftp2fn(efNDX, NFILE, fnm), 1, &isize, &index, &grpname);
536 for (i = 1; i < isize; i++)
538 imax = std::max(imax, index[i]);
545 val = read_xvg_time(opt2fn("-demux", NFILE, fnm),
547 opt2parg_bSet("-b", npargs, pa),
549 opt2parg_bSet("-e", npargs, pa),
556 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt_remd);
559 fprintf(debug, "Dump of replica_index.xvg\n");
560 for (i = 0; (i < n); i++)
562 fprintf(debug, "%10g", t[i]);
563 for (j = 0; (j < nset); j++)
565 fprintf(debug, " %3d", static_cast<int>(std::round(val[j][i])));
567 fprintf(debug, "\n");
572 gmx::ArrayRef<const std::string> inFiles = opt2fns("-f", NFILE, fnm);
575 gmx_fatal(FARGS, "No input files!");
578 if (bDeMux && ssize(inFiles) != nset)
580 gmx_fatal(FARGS, "You have specified %td files and %d entries in the demux table", inFiles.ssize(), nset);
583 ftpin = fn2ftp(inFiles[0].c_str());
585 if (ftpin != efTRR && ftpin != efXTC && ftpin != efTNG)
587 gmx_fatal(FARGS, "gmx trjcat can only handle binary trajectory formats (trr, xtc, tng)");
590 for (const std::string& inFile : inFiles)
592 if (ftpin != fn2ftp(inFile.c_str()))
594 gmx_fatal(FARGS, "All input files must be of the same (trr, xtc or tng) format");
598 gmx::ArrayRef<const std::string> outFiles = opt2fns("-o", NFILE, fnm);
599 if (outFiles.empty())
601 gmx_fatal(FARGS, "No output files!");
603 if ((outFiles.size() > 1) && !bDeMux)
606 "Don't know what to do with more than 1 output file if not demultiplexing");
608 else if (bDeMux && ssize(outFiles) != nset && outFiles.size() != 1)
611 "Number of output files should be 1 or %d (#input files), not %td",
617 auto outFilesDemux = gmx::copyOf(outFiles);
618 if (gmx::ssize(outFilesDemux) != nset)
620 std::string name = outFilesDemux[0];
621 outFilesDemux.resize(nset);
622 for (i = 0; (i < nset); i++)
624 outFilesDemux[i] = gmx::formatString("%d_%s", i, name.c_str());
627 do_demux(inFiles, outFilesDemux, n, val, t, dt_remd, isize, index, dt, oenv);
631 snew(readtime, inFiles.size() + 1);
632 snew(timest, inFiles.size() + 1);
633 scan_trj_files(inFiles, readtime, timest, imax, oenv);
635 snew(settime, inFiles.size() + 1);
636 snew(cont_type, inFiles.size() + 1);
637 auto inFilesEdited = gmx::copyOf(inFiles);
638 edit_files(inFilesEdited, readtime, timest, settime, cont_type, bSetTime, bSort, oenv);
640 /* Check whether the output file is amongst the input files
641 * This has to be done after sorting etc.
643 const char* out_file = outFiles[0].c_str();
644 ftpout = fn2ftp(out_file);
646 for (size_t i = 0; i < inFilesEdited.size() && n_append == -1; i++)
648 if (std::strcmp(inFilesEdited[i].c_str(), out_file) == 0)
655 fprintf(stderr, "Will append to %s rather than creating a new file\n", out_file);
657 else if (n_append != -1)
660 "Can only append to the first file which is %s (not %s)",
661 inFilesEdited[0].c_str(),
665 /* Not checking input format, could be dangerous :-) */
666 /* Not checking output format, equally dangerous :-) */
670 /* the default is not to change the time at all,
671 * but this is overridden by the edit_files routine
681 gmx_fatal(FARGS, "When writing TNG the input file format must also be TNG");
685 trxout = trjtools_gmx_prepare_tng_writing(out_file,
688 inFilesEdited[0].c_str(),
691 gmx::arrayRefFromArray(index, isize),
696 trxout = trjtools_gmx_prepare_tng_writing(
697 out_file, 'w', nullptr, inFilesEdited[0].c_str(), -1, nullptr, {}, nullptr);
702 trxout = open_trx(out_file, "w");
704 std::memset(&frout, 0, sizeof(frout));
710 if (!read_first_frame(oenv, &status, out_file, &fr, FLAGS))
712 gmx_fatal(FARGS, "Reading first frame from %s", out_file);
715 stfio = trx_get_fileio(status);
716 if (!bKeepLast && !bOverwrite)
719 "\n\nWARNING: Appending without -overwrite implies -keeplast "
720 "between the first two files. \n"
721 "If the trajectories have an overlap and have not been written binary \n"
722 "reproducible this will produce an incorrect trajectory!\n\n");
724 filetype = gmx_fio_getftp(stfio);
725 /* Fails if last frame is incomplete
726 * We can't do anything about it without overwriting
728 if (filetype == efXTC || filetype == efTNG)
730 lasttime = trx_get_time_of_final_frame(status);
735 while (read_next_frame(oenv, status, &fr)) {}
739 bKeepLastAppend = TRUE;
741 trxout = open_trx(out_file, "a");
745 if (gmx_fio_getftp(stfio) != efXTC)
747 gmx_fatal(FARGS, "Overwrite only supported for XTC.");
749 last_frame_time = trx_get_time_of_final_frame(status);
751 /* xtc_seek_time broken for trajectories containing only 1 or 2 frames
752 * or when seek time = 0 */
753 if (inFilesEdited.size() > 1 && settime[1] < last_frame_time + timest[0] * 0.5)
755 /* Jump to one time-frame before the start of next
757 searchtime = settime[1] - timest[0] * 1.25;
761 searchtime = last_frame_time;
763 if (xtc_seek_time(stfio, searchtime, fr.natoms, TRUE))
765 gmx_fatal(FARGS, "Error seeking to append position.");
767 read_next_frame(oenv, status, &fr);
768 if (std::abs(searchtime - fr.time) > timest[0] * 0.5)
770 gmx_fatal(FARGS, "Error seeking: attempted to seek to %f but got %f.", searchtime, fr.time);
774 fpos = gmx_fio_ftell(stfio);
776 trxout = open_trx(out_file, "r+");
777 if (gmx_fio_seek(trx_get_fileio(trxout), fpos))
779 gmx_fatal(FARGS, "Error seeking to append position.");
784 printf("\n Will append after %f \n", lasttime);
788 /* Lets stitch up some files */
789 timestep = timest[0];
790 for (size_t i = n_append + 1; i < inFilesEdited.size(); i++)
794 /* set the next time from the last frame in previous file */
797 /* When writing TNG the step determine which frame to write. Use an
798 * offset to be able to increase steps properly when changing files. */
801 prevEndStep = frout.step;
806 if (cont_type[i] == TIME_CONTINUE)
809 begin += 0.5 * timestep;
810 settime[i] = frout.time;
811 cont_type[i] = TIME_EXPLICIT;
813 else if (cont_type[i] == TIME_LAST)
816 begin += 0.5 * timestep;
818 /* Or, if the time in the next part should be changed by the
819 * same amount, start at half a timestep from the last time
820 * so we dont repeat frames.
822 /* I don't understand the comment above, but for all the cases
823 * I tried the code seems to work properly. B. Hess 2008-4-2.
826 /* Or, if time is set explicitly, we check for overlap/gap */
827 if (cont_type[i] == TIME_EXPLICIT)
829 if (i < inFilesEdited.size() && frout.time < settime[i] - 1.5 * timestep)
832 "WARNING: Frames around t=%f %s have a different "
833 "spacing than the rest,\n"
834 "might be a gap or overlap that couldn't be corrected "
836 output_env_conv_time(oenv, frout.time),
842 /* if we don't have a timestep in the current file, use the old one */
845 timestep = timest[i];
847 read_first_frame(oenv, &status, inFilesEdited[i].c_str(), &fr, FLAGS);
851 fprintf(stderr, "\nWARNING: Couldn't find a time in the frame.\n");
854 if (cont_type[i] == TIME_EXPLICIT)
856 t_corr = settime[i] - fr.time;
858 /* t_corr is the amount we want to change the time.
859 * If the user has chosen not to change the time for
860 * this part of the trajectory t_corr remains at
861 * the value it had in the last part, changing this
862 * by the same amount.
863 * If no value was given for the first trajectory part
864 * we let the time start at zero, see the edit_files routine.
875 printf("lasttime %g\n", lasttime);
879 /* copy the input frame to the output frame */
881 /* set the new time by adding the correct calculated above */
882 frout.time += t_corr;
885 frout.step += prevEndStep;
887 /* quit if we have reached the end of what should be written */
888 if ((end > 0) && (frout.time > end + GMX_REAL_EPS))
890 i = inFilesEdited.size();
894 /* determine if we should write this frame (dt is handled elsewhere) */
895 if (bCat) /* write all frames of all files */
899 else if (bKeepLast || (bKeepLastAppend && i == 1))
900 /* write till last frame of this traj
901 and skip first frame(s) of next traj */
903 bWrite = (frout.time > lasttime + 0.5 * timestep);
905 else /* write till first frame of next traj */
907 bWrite = (frout.time < settime[i + 1] - 0.5 * timestep);
910 if (bWrite && (frout.time >= begin))
915 first_time = frout.time;
917 lasttime = frout.time;
919 if (dt == 0 || bRmod(frout.time, first_time, dt))
922 last_ok_t = frout.time;
926 "\nContinue writing frames from %s t=%g %s, "
928 inFilesEdited[i].c_str(),
929 output_env_conv_time(oenv, frout.time),
937 write_trxframe_indexed(trxout, &frout, isize, index, nullptr);
941 write_trxframe(trxout, &frout, nullptr);
943 if (((frame % 10) == 0) || (frame < 10))
946 " -> frame %6d time %8.3f %s \r",
948 output_env_conv_time(oenv, frout.time),
954 } while (read_next_frame(oenv, status, &fr));
963 "\nLast frame written was %d, time %f %s\n",
965 output_env_conv_time(oenv, last_ok_t),