From 214499dc4df201d09f11fd565d24eefd06f5649f Mon Sep 17 00:00:00 2001 From: Teemu Murtola Date: Sun, 3 Aug 2014 07:16:54 +0300 Subject: [PATCH] Check frame atom count before evaluating selections Several tools work even if passed a trajectory that only contains a subset of the atoms specified in the topology. The selections work as well, provided that the selections don't need to access other atoms. It may be a bit unclear what the selections need for evaluation, though. Now the selections report an error if the trajectory does not contain the necessary atoms; previously, it would most likely just crash. I'm not 100% sure that the check catches all possible cases, but at least for the majority of the cases it will provide significantly better feedback. Currently, it is only possible to pass the first N atoms, as supporting anything else would require significant changes elsewhere as well, but the check itself would be straightforward to adapt if the implementation gets otherwise done (just replace the std::max operations with unions). Change-Id: I5d9bf68567ba8634bb81b82a6833c0d398a40bdb --- src/gromacs/selection/compiler.cpp | 64 +++++++++++++++++-- src/gromacs/selection/indexutil.cpp | 14 ++++ src/gromacs/selection/indexutil.h | 9 ++- src/gromacs/selection/poscalc.cpp | 21 ++++++ src/gromacs/selection/poscalc.h | 7 ++ .../selection/selectioncollection-impl.h | 2 + src/gromacs/selection/selectioncollection.cpp | 11 +++- src/gromacs/selection/selmethod.h | 10 +++ .../selection/tests/selectioncollection.cpp | 31 ++++++++- 9 files changed, 160 insertions(+), 9 deletions(-) diff --git a/src/gromacs/selection/compiler.cpp b/src/gromacs/selection/compiler.cpp index 58beeea69a..fc968633d1 100644 --- a/src/gromacs/selection/compiler.cpp +++ b/src/gromacs/selection/compiler.cpp @@ -1601,7 +1601,7 @@ init_item_minmax_groups(const SelectionTreeElementPointer &sel) * \param[in,out] sc Selection collection data. * * The evaluation group of each \ref SEL_ROOT element corresponding to a - * selection in \p sc is set to NULL. The evaluation grop for \ref SEL_ROOT + * selection in \p sc is set to NULL. The evaluation group for \ref SEL_ROOT * elements corresponding to subexpressions that need full evaluation is set * to \c sc->gall. */ @@ -1976,8 +1976,7 @@ evaluate_boolean_static_part(gmx_sel_evaluate_t *data, { child->cdata->evaluate = &_gmx_sel_evaluate_static; /* The cgrp has only been allocated if it originated from an - * external index group. In that case, we need special handling - * to preserve the name of the group and to not leak memory. + * external index group. * If cgrp has been set in make_static(), it is not allocated, * and hence we can overwrite it safely. */ if (child->u.cgrp.nalloc_index > 0) @@ -2439,6 +2438,54 @@ init_root_item(const SelectionTreeElementPointer &root, } +/******************************************************************** + * REQUIRED ATOMS ANALYSIS + ********************************************************************/ + +/*! \brief + * Finds the highest atom index required to evaluate a selection subtree. + * + * \param[in] sel Root of the selection subtree to process. + * \param[in,out] maxAtomIndex The highest atom index required to evaluate the + * subtree. The existing value is never decreased, so multiple calls with + * the same parameter will compute the maximum over several subtrees. + * + * For evaluation that starts from a \ref SEL_ROOT element with a fixed group, + * children will never extend the evaluation group except for method parameter + * evaluation (which have their own root element), so it is sufficient to check + * the root. However, children of \ref SEL_EXPRESSION elements (i.e., the + * method parameters) may have been independently evaluated to a static group + * that no longer has a separate root, so those need to be checked as well. + * + * Position calculations are not considered here, but are analyzed through the + * position calculation collection in the main compilation method. + */ +static void +init_required_atoms(const SelectionTreeElementPointer &sel, int *maxAtomIndex) +{ + // Process children. + if (sel->type != SEL_SUBEXPRREF) + { + SelectionTreeElementPointer child = sel->child; + while (child) + { + init_required_atoms(child, maxAtomIndex); + child = child->next; + } + } + + if (sel->type == SEL_ROOT + || (sel->type == SEL_CONST && sel->v.type == GROUP_VALUE)) + { + if (sel->u.cgrp.isize > 0) + { + *maxAtomIndex = + std::max(*maxAtomIndex, gmx_ana_index_get_max_index(&sel->u.cgrp)); + } + } +} + + /******************************************************************** * FINAL SUBEXPRESSION OPTIMIZATION ********************************************************************/ @@ -2832,9 +2879,10 @@ SelectionCompiler::compile(SelectionCollection *coll) coll->printTree(stderr, false); } - /* Initialize evaluation groups, position calculations for methods, perform - * some final optimization, and free the memory allocated for the - * compilation. */ + // Initialize evaluation groups, maximum atom index needed for evaluation, + // position calculations for methods, perform some final optimization, and + // free the memory allocated for the compilation. + coll->impl_->maxAtomIndex_ = 0; /* By default, use whole residues/molecules. */ flags = POS_COMPLWHOLE; PositionCalculationCollection::typeFromEnum(coll->impl_->rpost_.c_str(), @@ -2844,10 +2892,14 @@ SelectionCompiler::compile(SelectionCollection *coll) { init_root_item(item, &sc->gall); postprocess_item_subexpressions(item); + init_required_atoms(item, &coll->impl_->maxAtomIndex_); init_item_comg(item, &sc->pcc, post, flags); free_item_compilerdata(item); item = item->next; } + coll->impl_->maxAtomIndex_ = + std::max(coll->impl_->maxAtomIndex_, + sc->pcc.getHighestRequiredAtomIndex()); /* Allocate memory for the evaluation memory pool. */ _gmx_sel_mempool_reserve(sc->mempool, 0); diff --git a/src/gromacs/selection/indexutil.cpp b/src/gromacs/selection/indexutil.cpp index 3ed8c8783e..ad377e9b61 100644 --- a/src/gromacs/selection/indexutil.cpp +++ b/src/gromacs/selection/indexutil.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -420,6 +421,19 @@ gmx_ana_index_dump(FILE *fp, gmx_ana_index_t *g, int maxn) fprintf(fp, "\n"); } +int +gmx_ana_index_get_max_index(gmx_ana_index_t *g) +{ + if (g->isize == 0) + { + return 0; + } + else + { + return *std::max_element(g->index, g->index + g->isize); + } +} + /*! * \param[in] g Index group to check. * \returns true if the index group is sorted and has no duplicates, diff --git a/src/gromacs/selection/indexutil.h b/src/gromacs/selection/indexutil.h index 4575879009..bdbaaf0076 100644 --- a/src/gromacs/selection/indexutil.h +++ b/src/gromacs/selection/indexutil.h @@ -236,10 +236,17 @@ gmx_ana_index_copy(gmx_ana_index_t *dest, gmx_ana_index_t *src, bool bAlloc); void gmx_ana_index_dump(FILE *fp, gmx_ana_index_t *g, int maxn); +/*! \brief + * Returns maximum atom index that appears in an index group. + * + * \param[in] g Index group to query. + * \returns Largest atom index that appears in \p g, or zero if \p g is empty. + */ +int +gmx_ana_index_get_max_index(gmx_ana_index_t *g); /** Checks whether an index group is sorted. */ bool gmx_ana_index_check_sorted(gmx_ana_index_t *g); - /*! \brief * Checks whether an index group has atoms from a defined range. * diff --git a/src/gromacs/selection/poscalc.cpp b/src/gromacs/selection/poscalc.cpp index 72f00e7756..61a8977ba8 100644 --- a/src/gromacs/selection/poscalc.cpp +++ b/src/gromacs/selection/poscalc.cpp @@ -61,6 +61,8 @@ */ #include +#include + #include "gromacs/fileio/trx.h" #include "gromacs/math/vec.h" #include "gromacs/selection/centerofmass.h" @@ -557,6 +559,25 @@ PositionCalculationCollection::createCalculationFromEnum(const char *post, int f return impl_->createCalculation(type, cflags); } +int PositionCalculationCollection::getHighestRequiredAtomIndex() const +{ + int result = 0; + gmx_ana_poscalc_t *pc = impl_->first_; + while (pc) + { + // Calculations with a base just copy positions from the base, so + // those do not need to be considered in the check. + if (!pc->sbase) + { + gmx_ana_index_t g; + gmx_ana_index_set(&g, pc->b.nra, pc->b.a, 0); + result = std::max(result, gmx_ana_index_get_max_index(&g)); + } + pc = pc->next; + } + return result; +} + void PositionCalculationCollection::initEvaluation() { if (impl_->bInit_) diff --git a/src/gromacs/selection/poscalc.h b/src/gromacs/selection/poscalc.h index fead217d1b..47806069b1 100644 --- a/src/gromacs/selection/poscalc.h +++ b/src/gromacs/selection/poscalc.h @@ -279,6 +279,13 @@ class PositionCalculationCollection */ gmx_ana_poscalc_t *createCalculationFromEnum(const char *post, int flags); + /*! \brief + * Computes the highest atom index required to evaluate this collection. + * + * Does not throw. + */ + int getHighestRequiredAtomIndex() const; + /*! \brief * Initializes evaluation for a position calculation collection. * diff --git a/src/gromacs/selection/selectioncollection-impl.h b/src/gromacs/selection/selectioncollection-impl.h index a0af8d0659..a478143880 100644 --- a/src/gromacs/selection/selectioncollection-impl.h +++ b/src/gromacs/selection/selectioncollection-impl.h @@ -160,6 +160,8 @@ class SelectionCollection::Impl std::string rpost_; //! Default output position type for selections. std::string spost_; + //! Largest atom index needed by the selections for evaluation. + int maxAtomIndex_; /*! \brief * Debugging level for the collection. * diff --git a/src/gromacs/selection/selectioncollection.cpp b/src/gromacs/selection/selectioncollection.cpp index 2d766bcdf4..9ac55bcc02 100644 --- a/src/gromacs/selection/selectioncollection.cpp +++ b/src/gromacs/selection/selectioncollection.cpp @@ -51,6 +51,7 @@ #include "gromacs/legacyheaders/oenv.h" +#include "gromacs/fileio/trx.h" #include "gromacs/onlinehelp/helpmanager.h" #include "gromacs/onlinehelp/helpwritercontext.h" #include "gromacs/options/basicoptions.h" @@ -84,7 +85,7 @@ namespace gmx */ SelectionCollection::Impl::Impl() - : debugLevel_(0), bExternalGroupsSet_(false), grps_(NULL) + : maxAtomIndex_(0), debugLevel_(0), bExternalGroupsSet_(false), grps_(NULL) { sc_.nvars = 0; sc_.varstrs = NULL; @@ -800,6 +801,14 @@ SelectionCollection::compile() void SelectionCollection::evaluate(t_trxframe *fr, t_pbc *pbc) { + if (fr->natoms <= impl_->maxAtomIndex_) + { + std::string message = formatString( + "Trajectory has less atoms (%d) than what is required for " + "evaluating the provided selections (atoms up to index %d " + "are required).", fr->natoms, impl_->maxAtomIndex_ + 1); + GMX_THROW(InconsistentInputError(message)); + } impl_->sc_.pcc.initFrame(); SelectionEvaluator evaluator; diff --git a/src/gromacs/selection/selmethod.h b/src/gromacs/selection/selmethod.h index 538c94db17..bd3d79be30 100644 --- a/src/gromacs/selection/selmethod.h +++ b/src/gromacs/selection/selmethod.h @@ -556,6 +556,11 @@ typedef void (*sel_framefunc)(t_topology *top, t_trxframe *fr, t_pbc *pbc, * For \ref STR_VALUE methods, the pointers stored in \p out->s are discarded * without freeing; it is the responsibility of this function to provide * pointers that can be discarded without memory leaks. + * + * If the method accesses \p fr outside the index group specified in \p g or + * what it receives from its parameters, it must check that \p fr actually + * contains such an atom in case the \p fr has been loaded from a trajectory + * that only contains a subset of the system. */ typedef void (*sel_updatefunc)(t_topology *top, t_trxframe *fr, t_pbc *pbc, gmx_ana_index_t *g, gmx_ana_selvalue_t *out, @@ -584,6 +589,11 @@ typedef void (*sel_updatefunc)(t_topology *top, t_trxframe *fr, t_pbc *pbc, * For \ref STR_VALUE methods, the pointers stored in \p out->s are discarded * without freeing; it is the responsibility of this function to provide * pointers that can be discarded without memory leaks. + * + * If the method accesses \p fr outside the atoms referenced in \p pos or + * what it receives from its parameters, it must check that \p fr actually + * contains such an atom in case the \p fr has been loaded from a trajectory + * that only contains a subset of the system. */ typedef void (*sel_updatefunc_pos)(t_topology *top, t_trxframe *fr, t_pbc *pbc, gmx_ana_pos_t *pos, gmx_ana_selvalue_t *out, diff --git a/src/gromacs/selection/tests/selectioncollection.cpp b/src/gromacs/selection/tests/selectioncollection.cpp index 713394e26c..143385fd6d 100644 --- a/src/gromacs/selection/tests/selectioncollection.cpp +++ b/src/gromacs/selection/tests/selectioncollection.cpp @@ -41,6 +41,7 @@ */ #include +#include "gromacs/fileio/trx.h" #include "gromacs/options/basicoptions.h" #include "gromacs/options/options.h" #include "gromacs/selection/indexutil.h" @@ -583,7 +584,35 @@ TEST_F(SelectionCollectionTest, RecoversFromInvalidPermutation3) EXPECT_THROW_GMX(sc_.evaluate(frame_, NULL), gmx::InconsistentInputError); } -// TODO: Tests for evaluation errors +TEST_F(SelectionCollectionTest, HandlesFramesWithTooSmallAtomSubsets) +{ + ASSERT_NO_THROW_GMX(sc_.parseFromString("atomnr 3 to 10")); + ASSERT_NO_FATAL_FAILURE(loadTopology("simple.gro")); + ASSERT_NO_THROW_GMX(sc_.compile()); + frame_->natoms = 8; + EXPECT_THROW_GMX(sc_.evaluate(frame_, NULL), gmx::InconsistentInputError); +} + +TEST_F(SelectionCollectionTest, HandlesFramesWithTooSmallAtomSubsets2) +{ + // Evaluating the positions will require atoms 1-9. + ASSERT_NO_THROW_GMX(sc_.parseFromString("whole_res_com of atomnr 2 5 7")); + ASSERT_NO_FATAL_FAILURE(loadTopology("simple.gro")); + ASSERT_NO_THROW_GMX(sc_.compile()); + frame_->natoms = 8; + EXPECT_THROW_GMX(sc_.evaluate(frame_, NULL), gmx::InconsistentInputError); +} + +TEST_F(SelectionCollectionTest, HandlesFramesWithTooSmallAtomSubsets3) +{ + ASSERT_NO_THROW_GMX(sc_.parseFromString("mindistance from atomnr 1 to 5 < 2")); + ASSERT_NO_FATAL_FAILURE(loadTopology("simple.gro")); + ASSERT_NO_THROW_GMX(sc_.compile()); + frame_->natoms = 10; + EXPECT_THROW_GMX(sc_.evaluate(frame_, NULL), gmx::InconsistentInputError); +} + +// TODO: Tests for more evaluation errors /******************************************************************** -- 2.22.0