2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements functions in selhelp.h.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_selection
50 #include <boost/scoped_ptr.hpp>
52 #include "gromacs/onlinehelp/helptopic.h"
53 #include "gromacs/onlinehelp/helpwritercontext.h"
54 #include "gromacs/utility/exceptions.h"
55 #include "gromacs/utility/file.h"
56 #include "gromacs/utility/stringutil.h"
58 #include "selmethod.h"
66 static const char name[];
67 static const char title[];
68 static const char *const text[];
71 const char CommonHelpText::name[] = "selections";
72 const char CommonHelpText::title[] =
73 "Selection syntax and usage";
74 const char *const CommonHelpText::text[] = {
75 "Selections are used to select atoms/molecules/residues for analysis.",
76 "In contrast to traditional index files, selections can be dynamic, i.e.,",
77 "select different atoms for different trajectory frames.[PAR]",
79 "Each analysis tool requires a different number of selections and the",
80 "selections are interpreted differently. The general idea is still the",
81 "same: each selection evaluates to a set of positions, where a position",
82 "can be an atom position or center-of-mass or center-of-geometry of",
83 "a set of atoms. The tool then uses these positions for its analysis to",
84 "allow very flexible processing. Some analysis tools may have limitations",
85 "on the types of selections allowed.[PAR]",
87 "To get started with selections, run, e.g., [TT][PROGRAM] select[tt]",
88 "without specifying selections on the command-line and use the interactive",
89 "prompt to try out different selections.",
90 "This tool provides output options that allow one to see what is actually",
91 "selected by the given selections, and the interactive prompt reports",
92 "syntax errors immediately, allowing one to try again.",
93 "The subtopics listed below give more details on different aspects of",
97 struct ArithmeticHelpText
99 static const char name[];
100 static const char title[];
101 static const char *const text[];
104 const char ArithmeticHelpText::name[] = "arithmetic";
105 const char ArithmeticHelpText::title[] =
106 "Arithmetic expressions in selections";
107 const char *const ArithmeticHelpText::text[] = {
108 "Basic arithmetic evaluation is supported for numeric expressions.",
109 "Supported operations are addition, subtraction, negation, multiplication,",
110 "division, and exponentiation (using ^).",
111 "Result of a division by zero or other illegal operations is undefined.",
114 struct CmdLineHelpText
116 static const char name[];
117 static const char title[];
118 static const char *const text[];
121 const char CmdLineHelpText::name[] = "cmdline";
122 const char CmdLineHelpText::title[] =
123 "Specifying selections from command line";
124 const char *const CmdLineHelpText::text[] = {
125 "If no selections are provided on the command line, you are prompted to",
126 "type the selections interactively (a pipe can also be used to provide",
127 "the selections in this case for most tools). While this works well for",
128 "testing, it is easier to provide the selections from the command line",
129 "if they are complex or for scripting.[PAR]",
131 "Each tool has different command-line arguments for specifying selections",
132 "(listed by [TT][PROGRAM] help <tool>[tt]).",
133 "You can either pass a single string containing all selections (separated",
134 "by semicolons), or multiple strings, each containing one selection.",
135 "Note that you need to quote the selections to protect them from the",
138 "If you set a selection command-line argument, but do not provide any",
139 "selections, you are prompted to type the selections for that argument",
140 "interactively. This is useful if that selection argument is optional,",
141 "in which case it is not normally prompted for.[PAR]",
143 "To provide selections from a file, use [TT]-sf file.dat[tt] in the place",
144 "of the selection for a selection argument (e.g.,",
145 "[TT]-select -sf file.dat[tt]). In general, the [TT]-sf[tt] argument reads",
146 "selections from the provided file and assigns them to selection arguments",
147 "that have been specified up to that point, but for which no selections",
148 "have been provided.",
149 "As a special case, [TT]-sf[tt] provided on its own, without preceding",
150 "selection arguments, assigns the selections to all (yet unset) required",
151 "selections (i.e., those that would be promted interactively if no",
152 "selections are provided on the command line).[PAR]",
154 "To use groups from a traditional index file, use argument [TT]-n[tt]",
155 "to provide a file. See the \"syntax\" subtopic for how to use them.",
156 "If this option is not provided, default groups are generated.",
157 "The default groups are generated by reading selections from a file",
158 "[TT]defselection.dat[tt]. If such a file is found in the current",
159 "directory, it is used instead of the one provided by default.[PAR]",
161 "Depending on the tool, two additional command-line arguments may be",
162 "available to control the behavior:[BR]",
163 "1. [TT]-seltype[tt] can be used to specify the default type of",
164 "positions to calculate for each selection.[BR]",
165 "2. [TT]-selrpos[tt] can be used to specify the default type of",
166 "positions used in selecting atoms by coordinates.[BR]",
167 "See the \"positions\" subtopic for more information on these options.",
170 struct EvaluationHelpText
172 static const char name[];
173 static const char title[];
174 static const char *const text[];
177 const char EvaluationHelpText::name[] = "evaluation";
178 const char EvaluationHelpText::title[] =
179 "Selection evaluation and optimization";
180 const char *const EvaluationHelpText::text[] = {
181 "Boolean evaluation proceeds from left to right and is short-circuiting",
182 "i.e., as soon as it is known whether an atom will be selected, the",
183 "remaining expressions are not evaluated at all.",
184 "This can be used to optimize the selections: you should write the",
185 "most restrictive and/or the most inexpensive expressions first in",
186 "boolean expressions.",
187 "The relative ordering between dynamic and static expressions does not",
188 "matter: all static expressions are evaluated only once, before the first",
189 "frame, and the result becomes the leftmost expression.[PAR]",
191 "Another point for optimization is in common subexpressions: they are not",
192 "automatically recognized, but can be manually optimized by the use of",
193 "variables. This can have a big impact on the performance of complex",
194 "selections, in particular if you define several index groups like this:",
195 " [TT]rdist = distance from com of resnr 1 to 5;[tt][BR]",
196 " [TT]resname RES and rdist < 2;[tt][BR]",
197 " [TT]resname RES and rdist < 4;[tt][BR]",
198 " [TT]resname RES and rdist < 6;[tt][BR]",
199 "Without the variable assignment, the distances would be evaluated three",
200 "times, although they are exactly the same within each selection.",
201 "Anything assigned into a variable becomes a common subexpression that",
202 "is evaluated only once during a frame.",
203 "Currently, in some cases the use of variables can actually lead to a small",
204 "performance loss because of the checks necessary to determine for which",
205 "atoms the expression has already been evaluated, but this should not be",
209 struct ExamplesHelpText
211 static const char name[];
212 static const char title[];
213 static const char *const text[];
216 const char ExamplesHelpText::name[] = "examples";
217 const char ExamplesHelpText::title[] =
218 "Selection examples";
219 const char *const ExamplesHelpText::text[] = {
220 // TODO: Once there are more tools available, use examples that invoke
221 // tools and explain what the selections do in those tools.
222 "Below, examples of increasingly complex selections are given.[PAR]",
224 "Selection of all water oxygens:[BR]",
225 " resname SOL and name OW",
228 "Centers of mass of residues 1 to 5 and 10:[BR]",
229 " res_com of resnr 1 to 5 10",
232 "All atoms farther than 1 nm of a fixed position:[BR]",
233 " not within 1 of [1.2, 3.1, 2.4]",
236 "All atoms of a residue LIG within 0.5 nm of a protein (with a custom name):[BR]",
237 " \"Close to protein\" resname LIG and within 0.5 of group \"Protein\"",
240 "All protein residues that have at least one atom within 0.5 nm of a residue LIG:[BR]",
241 " group \"Protein\" and same residue as within 0.5 of resname LIG",
244 "All RES residues whose COM is between 2 and 4 nm from the COM of all of them:[BR]",
245 " rdist = res_com distance from com of resname RES[BR]",
246 " resname RES and rdist >= 2 and rdist <= 4",
249 "Selection like C1 C2 C2 C3 C3 C4 ... C8 C9 (e.g., for g_bond):[BR]",
250 " name \"C[1-8]\" merge name \"C[2-9]\"",
253 struct KeywordsHelpText
255 static const char name[];
256 static const char title[];
257 static const char *const text[];
260 const char KeywordsHelpText::name[] = "keywords";
261 const char KeywordsHelpText::title[] =
262 "Selection keywords";
263 const char *const KeywordsHelpText::text[] = {
264 "The following selection keywords are currently available.",
265 "For keywords marked with a star, additional help is available through",
266 "a subtopic KEYWORD, where KEYWORD is the name of the keyword.",
269 struct LimitationsHelpText
271 static const char name[];
272 static const char title[];
273 static const char *const text[];
276 const char LimitationsHelpText::name[] = "limitations";
277 const char LimitationsHelpText::title[] =
278 "Selection limitations";
279 const char *const LimitationsHelpText::text[] = {
280 "Some analysis programs may require a special structure for the input",
281 "selections (e.g., [TT]gmx angle[tt] requires the index group to be made",
282 "of groups of three or four atoms).",
283 "For such programs, it is up to the user to provide a proper selection",
284 "expression that always returns such positions.",
287 "Due to technical reasons, having a negative value as the first value in",
288 "expressions like[BR]",
289 "[TT]charge -1 to -0.7[tt][BR]",
290 "result in a syntax error. A workaround is to write[BR]",
291 "[TT]charge {-1 to -0.7}[tt][BR]",
294 "When [TT]name[tt] selection keyword is used together with PDB input",
295 "files, the behavior may be unintuitive. When Gromacs reads in a PDB",
296 "file, 4 character atom names that start with a digit are transformed",
297 "such that, e.g., 1HG2 becomes HG21, and the latter is what is matched",
298 "by the [TT]name[tt] keyword. Use [TT]pdbname[tt] to match the atom name",
299 "as it appears in the input PDB file.",
302 struct PositionsHelpText
304 static const char name[];
305 static const char title[];
306 static const char *const text[];
309 const char PositionsHelpText::name[] = "positions";
310 const char PositionsHelpText::title[] =
311 "Specifying positions in selections";
312 const char *const PositionsHelpText::text[] = {
313 "Possible ways of specifying positions in selections are:[PAR]",
315 "1. A constant position can be defined as [TT][XX, YY, ZZ][tt], where",
316 "[TT]XX[tt], [TT]YY[tt] and [TT]ZZ[tt] are real numbers.[PAR]",
318 "2. [TT]com of ATOM_EXPR [pbc][tt] or [TT]cog of ATOM_EXPR [pbc][tt]",
319 "calculate the center of mass/geometry of [TT]ATOM_EXPR[tt]. If",
320 "[TT]pbc[tt] is specified, the center is calculated iteratively to try",
321 "to deal with cases where [TT]ATOM_EXPR[tt] wraps around periodic",
322 "boundary conditions.[PAR]",
324 "3. [TT]POSTYPE of ATOM_EXPR[tt] calculates the specified positions for",
325 "the atoms in [TT]ATOM_EXPR[tt].",
326 "[TT]POSTYPE[tt] can be [TT]atom[tt], [TT]res_com[tt], [TT]res_cog[tt],",
327 "[TT]mol_com[tt] or [TT]mol_cog[tt], with an optional prefix [TT]whole_[tt]",
328 "[TT]part_[tt] or [TT]dyn_[tt].",
329 "[TT]whole_[tt] calculates the centers for the whole residue/molecule,",
330 "even if only part of it is selected.",
331 "[TT]part_[tt] prefix calculates the centers for the selected atoms, but",
332 "uses always the same atoms for the same residue/molecule. The used atoms",
333 "are determined from the the largest group allowed by the selection.",
334 "[TT]dyn_[tt] calculates the centers strictly only for the selected atoms.",
335 "If no prefix is specified, whole selections default to [TT]part_[tt] and",
336 "other places default to [TT]whole_[tt].",
337 "The latter is often desirable to select the same molecules in different",
338 "tools, while the first is a compromise between speed ([TT]dyn_[tt]",
339 "positions can be slower to evaluate than [TT]part_[tt]) and intuitive",
342 "4. [TT]ATOM_EXPR[tt], when given for whole selections, is handled as 3.",
343 "above, using the position type from the command-line argument",
344 "[TT]-seltype[tt].[PAR]",
346 "Selection keywords that select atoms based on their positions, such as",
347 "[TT]dist from[tt], use by default the positions defined by the",
348 "[TT]-selrpos[tt] command-line option.",
349 "This can be overridden by prepending a [TT]POSTYPE[tt] specifier to the",
350 "keyword. For example, [TT]res_com dist from POS[tt] evaluates the",
351 "residue center of mass distances. In the example, all atoms of a residue",
352 "are either selected or not, based on the single distance calculated.",
355 struct SyntaxHelpText
357 static const char name[];
358 static const char title[];
359 static const char *const text[];
362 const char SyntaxHelpText::name[] = "syntax";
363 const char SyntaxHelpText::title[] =
365 const char *const SyntaxHelpText::text[] = {
366 "A set of selections consists of one or more selections, separated by",
367 "semicolons. Each selection defines a set of positions for the analysis.",
368 "Each selection can also be preceded by a string that gives a name for",
369 "the selection for use in, e.g., graph legends.",
370 "If no name is provided, the string used for the selection is used",
371 "automatically as the name.[PAR]",
373 "For interactive input, the syntax is slightly altered: line breaks can",
374 "also be used to separate selections. \\ followed by a line break can",
375 "be used to continue a line if necessary.",
376 "Notice that the above only applies to real interactive input,",
377 "not if you provide the selections, e.g., from a pipe.[PAR]",
379 "It is possible to use variables to store selection expressions.",
380 "A variable is defined with the following syntax:[BR]",
381 "[TT]VARNAME = EXPR ;[tt][BR]",
382 "where [TT]EXPR[tt] is any valid selection expression.",
383 "After this, [TT]VARNAME[tt] can be used anywhere where [TT]EXPR[tt]",
384 "would be valid.[PAR]",
386 "Selections are composed of three main types of expressions, those that",
387 "define atoms ([TT]ATOM_EXPR[tt]s), those that define positions",
388 "([TT]POS_EXPR[tt]s), and those that evaluate to numeric values",
389 "([TT]NUM_EXPR[tt]s). Each selection should be a [TT]POS_EXPR[tt]",
390 "or a [TT]ATOM_EXPR[tt] (the latter is automatically converted to",
391 "positions). The basic rules are as follows:[BR]",
392 "1. An expression like [TT]NUM_EXPR1 < NUM_EXPR2[tt] evaluates to an",
393 "[TT]ATOM_EXPR[tt] that selects all the atoms for which the comparison",
395 "2. Atom expressions can be combined with boolean operations such as",
396 "[TT]not ATOM_EXPR[tt], [TT]ATOM_EXPR and ATOM_EXPR[tt], or",
397 "[TT]ATOM_EXPR or ATOM_EXPR[tt]. Parentheses can be used to alter the",
398 "evaluation order.[BR]",
399 "3. [TT]ATOM_EXPR[tt] expressions can be converted into [TT]POS_EXPR[tt]",
400 "expressions in various ways, see the \"positions\" subtopic for more",
403 "Some keywords select atoms based on string values such as the atom name.",
404 "For these keywords, it is possible to use wildcards ([TT]name \"C*\"[tt])",
405 "or regular expressions (e.g., [TT]resname \"R[AB]\"[tt]).",
406 "The match type is automatically guessed from the string: if it contains",
407 "other characters than letters, numbers, '*', or '?', it is interpreted",
408 "as a regular expression.",
409 "To force the matching to use literal string matching, use",
410 "[TT]name = \"C*\"[tt] to match a literal C*.",
411 "To force other type of matching, use '?' or '~' in place of '=' to force",
412 "wildcard or regular expression matching, respectively.[PAR]",
414 "Strings that contain non-alphanumeric characters should be enclosed in",
415 "double quotes as in the examples. For other strings, the quotes are",
416 "optional, but if the value conflicts with a reserved keyword, a syntax",
417 "error will occur. If your strings contain uppercase letters, this should",
420 "Index groups provided with the [TT]-n[tt] command-line option or",
421 "generated by default can be accessed with [TT]group NR[tt] or",
422 "[TT]group NAME[tt], where [TT]NR[tt] is a zero-based index of the group",
423 "and [TT]NAME[tt] is part of the name of the desired group.",
424 "The keyword [TT]group[tt] is optional if the whole selection is",
425 "provided from an index group.",
426 "To see a list of available groups in the interactive mode, press enter",
427 "in the beginning of a line.",
439 * Help topic implementation for an individual selection method.
441 * \ingroup module_selection
443 class KeywordDetailsHelpTopic : public AbstractSimpleHelpTopic
446 //! Initialize help topic for the given selection method.
447 KeywordDetailsHelpTopic(const std::string &name,
448 const gmx_ana_selmethod_t &method)
449 : name_(name), method_(method)
453 virtual const char *name() const
455 return name_.c_str();
457 virtual const char *title() const
463 virtual std::string helpText() const
465 return concatenateStrings(method_.help.help, method_.help.nlhelp);
470 const gmx_ana_selmethod_t &method_;
472 GMX_DISALLOW_COPY_AND_ASSIGN(KeywordDetailsHelpTopic);
476 * Custom help topic for printing a list of selection keywords.
478 * \ingroup module_selection
480 class KeywordsHelpTopic : public CompositeHelpTopic<KeywordsHelpText>
485 virtual void writeHelp(const HelpWriterContext &context) const;
489 * Container for known selection methods.
491 * The first item in the pair is the name of the selection method, and
492 * the second points to the static data structure that describes the
494 * The name in the first item may differ from the name of the static
495 * data structure if an alias is defined for that method.
497 typedef std::vector<std::pair<std::string,
498 const gmx_ana_selmethod_t *> >
502 * Prints a brief list of keywords (selection methods) available.
504 * \param[in] context Context for printing the help.
505 * \param[in] type Only methods that return this type are printed.
506 * \param[in] bModifiers If false, \ref SMETH_MODIFIER methods are
507 * excluded, otherwise only them are printed.
509 void printKeywordList(const HelpWriterContext &context,
510 e_selvalue_t type, bool bModifiers) const;
515 KeywordsHelpTopic::KeywordsHelpTopic()
517 // TODO: This is not a very elegant way of getting the list of selection
518 // methods, but this needs to be rewritten in any case if/when #652 is
520 boost::scoped_ptr<SelectionParserSymbolTable> symtab(
521 new SelectionParserSymbolTable);
522 gmx_ana_selmethod_register_defaults(symtab.get());
524 SelectionParserSymbolIterator symbol
525 = symtab->beginIterator(SelectionParserSymbol::MethodSymbol);
526 while (symbol != symtab->endIterator())
528 const std::string &symname = symbol->name();
529 const gmx_ana_selmethod_t *method = symbol->methodValue();
530 methods_.push_back(std::make_pair(std::string(symname), method));
531 if (method->help.nlhelp > 0 && method->help.help != NULL)
533 addSubTopic(HelpTopicPointer(
534 new KeywordDetailsHelpTopic(symname, *method)));
540 void KeywordsHelpTopic::writeHelp(const HelpWriterContext &context) const
542 if (context.outputFormat() != eHelpOutputFormat_Console)
544 GMX_THROW(NotImplementedError(
545 "Selection help is not implemented for this output format"));
547 // TODO: The markup here is not really appropriate, and printKeywordList()
548 // still prints raw text, but these are waiting for discussion of the
549 // markup format in #969.
550 writeBasicHelpTopic(context, *this, helpText());
551 context.writeTextBlock("[BR]");
553 // Print the list of keywords
554 context.writeTextBlock(
555 "Keywords that select atoms by an integer property:[BR]"
556 "(use in expressions or like \"atomnr 1 to 5 7 9\")[BR]");
557 printKeywordList(context, INT_VALUE, false);
558 context.writeTextBlock("[BR]");
560 context.writeTextBlock(
561 "Keywords that select atoms by a numeric property:[BR]"
562 "(use in expressions or like \"occupancy 0.5 to 1\")[BR]");
563 printKeywordList(context, REAL_VALUE, false);
564 context.writeTextBlock("[BR]");
566 context.writeTextBlock(
567 "Keywords that select atoms by a string property:[BR]"
568 "(use like \"name PATTERN [PATTERN] ...\")[BR]");
569 printKeywordList(context, STR_VALUE, false);
570 context.writeTextBlock("[BR]");
572 context.writeTextBlock(
573 "Additional keywords that directly select atoms:[BR]");
574 printKeywordList(context, GROUP_VALUE, false);
575 context.writeTextBlock("[BR]");
577 context.writeTextBlock(
578 "Keywords that directly evaluate to positions:[BR]"
579 "(see also \"positions\" subtopic)[BR]");
580 printKeywordList(context, POS_VALUE, false);
581 context.writeTextBlock("[BR]");
583 context.writeTextBlock("Additional keywords:[BR]");
584 printKeywordList(context, POS_VALUE, true);
585 printKeywordList(context, NO_VALUE, true);
588 void KeywordsHelpTopic::printKeywordList(const HelpWriterContext &context,
590 bool bModifiers) const
592 File &file = context.outputFile();
593 MethodList::const_iterator iter;
594 for (iter = methods_.begin(); iter != methods_.end(); ++iter)
596 const gmx_ana_selmethod_t &method = *iter->second;
597 bool bIsModifier = (method.flags & SMETH_MODIFIER) != 0;
598 if (method.type == type && bModifiers == bIsModifier)
600 bool bHasHelp = (method.help.nlhelp > 0 && method.help.help != NULL);
601 file.writeString(formatString(" %c ", bHasHelp ? '*' : ' '));
602 if (method.help.syntax != NULL)
604 file.writeLine(method.help.syntax);
608 std::string symname = iter->first;
609 if (symname != method.name)
611 symname.append(formatString(" (synonym for %s)", method.name));
613 file.writeLine(symname);
622 HelpTopicPointer createSelectionHelpTopic()
624 CompositeHelpTopicPointer root(new CompositeHelpTopic<CommonHelpText>);
625 root->registerSubTopic<SimpleHelpTopic<ArithmeticHelpText> >();
626 root->registerSubTopic<SimpleHelpTopic<CmdLineHelpText> >();
627 root->registerSubTopic<SimpleHelpTopic<EvaluationHelpText> >();
628 root->registerSubTopic<SimpleHelpTopic<ExamplesHelpText> >();
629 root->registerSubTopic<KeywordsHelpTopic>();
630 root->registerSubTopic<SimpleHelpTopic<LimitationsHelpText> >();
631 root->registerSubTopic<SimpleHelpTopic<PositionsHelpText> >();
632 root->registerSubTopic<SimpleHelpTopic<SyntaxHelpText> >();