2 # This file is part of the GROMACS molecular simulation package.
4 # Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 # and including many others, as listed in the AUTHORS file in the
7 # top-level source directory and at http://www.gromacs.org.
9 # GROMACS is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public License
11 # as published by the Free Software Foundation; either version 2.1
12 # of the License, or (at your option) any later version.
14 # GROMACS is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with GROMACS; if not, see
21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 # If you want to redistribute modifications to GROMACS, please
25 # consider that scientific software is very special. Version
26 # control is crucial - bugs must be traceable. We will be happy to
27 # consider code for inclusion in the official distribution, but
28 # derived work must not be called official GROMACS. Details are found
29 # in the README & COPYING files - if they are missing, get the
30 # official version at http://www.gromacs.org.
32 # To help us fund GROMACS development, we humbly ask that you cite
33 # the research papers on the package. Check out http://www.gromacs.org.
36 Provide command line operation.
39 __all__ = ['commandline_operation']
48 from gmxapi import exceptions
49 from gmxapi import logger as root_logger
50 from gmxapi.datamodel import NDArray
51 from gmxapi.operation import OutputCollectionDescription
54 logger = root_logger.getChild('commandline')
55 logger.info('Importing {}'.format(__name__))
58 @functools.lru_cache()
59 def _config() -> dict:
60 """Get the GROMACS configuration detected during installation.
62 If this appears to be a useful function, it may become part of the regular
63 interface, but it is currently unadvertised.
66 from importlib.resources import open_text
67 with open_text('gmxapi', 'gmxconfig.json') as textfile:
68 config = json.load(textfile)
72 @functools.lru_cache()
73 def cli_executable() -> pathlib.Path:
74 """Report the installed GROMACS command line executable."""
75 path = _config().get('gmx_executable', None)
77 path = pathlib.Path(os.path.abspath(path))
80 raise exceptions.FeatureNotAvailableError('GROMACS installation unavailable.')
83 @functools.lru_cache()
84 def cli_bindir() -> pathlib.Path:
85 """Report the installed GROMACS binary directory."""
86 path = _config().get('gmx_bindir', None)
88 path = pathlib.Path(os.path.abspath(path))
91 raise exceptions.FeatureNotAvailableError('GROMACS installation unavailable.')
94 # Create an Operation that consumes a list and a boolean to produce a string and an integer.
96 # Wrap the defined function using a decorator that
97 # * strips the `output` parameter from the signature
98 # * provides `output` publishing proxy to the inner function and
99 # * produce a result with attributes for
100 # * file: mapping of output flags to output filenames
101 # * stdout: process STDOUT
102 # * stderr: porcess STDERR
103 # * returncode: integer return code of wrapped command
105 # Note that the existence of the 'file' output map is expressed here, but
106 # the keys of the map are not implicit or set by the wrapped function.
107 # For the map to be non-empty, it must be defined before the resulting helper
108 # function is called.
110 # TODO: Operation returns the output object when called with the shorter signature.
112 @gmx.function_wrapper(output={'stdout': str,
115 def cli(command: NDArray, shell: bool, output: OutputCollectionDescription, stdin: str = ''):
116 """Execute a command line program in a subprocess.
118 Configure an executable in a subprocess. Executes when run in an execution
119 Context, as part of a work graph or via gmx.run(). Runs in the current
122 Shell processing is not enabled, but can be considered for a future version.
123 This means that shell expansions such as environment variables, globbing (`*`),
124 and other special symbols (like `~` for home directory) are not available.
125 This allows a simpler and more robust implementation, as well as a better
126 ability to uniquely identify the effects of a command line operation. If you
127 think this disallows important use cases, please let us know.
130 command: a tuple (or list) to be the subprocess arguments, including `executable`
131 output: mapping of command line flags to output filename arguments
132 shell: unused (provides forward-compatibility)
133 stdin (str): String input to send to STDIN (terminal input) of the executable.
135 Multi-line text sent to *stdin* should be joined into a single string
136 (e.g. ``'\n'.join(list_of_strings) + '\n'``).
137 If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
138 and will run one operation for each provided string.
140 Only string input (:py:func:str) to *stdin* is currently supported.
141 If you have a use case that requires streaming input or binary input,
142 please open an issue or contact the author(s).
144 Arguments are iteratively added to the command line with standard Python
145 iteration, so you should use a tuple or list even if you have only one parameter.
146 I.e. If you provide a string with `arguments="asdf"` then it will be passed as
147 `... "a" "s" "d" "f"`. To pass a single string argument, `arguments=("asdf")`
148 or `arguments=["asdf"]`.
150 `input` and `output` should be a dictionary with string keys, where the keys
151 name command line "flags" or options.
154 Execute a command named `exe` that takes a flagged option for file name
155 (stored in a local Python variable `my_filename`) and an `origin` flag
156 that uses the next three arguments to define a vector.
158 >>> my_filename = "somefilename"
159 >>> result = cli(('exe', '--origin', 1.0, 2.0, 3.0, '-f', my_filename), shell=False)
160 >>> assert hasattr(result, 'file')
161 >>> assert hasattr(result, 'stdout')
162 >>> assert hasattr(result, 'stderr')
163 >>> assert hasattr(result, 'returncode')
166 A data structure with attributes for each of the results `file`, `stdout`, `stderr`, and `returncode`
168 Result object attributes:
169 * `file`: the mapping of CLI flags to filename strings resulting from the `output` kwarg
170 * `stdout`: A string mapping from process STDOUT.
171 * `stderr`: A string mapping from process STDERR; it will be the
172 error output (if any) if the process failed.
173 * `returncode`: return code of the subprocess.
176 # In the operation implementation, we expect the `shell` parameter to be intercepted by the
177 # wrapper and set to False.
179 raise exceptions.UsageError("Operation does not support shell processing.")
184 if isinstance(command, (str, bytes)):
186 command = list([arg for arg in command])
188 executable = shutil.which(command[0])
189 if executable is None:
190 executable = shutil.which(command[0], path=str(cli_bindir()))
191 if executable is None:
192 raise exceptions.ValueError('"{}" is not found or not executable.'.format(command[0]))
193 command[0] = executable
195 # TODO: (FR9) Can OS input/output filehandles be a responsibility of
196 # the code providing 'resources'?
200 logger.debug('executing subprocess')
202 completed_process = subprocess.run(command,
206 stdout=subprocess.PIPE,
207 stderr=subprocess.PIPE,
209 universal_newlines=True
211 returncode = completed_process.returncode
212 # TODO: Resource management code should manage a safe data object for `output`.
213 logger.debug('STDOUT:')
214 if completed_process.stderr is not None:
215 for line in completed_process.stdout.split('\n'):
218 logger.debug('STDOUT is empty')
219 logger.debug('STDERR:')
220 if completed_process.stderr is not None:
221 for line in completed_process.stderr.split('\n'):
224 logger.debug('STDERR is empty')
226 stdout = completed_process.stdout
227 stderr = completed_process.stderr
229 except subprocess.CalledProcessError as e:
230 logger.info("commandline operation had non-zero return status"
231 "when calling {}".format(e.cmd))
234 returncode = e.returncode
237 output.stdout = stdout
238 output.stderr = stderr
239 output.returncode = returncode
242 # TODO: (FR4) Make this a formal operation to properly handle gmxapi data dependencies.
243 # The consumer of this operation has an NDArray input. filemap may contain gmxapi data flow
244 # aspects that we want the framework to handle for us.
245 def filemap_to_flag_list(filemap: dict = None):
246 """Convert a map of command line flags and filenames to a list of command line arguments.
248 Used to map inputs and outputs of command line tools to and from gmxapi data handles.
249 User provides mappings of flags and filenames so that gmxapi can construct an
250 executable command line.
252 Primary use case is implicit. commandline_operation() instantiates this operation based on
253 user input, and sends the output to cli()
256 filemap: key-value map of command line flags and filename arguments
259 list of strings and/or gmxapi data references
262 if filemap is not None:
263 for key, value in filemap.items():
264 # Note that the value may be a string, a list, an ndarray, or a future
265 if not isinstance(value, (list, tuple, NDArray)):
266 if hasattr(value, 'result') and value.dtype == NDArray:
268 elif hasattr(value, 'result') and value.dtype != NDArray:
269 # TODO: Fix this ugly hack when we have proper Future slicing and can make NDArray futures.
270 result_function = value.result
271 value.result = lambda function=result_function: [function()]
274 result = gmx.join_arrays(front=result, back=gmx.join_arrays(front=[key], back=value))
278 # TODO: (FR4) Use generating function or decorator that can validate kwargs?
279 # TODO: (FR4) Outputs need to be fully formed and typed in the object returned
280 # from the helper (decorated function).
281 def commandline_operation(executable=None,
283 input_files: dict = None,
284 output_files: dict = None,
287 """Helper function to define a new operation that executes a subprocess in gmxapi data flow.
289 Define a new Operation for a particular executable and input/output parameter set.
290 Generate a chain of operations to process the named key word arguments and handle
291 input/output data dependencies.
294 executable: name of an executable on the path
295 arguments: list of positional arguments to insert at ``argv[1]``
296 input_files: mapping of command-line flags to input file names
297 output_files: mapping of command-line flags to output file names
298 stdin (str): String input to send to STDIN (terminal input) of the executable (optional).
300 Multi-line text sent to *stdin* should be joined into a single string.
303 commandline_operation(..., stdin='\\n'.join(list_of_strings) + '\\n')
305 If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
306 and will run one operation for each provided string.
308 Only string input (:py:func:`str`) to *stdin* is currently supported.
309 If you have a use case that requires streaming input or binary input,
310 please open an issue or contact the author(s).
313 The output node of the resulting operation handle contains
315 * ``file``: the mapping of CLI flags to filename strings resulting from the ``output_files`` kwarg
316 * ``stdout``: A string mapping from process STDOUT.
317 * ``stderr``: A string mapping from process STDERR; it will be the
318 error output (if any) if the process failed.
319 * ``returncode``: return code of the subprocess.
323 # Implementation details: When used in a script, this function returns an
324 # instance of an operation. However, because of the dynamic specification of
325 # inputs and outputs, each invocation may have the overhead of defining new
326 # types to express the data flow topology, regardless of the executable.
327 # If this overhead is problematic, consider exposing the intermediate step
328 # at which the Operation is fully specified to facilitate reuse.
331 # 1. Define a new operation with outputs from `cli()` plus `file` from `output_files`
333 # output_files is essentially passed through, but we need assurance that results
334 # will not be published until the rest of the operation has run (i.e. the cli() executable.)
336 # Warning: decorating a local function like this is counter to the notion of Operations
337 # as portable (importable, serializable/deserializable). The big picture here needs
338 # some more consideration.
339 # TODO: (NOW) Distinguish portable Operations from relocatable Futures.
340 # There is nothing antithetical about objects implementing gmxapi data interfaces
341 # that are only resolvable by a certain Context as long as that Context can convey
342 # the results to another Context upon request. Re-instantiating Operations is
343 # only one way of relocating Futures. In this case, though, the dynamic creation of
344 # merged_ops doesn't seem right, and commandline_operation should probably be
345 # a proper Operation.
347 # TODO: (FR4+) Characterize the `file` dictionary key type:
348 # explicitly sequences rather than maybe-string/maybe-sequence-of-strings
349 @gmx.function_wrapper(output={'stdout': str,
353 def merged_ops(stdout: str = None,
355 returncode: int = None,
357 output: OutputCollectionDescription = None):
358 assert stdout is not None
359 assert stderr is not None
360 assert returncode is not None
361 assert file is not None
362 assert output is not None
363 output.returncode = returncode
364 output.stdout = stdout
365 output.stderr = stderr
372 # 2. Prepare data flow.
374 if input_files is None:
376 if output_files is None:
378 if isinstance(arguments, (str, bytes)):
379 arguments = [arguments]
380 command = gmx.concatenate_lists([[executable],
382 filemap_to_flag_list(input_files),
383 filemap_to_flag_list(output_files)])
384 shell = gmx.make_constant(False)
385 cli_args = {'command': command,
387 cli_args.update(**kwargs)
388 if stdin is not None:
389 cli_args['stdin'] = str(stdin)
392 # 3. Merge operations
394 # Note: Without a `label` argument, repeated calls to cli(**cli_args) should
395 # produce references to the same unique resource. Creating this handle
396 # separately should not be necessary, but we've got a way to go until we have the
397 # fingerprinting and Context resource management we need for that.
398 # TODO: ``label`` kwarg
399 # TODO: input fingerprinting
400 cli_result = cli(**cli_args)
401 merged_result = merged_ops(stdout=cli_result.output.stdout,
402 stderr=cli_result.output.stderr,
403 returncode=cli_result.output.returncode,
407 # Return an object with an OutputCollection granting access to outputs of
408 # cli() and of output_files (as "file")