python_packaging/src/gmxapi/commandline.py

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34
  35 """
  36 Provide command line operation.
  37 """
  38
  39 __all__ = ['commandline_operation']
  40
  41 import functools
  42 import os
  43 import pathlib
  44 import shutil
  45 import subprocess
  46
  47 import gmxapi as gmx
  48 from gmxapi import exceptions
  49 from gmxapi import logger as root_logger
  50 from gmxapi.datamodel import NDArray
  51 from gmxapi.operation import OutputCollectionDescription
  52
  53 # Module-level logger
  54 logger = root_logger.getChild('commandline')
  55 logger.info('Importing {}'.format(__name__))
  56
  57
  58 @functools.lru_cache()
  59 def _config() -> dict:
  60     """Get the GROMACS configuration detected during installation.
  61
  62     If this appears to be a useful function, it may become part of the regular
  63     interface, but it is currently unadvertised.
  64     """
  65     import json
  66     from importlib.resources import open_text
  67     with open_text('gmxapi', 'gmxconfig.json') as textfile:
  68         config = json.load(textfile)
  69     return config
  70
  71
  72 @functools.lru_cache()
  73 def cli_executable() -> pathlib.Path:
  74     """Report the installed GROMACS command line executable."""
  75     path = _config().get('gmx_executable', None)
  76     if path is not None:
  77         path = pathlib.Path(os.path.abspath(path))
  78         if path.is_file():
  79             return path
  80     raise exceptions.FeatureNotAvailableError('GROMACS installation unavailable.')
  81
  82
  83 @functools.lru_cache()
  84 def cli_bindir() -> pathlib.Path:
  85     """Report the installed GROMACS binary directory."""
  86     path = _config().get('gmx_bindir', None)
  87     if path is not None:
  88         path = pathlib.Path(os.path.abspath(path))
  89         if path.is_dir():
  90             return path
  91     raise exceptions.FeatureNotAvailableError('GROMACS installation unavailable.')
  92
  93
  94 # Create an Operation that consumes a list and a boolean to produce a string and an integer.
  95 #
  96 # Wrap the defined function using a decorator that
  97 #    * strips the `output` parameter from the signature
  98 #    * provides `output` publishing proxy to the inner function and
  99 #    * produce a result with attributes for
 100 #       * file: mapping of output flags to output filenames
 101 #       * stdout: process STDOUT
 102 #       * stderr: porcess STDERR
 103 #       * returncode: integer return code of wrapped command
 104 #
 105 # Note that the existence of the 'file' output map is expressed here, but
 106 # the keys of the map are not implicit or set by the wrapped function.
 107 # For the map to be non-empty, it must be defined before the resulting helper
 108 # function is called.
 109 #
 110 # TODO: Operation returns the output object when called with the shorter signature.
 111 #
 112 @gmx.function_wrapper(output={'stdout': str,
 113                               'stderr': str,
 114                               'returncode': int})
 115 def cli(command: NDArray, shell: bool, output: OutputCollectionDescription, stdin: str = ''):
 116     """Execute a command line program in a subprocess.
 117
 118     Configure an executable in a subprocess. Executes when run in an execution
 119     Context, as part of a work graph or via gmx.run(). Runs in the current
 120     working directory.
 121
 122     Shell processing is not enabled, but can be considered for a future version.
 123     This means that shell expansions such as environment variables, globbing (`*`),
 124     and other special symbols (like `~` for home directory) are not available.
 125     This allows a simpler and more robust implementation, as well as a better
 126     ability to uniquely identify the effects of a command line operation. If you
 127     think this disallows important use cases, please let us know.
 128
 129     Arguments:
 130          command: a tuple (or list) to be the subprocess arguments, including `executable`
 131          output: mapping of command line flags to output filename arguments
 132          shell: unused (provides forward-compatibility)
 133          stdin (str): String input to send to STDIN (terminal input) of the executable.
 134
 135     Multi-line text sent to *stdin* should be joined into a single string
 136     (e.g. ``'\n'.join(list_of_strings) + '\n'``).
 137     If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
 138     and will run one operation for each provided string.
 139
 140     Only string input (:py:func:str) to *stdin* is currently supported.
 141     If you have a use case that requires streaming input or binary input,
 142     please open an issue or contact the author(s).
 143
 144     Arguments are iteratively added to the command line with standard Python
 145     iteration, so you should use a tuple or list even if you have only one parameter.
 146     I.e. If you provide a string with `arguments="asdf"` then it will be passed as
 147     `... "a" "s" "d" "f"`. To pass a single string argument, `arguments=("asdf")`
 148     or `arguments=["asdf"]`.
 149
 150     `input` and `output` should be a dictionary with string keys, where the keys
 151     name command line "flags" or options.
 152
 153     Example:
 154         Execute a command named `exe` that takes a flagged option for file name
 155         (stored in a local Python variable `my_filename`) and an `origin` flag
 156         that uses the next three arguments to define a vector.
 157
 158             >>> my_filename = "somefilename"
 159             >>> result = cli(('exe', '--origin', 1.0, 2.0, 3.0, '-f', my_filename), shell=False)
 160             >>> assert hasattr(result, 'file')
 161             >>> assert hasattr(result, 'stdout')
 162             >>> assert hasattr(result, 'stderr')
 163             >>> assert hasattr(result, 'returncode')
 164
 165     Returns:
 166         A data structure with attributes for each of the results `file`, `stdout`, `stderr`, and `returncode`
 167
 168     Result object attributes:
 169         * `file`: the mapping of CLI flags to filename strings resulting from the `output` kwarg
 170         * `stdout`: A string mapping from process STDOUT.
 171         * `stderr`: A string mapping from process STDERR; it will be the
 172                     error output (if any) if the process failed.
 173         * `returncode`: return code of the subprocess.
 174
 175     """
 176     # In the operation implementation, we expect the `shell` parameter to be intercepted by the
 177     # wrapper and set to False.
 178     if shell:
 179         raise exceptions.UsageError("Operation does not support shell processing.")
 180
 181     if stdin == '':
 182         stdin = None
 183
 184     if isinstance(command, (str, bytes)):
 185         command = [command]
 186     command = list([arg for arg in command])
 187
 188     executable = shutil.which(command[0])
 189     if executable is None:
 190         executable = shutil.which(command[0], path=str(cli_bindir()))
 191     if executable is None:
 192         raise exceptions.ValueError('"{}" is not found or not executable.'.format(command[0]))
 193     command[0] = executable
 194
 195     # TODO: (FR9) Can OS input/output filehandles be a responsibility of
 196     #  the code providing 'resources'?
 197
 198     stdout = ''
 199     stderr = ''
 200     logger.debug('executing subprocess')
 201     try:
 202         completed_process = subprocess.run(command,
 203                                            shell=shell,
 204                                            input=stdin,
 205                                            check=True,
 206                                            stdout=subprocess.PIPE,
 207                                            stderr=subprocess.PIPE,
 208                                            encoding='utf-8',
 209                                            universal_newlines=True
 210                                            )
 211         returncode = completed_process.returncode
 212         # TODO: Resource management code should manage a safe data object for `output`.
 213         logger.debug('STDOUT:')
 214         if completed_process.stderr is not None:
 215             for line in completed_process.stdout.split('\n'):
 216                 logger.debug(line)
 217         else:
 218             logger.debug('STDOUT is empty')
 219         logger.debug('STDERR:')
 220         if completed_process.stderr is not None:
 221             for line in completed_process.stderr.split('\n'):
 222                 logger.debug(line)
 223         else:
 224             logger.debug('STDERR is empty')
 225
 226         stdout = completed_process.stdout
 227         stderr = completed_process.stderr
 228
 229     except subprocess.CalledProcessError as e:
 230         logger.info("commandline operation had non-zero return status"
 231                     "when calling {}".format(e.cmd))
 232         stdout = e.stdout
 233         stderr = e.stderr
 234         returncode = e.returncode
 235
 236     # Publish outputs.
 237     output.stdout = stdout
 238     output.stderr = stderr
 239     output.returncode = returncode
 240
 241
 242 # TODO: (FR4) Make this a formal operation to properly handle gmxapi data dependencies.
 243 #  The consumer of this operation has an NDArray input. filemap may contain gmxapi data flow
 244 #  aspects that we want the framework to handle for us.
 245 def filemap_to_flag_list(filemap: dict = None):
 246     """Convert a map of command line flags and filenames to a list of command line arguments.
 247
 248     Used to map inputs and outputs of command line tools to and from gmxapi data handles.
 249     User provides mappings of flags and filenames so that gmxapi can construct an
 250     executable command line.
 251
 252     Primary use case is implicit. commandline_operation() instantiates this operation based on
 253     user input, and sends the output to cli()
 254
 255     Arguments:
 256         filemap: key-value map of command line flags and filename arguments
 257
 258     Returns:
 259         list of strings and/or gmxapi data references
 260     """
 261     result = []
 262     if filemap is not None:
 263         for key, value in filemap.items():
 264             # Note that the value may be a string, a list, an ndarray, or a future
 265             if not isinstance(value, (list, tuple, NDArray)):
 266                 if hasattr(value, 'result') and value.dtype == NDArray:
 267                     pass
 268                 elif hasattr(value, 'result') and value.dtype != NDArray:
 269                     # TODO: Fix this ugly hack when we have proper Future slicing and can make NDArray futures.
 270                     result_function = value.result
 271                     value.result = lambda function=result_function: [function()]
 272                 else:
 273                     value = [value]
 274             result = gmx.join_arrays(front=result, back=gmx.join_arrays(front=[key], back=value))
 275     return result
 276
 277
 278 # TODO: (FR4) Use generating function or decorator that can validate kwargs?
 279 # TODO: (FR4) Outputs need to be fully formed and typed in the object returned
 280 #  from the helper (decorated function).
 281 def commandline_operation(executable=None,
 282                           arguments=(),
 283                           input_files: dict = None,
 284                           output_files: dict = None,
 285                           stdin: str = None,
 286                           **kwargs):
 287     """Helper function to define a new operation that executes a subprocess in gmxapi data flow.
 288
 289     Define a new Operation for a particular executable and input/output parameter set.
 290     Generate a chain of operations to process the named key word arguments and handle
 291     input/output data dependencies.
 292
 293     Arguments:
 294         executable: name of an executable on the path
 295         arguments: list of positional arguments to insert at ``argv[1]``
 296         input_files: mapping of command-line flags to input file names
 297         output_files: mapping of command-line flags to output file names
 298         stdin (str): String input to send to STDIN (terminal input) of the executable (optional).
 299
 300     Multi-line text sent to *stdin* should be joined into a single string.
 301     E.g.::
 302
 303         commandline_operation(..., stdin='\\n'.join(list_of_strings) + '\\n')
 304
 305     If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
 306     and will run one operation for each provided string.
 307
 308     Only string input (:py:func:`str`) to *stdin* is currently supported.
 309     If you have a use case that requires streaming input or binary input,
 310     please open an issue or contact the author(s).
 311
 312     Output:
 313         The output node of the resulting operation handle contains
 314
 315         * ``file``: the mapping of CLI flags to filename strings resulting from the ``output_files`` kwarg
 316         * ``stdout``: A string mapping from process STDOUT.
 317         * ``stderr``: A string mapping from process STDERR; it will be the
 318                       error output (if any) if the process failed.
 319         * ``returncode``: return code of the subprocess.
 320
 321     """
 322
 323     # Implementation details: When used in a script, this function returns an
 324     # instance of an operation. However, because of the dynamic specification of
 325     # inputs and outputs, each invocation may have the overhead of defining new
 326     # types to express the data flow topology, regardless of the executable.
 327     # If this overhead is problematic, consider exposing the intermediate step
 328     # at which the Operation is fully specified to facilitate reuse.
 329
 330     ##
 331     # 1. Define a new operation with outputs from `cli()` plus `file` from `output_files`
 332
 333     # output_files is essentially passed through, but we need assurance that results
 334     # will not be published until the rest of the operation has run (i.e. the cli() executable.)
 335
 336     # Warning: decorating a local function like this is counter to the notion of Operations
 337     # as portable (importable, serializable/deserializable). The big picture here needs
 338     # some more consideration.
 339     # TODO: (NOW) Distinguish portable Operations from relocatable Futures.
 340     # There is nothing antithetical about objects implementing gmxapi data interfaces
 341     # that are only resolvable by a certain Context as long as that Context can convey
 342     # the results to another Context upon request. Re-instantiating Operations is
 343     # only one way of relocating Futures. In this case, though, the dynamic creation of
 344     # merged_ops doesn't seem right, and commandline_operation should probably be
 345     # a proper Operation.
 346     #
 347     # TODO: (FR4+) Characterize the `file` dictionary key type:
 348     #  explicitly sequences rather than maybe-string/maybe-sequence-of-strings
 349     @gmx.function_wrapper(output={'stdout': str,
 350                                   'stderr': str,
 351                                   'returncode': int,
 352                                   'file': dict})
 353     def merged_ops(stdout: str = None,
 354                    stderr: str = None,
 355                    returncode: int = None,
 356                    file: dict = None,
 357                    output: OutputCollectionDescription = None):
 358         assert stdout is not None
 359         assert stderr is not None
 360         assert returncode is not None
 361         assert file is not None
 362         assert output is not None
 363         output.returncode = returncode
 364         output.stdout = stdout
 365         output.stderr = stderr
 366         if returncode == 0:
 367             output.file = file
 368         else:
 369             output.file = {}
 370
 371     ##
 372     # 2. Prepare data flow.
 373
 374     if input_files is None:
 375         input_files = {}
 376     if output_files is None:
 377         output_files = {}
 378     if isinstance(arguments, (str, bytes)):
 379         arguments = [arguments]
 380     command = gmx.concatenate_lists([[executable],
 381                                      arguments,
 382                                      filemap_to_flag_list(input_files),
 383                                      filemap_to_flag_list(output_files)])
 384     shell = gmx.make_constant(False)
 385     cli_args = {'command': command,
 386                 'shell': shell}
 387     cli_args.update(**kwargs)
 388     if stdin is not None:
 389         cli_args['stdin'] = str(stdin)
 390
 391     ##
 392     # 3. Merge operations
 393     #
 394     # Note: Without a `label` argument, repeated calls to cli(**cli_args) should
 395     # produce references to the same unique resource. Creating this handle
 396     # separately should not be necessary, but we've got a way to go until we have the
 397     # fingerprinting and Context resource management we need for that.
 398     # TODO: ``label`` kwarg
 399     # TODO: input fingerprinting
 400     cli_result = cli(**cli_args)
 401     merged_result = merged_ops(stdout=cli_result.output.stdout,
 402                                stderr=cli_result.output.stderr,
 403                                returncode=cli_result.output.returncode,
 404                                file=output_files,
 405                                **kwargs)
 406
 407     # Return an object with an OutputCollection granting access to outputs of
 408     # cli() and of output_files (as "file")
 409     return merged_result