Update CI containers to oneAPI 2021.4.0
[alexxy/gromacs.git] / admin / containers / scripted_gmx_docker_builds.py
index c3f3e770888a3f244a570620b056cdfaa40155f8..08e16d1c171e3f9c5e6d53d93c8b00d6fc15ccdd 100755 (executable)
@@ -53,6 +53,7 @@ Authors:
     * Eric Irrgang <ericirrgang@gmail.com>
     * Joe Jordan <e.jjordan12@gmail.com>
     * Mark Abraham <mark.j.abraham@gmail.com>
+    * Gaurav Garg <gaugarg@nvidia.com>
 
 Usage::
 
@@ -67,6 +68,7 @@ See Also:
 
 import argparse
 import collections
+import collections.abc
 import typing
 from distutils.version import StrictVersion
 
@@ -107,15 +109,26 @@ _opencl_extra_packages = [
     'ocl-icd-libopencl1',
     'ocl-icd-opencl-dev',
     'opencl-headers',
+]
+
+_rocm_extra_packages = [
     # The following require
-    #             apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
-    #             apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main']
+    #             apt_keys=['http://repo.radeon.com/rocm/rocm.gpg.key'],
+    #             apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/4.0.1/ xenial main']
+    'clinfo',
+    'hipfft',
     'libelf1',
+    'rocfft',
     'rocm-opencl',
     'rocm-dev',
-    'clinfo'
 ]
 
+# Extra packages needed to build Intel Compute Runtime
+_intel_compute_runtime_extra_packages = ['intel-opencl-icd',
+                                         'intel-level-zero-gpu',
+                                         'level-zero',
+                                         'libmfx1']
+
 # Extra packages needed to build Python installations from source.
 _python_extra_packages = ['build-essential',
                           'ca-certificates',
@@ -148,7 +161,6 @@ _docs_extra_packages = ['autoconf',
                         'help2man',
                         'imagemagick',
                         'libtool',
-                        'linkchecker',
                         'mscgen',
                         'm4',
                         'openssh-client',
@@ -156,7 +168,8 @@ _docs_extra_packages = ['autoconf',
                         'texlive-latex-base',
                         'texlive-latex-extra',
                         'texlive-fonts-recommended',
-                        'texlive-fonts-extra']
+                        'texlive-fonts-extra',
+                        'tex-gyre']
 
 # Parse command line arguments
 parser = argparse.ArgumentParser(description='GROMACS CI image creation script',
@@ -168,6 +181,7 @@ parser.add_argument('--format', type=str, default='docker',
 
 
 def base_image_tag(args) -> str:
+    """Generate *image* for hpccm.baseimage()."""
     # Check if we use CUDA images or plain linux images
     if args.cuda is not None:
         cuda_version_tag = 'nvidia/cuda:' + args.cuda + '-devel'
@@ -189,27 +203,66 @@ def base_image_tag(args) -> str:
     return base_image_tag
 
 
+def hpccm_distro_name(args) -> str:
+    """Generate *_distro* for hpccm.baseimage().
+
+    Convert the linux distribution variables into something that hpccm
+    understands.
+
+    The same format is used by the lower level hpccm.config.set_linux_distro().
+    """
+    if args.centos is not None:
+        name_mapping = {'7': 'centos7',
+                        '8': 'centos8'}
+        if args.centos in name_mapping:
+            hpccm_name = name_mapping[args.centos]
+        else:
+            raise RuntimeError('Logic error: unsupported CentOS distribution selected.')
+    elif args.ubuntu is not None:
+        name_mapping = {'20.04': 'ubuntu20',
+                        '18.04': 'ubuntu18',
+                        '16.04': 'ubuntu16'}
+        if args.ubuntu in name_mapping:
+            hpccm_name = name_mapping[args.ubuntu]
+        else:
+            raise RuntimeError('Logic error: unsupported Ubuntu distribution selected.')
+    else:
+        raise RuntimeError('Logic error: no Linux distribution selected.')
+    return hpccm_name
+
+
 def get_llvm_packages(args) -> typing.Iterable[str]:
     # If we use the package version of LLVM, we need to install extra packages for it.
     if (args.llvm is not None) and (args.tsan is None):
-        return ['libomp-dev',
-                'libomp5',
-                'clang-format-' + str(args.llvm),
-                'clang-tidy-' + str(args.llvm)]
+        packages = [f'libomp-{args.llvm}-dev',
+                    f'libomp5-{args.llvm}',
+                    'clang-format-' + str(args.llvm),
+                    'clang-tidy-' + str(args.llvm)]
+        if args.hipsycl is not None:
+            packages += [f'llvm-{args.llvm}-dev',
+                         f'libclang-{args.llvm}-dev',
+                         f'lld-{args.llvm}']
+        return packages
     else:
         return []
 
-def get_opencl_packages(args) -> typing.Iterable[str]:
+
+def get_opencl_packages(args) -> typing.List[str]:
     if (args.doxygen is None) and (args.oneapi is None):
         return _opencl_extra_packages
     else:
         return []
 
+
+def get_rocm_packages(args) -> typing.List[str]:
+    if (args.rocm is None):
+        return []
+    else:
+        return _rocm_extra_packages
+
+
 def get_compiler(args, compiler_build_stage: hpccm.Stage = None) -> bb_base:
     # Compiler
-    if args.icc is not None:
-        raise RuntimeError('Intel compiler toolchain recipe not implemented yet')
-
     if args.llvm is not None:
         # Build our own version instead to get TSAN + OMP
         if args.tsan is not None:
@@ -219,15 +272,18 @@ def get_compiler(args, compiler_build_stage: hpccm.Stage = None) -> bb_base:
                 raise RuntimeError('No TSAN compiler build stage!')
         # Build the default compiler if we don't need special support
         else:
-            compiler = hpccm.building_blocks.llvm(extra_repository=True, version=args.llvm)
+            # Currently the focal apt repositories do not contain
+            # llvm higher than 11, so we work around that. This will
+            # need further work when we start supporting ubuntu 22.04
+            compiler = hpccm.building_blocks.llvm(version=args.llvm, upstream=True if int(args.llvm) > 11 else False)
 
     elif args.oneapi is not None:
         if compiler_build_stage is not None:
             compiler = compiler_build_stage.runtime(_from='oneapi')
             # Prepare the toolchain (needed only for builds done within the Dockerfile, e.g.
             # OpenMPI builds, which don't currently work for other reasons)
-            oneapi_toolchain = hpccm.toolchain(CC='/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc',
-                                               CXX='/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icpc')
+            oneapi_toolchain = hpccm.toolchain(CC=f'/opt/intel/oneapi/compiler/{args.oneapi}/linux/bin/intel64/icx',
+                                               CXX=f'/opt/intel/oneapi/compiler/{args.oneapi}/linux/bin/intel64/icpx')
             setattr(compiler, 'toolchain', oneapi_toolchain)
 
         else:
@@ -242,18 +298,42 @@ def get_compiler(args, compiler_build_stage: hpccm.Stage = None) -> bb_base:
     return compiler
 
 
-def get_mpi(args, compiler):
+def get_gdrcopy(args, compiler):
+    if args.cuda is not None:
+        if hasattr(compiler, 'toolchain'):
+            # Version last updated June 7, 2021
+            return hpccm.building_blocks.gdrcopy(toolchain=compiler.toolchain, version="2.2")
+        else:
+            raise RuntimeError('compiler is not an HPCCM compiler building block!')
+    else:
+        return None
+
+
+def get_ucx(args, compiler, gdrcopy):
+    if args.cuda is not None:
+        if hasattr(compiler, 'toolchain'):
+            use_gdrcopy = (gdrcopy is not None)
+            # Version last updated June 7, 2021
+            return hpccm.building_blocks.ucx(toolchain=compiler.toolchain, gdrcopy=use_gdrcopy, version="1.10.1",
+                                             cuda=True)
+        else:
+            raise RuntimeError('compiler is not an HPCCM compiler building block!')
+    else:
+        return None
+
+
+def get_mpi(args, compiler, ucx):
     # If needed, add MPI to the image
     if args.mpi is not None:
         if args.mpi == 'openmpi':
-            use_cuda = False
-            if args.cuda is not None:
-                use_cuda = True
-
             if hasattr(compiler, 'toolchain'):
                 if args.oneapi is not None:
                     raise RuntimeError('oneAPI building OpenMPI is not supported')
-                return hpccm.building_blocks.openmpi(toolchain=compiler.toolchain, cuda=use_cuda, infiniband=False)
+                use_cuda = (args.cuda is not None)
+                use_ucx = (ucx is not None)
+                # Version last updated June 7, 2021
+                return hpccm.building_blocks.openmpi(toolchain=compiler.toolchain, version="4.1.1", cuda=use_cuda,
+                                                     ucx=use_ucx, infiniband=False)
             else:
                 raise RuntimeError('compiler is not an HPCCM compiler building block!')
 
@@ -280,6 +360,56 @@ def get_clfft(args):
         return None
 
 
+def get_heffte(args):
+    if (args.heffte is not None):
+        return hpccm.building_blocks.generic_cmake(
+            cmake_opts=['-D CMAKE_BUILD_TYPE=Release',
+                        '-D CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda',
+                        '-D Heffte_ENABLE_CUDA=ON',
+                        '-D Heffte_ENABLE_FFTW=OFF',
+                        '-D BUILD_SHARED_LIBS=ON'],
+            repository='https://bitbucket.org/icl/heffte.git',
+            prefix='/usr/local', recursive=True, commit=args.heffte, directory='heffte')
+    else:
+        return None
+
+
+def get_hipsycl(args):
+    if args.hipsycl is None:
+        return None
+    if args.llvm is None:
+        raise RuntimeError('Can not build hipSYCL without llvm')
+
+    if args.rocm is None:
+        raise RuntimeError('hipSYCL requires the rocm packages')
+
+    cmake_opts = ['-DLLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm',
+                  '-DCMAKE_PREFIX_PATH=/opt/rocm/lib/cmake',
+                  '-DWITH_ROCM_BACKEND=ON']
+    if args.cuda is not None:
+        cmake_opts += ['-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda',
+                       '-DWITH_CUDA_BACKEND=ON']
+
+    postinstall = [
+        # https://github.com/illuhad/hipSYCL/issues/361#issuecomment-718943645
+        'for f in /opt/rocm/amdgcn/bitcode/*.bc; do ln -s "$f" "/opt/rocm/lib/$(basename $f .bc).amdgcn.bc"; done'
+    ]
+    if args.cuda is not None:
+        postinstall += [
+            # https://github.com/illuhad/hipSYCL/issues/410#issuecomment-743301929
+            f'sed s/_OPENMP/__OPENMP_NVPTX__/ -i /usr/lib/llvm-{args.llvm}/lib/clang/*/include/__clang_cuda_complex_builtins.h',
+            # Not needed unless we're building with CUDA 11.x, but no harm in doing always
+            'ln -s /usr/local/cuda/compat/* /usr/local/cuda/lib64/'
+        ]
+
+    return hpccm.building_blocks.generic_cmake(
+        repository='https://github.com/illuhad/hipSYCL.git',
+        directory='/var/tmp/hipSYCL',
+        prefix='/usr/local', recursive=True, commit=args.hipsycl,
+        cmake_opts=['-DCMAKE_BUILD_TYPE=Release', *cmake_opts],
+        postinstall=postinstall)
+
+
 def add_tsan_compiler_build_stage(input_args, output_stages: typing.Mapping[str, hpccm.Stage]):
     """Isolate the expensive TSAN preparation stage.
 
@@ -290,8 +420,12 @@ def add_tsan_compiler_build_stage(input_args, output_stages: typing.Mapping[str,
     """
     if not isinstance(output_stages, collections.abc.MutableMapping):
         raise RuntimeError('Need output_stages container.')
+    if 'compiler_build' in output_stages:
+        raise RuntimeError('"compiler_build" output stage is already present.')
     tsan_stage = hpccm.Stage()
-    tsan_stage += hpccm.primitives.baseimage(image=base_image_tag(input_args), _as='tsan')
+    tsan_stage += hpccm.primitives.baseimage(image=base_image_tag(input_args),
+                                             _distro=hpccm_distro_name(input_args),
+                                             _as='tsan')
 
     tsan_stage += hpccm.building_blocks.packages(ospackages=['git', 'ca-certificates', 'build-essential', 'cmake'])
     # CMake will get duplicated later, but this is an expensive image, and it isn't worth optimizing
@@ -303,16 +437,20 @@ def add_tsan_compiler_build_stage(input_args, output_stages: typing.Mapping[str,
         repository='https://github.com/llvm/llvm-project.git',
         directory='/var/tmp/llvm-project/llvm/',
         prefix='/usr/local', recursive=True, branch=compiler_branch,
-        cmake_opts=['-D CMAKE_BUILD_TYPE=Release', '-D LLVM_ENABLE_PROJECTS="clang;openmp;clang-tools-extra;compiler-rt;lld"',
+        cmake_opts=['-D CMAKE_BUILD_TYPE=Release',
+                    '-D LLVM_ENABLE_PROJECTS="clang;openmp;clang-tools-extra;compiler-rt;lld"',
                     '-D LIBOMP_TSAN_SUPPORT=on'],
         postinstall=['ln -s /usr/local/bin/clang++ /usr/local/bin/clang++-' + str(input_args.llvm),
                      'ln -s /usr/local/bin/clang-format /usr/local/bin/clang-format-' + str(input_args.llvm),
                      'ln -s /usr/local/bin/clang-tidy /usr/local/bin/clang-tidy-' + str(input_args.llvm),
-                     'ln -s /usr/local/share/clang/run-clang-tidy.py /usr/local/bin/run-clang-tidy-' + str(input_args.llvm) + '.py',
-                     'ln -s /usr/local/bin/run-clang-tidy-' + str(input_args.llvm) + '.py /usr/local/bin/run-clang-tidy-' + str(input_args.llvm),
+                     'ln -s /usr/local/share/clang/run-clang-tidy.py /usr/local/bin/run-clang-tidy-'
+                     + str(input_args.llvm) + '.py',
+                     'ln -s /usr/local/bin/run-clang-tidy-'
+                     + str(input_args.llvm) + '.py /usr/local/bin/run-clang-tidy-' + str(input_args.llvm),
                      'ln -s /usr/local/libexec/c++-analyzer /usr/local/bin/c++-analyzer-' + str(input_args.llvm)])
     output_stages['compiler_build'] = tsan_stage
 
+
 def oneapi_runtime(_from='0'):
     oneapi_runtime_stage = hpccm.Stage()
     oneapi_runtime_stage += hpccm.primitives.copy(_from='oneapi-build',
@@ -320,20 +458,22 @@ def oneapi_runtime(_from='0'):
                                                          "/etc/bash.bashrc": "/etc/bash.bashrc"})
     return oneapi_runtime_stage
 
+
 def add_oneapi_compiler_build_stage(input_args, output_stages: typing.Mapping[str, hpccm.Stage]):
     """Isolate the oneAPI preparation stage.
 
     This stage is isolated so that its installed components are minimized in the
     final image (chiefly /opt/intel) and its environment setup script can be
     sourced. This also helps with rebuild time and final image size.
-
-    Note that the ICC compiler inside oneAPI on linux also needs
-    gcc to build other components and provide libstdc++.
     """
     if not isinstance(output_stages, collections.abc.MutableMapping):
         raise RuntimeError('Need output_stages container.')
+    if 'compiler_build' in output_stages:
+        raise RuntimeError('"compiler_build" output stage is already present.')
     oneapi_stage = hpccm.Stage()
-    oneapi_stage += hpccm.primitives.baseimage(image=base_image_tag(input_args), _as='oneapi-build')
+    oneapi_stage += hpccm.primitives.baseimage(image=base_image_tag(input_args),
+                                               _distro=hpccm_distro_name(input_args),
+                                               _as='oneapi-build')
 
     version = str(input_args.oneapi)
 
@@ -343,19 +483,22 @@ def add_oneapi_compiler_build_stage(input_args, output_stages: typing.Mapping[st
         apt_keys=['https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB'],
         apt_repositories=['deb https://apt.repos.intel.com/oneapi all main'],
         # Add minimal packages (not the whole HPC toolkit!)
-        ospackages=['intel-oneapi-dpcpp-cpp-{}'.format(version),
-            'intel-oneapi-openmp-{}'.format(version),
-            'intel-oneapi-mkl-{}'.format(version),
-            'intel-oneapi-mkl-devel-{}'.format(version)]
+        ospackages=[f'intel-oneapi-dpcpp-cpp-{version}',
+                    f'intel-oneapi-openmp-{version}',
+                    f'intel-oneapi-mkl-{version}',
+                    f'intel-oneapi-mkl-devel-{version}']
     )
     # Ensure that all bash shells on the final container will have access to oneAPI
     oneapi_stage += hpccm.primitives.shell(
-            commands=['echo "source /opt/intel/oneapi/setvars.sh" >> /etc/bash.bashrc']
-            )
+        commands=['echo "source /opt/intel/oneapi/setvars.sh" >> /etc/bash.bashrc',
+                  'unlink /opt/intel/oneapi/compiler/latest',
+                  f'ln -sf /opt/intel/oneapi/compiler/{version} /opt/intel/oneapi/compiler/latest']
+    )
     setattr(oneapi_stage, 'runtime', oneapi_runtime)
 
     output_stages['compiler_build'] = oneapi_stage
 
+
 def prepare_venv(version: StrictVersion) -> typing.Sequence[str]:
     """Get shell commands to set up the venv for the requested Python version."""
     major = version.version[0]
@@ -363,36 +506,31 @@ def prepare_venv(version: StrictVersion) -> typing.Sequence[str]:
 
     pyenv = '$HOME/.pyenv/bin/pyenv'
 
-    py_ver = '{}.{}'.format(major, minor)
-    venv_path = '$HOME/venv/py{}'.format(py_ver)
-    commands = ['$({pyenv} prefix `{pyenv} whence python{py_ver}`)/bin/python -m venv {path}'.format(
-        pyenv=pyenv,
-        py_ver=py_ver,
-        path=venv_path
-    )]
-
-    commands.append('{path}/bin/python -m pip install --upgrade pip setuptools'.format(
-        path=venv_path
-    ))
+    py_ver = f'{major}.{minor}'
+    venv_path = f'$HOME/venv/py{py_ver}'
+    commands = [f'$({pyenv} prefix `{pyenv} whence python{py_ver}`)/bin/python -m venv {venv_path}']
+
+    commands.append(f'{venv_path}/bin/python -m pip install --upgrade pip setuptools')
     # Install dependencies for building and testing gmxapi Python package.
     # WARNING: Please keep this list synchronized with python_packaging/requirements-test.txt
     # TODO: Get requirements.txt from an input argument.
-    commands.append("""{path}/bin/python -m pip install --upgrade \
-            'cmake>=3.13' \
+    commands.append(f"""{venv_path}/bin/python -m pip install --upgrade \
+            'breathe' \
+            'cmake>=3.16.3' \
             'flake8>=3.7.7' \
+            'gcovr>=4.2' \
             'mpi4py>=3.0.3' \
             'networkx>=2.0' \
             'numpy>=1' \
             'pip>=10.1' \
+            'pybind11>2.6' \
+            'Pygments>=2.2.0' \
             'pytest>=3.9' \
             'setuptools>=42' \
-            'scikit-build>=0.10'""".format(path=venv_path))
-
-    # TODO: Remove 'importlib_resources' dependency when Python >=3.7 is required.
-    if minor == 6:
-        commands.append("""{path}/bin/python -m pip install --upgrade \
-                'importlib_resources'""".format(path=venv_path))
-
+            'scikit-build>=0.10' \
+            'Sphinx>=1.6.3' \
+            'sphinxcontrib-plantuml>=0.14' \
+            'wheel'""")
     return commands
 
 
@@ -417,16 +555,28 @@ def add_python_stages(building_blocks: typing.Mapping[str, bb_base],
     # copy is a bit slow and wastes local Docker image space for each filesystem
     # layer.
     pyenv_stage = hpccm.Stage()
-    pyenv_stage += hpccm.primitives.baseimage(image=base_image_tag(input_args), _as='pyenv')
+    pyenv_stage += hpccm.primitives.baseimage(image=base_image_tag(input_args),
+                                              _distro=hpccm_distro_name(input_args),
+                                              _as='pyenv')
     pyenv_stage += building_blocks['compiler']
+    if building_blocks['gdrcopy'] is not None:
+        pyenv_stage += building_blocks['gdrcopy']
+    if building_blocks['ucx'] is not None:
+        pyenv_stage += building_blocks['ucx']
     pyenv_stage += building_blocks['mpi']
     pyenv_stage += hpccm.building_blocks.packages(ospackages=_python_extra_packages)
 
     for version in [StrictVersion(py_ver) for py_ver in sorted(input_args.venvs)]:
         stage_name = 'py' + str(version)
         stage = hpccm.Stage()
-        stage += hpccm.primitives.baseimage(image=base_image_tag(input_args), _as=stage_name)
+        stage += hpccm.primitives.baseimage(image=base_image_tag(input_args),
+                                            _distro=hpccm_distro_name(input_args),
+                                            _as=stage_name)
         stage += building_blocks['compiler']
+        if building_blocks['gdrcopy'] is not None:
+            stage += building_blocks['gdrcopy']
+        if building_blocks['ucx'] is not None:
+            stage += building_blocks['ucx']
         stage += building_blocks['mpi']
         stage += hpccm.building_blocks.packages(ospackages=_python_extra_packages)
 
@@ -438,9 +588,7 @@ def add_python_stages(building_blocks: typing.Mapping[str, bb_base],
             """echo 'eval "$(pyenv init -)"' >> $HOME/.bashrc""",
             """echo 'eval "$(pyenv virtualenv-init -)"' >> $HOME/.bashrc"""])
         pyenv = '$HOME/.pyenv/bin/pyenv'
-        commands = ['PYTHON_CONFIGURE_OPTS="--enable-shared" {pyenv} install -s {version}'.format(
-            pyenv=pyenv,
-            version=str(version))]
+        commands = [f'PYTHON_CONFIGURE_OPTS="--enable-shared" {pyenv} install -s {version}']
         stage += hpccm.primitives.shell(commands=commands)
 
         commands = prepare_venv(version)
@@ -471,9 +619,12 @@ def add_documentation_dependencies(input_args,
     """Add appropriate layers according to doxygen input arguments."""
     if input_args.doxygen is None:
         return
+    # Always clone the same version of linkchecker (latest release at June 1, 2021)
+    output_stages['main'] += hpccm.building_blocks.pip(pip='pip3', packages=[
+        'git+https://github.com/linkchecker/linkchecker.git@v10.0.1'])
     output_stages['main'] += hpccm.primitives.shell(
-        commands=['sed -i \'/\"XPS\"/d;/\"PDF\"/d;/\"PS\"/d;/\"EPS\"/d;/disable ghostscript format types/d\' /etc/ImageMagick-6/policy.xml'])
-    output_stages['main'] += hpccm.building_blocks.pip(pip='pip3', packages=['sphinx==1.6.1', 'gcovr'])
+        commands=[
+            'sed -i \'/\"XPS\"/d;/\"PDF\"/d;/\"PS\"/d;/\"EPS\"/d;/disable ghostscript format types/d\' /etc/ImageMagick-6/policy.xml'])
     if input_args.doxygen == '1.8.5':
         doxygen_commit = 'ed4ed873ab0e7f15116e2052119a6729d4589f7a'
         output_stages['main'] += hpccm.building_blocks.generic_autotools(
@@ -491,17 +642,14 @@ def add_documentation_dependencies(input_args,
                 '--static'])
     else:
         version = input_args.doxygen
-        archive_name = 'doxygen-{}.linux.bin.tar.gz'.format(version)
-        archive_url = 'https://sourceforge.net/projects/doxygen/files/rel-{}/{}'.format(
-            version,
-            archive_name
-        )
-        binary_path = 'doxygen-{}/bin/doxygen'.format(version)
+        archive_name = f'doxygen-{version}.linux.bin.tar.gz'
+        archive_url = f'https://sourceforge.net/projects/doxygen/files/rel-{version}/{archive_name}'
+        binary_path = f'doxygen-{version}/bin/doxygen'
         commands = [
             'mkdir doxygen && cd doxygen',
-            'wget {}'.format(archive_url),
-            'tar xf {} {}'.format(archive_name, binary_path),
-            'cp {} /usr/local/bin/'.format(binary_path),
+            f'wget {archive_url}',
+            f'tar xf {archive_name} {binary_path}',
+            f'cp {binary_path} /usr/local/bin/',
             'cd .. && rm -rf doxygen'
         ]
         output_stages['main'] += hpccm.primitives.shell(commands=commands)
@@ -535,28 +683,59 @@ def build_stages(args) -> typing.Iterable[hpccm.Stage]:
 
     # These are the most expensive and most reusable layers, so we put them first.
     building_blocks['compiler'] = get_compiler(args, compiler_build_stage=stages.get('compiler_build'))
-    building_blocks['mpi'] = get_mpi(args, building_blocks['compiler'])
+    building_blocks['gdrcopy'] = get_gdrcopy(args, building_blocks['compiler'])
+    building_blocks['ucx'] = get_ucx(args, building_blocks['compiler'], building_blocks['gdrcopy'])
+    building_blocks['mpi'] = get_mpi(args, building_blocks['compiler'], building_blocks['ucx'])
     for i, cmake in enumerate(args.cmake):
         building_blocks['cmake' + str(i)] = hpccm.building_blocks.cmake(
             eula=True,
-            prefix='/usr/local/cmake-{}'.format(cmake),
+            prefix=f'/usr/local/cmake-{cmake}',
             version=cmake)
 
     # Install additional packages early in the build to optimize Docker build layer cache.
-    os_packages = list(get_llvm_packages(args)) + get_opencl_packages(args)
+    os_packages = list(get_llvm_packages(args)) + get_opencl_packages(args) + get_rocm_packages(args)
     if args.doxygen is not None:
         os_packages += _docs_extra_packages
     if args.oneapi is not None:
         os_packages += ['lsb-release']
-    building_blocks['extra_packages'] = hpccm.building_blocks.packages(
+    if args.hipsycl is not None:
+        os_packages += ['libboost-fiber-dev']
+    building_blocks['extra_packages'] = []
+    if args.intel_compute_runtime:
+        building_blocks['extra_packages'] += hpccm.building_blocks.packages(
+            apt_keys=['https://repositories.intel.com/graphics/intel-graphics.key'],
+            apt_repositories=[f'deb [arch=amd64] https://repositories.intel.com/graphics/ubuntu focal main']
+        )
+        os_packages += _intel_compute_runtime_extra_packages
+    if args.rocm is not None:
+        building_blocks['extra_packages'] += hpccm.building_blocks.packages(
+            apt_keys=['http://repo.radeon.com/rocm/rocm.gpg.key'],
+            apt_repositories=[f'deb [arch=amd64] http://repo.radeon.com/rocm/apt/{args.rocm}/ xenial main']
+        )
+    building_blocks['extra_packages'] += hpccm.building_blocks.packages(
         ospackages=os_packages,
-        apt_ppas=['ppa:intel-opencl/intel-opencl'],
-        apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
-        apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main']
-    )
+        apt_ppas=['ppa:intel-opencl/intel-opencl'])
+
+    if args.cuda is not None and args.llvm is not None:
+        # Hack to tell clang what version of CUDA we're using
+        # based on https://github.com/llvm/llvm-project/blob/1fdec59bffc11ae37eb51a1b9869f0696bfd5312/clang/lib/Driver/ToolChains/Cuda.cpp#L43
+        cuda_version_split = args.cuda.split('.')
+        # LLVM requires having the version in x.y.z format, while args.cuda be be either x.y or x.y.z
+        cuda_version_str = '{}.{}.{}'.format(
+            cuda_version_split[0],
+            cuda_version_split[1],
+            cuda_version_split[2] if len(cuda_version_split) > 2 else 0
+        )
+        building_blocks['cuda-clang-workaround'] = hpccm.primitives.shell(commands=[
+            f'echo "CUDA Version {cuda_version_str}" > /usr/local/cuda/version.txt'
+        ])
 
     building_blocks['clfft'] = get_clfft(args)
 
+    building_blocks['heffte'] = get_heffte(args)
+
+    building_blocks['hipSYCL'] = get_hipsycl(args)
+
     # Add Python environments to MPI images, only, so we don't have to worry
     # about whether to install mpi4py.
     if args.mpi is not None and len(args.venvs) > 0:
@@ -571,9 +750,7 @@ def build_stages(args) -> typing.Iterable[hpccm.Stage]:
             stages['main'] += bb
 
     # We always add Python3 and Pip
-    stages['main'] += hpccm.building_blocks.python(python3=True, python2=False, devel=True)
-    stages['main'] += hpccm.building_blocks.pip(upgrade=True, pip='pip3',
-                                                packages=['pytest', 'networkx', 'numpy'])
+    stages['main'] += hpccm.building_blocks.python(python3=True, python2=False)
 
     # Add documentation requirements (doxygen and sphinx + misc).
     if args.doxygen is not None: