diff --git a/docs/building_blocks.md b/docs/building_blocks.md index 9837b58..b9aa583 100644 --- a/docs/building_blocks.md +++ b/docs/building_blocks.md @@ -3349,8 +3349,11 @@ directory should be added dynamic linker cache. If False, then `LD_LIBRARY_PATH` is modified to include the NVSHMEM library directory. The default value is False. -- __mpi__: Flag to specify the path to the MPI installation. The -default is empty, i.e., do not build NVSHMEM with MPI support. +- __mpi__: Flag to enable MPI support. If True, enables MPI and relies +on CMake's FindMPI to locate the installation. If a string, uses +the value as the MPI installation path (MPI_HOME). If False, +MPI support is explicitly disabled. The default is True, matching +the upstream NVSHMEM CMake default. - __ospackages__: List of OS packages to install prior to building. The default values are `make` and `wget`. diff --git a/hpccm/building_blocks/nvshmem.py b/hpccm/building_blocks/nvshmem.py index 85e8180..dbb889c 100644 --- a/hpccm/building_blocks/nvshmem.py +++ b/hpccm/building_blocks/nvshmem.py @@ -24,6 +24,8 @@ import os import posixpath +from packaging.version import Version + import hpccm.templates.downloader import hpccm.templates.envvars import hpccm.templates.ldconfig @@ -71,8 +73,11 @@ class nvshmem(bb_base, hpccm.templates.downloader, hpccm.templates.envvars, `LD_LIBRARY_PATH` is modified to include the NVSHMEM library directory. The default value is False. - mpi: Flag to specify the path to the MPI installation. The - default is empty, i.e., do not build NVSHMEM with MPI support. + mpi: Flag to enable MPI support. If True, enables MPI and relies + on CMake's FindMPI to locate the installation. If a string, uses + the value as the MPI installation path (MPI_HOME). If False, + MPI support is explicitly disabled. The default is True, matching + the upstream NVSHMEM CMake default. ospackages: List of OS packages to install prior to building. The default values are `make` and `wget`. @@ -104,7 +109,7 @@ def __init__(self, **kwargs): self.__cmake_opts = kwargs.pop('cmake_opts', []) self.__cuda = kwargs.pop('cuda', '/usr/local/cuda') self.__gdrcopy = kwargs.pop('gdrcopy', None) - self.__mpi = kwargs.pop('mpi', None) + self.__mpi = kwargs.pop('mpi', True) self.__ospackages = kwargs.pop('ospackages', ['make', 'wget']) self.__prefix = kwargs.pop('prefix', '/usr/local/nvshmem') self.__shmem = kwargs.pop('shmem', None) @@ -115,6 +120,12 @@ def __init__(self, **kwargs): self.__download() kwargs['url'] = self.url + # GitHub release tarballs use paths like .../v3.6.5-0.tar.gz; tar strips the + # extension but the top-level directory is nvshmem-3.6.5-0, not v3.6.5-0. + if (kwargs.get('directory') is None and self.url + and 'github.com/NVIDIA/nvshmem' in self.url): + kwargs['directory'] = 'nvshmem-{0}'.format(self.__version) + # Setup the environment variables self.environment_variables['CPATH'] = '{}:$CPATH'.format( posixpath.join(self.__prefix, 'include')) @@ -134,6 +145,18 @@ def __init__(self, **kwargs): # Set the build options self.__configure() + # NVSHMEM's CMake configure step (find_package(CUDAToolkit) and + # several find_library calls) needs to be able to dlopen CUDA + # runtime libraries, so prepend cuda/lib64 to LD_LIBRARY_PATH for + # the build environment whenever a CUDA installation is known. + if self.__cuda: + be = kwargs.get('build_environment', {}) + cuda_lib = posixpath.join(self.__cuda, 'lib64') + existing = be.get('LD_LIBRARY_PATH', '') + if cuda_lib not in existing: + be['LD_LIBRARY_PATH'] = '{}:{}'.format(cuda_lib, existing).rstrip(':') + kwargs['build_environment'] = be + self.__bb = generic_cmake( cmake_opts=self.__cmake_opts, comment=False, @@ -161,20 +184,31 @@ def __configure(self): self.__cmake_opts.append('-DGDRCOPY_HOME={}'.format(self.__gdrcopy)) if self.__mpi: - self.__cmake_opts.append('-DNVSHMEM_MPI_SUPPORT=1') - self.__cmake_opts.append('-DMPI_HOME={}'.format(self.__mpi)) - #else: - # self.__cmake_opts.append('-DNVSHMEM_MPI_SUPPORT=0') + self.__cmake_opts.append('-DNVSHMEM_MPI_SUPPORT=ON') + if isinstance(self.__mpi, str): + self.__cmake_opts.append('-DMPI_HOME={}'.format(self.__mpi)) + else: + # NVSHMEM 3.4.5+ defaults NVSHMEM_MPI_SUPPORT to ON, so an + # explicit OFF is required when the user did not request MPI. + self.__cmake_opts.append('-DNVSHMEM_MPI_SUPPORT=OFF') if self.__shmem: self.__cmake_opts.append('-DNVSHMEM_SHMEM_SUPPORT=1') self.__cmake_opts.append('-DSHMEM_HOME={}'.format(self.__shmem)) + # First NVSHMEM version published as a GitHub release tarball + __github_min_version = Version('3.4.5') + def __download(self): """Set download source based on user parameters""" if not self.package and not self.repository and not self.url: - self.url = 'https://developer.download.nvidia.com/compute/redist/nvshmem/{0}/source/nvshmem_src_{1}.txz'.format(self.__version.split('-')[0], self.__version) + v = Version(self.__version.split('-')[0]) + if v >= self.__github_min_version: + tag = self.__version if self.__version.startswith('v') else 'v{}'.format(self.__version) + self.url = 'https://github.com/NVIDIA/nvshmem/archive/refs/tags/{}.tar.gz'.format(tag) + else: + self.url = 'https://developer.download.nvidia.com/compute/redist/nvshmem/{0}/source/nvshmem_src_{1}.txz'.format(self.__version.split('-')[0], self.__version) def runtime(self, _from='0'): """Generate the set of instructions to install the runtime specific diff --git a/test/.test_nvshmem.py.swp b/test/.test_nvshmem.py.swp new file mode 100644 index 0000000..1c0ca93 Binary files /dev/null and b/test/.test_nvshmem.py.swp differ diff --git a/test/test_nvshmem.py b/test/test_nvshmem.py index ea63830..b4f2eae 100644 --- a/test/test_nvshmem.py +++ b/test/test_nvshmem.py @@ -45,7 +45,7 @@ def test_defaults_ubuntu(self): rm -rf /var/lib/apt/lists/* RUN mkdir -p /var/tmp && wget -q -nc -P /var/tmp https://developer.download.nvidia.com/compute/redist/nvshmem/2.9.0/source/nvshmem_src_2.9.0-2.txz && \ mkdir -p /var/tmp && tar -x -f /var/tmp/nvshmem_src_2.9.0-2.txz -C /var/tmp -J && \ - mkdir -p /var/tmp/nvshmem_src_2.9.0-2/build && cd /var/tmp/nvshmem_src_2.9.0-2/build && cmake -DCMAKE_INSTALL_PREFIX=/usr/local/nvshmem -DNVSHMEM_BUILD_EXAMPLES=OFF -DNVSHMEM_BUILD_PACKAGES=OFF -DNVSHMEM_BUILD_DEB_PACKAGES=OFF -DNVSHMEM_BUILD_RPM_PACKAGES=OFF -DCUDA_HOME=/usr/local/cuda /var/tmp/nvshmem_src_2.9.0-2 && \ + mkdir -p /var/tmp/nvshmem_src_2.9.0-2/build && cd /var/tmp/nvshmem_src_2.9.0-2/build && LD_LIBRARY_PATH=/usr/local/cuda/lib64 cmake -DCMAKE_INSTALL_PREFIX=/usr/local/nvshmem -DNVSHMEM_BUILD_EXAMPLES=OFF -DNVSHMEM_BUILD_PACKAGES=OFF -DNVSHMEM_BUILD_DEB_PACKAGES=OFF -DNVSHMEM_BUILD_RPM_PACKAGES=OFF -DCUDA_HOME=/usr/local/cuda -DNVSHMEM_MPI_SUPPORT=ON /var/tmp/nvshmem_src_2.9.0-2 && \ cmake --build /var/tmp/nvshmem_src_2.9.0-2/build --target all -- -j$(nproc) && \ cmake --build /var/tmp/nvshmem_src_2.9.0-2/build --target install -- -j$(nproc) && \ rm -rf /var/tmp/nvshmem_src_2.9.0-2 /var/tmp/nvshmem_src_2.9.0-2.txz @@ -57,8 +57,8 @@ def test_defaults_ubuntu(self): @ubuntu @docker def test_package_ubuntu(self): - """nvshmem source package""" - n = nvshmem(package='nvshmem_src_2.9.0-2.tar.xz') + """nvshmem source package, MPI support explicitly disabled""" + n = nvshmem(package='nvshmem_src_2.9.0-2.tar.xz', mpi=False) self.assertEqual(str(n), r'''# NVSHMEM RUN apt-get update -y && \ @@ -68,7 +68,7 @@ def test_package_ubuntu(self): rm -rf /var/lib/apt/lists/* COPY nvshmem_src_2.9.0-2.tar.xz /var/tmp/nvshmem_src_2.9.0-2.tar.xz RUN mkdir -p /var/tmp && tar -x -f /var/tmp/nvshmem_src_2.9.0-2.tar.xz -C /var/tmp -J && \ - mkdir -p /var/tmp/nvshmem_src_2.9.0-2/build && cd /var/tmp/nvshmem_src_2.9.0-2/build && cmake -DCMAKE_INSTALL_PREFIX=/usr/local/nvshmem -DNVSHMEM_BUILD_EXAMPLES=OFF -DNVSHMEM_BUILD_PACKAGES=OFF -DNVSHMEM_BUILD_DEB_PACKAGES=OFF -DNVSHMEM_BUILD_RPM_PACKAGES=OFF -DCUDA_HOME=/usr/local/cuda /var/tmp/nvshmem_src_2.9.0-2 && \ + mkdir -p /var/tmp/nvshmem_src_2.9.0-2/build && cd /var/tmp/nvshmem_src_2.9.0-2/build && LD_LIBRARY_PATH=/usr/local/cuda/lib64 cmake -DCMAKE_INSTALL_PREFIX=/usr/local/nvshmem -DNVSHMEM_BUILD_EXAMPLES=OFF -DNVSHMEM_BUILD_PACKAGES=OFF -DNVSHMEM_BUILD_DEB_PACKAGES=OFF -DNVSHMEM_BUILD_RPM_PACKAGES=OFF -DCUDA_HOME=/usr/local/cuda -DNVSHMEM_MPI_SUPPORT=OFF /var/tmp/nvshmem_src_2.9.0-2 && \ cmake --build /var/tmp/nvshmem_src_2.9.0-2/build --target all -- -j$(nproc) && \ cmake --build /var/tmp/nvshmem_src_2.9.0-2/build --target install -- -j$(nproc) && \ rm -rf /var/tmp/nvshmem_src_2.9.0-2 /var/tmp/nvshmem_src_2.9.0-2.tar.xz @@ -95,7 +95,7 @@ def test_cmake_options_centos(self): rm -rf /var/cache/yum/* RUN mkdir -p /var/tmp && wget -q -nc -P /var/tmp https://developer.download.nvidia.com/compute/redist/nvshmem/2.9.0/source/nvshmem_src_2.9.0-2.txz && \ mkdir -p /var/tmp && tar -x -f /var/tmp/nvshmem_src_2.9.0-2.txz -C /var/tmp -J && \ - mkdir -p /var/tmp/nvshmem_src_2.9.0-2/build && cd /var/tmp/nvshmem_src_2.9.0-2/build && cmake -DCMAKE_INSTALL_PREFIX=/usr/local/nvshmem -DNVSHMEM_USE_NCCL=1 -DNVSHMEM_UCX_SUPPORT=1 -DNVSHMEM_BUILD_EXAMPLES=OFF -DNVSHMEM_BUILD_PACKAGES=OFF -DNVSHMEM_BUILD_DEB_PACKAGES=OFF -DNVSHMEM_BUILD_RPM_PACKAGES=OFF -DCUDA_HOME=/usr/local/cuda -DGDRCOPY_HOME=/usr/local/gdrcopy -DNVSHMEM_MPI_SUPPORT=1 -DMPI_HOME=/usr/local/openmpi -DNVSHMEM_SHMEM_SUPPORT=1 -DSHMEM_HOME=/usr/local/openmpi /var/tmp/nvshmem_src_2.9.0-2 && \ + mkdir -p /var/tmp/nvshmem_src_2.9.0-2/build && cd /var/tmp/nvshmem_src_2.9.0-2/build && LD_LIBRARY_PATH=/usr/local/cuda/lib64 cmake -DCMAKE_INSTALL_PREFIX=/usr/local/nvshmem -DNVSHMEM_USE_NCCL=1 -DNVSHMEM_UCX_SUPPORT=1 -DNVSHMEM_BUILD_EXAMPLES=OFF -DNVSHMEM_BUILD_PACKAGES=OFF -DNVSHMEM_BUILD_DEB_PACKAGES=OFF -DNVSHMEM_BUILD_RPM_PACKAGES=OFF -DCUDA_HOME=/usr/local/cuda -DGDRCOPY_HOME=/usr/local/gdrcopy -DNVSHMEM_MPI_SUPPORT=ON -DMPI_HOME=/usr/local/openmpi -DNVSHMEM_SHMEM_SUPPORT=1 -DSHMEM_HOME=/usr/local/openmpi /var/tmp/nvshmem_src_2.9.0-2 && \ cmake --build /var/tmp/nvshmem_src_2.9.0-2/build --target all -- -j$(nproc) && \ cmake --build /var/tmp/nvshmem_src_2.9.0-2/build --target install -- -j$(nproc) && \ rm -rf /var/tmp/nvshmem_src_2.9.0-2 /var/tmp/nvshmem_src_2.9.0-2.txz @@ -104,6 +104,29 @@ def test_cmake_options_centos(self): LIBRARY_PATH=/usr/local/nvshmem/lib:$LIBRARY_PATH \ PATH=/usr/local/nvshmem/bin:$PATH''') + @ubuntu + @docker + def test_github_release_345_mpi(self): + """nvshmem 3.4.5 fetched from a GitHub release tarball with an explicit MPI path""" + n = nvshmem(mpi='/usr/local/openmpi', version='3.4.5-0') + self.assertEqual(str(n), +r'''# NVSHMEM 3.4.5-0 +RUN apt-get update -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + make \ + wget && \ + rm -rf /var/lib/apt/lists/* +RUN mkdir -p /var/tmp && wget -q -nc -P /var/tmp https://github.com/NVIDIA/nvshmem/archive/refs/tags/v3.4.5-0.tar.gz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/v3.4.5-0.tar.gz -C /var/tmp -z && \ + mkdir -p /var/tmp/nvshmem-3.4.5-0/build && cd /var/tmp/nvshmem-3.4.5-0/build && LD_LIBRARY_PATH=/usr/local/cuda/lib64 cmake -DCMAKE_INSTALL_PREFIX=/usr/local/nvshmem -DNVSHMEM_BUILD_EXAMPLES=OFF -DNVSHMEM_BUILD_PACKAGES=OFF -DNVSHMEM_BUILD_DEB_PACKAGES=OFF -DNVSHMEM_BUILD_RPM_PACKAGES=OFF -DCUDA_HOME=/usr/local/cuda -DNVSHMEM_MPI_SUPPORT=ON -DMPI_HOME=/usr/local/openmpi /var/tmp/nvshmem-3.4.5-0 && \ + cmake --build /var/tmp/nvshmem-3.4.5-0/build --target all -- -j$(nproc) && \ + cmake --build /var/tmp/nvshmem-3.4.5-0/build --target install -- -j$(nproc) && \ + rm -rf /var/tmp/nvshmem-3.4.5-0 /var/tmp/v3.4.5-0.tar.gz +ENV CPATH=/usr/local/nvshmem/include:$CPATH \ + LD_LIBRARY_PATH=/usr/local/nvshmem/lib:$LD_LIBRARY_PATH \ + LIBRARY_PATH=/usr/local/nvshmem/lib:$LIBRARY_PATH \ + PATH=/usr/local/nvshmem/bin:$PATH''') + @ubuntu @docker def test_runtime(self):