From 19e4343963aa54394f0e485afda6103983f8045e Mon Sep 17 00:00:00 2001
From: Billy Suh <bsuh@nvidia.com>
Date: Sat, 14 Dec 2024 23:24:12 +0000
Subject: [PATCH] libnvshmem 3.1.7

---
 recipes/libnvshmem/build-libnvshmem.sh     |  14 ++
 recipes/libnvshmem/conda_build_config.yaml |   3 +
 recipes/libnvshmem/meta.yaml               | 199 +++++++++++++++++++++
 recipes/nvshmem/meta.yaml                  |  39 ++++
 4 files changed, 255 insertions(+)
 create mode 100644 recipes/libnvshmem/build-libnvshmem.sh
 create mode 100644 recipes/libnvshmem/conda_build_config.yaml
 create mode 100644 recipes/libnvshmem/meta.yaml
 create mode 100644 recipes/nvshmem/meta.yaml

diff --git a/recipes/libnvshmem/build-libnvshmem.sh b/recipes/libnvshmem/build-libnvshmem.sh
new file mode 100644
index 0000000000000..153aac019e80a
--- /dev/null
+++ b/recipes/libnvshmem/build-libnvshmem.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+set -e
+
+mkdir -p $PREFIX/lib/cmake/nvshmem/
+
+cp -rv bin $PREFIX/
+cp -rv include/ $PREFIX/
+cp -rv lib/cmake/ $PREFIX/lib/
+cp -rv lib/libnvshmem_host.so $PREFIX/lib
+cp -rv lib/nvshmem_bootstrap*.so $PREFIX/lib
+cp -rv lib/nvshmem_transport*.so $PREFIX/lib
+cp -rv share/ $PREFIX/
+
diff --git a/recipes/libnvshmem/conda_build_config.yaml b/recipes/libnvshmem/conda_build_config.yaml
new file mode 100644
index 0000000000000..a54dd8973663a
--- /dev/null
+++ b/recipes/libnvshmem/conda_build_config.yaml
@@ -0,0 +1,3 @@
+# https://docs.nvidia.com/deeplearning/cudnn/v9.3.0/reference/support-matrix.html#cpu-architecture-and-os-requirements
+c_stdlib_version:  # [linux]
+  - 2.28  # [linux]
diff --git a/recipes/libnvshmem/meta.yaml b/recipes/libnvshmem/meta.yaml
new file mode 100644
index 0000000000000..ef818759c1fe2
--- /dev/null
+++ b/recipes/libnvshmem/meta.yaml
@@ -0,0 +1,199 @@
+{% set version = "3.1.7" %}
+{% set platform = "linux-x86_64" %}  # [linux64]
+{% set platform = "linux-sbsa" %}    # [aarch64]
+{% set extension = "tar.xz" %}
+
+{% set soname = version.split(".")[0] %}
+{% set cuda_major = environ.get("cuda_compiler_version", "12.6").split(".")[0] %}
+
+package:
+  name: libnvshmem-split
+  version: {{ version }}
+
+source:
+  url: https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/{{ platform }}/libnvshmem-{{ platform }}-{{ version }}_cuda{{ cuda_major }}-archive.{{ extension }}
+  sha256: 4e03e40d16770a5bdbcefeb7f18579bdfc07aaa7b09a4825a7ab9b5122f69567  # [linux64 and (cuda_compiler_version or "").startswith("11")]
+  sha256: 128c62dd19926f58d1ff69290917dd50f3fb1690567f52d3965cd8fbfb9b2412  # [linux64 and (cuda_compiler_version or "").startswith("12")]
+  sha256: 03e4404e888609bc98f496453896acc34c9ca7bbc39cc32b41d677bcaab71c49  # [aarch64 and (cuda_compiler_version or "").startswith("12")]
+
+build:
+  number: 0
+  skip: true  # [not (linux64 or aarch64)]
+  skip: true  # [cuda_compiler_version in (None, "None") or (aarch64 and (cuda_compiler_version or "").startswith("11"))]
+  error_overlinking: false
+
+outputs:
+  - name: libnvshmem{{ soname }}
+    build:
+      missing_dso_whitelist:
+        - "*libcuda.so*" # driver
+      run_exports:
+        - {{ pin_subpackage("libnvshmem" ~ soname, max_pin="x") }}
+      script: |
+        cp -rv lib $PREFIX/
+    files:
+      - lib/libnvshmem_host.so.*
+      - lib/nvshmem_bootstrap*.so.*
+      - lib/nvshmem_transport*.so.*
+    requirements:
+      build:
+        - {{ compiler('c') }}
+        - {{ compiler('cxx') }}
+        - {{ compiler('cuda') }}
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+        - {{ stdlib("c") }}
+      host:
+        - cuda-version {{ cuda_compiler_version }}
+        - cuda-cudart-dev   # [(cuda_compiler_version or "").startswith("12")]
+        - cudatoolkit       # [(cuda_compiler_version or "").startswith("11")]
+        - openmpi 5.0.6
+      run:
+        - {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
+        - libfabric
+        - libpciaccess
+        - openmpi 5.0.6
+        - rdma-core
+        - ucx
+      run_constrained:
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+    test:
+      commands:
+        - test -L $PREFIX/lib/libnvshmem_host.so.{{ soname }}
+        - test -f $PREFIX/lib/libnvshmem_host.so.{{ version }}
+        - test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }}.0.0
+
+  - name: libnvshmem-dev
+    build:
+      missing_dso_whitelist:
+        - "*libcuda.so*" # driver
+        - "*libnvshmem_host.so.{{ soname }}" # avoids: ERROR (libnvshmem-dev,bin/perftest/device/pt-to-pt/shmem_put_bw): lib/libnvshmem_host.so.3 not found in any packages. We DO test for its existence though.
+    run_exports:
+      - {{ pin_subpackage("libnvshmem" ~ soname, max_pin="x") }}
+    script: build-libnvshmem.sh
+    script_interpreter: /bin/bash
+    requirements:
+      build:
+        - {{ compiler("c") }}
+        - {{ compiler("cxx") }}
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+        - {{ stdlib("c") }}
+      host:
+        - cuda-version {{ cuda_compiler_version }}
+        - cuda-cudart-dev   # [(cuda_compiler_version or "").startswith("12")]
+        - cudatoolkit       # [(cuda_compiler_version or "").startswith("11")]
+        - libpciaccess
+        - openmpi 5.0.6
+      run:
+        - {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
+        - {{ pin_subpackage("libnvshmem" ~ soname, exact=True) }}
+        - cudatoolkit       # [(cuda_compiler_version or "").startswith("11")]
+        - openmpi 5.0.6
+      run_constrained:
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+    test:
+      commands:
+        - test -f $PREFIX/bin/hydra_pmi_proxy
+        - test -f $PREFIX/bin/nvshmem-info
+        - test -f $PREFIX/bin/nvshmrun.hydra
+        - test -f $PREFIX/bin/hydra_nameserver
+        - test -f $PREFIX/bin/nvshmrun
+        - test -f $PREFIX/bin/hydra_persist
+        - test -f $PREFIX/bin/examples/collective-launch
+        - test -f $PREFIX/bin/examples/on-stream
+        - test -f $PREFIX/bin/perftest/device/pt-to-pt/shmem_p_latency
+        - test -f $PREFIX/bin/perftest/device/coll/reduction_latency
+        - test -f $PREFIX/bin/perftest/host/init/malloc
+        - test -f $PREFIX/bin/perftest/host/pt-to-pt/bw
+        - test -f $PREFIX/bin/perftest/host/coll/broadcast_on_stream
+        - test -f $PREFIX/include/bootstrap_device_host/nvshmem_uniqueid.h
+        - test -f $PREFIX/include/device/nvshmem_defines.h
+        - test -f $PREFIX/include/device_host/nvshmem_types.h
+        - test -f $PREFIX/include/host/nvshmem_api.h
+        - test -f $PREFIX/include/non_abi/device/coll/defines.cuh
+        - test -f $PREFIX/include/device_host_transport/nvshmem_constants.h
+        - test -f $PREFIX/include/nvshmem.h
+        - test -f $PREFIX/include/nvshmemx.h
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMConfig.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets-release.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets-release.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMVersion.cmake
+        - test -L $PREFIX/lib/libnvshmem_host.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_uid.so
+        - test -L $PREFIX/lib/nvshmem_transport_ibdevx.so
+        - test -L $PREFIX/lib/nvshmem_transport_ibgda.so
+        - test -L $PREFIX/lib/nvshmem_transport_ibrc.so
+        - test -L $PREFIX/lib/nvshmem_transport_libfabric.so
+        - test -L $PREFIX/lib/nvshmem_transport_ucx.so
+
+  - name: libnvshmem-static
+    build:
+      script: |
+        cp -rv lib/*.a $PREFIX/lib/
+    files:
+      - lib/libnvshmem*.a
+    requirements:
+      build:
+        - {{ compiler("c") }}
+        - {{ compiler("cxx") }}
+        - {{ stdlib("c") }}
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+      host:
+        - cuda-version {{ cuda_compiler_version }}
+        - openmpi 5.0.6
+        - rdma-core
+        - ucx
+      run:
+        - {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
+        - openmpi 5.0.6
+        - rdma-core
+        - ucx
+      run_constrained:
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+    test:
+      commands:
+        - test -f $PREFIX/lib/libnvshmem_device.a
+        - test -f $PREFIX/lib/libnvshmem.a
+
+about:
+  home: https://docs.nvidia.com/nvshmem/index.html
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  license_file: LICENSE
+  license_url: https://docs.nvidia.com/nvshmem/api/sla.html
+  summary: NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs.
+  description: |
+    NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs. 
+    NVSHMEM can significantly reduce communication and coordination overheads by allowing programmers to perform these operations from within CUDA kernels and on CUDA streams.
+  doc_url: https://docs.nvidia.com/nvshmem/api/index.html
+
+extra:
+  feedstock-name: libnvshmem
+  recipe-maintainers:
+    - conda-forge/cuda
diff --git a/recipes/nvshmem/meta.yaml b/recipes/nvshmem/meta.yaml
new file mode 100644
index 0000000000000..cf5a262d4d02e
--- /dev/null
+++ b/recipes/nvshmem/meta.yaml
@@ -0,0 +1,39 @@
+{% set version = "3.1.7" %}
+{% set soname = version.split(".")[0] %}
+
+package:
+  name: nvshmem
+  version: {{ version }}
+
+source:
+  url: https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/LICENSE.txt
+  sha256: 43a87c0ff94ce3196011ff75e17fbee96933c9e1d511557659ece8a326f95e8f
+
+build:
+  number: 0
+  noarch: generic
+
+requirements:
+  run:
+    - libnvshmem{{ soname }} {{ version }}
+    - libnvshmem-dev {{ version }}
+    - libnvshmem-static {{ version }}
+
+test:
+  commands:
+    - exit 0
+
+about:
+  home: https://developer.nvidia.com/cuda-toolkit
+  license_file: LICENSE.txt
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  license_url: https://docs.nvidia.com/cuda/eula/index.html
+  summary: Meta-package containing all NVIDIA NVSHMEM  packages, not intended for use in a conda-build context.
+  description: |
+    NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs.
+    NVSHMEM can significantly reduce communication and coordination overheads by allowing programmers to perform these operations from within CUDA kernels and on CUDA streams.
+  doc_url: https://docs.nvidia.com/nvshmem/api/index.html
+
+extra:
+  recipe-maintainers:
+    - conda-forge/cuda