Skip to content

basic structure for tool to encode shares #473

basic structure for tool to encode shares

basic structure for tool to encode shares #473

Workflow file for this run

name: Rust GPU Tests
on:
pull_request:
concurrency:
group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}"
cancel-in-progress: true
jobs:
e2e:
runs-on: gpu
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Validate presence of GPU devices
run: nvidia-smi
- name: Check shared memory size
run: df -h
- name: Install OpenSSL && pkg-config
run: sudo apt-get update && sudo apt-get install -y pkg-config libssl-dev
- name: Install CUDA and NCCL dependencies
if: steps.cache-cuda-nccl.outputs.cache-hit != 'true'
env:
DEBIAN_FRONTEND: noninteractive
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update
sudo apt install -y cuda-toolkit-12-2 cuda-command-line-tools-12-2 libnccl2 libnccl-dev
- name: Find libs
run: find /usr -name "libnvrtc*" && find /usr -name libcuda.so
- name: Cache Rust build
uses: actions/cache@v4
id: cache-rust
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: rust-build-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
rust-build-${{ runner.os }}-
- name: Find libs
run: find /usr -name "libnvrtc*" && find /usr -name libcuda.so
- name: Install Rust nightly
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly
- name: E2E Tests
run: cargo test --release e2e
shell: bash
env:
NCCL_P2P_LEVEL: LOC
NCCL_NET: Socket
NCCL_P2P_DIRECT_DISABLE: 1
NCCL_SHM_DISABLE: 1
- name: E2E Tests w/ OTP encryption
run: cargo test --release e2e --features otp_encrypt
shell: bash
env:
NCCL_P2P_LEVEL: LOC
NCCL_NET: Socket
NCCL_P2P_DIRECT_DISABLE: 1
NCCL_SHM_DISABLE: 1
- name: Grab e2e executable name
run: echo TEST_NAME=$(cargo --color=never test --release e2e --no-run 2>&1 | grep "Executable tests/e2e.rs" | sed "s/.*(\(.*\))/\1/") >> $GITHUB_OUTPUT
id: build-e2e
- name: E2E Tests w/ compute-sanitizer
run: /usr/local/cuda-12.2/bin/compute-sanitizer --tool=memcheck ${{ steps.build-e2e.outputs.TEST_NAME }} --nocapture
env:
NCCL_P2P_LEVEL: LOC
NCCL_NET: Socket
NCCL_P2P_DIRECT_DISABLE: 1
NCCL_SHM_DISABLE: 1