Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
b72e6ed
Update
vmoens Dec 12, 2025
2aa2f9c
Update
vmoens Dec 12, 2025
693f3e9
Update
vmoens Dec 12, 2025
4bc19f9
Update
vmoens Dec 12, 2025
7f4124b
Update
vmoens Dec 12, 2025
051ebd6
Update
vmoens Dec 12, 2025
34dec7a
Update
vmoens Dec 12, 2025
14a1f56
Update
vmoens Dec 12, 2025
77b0337
Update
vmoens Dec 12, 2025
3834732
Update
vmoens Dec 12, 2025
85a0718
Update
vmoens Dec 12, 2025
cb6d148
Update
vmoens Dec 12, 2025
2e96673
Update
vmoens Dec 12, 2025
f6568e6
Update
vmoens Dec 12, 2025
0aad8f0
Update
vmoens Dec 12, 2025
a24bd14
Update
vmoens Dec 12, 2025
844f22f
Update
vmoens Dec 12, 2025
04f724c
Update
vmoens Dec 15, 2025
ed35d1a
Update
vmoens Dec 15, 2025
ae09156
Update
vmoens Dec 15, 2025
421afb9
Update
vmoens Dec 15, 2025
d888a05
Update
vmoens Dec 15, 2025
fb56f5b
Update
vmoens Dec 15, 2025
7b602bc
Update
vmoens Dec 15, 2025
bd5f2cb
Update
vmoens Dec 15, 2025
b3c8935
Update
vmoens Dec 16, 2025
678eb83
Update
vmoens Dec 16, 2025
ea505d8
Update
vmoens Dec 16, 2025
35d324b
Update
vmoens Dec 16, 2025
1f7dd64
Update
vmoens Dec 16, 2025
e3e7ac6
Update
vmoens Dec 16, 2025
948e7c4
Update
vmoens Dec 16, 2025
1944599
Update
vmoens Dec 16, 2025
2c919f5
Update
vmoens Dec 16, 2025
e34b474
Update
vmoens Dec 16, 2025
0b82563
Update
vmoens Dec 16, 2025
7189fb9
Update
vmoens Dec 17, 2025
d51342a
Update
vmoens Dec 17, 2025
ec96627
Update
vmoens Dec 17, 2025
e6dcb0d
Update
vmoens Dec 17, 2025
4c06a56
Update
vmoens Dec 17, 2025
c643083
Update
vmoens Dec 17, 2025
fe8fdef
Update
vmoens Dec 18, 2025
3f3d7af
Update
vmoens Dec 18, 2025
ad9e57a
Update
vmoens Dec 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/unittest/linux_sota/scripts/environment.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
channels:
- pytorch
- defaults
- pytorch
dependencies:
- pip
- protobuf
Expand Down
3 changes: 0 additions & 3 deletions .github/unittest/linux_sota/scripts/post_process.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#!/usr/bin/env bash

set -e

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env
185 changes: 61 additions & 124 deletions .github/unittest/linux_sota/scripts/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ set -v
# ==================================================================================== #
# ================================ Init ============================================== #


export DEBIAN_FRONTEND=noninteractive
export TZ="${TZ:-Etc/UTC}"
ln -snf "/usr/share/zoneinfo/${TZ}" /etc/localtime || true
Expand All @@ -17,10 +16,10 @@ apt-get install -y --no-install-recommends tzdata
dpkg-reconfigure -f noninteractive tzdata || true

apt-get upgrade -y
apt-get install -y vim git wget cmake
apt-get install -y vim git wget cmake curl

apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libosmesa6-dev
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2
apt-get install -y libglfw3 libosmesa6 libglew-dev libosmesa6-dev
apt-get install -y libglvnd0 libgl1 libglx0 libglx-mesa0 libegl1 libgles2
apt-get install -y g++ gcc patchelf

this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
Expand All @@ -34,152 +33,94 @@ cp $this_dir/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
# Avoid error: "fatal: unsafe repository"
git config --global --add safe.directory '*'
root_dir="$(git rev-parse --show-toplevel)"
conda_dir="${root_dir}/conda"
env_dir="${root_dir}/env"
lib_dir="${env_dir}/lib"
env_dir="${root_dir}/venv"

cd "${root_dir}"

case "$(uname -s)" in
Darwin*) os=MacOSX;;
*) os=Linux
esac

# 1. Install conda at ./conda
if [ ! -d "${conda_dir}" ]; then
printf "* Installing conda\n"
wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
bash ./miniconda.sh -b -f -p "${conda_dir}"
fi
eval "$(${conda_dir}/bin/conda shell.bash hook)"
# Install uv
curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"

# 2. Create test environment at ./env
printf "python: ${PYTHON_VERSION}\n"
if [ ! -d "${env_dir}" ]; then
printf "* Creating a test environment\n"
conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
fi
conda activate "${env_dir}"

# Verify we have CPython, not PyPy
python_impl=$(python -c "import platform; print(platform.python_implementation())")
if [ "$python_impl" != "CPython" ]; then
echo "ERROR: Expected CPython but got $python_impl"
echo "Python executable: $(which python)"
echo "Python version: $(python --version)"
exit 1
fi
printf "* Verified Python implementation: %s\n" "$python_impl"

# 3. Install mujoco
printf "* Installing mujoco and related\n"
mkdir -p $root_dir/.mujoco
cd $root_dir/.mujoco/
#wget https://github.com/deepmind/mujoco/releases/download/2.1.1/mujoco-2.1.1-linux-x86_64.tar.gz
#tar -xf mujoco-2.1.1-linux-x86_64.tar.gz
wget https://mujoco.org/download/mujoco210-linux-x86_64.tar.gz
tar -xf mujoco210-linux-x86_64.tar.gz
cd "${root_dir}"
# Create venv with uv
printf "* Creating venv with Python ${PYTHON_VERSION}\n"
uv venv --python "${PYTHON_VERSION}" "${env_dir}"
source "${env_dir}/bin/activate"

# 4. Install Conda dependencies
printf "* Installing dependencies (except PyTorch)\n"
# Add python version to environment.yml if not already present (idempotent)
if ! grep -q "python=${PYTHON_VERSION}" "${this_dir}/environment.yml"; then
echo " - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml"
fi
cat "${this_dir}/environment.yml"
# Verify CPython
python -c "import sys; assert sys.implementation.name == 'cpython', f'Expected CPython, got {sys.implementation.name}'"

export MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210
#export MJLIB_PATH=$root_dir/.mujoco/mujoco-2.1.1/lib/libmujoco.so.2.1.1
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$root_dir/.mujoco/mujoco210/bin
# Set environment variables
export SDL_VIDEODRIVER=dummy
export MUJOCO_GL=egl
export PYOPENGL_PLATFORM=egl
export LAZY_LEGACY_OP=False
export COMPOSITE_LP_AGGREGATE=0
export MAX_IDLE_COUNT=1000
export DISPLAY=:99
export BATCHED_PIPE_TIMEOUT=60
export TOKENIZERS_PARALLELISM=true

conda env config vars set \
MAX_IDLE_COUNT=1000 \
MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210 \
DISPLAY=:99 \
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$root_dir/.mujoco/mujoco210/bin \
SDL_VIDEODRIVER=dummy \
MUJOCO_GL=egl \
PYOPENGL_PLATFORM=egl \
BATCHED_PIPE_TIMEOUT=60 \
TOKENIZERS_PARALLELISM=true

pip install pip --upgrade

conda env update --file "${this_dir}/environment.yml" --prune

conda deactivate
conda activate "${env_dir}"

# install d4rl
pip install free-mujoco-py
pip install git+https://github.com/Farama-Foundation/d4rl@master#egg=d4rl

# TODO: move this down -- will break torchrl installation
conda install -y -c conda-forge libstdcxx-ng=12
## find libstdc - search in the env's lib directory first, then fall back to conda packages
STDC_LOC=$(find "${env_dir}/lib" -name "libstdc++.so.6" 2>/dev/null | head -1)
if [ -z "$STDC_LOC" ]; then
# Fall back to searching in conda packages for libstdcxx-ng specifically
STDC_LOC=$(find conda/pkgs -path "*libstdcxx*" -name "libstdc++.so.6" 2>/dev/null | head -1)
fi
if [ -z "$STDC_LOC" ]; then
echo "WARNING: Could not find libstdc++.so.6, skipping LD_PRELOAD"
conda env config vars set \
MAX_IDLE_COUNT=1000 \
TOKENIZERS_PARALLELISM=true
else
echo "Found libstdc++ at: $STDC_LOC"
conda env config vars set \
MAX_IDLE_COUNT=1000 \
LD_PRELOAD=${STDC_LOC} TOKENIZERS_PARALLELISM=true
fi

# Reactivate environment to apply the new env vars
conda deactivate
conda activate "${env_dir}"

# compile mujoco-py (bc it's done at runtime for whatever reason someone thought it was a good idea)
python -c """import gym;import d4rl"""

# install ale-py: manylinux names are broken for CentOS so we need to manually download and
# rename them
# ==================================================================================== #
# ================================ Install dependencies ============================== #

printf "* Installing dependencies\n"

# Install base dependencies
uv pip install \
hypothesis \
future \
cloudpickle \
pygame \
"moviepy<2.0.0" \
tqdm \
pytest \
pytest-cov \
pytest-mock \
pytest-instafail \
pytest-rerunfailures \
expecttest \
pybind11 \
pyyaml \
scipy \
hydra-core \
"imageio==2.26.0" \
dm_control \
"mujoco<3.3.6" \
mlflow \
av \
coverage \
vmas \
transformers \
"minari[hdf5,create]"

# Install gymnasium with atari and mujoco support
uv pip install "gymnasium[atari,mujoco]>=1.1.0"

# ============================================================================================ #
# ================================ PyTorch & TorchRL ========================================= #


if [[ ${#CU_VERSION} -eq 4 ]]; then
CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
elif [[ ${#CU_VERSION} -eq 5 ]]; then
CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
fi
echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"

# submodules
git submodule sync && git submodule update --init --recursive

pip3 install ale-py -U
pip3 install "gym[atari,accept-rom-license]" "gymnasium>=1.1.0" -U

printf "Installing PyTorch with %s\n" "${CU_VERSION}"
if [[ "$TORCH_VERSION" == "nightly" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/cpu -U
uv pip install --pre torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/nightly/cpu
else
pip3 install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
uv pip install --pre torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
fi
elif [[ "$TORCH_VERSION" == "stable" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/cpu
if [ "${CU_VERSION:-}" == cpu ] ; then
uv pip install torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/cpu
else
pip3 install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/$CU_VERSION
uv pip install torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/$CU_VERSION
fi
else
printf "Failed to install pytorch"
Expand All @@ -189,23 +130,19 @@ fi
# smoke test
python -c "import functorch"

## install snapshot
#pip install git+https://github.com/pytorch/torchsnapshot

# install tensordict
if [[ "$RELEASE" == 0 ]]; then
pip3 install git+https://github.com/pytorch/tensordict.git
uv pip install git+https://github.com/pytorch/tensordict.git
else
pip3 install tensordict
uv pip install tensordict
fi

printf "* Installing torchrl\n"
python -m pip install -e . --no-build-isolation
uv pip install -e . --no-build-isolation

# ==================================================================================== #
# ================================ Run tests ========================================= #


bash ${this_dir}/run_test.sh

# ==================================================================================== #
Expand Down
24 changes: 5 additions & 19 deletions .github/unittest/linux_sota/scripts/test_sota.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,6 @@
), "Composite LP must be set to False. Run this test with COMPOSITE_LP_AGGREGATE=0"

commands = {
"dt": """python sota-implementations/decision_transformer/dt.py \
optim.pretrain_gradient_steps=55 \
optim.updates_per_episode=3 \
optim.warmup_steps=10 \
logger.backend= \
env.backend=gymnasium \
env.name=HalfCheetah-v4
""",
"online_dt": """python sota-implementations/decision_transformer/online_dt.py \
optim.pretrain_gradient_steps=55 \
optim.updates_per_episode=3 \
optim.warmup_steps=10 \
env.backend=gymnasium \
logger.backend=
""",
"td3_bc": """python sota-implementations/td3_bc/td3_bc.py \
optim.gradient_steps=55 \
logger.backend=
Expand All @@ -39,7 +24,7 @@
collector.frames_per_batch=20 \
collector.num_workers=1 \
logger.backend= \
env.backend=gym \
env.backend=gymnasium \
logger.test_interval=10
""",
"ppo_mujoco": """python sota-implementations/ppo/ppo_mujoco.py \
Expand All @@ -57,7 +42,7 @@
loss.mini_batch_size=20 \
loss.ppo_epochs=2 \
logger.backend= \
env.backend=gym \
env.backend=gymnasium \
logger.test_interval=10
""",
"ddpg": """python sota-implementations/ddpg/ddpg.py \
Expand All @@ -84,7 +69,7 @@
collector.frames_per_batch=20 \
loss.mini_batch_size=20 \
logger.backend= \
env.backend=gym \
env.backend=gymnasium \
logger.test_interval=40
""",
"dqn_atari": """python sota-implementations/dqn/dqn_atari.py \
Expand All @@ -94,7 +79,7 @@
buffer.batch_size=10 \
loss.num_updates=1 \
logger.backend= \
env.backend=gym \
env.backend=gymnasium \
buffer.buffer_size=120
""",
"discrete_cql_online": """python sota-implementations/cql/discrete_cql_online.py \
Expand Down Expand Up @@ -301,6 +286,7 @@
collector.frames_per_batch=200 \
env.n_parallel_envs=1 \
optimization.optim_steps_per_batch=1 \
optimization.compile=False \
logger.video=False \
logger.backend=csv \
replay_buffer.buffer_size=120 \
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test-linux-sota.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ jobs:
tests:
strategy:
matrix:
python_version: ["3.9"]
cuda_arch_version: ["12.8"]
python_version: ["3.10"]
cuda_arch_version: ["13.0"]
fail-fast: false
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
repository: pytorch/rl
docker-image: "nvidia/cuda:12.2.0-devel-ubuntu22.04"
docker-image: "nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04"
gpu-arch-type: cuda
gpu-arch-version: ${{ matrix.cuda_arch_version }}
timeout: 90
Expand Down
2 changes: 1 addition & 1 deletion sota-implementations/a2c/config_atari.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Environment
env:
env_name: PongNoFrameskip-v4
env_name: ALE/Pong-v5
backend: gymnasium
num_envs: 16

Expand Down
2 changes: 1 addition & 1 deletion sota-implementations/dqn/config_atari.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ device: null

# Environment
env:
env_name: PongNoFrameskip-v4
env_name: ALE/Pong-v5
backend: gymnasium

# collector
Expand Down
5 changes: 5 additions & 0 deletions sota-implementations/dreamer/dreamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def main(cfg: DictConfig): # noqa: F821
grayscale=cfg.env.grayscale,
image_size=cfg.env.image_size,
use_autocast=cfg.optimization.use_autocast,
compile=(
{"backend": cfg.optimization.compile_backend}
if cfg.optimization.compile
else False
),
)

# Training loop
Expand Down
Loading
Loading