diff --git a/.github/workflows/perf-bench.yml b/.github/workflows/perf-bench.yml index 3657862f2f..1ec03708f9 100644 --- a/.github/workflows/perf-bench.yml +++ b/.github/workflows/perf-bench.yml @@ -32,7 +32,7 @@ jobs: - name: Install data-juicer working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' + docker compose exec ray-head bash -c 'uv pip install --system -e .\[all\]' - name: Clean dataset cache working-directory: dj-${{ github.run_id }}/.github/workflows/docker diff --git a/.github/workflows/unit-test-partial.yml b/.github/workflows/unit-test-partial.yml index 89fb8a732b..aeb3cf8f89 100644 --- a/.github/workflows/unit-test-partial.yml +++ b/.github/workflows/unit-test-partial.yml @@ -31,12 +31,12 @@ jobs: - name: Install data-juicer working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' + docker compose exec ray-head bash -c 'uv pip install --system -e .\[all\]' - name: Print Pip Dependency Tree working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system pipdeptree' + docker compose exec ray-head bash -c 'uv pip install --system pipdeptree' docker compose exec ray-head bash -c 'pipdeptree' - name: Clean dataset cache @@ -90,8 +90,8 @@ jobs: - name: Install data-juicer working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' - docker compose exec ray-worker bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' + docker compose exec ray-head bash -c 'uv pip install --system -e .\[all\]' + docker compose exec ray-worker bash -c 'uv pip install --system -e .\[all\]' - name: Clean dataset cache working-directory: dj-${{ github.run_id }}/.github/workflows/docker @@ -140,7 +140,7 @@ jobs: - name: Install coverage working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system coverage' + docker compose exec ray-head bash -c 'uv pip install --system coverage' - name: Download Coverage Report Standalone uses: actions/download-artifact@v4 diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index cb3526c9d7..f6bd879547 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -32,13 +32,13 @@ jobs: - name: Install data-juicer working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' - docker compose exec ray-worker bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' + docker compose exec ray-head bash -c 'uv pip install --system -e .\[all\]' + docker compose exec ray-worker bash -c 'uv pip install --system -e .\[all\]' - name: Print Pip Dependency Tree working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system pipdeptree' + docker compose exec ray-head bash -c 'uv pip install --system pipdeptree' docker compose exec ray-head bash -c 'pipdeptree' - name: Clean dataset cache @@ -87,8 +87,8 @@ jobs: - name: Install data-juicer working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' - docker compose exec ray-worker bash -c '/root/.local/bin/uv pip install --system -e .\[all\]' + docker compose exec ray-head bash -c 'uv pip install --system -e .\[all\]' + docker compose exec ray-worker bash -c 'uv pip install --system -e .\[all\]' - name: Clean dataset cache working-directory: dj-${{ github.run_id }}/.github/workflows/docker @@ -139,7 +139,7 @@ jobs: - name: Install coverage working-directory: dj-${{ github.run_id }}/.github/workflows/docker run: | - docker compose exec ray-head bash -c '/root/.local/bin/uv pip install --system coverage' + docker compose exec ray-head bash -c 'uv pip install --system coverage' - name: Download Coverage Report Standalone uses: actions/download-artifact@v4 diff --git a/Dockerfile b/Dockerfile index eae1995c1e..71122e2efd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,45 +1,96 @@ # The data-juicer image includes all open-source contents of data-juicer, # and it will be installed in editable mode. -FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 +FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 -# change to aliyun source +# avoid hanging on interactive installation +ENV DEBIAN_FRONTEND=noninteractive + +# add aliyun apt source mirrors for faster download in China RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list \ && sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list -# install python 3.10 -RUN DEBIAN_FRONTEND=noninteractive apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y git curl vim wget python3.10 libpython3.10-dev python3-pip libgl1-mesa-glx libglib2.0-0 \ - && ln -sf /usr/bin/python3.10 /usr/bin/python3 \ - && ln -sf /usr/bin/python3.10 /usr/bin/python \ - && apt-get autoclean && rm -rf /var/lib/apt/lists/* \ - && pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple +# install some basic system dependencies +RUN apt-get update && apt-get install -y \ + git curl vim wget aria2 openssh-server gnupg build-essential cmake gfortran \ + ffmpeg libsm6 libxext6 libgl1 libglx-mesa0 libglib2.0-0 libosmesa6-dev \ + freeglut3-dev libglfw3-dev libgles2-mesa-dev vulkan-tools \ + libopenblas-dev liblapack-dev postgresql postgresql-contrib libpq-dev \ + software-properties-common \ + && rm -rf /var/lib/apt/lists/* + +# install Git LFS +RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | /bin/bash \ + && apt-get install -y git-lfs && git lfs install + +# install gcc-11 and g++-11 +RUN apt-get update && \ + apt-get install -y gcc-11 g++-11 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 200 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 200 + +# set up Vulkan for NVIDIA +ENV NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=all VK_DRIVER_FILES=/etc/vulkan/icd.d/nvidia_icd.json +RUN mkdir -p /etc/vulkan/icd.d /etc/vulkan/implicit_layer.d /usr/share/glvnd/egl_vendor.d +RUN wget https://pai-vision-data-sh.oss-cn-shanghai.aliyuncs.com/aigc-data/isaac/nb10/nvidia_icd.json -O /etc/vulkan/icd.d/nvidia_icd.json +RUN wget https://pai-vision-data-sh.oss-cn-shanghai.aliyuncs.com/aigc-data/isaac/nb10/nvidia_layers.json -O /etc/vulkan/implicit_layer.d/nvidia_layers.json +RUN wget https://pai-vision-data-sh.oss-cn-shanghai.aliyuncs.com/aigc-data/isaac/nb10/10_nvidia.json -O /usr/share/glvnd/egl_vendor.d/10_nvidia.json +RUN wget https://pai-vision-data-sh.oss-cn-shanghai.aliyuncs.com/aigc-data/isaac/nb10/50_mesa.json -O /usr/share/glvnd/egl_vendor.d/50_mesa.json -# install 3rd-party system dependencies -RUN DEBIAN_FRONTEND=noninteractive apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libsm6 libxext6 software-properties-common build-essential cmake gfortran libopenblas-dev liblapack-dev postgresql postgresql-contrib libpq-dev +# install Python 3.11 +RUN add-apt-repository -y ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y python3.11 python3.11-dev python3.11-venv python3.11-distutils && \ + # set the default Python + update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ + # install pip + curl https://bootstrap.pypa.io/get-pip.py | python3.11 && \ + pip install --upgrade pip -# prepare the java env +# install uv +RUN pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple + +# install java WORKDIR /opt -# download jdk -RUN wget https://aka.ms/download-jdk/microsoft-jdk-17.0.9-linux-x64.tar.gz -O jdk.tar.gz \ +RUN wget https://aka.ms/download-jdk/microsoft-jdk-17.0.9-linux-x64.tar.gz -O jdk.tar.gz \ && tar -xzf jdk.tar.gz \ && rm -rf jdk.tar.gz \ && mv jdk-17.0.9+8 jdk - -# set the environment variable ENV JAVA_HOME=/opt/jdk +ENV PATH=$JAVA_HOME/bin:$PATH -WORKDIR /data-juicer +# install Isaac Sim +ENV UV_HTTP_TIMEOUT=300 +RUN uv pip install isaacsim[all,extscache]==5.1.0 --extra-index-url https://pypi.nvidia.com --system -# install uv -RUN pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple +# install Isaac Lab 2.3 +ENV ACCEPT_EULA=Y +ENV OMNI_KIT_ACCEPT_EULA=Y +RUN mkdir -p /third-party +RUN uv pip install usd-core --system +# clone and install Isaac Lab +RUN cd /tmp && git clone https://github.com/isaac-sim/IsaacLab.git isaaclab && mv /tmp/isaaclab /third-party/isaaclab \ + && cd /third-party/isaaclab \ + && git checkout v2.3.0 \ + && ./isaaclab.sh --install + +# set env vars for Isaac Lab +ENV ISAACLAB_ROOT_PATH=/third-party/isaaclab ISAACLAB_VERSION=2.3.0 -# install requirements which need to be installed from source +# modify assets.py for customized assets +RUN wget https://pai-vision-data-sh.oss-cn-shanghai.aliyuncs.com/aigc-data/isaac/assets.py -O /third-party/isaaclab/source/isaaclab/isaaclab/utils/assets.py + +WORKDIR /data-juicer + +# install basic dependencies for Data-Juicer RUN uv pip install --upgrade setuptools==69.5.1 setuptools_scm -i https://pypi.tuna.tsinghua.edu.cn/simple --system \ - && uv pip install http://dail-wlcb.oss-cn-wulanchabu.aliyuncs.com/data_juicer/recognize-anything-main.zip -i https://pypi.tuna.tsinghua.edu.cn/simple --system + && uv pip install git+https://github.com/datajuicer/recognize-anything.git -i https://pypi.tuna.tsinghua.edu.cn/simple --system -# install data-juicer then +# copy source code and install COPY . . RUN uv pip install -v -e .[all] -i https://pypi.tuna.tsinghua.edu.cn/simple --system \ && python -c "import nltk; nltk.download('punkt_tab'); nltk.download('punkt'); nltk.download('averaged_perceptron_tagger'); nltk.download('averaged_perceptron_tagger_eng')" + +# 最终入口配置 +CMD ["/bin/bash"]