diff --git a/colab_start.ipynb b/colab_start.ipynb new file mode 100644 index 00000000..58aae354 --- /dev/null +++ b/colab_start.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SAM 3D Objects - Colab Setup\n", + "\n", + "This notebook sets up the environment and runs a demo inference on Google Colab (T4 GPU recommended).\n", + "\n", + "**Note:** If you encounter build errors with PyTorch3D or other libraries, make sure you are using a GPU runtime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Clone the repository\n", + "import os\n", + "if not os.path.exists(\"sam-3d-objects\"):\n", + " !git clone https://github.com/facebookresearch/sam-3d-objects.git\n", + " %cd sam-3d-objects\n", + "else:\n", + " %cd sam-3d-objects\n", + " !git pull\n", + "\n", + "# 2. Install Dependencies\n", + "print(\"Installing dependencies... This may take a few minutes.\")\n", + "\n", + "# Install general requirements first (excluding pytorch3d)\n", + "!pip install -r requirements_colab.txt\n", + "\n", + "# 3. Install PyTorch3D (Binary Wheel)\n", + "# Building PyTorch3D from source on Colab is slow and error-prone. We use pre-built wheels.\n", + "import torch\n", + "import sys\n", + "\n", + "try:\n", + " import pytorch3d\n", + " print(\"PyTorch3D is already installed.\")\n", + "except ImportError:\n", + " print(\"Installing PyTorch3D...\")\n", + " pyt_version_str = torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n", + " cuda_version_str = torch.version.cuda.replace(\".\", \"\")\n", + " \n", + " if \"2.4\" in torch.__version__ or \"2.5\" in torch.__version__:\n", + " try:\n", + " !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html\n", + " except:\n", + " print(\"Binary wheel not found, falling back to source install (slow)...\")\n", + " !pip install \"git+https://github.com/facebookresearch/pytorch3d.git@stable\"\n", + " else:\n", + " version_str = \"\".join([\n", + " f\"py3{sys.version_info.minor}_cu\",\n", + " cuda_version_str,\n", + " f\"_pyt{pyt_version_str}\"\n", + " ])\n", + " !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n", + "\n", + "# 4. Install the package in editable mode and patch Hydra\n", + "!pip install -e .\n", + "!python patching/hydra\n", + "\n", + "print(\"Installation complete. PLEASE RESTART THE RUNTIME (Runtime > Restart session) if you see import errors!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download Checkpoints\n", + "\n", + "Attempt to download weights. \n", + "Option 1: Hugging Face (Requires Token, Full Model)\n", + "Option 2: Direct Download (Public Link - User Requested, may be partial)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "from huggingface_hub import snapshot_download\n", + "\n", + "def download_weights_hf(token):\n", + " tag = \"hf\"\n", + " download_dir = f\"checkpoints/{tag}-download\"\n", + " target_dir = f\"checkpoints/{tag}\"\n", + " \n", + " if os.path.exists(target_dir) and os.path.exists(os.path.join(target_dir, \"pipeline.yaml\")):\n", + " print(f\"Checkpoints already exist at {target_dir}\")\n", + " return\n", + " \n", + " print(\"Downloading model weights from Hugging Face...\")\n", + " try:\n", + " snapshot_download(\n", + " repo_id=\"facebook/sam-3d-objects\",\n", + " repo_type=\"model\",\n", + " local_dir=download_dir,\n", + " max_workers=1,\n", + " token=token\n", + " )\n", + " \n", + " source = os.path.join(download_dir, \"checkpoints\")\n", + " if os.path.exists(source):\n", + " if os.path.exists(target_dir):\n", + " shutil.rmtree(target_dir)\n", + " shutil.move(source, target_dir)\n", + " shutil.rmtree(download_dir)\n", + " print(\"Download complete and files moved.\")\n", + " else:\n", + " if os.path.exists(target_dir):\n", + " shutil.rmtree(target_dir)\n", + " shutil.move(download_dir, target_dir)\n", + " print(\"Download complete (fallback structure).\")\n", + " \n", + " except Exception as e:\n", + " print(f\"Error downloading weights from HF: {e}\")\n", + "\n", + "def download_weights_public():\n", + " # User requested link for public SAM weights\n", + " url = \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth\"\n", + " target_dir = \"checkpoints/hf\"\n", + " target_file = os.path.join(target_dir, \"sam_vit_h_4b8939.pth\")\n", + " \n", + " if not os.path.exists(target_dir):\n", + " os.makedirs(target_dir)\n", + " \n", + " print(f\"Downloading public SAM weights from {url}...\")\n", + " !wget -O {target_file} {url}\n", + " print(\"Download complete.\")\n", + " print(\"WARNING: This is only the SAM checkpoint. The full SAM-3D-Objects pipeline likely requires 'pipeline.yaml' and other weights from the gated HF repo.\")\n", + "\n", + "# Try to get token\n", + "token = None\n", + "try:\n", + " from google.colab import userdata\n", + " token = userdata.get('HF_TOKEN')\n", + "except:\n", + " pass\n", + "\n", + "# If no token, ask or fallback\n", + "if not token:\n", + " print(\"No HF_TOKEN found in secrets.\")\n", + " choice = input(\"Enter '1' to provide HF Token (Recommended), '2' to use Public SAM Link (May be incomplete): \")\n", + " if choice == '1':\n", + " from getpass import getpass\n", + " token = getpass(\"Enter Hugging Face Token: \")\n", + " download_weights_hf(token)\n", + " else:\n", + " download_weights_public()\n", + "else:\n", + " print(\"Using HF_TOKEN from secrets.\")\n", + " download_weights_hf(token)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Minimal Inference Code\n", + "import sys\n", + "import os\n", + "sys.path.append(\"notebook\")\n", + "from inference import Inference, load_image, load_single_mask\n", + "import torch\n", + "\n", + "if not torch.cuda.is_available():\n", + " print(\"Warning: CUDA is not available. Inference will be slow or fail.\")\n", + "\n", + "tag = \"hf\"\n", + "config_path = f\"checkpoints/{tag}/pipeline.yaml\"\n", + "\n", + "if not os.path.exists(config_path):\n", + " print(f\"Error: Config not found at {config_path}. Did you download the full model from Hugging Face?\")\n", + "else:\n", + " # Load model\n", + " print(\"Loading model...\")\n", + " inference = Inference(config_path, compile=False)\n", + "\n", + " # Load dummy image/mask (using one from repo)\n", + " image_path = \"notebook/images/shutterstock_stylish_kidsroom_1640806567/image.png\"\n", + " mask_folder = \"notebook/images/shutterstock_stylish_kidsroom_1640806567\"\n", + " \n", + " print(f\"Processing image: {image_path}\")\n", + " if os.path.exists(image_path):\n", + " image = load_image(image_path)\n", + " mask = load_single_mask(mask_folder, index=14)\n", + "\n", + " # Run model\n", + " output = inference(image, mask, seed=42)\n", + "\n", + " # Save output\n", + " output[\"gs\"].save_ply(\"splat.ply\")\n", + " print(\"Success! Output saved to splat.ply\")\n", + " else:\n", + " print(\"Test image not found.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebook/inference.py b/notebook/inference.py index 94e99a7f..7c0f9f83 100644 --- a/notebook/inference.py +++ b/notebook/inference.py @@ -2,7 +2,7 @@ import os # not ideal to put that here -os.environ["CUDA_HOME"] = os.environ["CONDA_PREFIX"] +os.environ["CUDA_HOME"] = os.environ.get("CONDA_PREFIX", "/usr/local/cuda") os.environ["LIDRA_SKIP_INIT"] = "true" import sys diff --git a/requirements_colab.txt b/requirements_colab.txt new file mode 100644 index 00000000..ffadbb12 --- /dev/null +++ b/requirements_colab.txt @@ -0,0 +1,26 @@ +numpy +Pillow +opencv-python +matplotlib +seaborn +gradio +omegaconf +hydra-core +timm +h5py +scikit-image +einops-exts +transformers +accelerate +bitsandbytes +gdown +ninja +kaolin==0.17.0 +gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@2323de5905d5e90e035f792fe65bad0fedd413e7 +xformers +spconv-cu121 +open3d +pandas +scipy +MoGe @ git+https://github.com/microsoft/MoGe.git@a8c37341bc0325ca99b9d57981cc3bb2bd3e255b +cuda-python