diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh
index f6b9ad51a26..adb1256303f 100755
--- a/.github/scripts/setup-env.sh
+++ b/.github/scripts/setup-env.sh
@@ -35,7 +35,6 @@ conda create \
 conda activate ci
 conda install --quiet --yes libjpeg-turbo -c pytorch
 pip install --progress-bar=off --upgrade setuptools==72.1.0
-conda install libavif -c conda-forge --yes
 
 # See https://github.com/pytorch/vision/issues/6790
 if [[ "${PYTHON_VERSION}" != "3.11" ]]; then
diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
index cae827d73f6..9b1f93b5abe 100644
--- a/packaging/pre_build_script.sh
+++ b/packaging/pre_build_script.sh
@@ -36,7 +36,5 @@ else
   pip install auditwheel
 fi
 
-conda install libavif -c conda-forge -y
-
 pip install numpy pyyaml future ninja
 pip install --upgrade setuptools==72.1.0
diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml
index e16c99afe9f..a847328a77e 100644
--- a/packaging/torchvision/meta.yaml
+++ b/packaging/torchvision/meta.yaml
@@ -12,7 +12,6 @@ requirements:
     - libpng
     - libjpeg-turbo
     - libwebp
-    - libavif  # -c conda-forge
     - ffmpeg >=4.2.2, <5.0.0  # [linux]
 
   host:
@@ -31,7 +30,6 @@ requirements:
     - ffmpeg >=4.2.2, <5.0.0  # [linux]
     - libjpeg-turbo
     - libwebp
-    - libavif  # -c conda-forge
     - pillow >=5.3.0, !=8.3.*
     - pytorch-mutex 1.0 {{ build_variant }}  # [not osx ]
     {{ environ.get('CONDA_PYTORCH_CONSTRAINT', 'pytorch') }}
diff --git a/setup.py b/setup.py
index 26760923dff..7f383b82ec4 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@
 USE_PNG = os.getenv("TORCHVISION_USE_PNG", "1") == "1"
 USE_JPEG = os.getenv("TORCHVISION_USE_JPEG", "1") == "1"
 USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
-USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "1") == "1"
+USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "0") == "1"  # TODO enable by default!
 USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
 NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
 # Note: the GPU video decoding stuff used to be called "video codec", which
diff --git a/test/smoke_test.py b/test/smoke_test.py
index bb95d8010f8..f98d019bea5 100644
--- a/test/smoke_test.py
+++ b/test/smoke_test.py
@@ -30,9 +30,6 @@ def smoke_test_torchvision_read_decode() -> None:
     img_webp = read_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp"))
     if img_webp.shape != (3, 100, 100):
         raise RuntimeError(f"Unexpected shape of img_webp: {img_webp.shape}")
-    img_avif = read_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif"))
-    if img_avif.shape != (3, 100, 100):
-        raise RuntimeError(f"Unexpected shape of img_avif: {img_avif.shape}")
 
 
 def smoke_test_torchvision_decode_jpeg(device: str = "cpu"):
diff --git a/test/test_image.py b/test/test_image.py
index 6969f32e52c..5b0da3481ab 100644
--- a/test/test_image.py
+++ b/test/test_image.py
@@ -14,7 +14,7 @@
 from common_utils import assert_equal, cpu_and_cuda, IN_OSS_CI, needs_cuda
 from PIL import __version__ as PILLOW_VERSION, Image, ImageOps, ImageSequence
 from torchvision.io.image import (
-    decode_avif,
+    _decode_avif,
     decode_gif,
     decode_image,
     decode_jpeg,
@@ -863,7 +863,7 @@ def test_decode_gif(tmpdir, name, scripted):
             torch.testing.assert_close(tv_frame, pil_frame, atol=0, rtol=0)
 
 
-@pytest.mark.parametrize("decode_fun", (decode_gif, decode_webp, decode_avif))
+@pytest.mark.parametrize("decode_fun", (decode_gif, decode_webp))
 def test_decode_gif_webp_errors(decode_fun):
     encoded_data = torch.randint(0, 256, (100,), dtype=torch.uint8)
     with pytest.raises(RuntimeError, match="Input tensor must be 1-dimensional"):
@@ -876,8 +876,6 @@ def test_decode_gif_webp_errors(decode_fun):
         expected_match = re.escape("DGifOpenFileName() failed - 103")
     elif decode_fun is decode_webp:
         expected_match = "WebPDecodeRGB failed."
-    else:
-        expected_match = "avifDecoderParse failed: BMFF parsing failed"
     with pytest.raises(RuntimeError, match=expected_match):
         decode_fun(encoded_data)
 
@@ -893,7 +891,8 @@ def test_decode_webp(decode_fun, scripted):
     assert img[None].is_contiguous(memory_format=torch.channels_last)
 
 
-@pytest.mark.parametrize("decode_fun", (decode_avif, decode_image))
+@pytest.mark.xfail(reason="AVIF support not enabled yet.")
+@pytest.mark.parametrize("decode_fun", (_decode_avif, decode_image))
 @pytest.mark.parametrize("scripted", (False, True))
 def test_decode_avif(decode_fun, scripted):
     encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
diff --git a/torchvision/io/__init__.py b/torchvision/io/__init__.py
index 85063af9572..08a0d6d62b7 100644
--- a/torchvision/io/__init__.py
+++ b/torchvision/io/__init__.py
@@ -22,7 +22,6 @@
     VideoMetaData,
 )
 from .image import (
-    decode_avif,
     decode_gif,
     decode_image,
     decode_jpeg,
@@ -64,7 +63,6 @@
     "decode_png",
     "decode_webp",
     "decode_gif",
-    "decode_avif",
     "encode_jpeg",
     "encode_png",
     "read_file",
diff --git a/torchvision/io/image.py b/torchvision/io/image.py
index efdeed33c66..6d4613f703b 100644
--- a/torchvision/io/image.py
+++ b/torchvision/io/image.py
@@ -384,7 +384,7 @@ def decode_webp(
     return torch.ops.image.decode_webp(input)
 
 
-def decode_avif(
+def _decode_avif(
     input: torch.Tensor,
 ) -> torch.Tensor:
     if not torch.jit.is_scripting() and not torch.jit.is_tracing():