Revert "[BE] Better compress flash attention binaries (#1744)" (#1751)

imoneoi · web-flow · commit 24f0957be6cf · 2025-07-22T10:36:36.000-07:00
This reverts commit 8ba246f.
diff --git a/hopper/setup.py b/hopper/setup.py
@@ -524,9 +524,6 @@ def nvcc_threads_args():
         "-DCUTLASS_ENABLE_GDC_FOR_SM90",  # For PDL
         "-DCUTLASS_DEBUG_TRACE_LEVEL=0",  # Can toggle for debugging
         "-DNDEBUG",  # Important, otherwise performance is severely impacted
-        "-Xfatbin",  # compress all binary sections
-        "-compress-all",
-        "-compress-mode=size",  # compress with CUDA fatbin more aggressively
     ]
     if get_platform() == "win_amd64":
         nvcc_flags.extend(
diff --git a/setup.py b/setup.py
@@ -206,9 +206,6 @@ def validate_and_update_archs(archs):
     "--expt-relaxed-constexpr",
     "--expt-extended-lambda",
     "--use_fast_math",
-    "-Xfatbin",
-    "-compress-all",
-    "-compress-mode=size",
     # "--ptxas-options=-v",
     # "--ptxas-options=-O2",
     # "-lineinfo",