Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
72 changes: 35 additions & 37 deletions .github/workflows/ascend-build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@ concurrency:

jobs:
ascend-build-and-test:
runs-on: ascend
runs-on: flagtree-ascend
if: ${{ github.repository == 'FlagTree/flagtree' || github.repository == 'flagos-ai/flagtree' }}
steps:
- name: Setup environment
shell: bash
run: |
source ~/env.sh
env | grep -E '^(http_proxy|https_proxy|all_proxy|no_proxy)=' >> $GITHUB_ENV || true

- name: Checkout code (attempt 1)
id: checkout1
uses: actions/checkout@v6
Expand Down Expand Up @@ -60,49 +67,40 @@ jobs:
shell: bash
run: |
set -x
pip uninstall -y triton
export FLAGTREE_BACKEND=ascend
source ~/env.sh
cd python
MAX_JOBS=32 python3 -m pip install . --no-build-isolation
MAX_JOBS=32 python3 -m pip install . --no-build-isolation -vvv

- name: FlagTree Test on Ascend
if: steps.check_files.outputs.only_docs_changed != 'true'
shell: bash
run: |
set -x
source /usr/local/Ascend/ascend-toolkit/set_env.sh
python3 third_party/tests/ascend/vector-add.py
python3 third_party/ascend/examples/tutorials/01-vector-add.py
python3 third_party/ascend/examples/tutorials/02-fused-softmax.py
python3 third_party/ascend/examples/tutorials/03-layer-norm.py
python3 third_party/ascend/examples/tutorials/04-fused-attention.py
python3 third_party/ascend/examples/tutorials/06-demo-autotune.py
python3 third_party/ascend/examples/tutorials/07-profiler.py
python3 third_party/ascend/examples/tutorials/09-gather.py
python3 third_party/ascend/examples/tutorials/10-gather_sorted.py
python3 third_party/ascend/examples/tutorials/11-rab_time.py
python3 third_party/ascend/examples/tutorials/13-matrix-multiplication-optimized.py
python3 third_party/ascend/examples/tutorials/13-matrix-multiplication-optimized-flagtree.py
python3 third_party/ascend/examples/tutorials/14-accuracy-comparison.py
python3 python/test/ops/01_vector_add/01_vector_add.py
python3 python/test/ops/abs/abs.py
python3 python/test/ops/addmm/addmm.py
python3 python/test/ops/addmm/addmm_ascend.py
python3 python/test/ops/amax/amax.py
python3 python/test/ops/amax/amax_ascend_perf.py
python3 python/test/ops/apply_rotary_pos_emb/apply_rotary_pos_emb.py
python3 python/test/ops/apply_rotary_pos_emb/apply_rotary_pos_emb_ascend.py
python3 python/test/ops/argmin/argmin.py
python3 python/test/ops/argmin/argmin_ascend_perf.py
python3 python/test/ops/bmm/bmm_ascend.py
python3 python/test/ops/cumsum/cumsum.py
python3 python/test/ops/min_dim/min_dim.py
python3 python/test/ops/min_dim/min_dim_ascend_perf.py
python3 python/test/ops/sum_dim/sum_dim.py
python3 python/test/ops/varmean/var_mean_ascend.py
python3 -m pytest third_party/ascend/examples/pytest_ut --ignore=third_party/ascend/examples/pytest_ut/test_index_select.py \
--ignore=third_party/ascend/examples/pytest_ut/test_linearize_permute.py \
--ignore=third_party/ascend/examples/pytest_ut/test_logical_and.py \
--ignore=third_party/ascend/examples/pytest_ut/test_logical_or.py \
--ignore=third_party/ascend/examples/pytest_ut/test_triton_unified_attention.py
# tutorials
pushd third_party/ascend/tutorials
python3 01-vector-add.py
python3 02-fused-softmax.py
python3 03-layer-norm.py
python3 04-fused-attention.py
python3 06-demo-autotune.py
python3 07-profiler.py
python3 08-demo-libentry.py
python3 09-gather.py
python3 10-gather_sorted.py
python3 11-rab_time.py
python3 12-hstu_attention.py
python3 13-matrix-multiplication-optimized.py
python3 14-accuracy-comparison.py
python3 15-embedding_gather_demo.py
popd
# pytest_ut
pushd third_party/ascend/unittest/pytest_ut
python3 -m pytest . \
--ignore=test_index_select.py \
--ignore=test_linearize_permute.py \
--ignore=test_logical_and.py \
--ignore=test_logical_or.py \
--ignore=test_triton_unified_attention.py
popd
5 changes: 4 additions & 1 deletion .github/workflows/nv-build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,10 @@ jobs:
shell: bash
run: |
set -x
python3.11 -m pytest -s python/test/unit
python3.11 -m pytest -s python/test/unit \
--ignore=python/test/unit/test_debug.py \
--ignore=python/test/unit/test_debug_dump.py \
--ignore=python/test/unit/tools/test_disasm.py
if [ -d "python/test/operators" ]; then
python3.11 -m pytest -s python/test/operators
fi
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ repos:
hooks:
- id: ruff
files: '^python/.*'
args: ["--fix", "--line-length", "120"]
args: ["--fix", "--line-length", "120", "--per-file-ignores", "*/__init__.py:E402"]
stages: [pre-commit, pre-push, manual]
exclude: |
(?x)(
Expand Down
21 changes: 13 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,16 @@ elseif(FLAGTREE_BACKEND STREQUAL "ascend")
set(CMAKE_CXX_COMPILER clang++)
set(FLIR_BUILD_INCUBATED ON CACHE BOOL "Build FLIR incubated passes" FORCE)
set(LLVM_MAJOR_VERSION_21_COMPATIBLE ON CACHE BOOL "AscendNPU-IR build with llvm major version 21 or later" FORCE)
add_definitions(-D__LLVM_MAJOR_VERSION_21_COMPATIBLE__)
add_compile_options("-Wno-deprecated-declarations")
add_compile_options("-Wno-error=deprecated-declarations")
# AscendNPU-IR
set(BISHENGIR_ENABLE_A5_UNPUBLISHED_FEATURES ON)
set(BISHENGIR_BUILD_STANDALONE_IR_ONLY ON)
set(ASCENDNPU_IR_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/ascend/AscendNPU-IR)
set(ASCENDNPU_IR_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/third_party/ascend/AscendNPU-IR)
include_directories(${ASCENDNPU_IR_SRC_DIR}/bishengir/include)
include_directories(${ASCENDNPU_IR_BINARY_DIR}/bishengir/include)
else()
set(FLIR_BUILD_INCUBATED OFF CACHE BOOL "Build FLIR incubated passes" FORCE)
endif()
Expand Down Expand Up @@ -153,13 +161,6 @@ function(add_triton_object name)
INTERFACE $<TARGET_OBJECTS:${name}>
)

if (FLAGTREE_BACKEND STREQUAL "ascend")
set(ASCENDNPU_IR_SRC_DIR ${PROJECT_SOURCE_DIR}/third_party/ascendnpu-ir)
set(ASCENDNPU_IR_BINARY_DIR ${PROJECT_BINARY_DIR}/third_party/ascendnpu-ir)
include_directories(${ASCENDNPU_IR_SRC_DIR}/bishengir/include)
include_directories(${ASCENDNPU_IR_BINARY_DIR}/bishengir/include)
endif()

#add_library(${name} OBJECT ${ARG_UNPARSED_ARGUMENTS})
if(ARG_DEPENDS)
add_dependencies(${name} ${ARG_DEPENDS})
Expand Down Expand Up @@ -225,6 +226,10 @@ if (NOT WIN32 AND NOT APPLE)
link_libraries(stdc++fs)
endif()

# AscendNPU-IR
if(FLAGTREE_BACKEND STREQUAL "ascend")
add_subdirectory(${ASCENDNPU_IR_SRC_DIR} ${ASCENDNPU_IR_BINARY_DIR})
endif()

# -----

Expand Down Expand Up @@ -410,7 +415,7 @@ if(TRITON_BUILD_PYTHON_MODULE)
elseif(FLAGTREE_BACKEND STREQUAL "ascend")
set(PYTHON_ROOT_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/python/src)
include_directories(${PYTHON_ROOT_SRC_PATH})
add_library(triton SHARED ${PYTHON_ROOT_SRC_PATH}/main.cc
add_library(triton SHARED ${PYTHON_SRC_PATH}/main.cc
${PYTHON_SRC_PATH}/ir.cc
${PYTHON_ROOT_SRC_PATH}/passes.cc
${PYTHON_ROOT_SRC_PATH}/interpreter.cc
Expand Down
2 changes: 2 additions & 0 deletions bin/RegisterTritonDialects.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ namespace test {
void registerTestAliasPass();
void registerTestAlignmentPass();
void registerTestAllocationPass();
#ifdef __NVIDIA__
void registerTestMembarPass();
#endif
} // namespace test
} // namespace mlir

Expand Down
10 changes: 4 additions & 6 deletions include/triton/Dialect/Triton/IR/Dialect.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "triton/Dialect/Triton/IR/Dialect.h.inc"
#include "triton/Dialect/Triton/IR/OpsEnums.h.inc"
#include "triton/Dialect/Triton/IR/Traits.h"
#include "triton/Dialect/Triton/IR/Types.h"

#if __has_include("flagtree_spec.h")
#include "flagtree_spec.h"
#endif
#if __has_include("triton/Dialect/Triton/IR/OpInterfaces.h")
#include "triton/Dialect/Triton/IR/OpInterfaces.h"
#endif

#include "triton/Dialect/Triton/IR/OpsEnums.h.inc"
#include "triton/Dialect/Triton/IR/Traits.h"
#include "triton/Dialect/Triton/IR/Types.h"

#define GET_OP_CLASSES
#include "triton/Dialect/Triton/IR/Ops.h.inc"
Expand Down
8 changes: 4 additions & 4 deletions lib/Dialect/Triton/IR/Ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,6 @@ OpFoldResult AdvanceOp::fold(FoldAdaptor adaptor) {
// https://github.com/llvm/llvm-project/blob/main/mlir/lib/Dialect/Func/IR/FuncOps.cpp
// We could revert it back once MLIR has a better inliner interface.
//-- FuncOp --
#ifndef FLAGTREE_SPEC_Dialect_Triton_IR_Ops_build
void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name,
FunctionType type, ArrayRef<NamedAttribute> attrs,
ArrayRef<DictionaryAttr> argAttrs) {
Expand All @@ -835,11 +834,14 @@ void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name,
if (argAttrs.empty())
return;
assert(type.getNumInputs() == argAttrs.size());
#if LLVM_VERSION_MAJOR < 21
function_interface_impl::addArgAndResultAttrs(
#else // triton_v3.3.x
call_interface_impl::addArgAndResultAttrs(
#endif
builder, state, argAttrs, /*resultAttrs=*/std::nullopt,
getArgAttrsAttrName(state.name), getResAttrsAttrName(state.name));
}
#endif

ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
auto buildFuncType =
Expand Down Expand Up @@ -918,7 +920,6 @@ LogicalResult ReturnOp::verify() {
}

// -- JoinOp --
#ifndef FLAGTREE_SPEC_Dialect_Triton_IR_Ops_inferReturnTypes
LogicalResult
JoinOp::inferReturnTypes(MLIRContext *context, std::optional<Location> location,
ValueRange operands, DictionaryAttr attributes,
Expand Down Expand Up @@ -950,7 +951,6 @@ JoinOp::inferReturnTypes(MLIRContext *context, std::optional<Location> location,
RankedTensorType::get(retShape, srcTy.getElementType(), retEnc));
return success();
}
#endif

// -- SplitOp --
LogicalResult SplitOp::inferReturnTypes(
Expand Down
8 changes: 6 additions & 2 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,8 +693,12 @@ def get_packages():
"triton/backends",
"triton/tools",
]
if helper.flagtree_backend:
packages.append(f"triton/language/extra/{helper.get_device_name()}")
if helper.flagtree_backend and helper.flagtree_backend in helper.configs.language_extra_backends:
if helper.flagtree_backend == "ascend":
packages.append("triton/language/extra/cann")
packages.append("triton/language/extra/kernels")
else:
packages.append(f"triton/language/extra/{helper.get_device_name()}")
packages += helper.get_extra_packages()
packages += get_language_extra_packages()
packages += [f'triton/backends/{backend.name}' for backend in backends]
Expand Down
1 change: 1 addition & 0 deletions python/setup_tools/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
dst_path=os.path.join(flagtree_configs.flagtree_submodule_dir, "triton_shared")),
"flir":
tools.Module(name="flir", url="https://github.com/FlagTree/flir.git",
commit_id="ascend-0129",
dst_path=os.path.join(flagtree_configs.flagtree_submodule_dir, "flir")),
}

Expand Down
8 changes: 5 additions & 3 deletions python/setup_tools/utils/ascend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@

downloader = DownloadManager()

submodules = (Module(name="ascendnpu-ir", url="https://gitcode.com/Ascend/AscendNPU-IR.git",
commit_id="04045a06ec7c9592b17de659307d5debe7be590a",
dst_path=os.path.join(flagtree_configs.flagtree_submodule_dir, "ascendnpu-ir")), )
submodules = (Module(name="AscendNPU-IR", url="https://gitcode.com/qq_42979146/AscendNPU-IR.git",
commit_id="5a3921f871",
dst_path=os.path.join(flagtree_configs.flagtree_submodule_dir, "ascend/AscendNPU-IR")), )


def precompile_hook_flir(*args, **kargs):
default_backends = kargs["default_backends"]
default_backends_list = list(default_backends)
if 'nvidia' in default_backends:
default_backends_list.remove('nvidia')
if 'amd' in default_backends:
default_backends_list.remove('amd')
default_backends_list.append('flir')
Expand Down
36 changes: 0 additions & 36 deletions python/setup_tools/utils/src/ascend/CMakeLists.txt

This file was deleted.

4 changes: 2 additions & 2 deletions python/setup_tools/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def _get_flagtree_root() -> str:
class FlagtreeConfigs:
default_backends: tuple = ("nvidia", "amd")
plugin_backends: tuple = ("ascend", "aipu", "tsingmicro")
use_cuda_toolkit_backends: tuple = ('aipu', )
language_extra_backends: tuple = ('xpu', 'mthreads', "cambricon")
use_cuda_toolkit_backends: tuple = ("aipu", )
language_extra_backends: tuple = ("xpu", "mthreads", "cambricon", "ascend")
ext_sourcedir: str = "triton/_C/"
flagtree_root_dir: str = field(default_factory=_get_flagtree_root)
flagtree_backend: str = field(default_factory=lambda: os.environ.get("FLAGTREE_BACKEND"))
Expand Down
6 changes: 0 additions & 6 deletions python/test/unit/test_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import triton.language as tl
import triton


@pytest.mark.skip(reason="flagtree")
@pytest.mark.parametrize('cond, opt_flag, env_var', [
(cond, opt_flag, env_var) for cond in [True, False] \
for opt_flag in [True, False] \
Expand All @@ -30,7 +28,6 @@ def _kernel(COND: tl.constexpr):
getattr(torch, device).synchronize()


@pytest.mark.skip(reason="flagtree")
@pytest.mark.parametrize("cond", [False, True])
def test_static_assert(cond):

Expand Down Expand Up @@ -64,7 +61,6 @@ def _test_overflow(x, y, x_dtype, y_dtype, debug, should_overflow, tri_func, ref
# integer overflow sanitization


@pytest.mark.skip(reason="flagtree")
@pytest.mark.parametrize("x, y, x_dtype, y_dtype, debug, should_overflow", [
(-2**31, -1, 'int32', 'int32', False, False),
(-2**31, -1, 'int32', 'int32', True, True),
Expand All @@ -89,7 +85,6 @@ def _kernel_add(X, Y, Z):
# mul overflow


@pytest.mark.skip(reason="flagtree")
@pytest.mark.parametrize("x, y, x_dtype, y_dtype, debug, should_overflow", [
(2**30, 4, 'int32', 'int32', False, False),
(2**30, 4, 'int32', 'int32', True, True),
Expand All @@ -111,7 +106,6 @@ def _kernel_mul(X, Y, Z):
# sub overflow


@pytest.mark.skip(reason="flagtree")
@pytest.mark.parametrize("x, y, x_dtype, y_dtype, debug, should_overflow", [
(-2**31, 1, 'int32', 'int32', False, False),
(-2**31, 1, 'int32', 'int32', True, True),
Expand Down
2 changes: 0 additions & 2 deletions python/test/unit/test_debug_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ def enable_dump_context(pass_name="1"):


def test_fn_dump(capfd, device, fresh_triton_cache):
return # TODO: flagtree

N = 1024
src = torch.zeros(N, device=device)

Expand Down
1 change: 0 additions & 1 deletion python/test/unit/tools/test_disasm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import triton.language as tl


@pytest.mark.skip(reason="flagtree")
def test_disam_cubin():
if not triton.runtime.driver.active.get_current_target().backend == "cuda":
pytest.skip("Test requires CUDA.")
Expand Down
4 changes: 4 additions & 0 deletions python/triton/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# flagtree backend path specialization
from triton.runtime.driver import spec_path

spec_path(__path__)
"""isort:skip_file"""
__version__ = '3.2.0'

Expand Down
Loading
Loading