diff --git a/.github/scripts/bench/bench_op.py b/.github/scripts/bench/bench_op.py index a0d99b2fa..7bbce06e9 100644 --- a/.github/scripts/bench/bench_op.py +++ b/.github/scripts/bench/bench_op.py @@ -14,7 +14,8 @@ def bench_matmul_f16(params: str, *args, **kwargs) -> float: c = hidet.ops.matmul(a, b) g = hidet.trace_from(c, inputs=[a, b]) g = hidet.graph.optimize(g) - return g.latency() + g = g.cuda_graph() + return bench_torch_model(lambda: g.run_async(), []) def bench_batch_matmul(params: str, *args, **kwargs) -> float: # Default to benchmarking f32 for now, though this op can run other dtypes @@ -26,7 +27,8 @@ def bench_batch_matmul(params: str, *args, **kwargs) -> float: c = hidet.ops.matmul(a, b) g = hidet.trace_from(c, inputs=[a, b]) g = hidet.graph.optimize(g) - return g.latency() + g = g.cuda_graph() + return bench_torch_model(lambda: g.run_async(), []) def bench_conv2d(params: str, *args, **kwargs) -> float: x_shape, w_shape = params.split(',') @@ -37,7 +39,8 @@ def bench_conv2d(params: str, *args, **kwargs) -> float: o = hidet.ops.conv2d(x, w) g = hidet.trace_from(o, inputs=[x, w]) g = hidet.graph.optimize(g) - return g.latency() + g = g.cuda_graph() + return bench_torch_model(lambda: g.run_async(), []) def bench_conv2d_gemm_f16(params: str, *args, **kwargs) -> float: x_shape, w_shape = params.split(',') @@ -48,7 +51,8 @@ def bench_conv2d_gemm_f16(params: str, *args, **kwargs) -> float: o = hidet.ops.conv2d(x, w) g = hidet.trace_from(o, inputs=[x, w]) g = hidet.graph.optimize(g) - return g.latency() + g = g.cuda_graph() + return bench_torch_model(lambda: g.run_async(), []) def bench_attn(params: str, *args, **kwargs) -> float: bs, seqlen, nhead, hdim = [int(s) for s in params.split('x')] @@ -61,7 +65,8 @@ def bench_attn(params: str, *args, **kwargs) -> float: o = hidet.ops.attention(q, k, v) g = hidet.trace_from(o, inputs=[q, k, v]) g = hidet.graph.optimize(g) - return g.latency() + g = g.cuda_graph() + return bench_torch_model(lambda: g.run_async(), []) def bench_attn_mask_add(params: str, *args, **kwargs) -> float: bs, seqlen, nhead, hdim = [int(s) for s in params.split('x')] @@ -76,7 +81,8 @@ def bench_attn_mask_add(params: str, *args, **kwargs) -> float: o = hidet.ops.attention(q, k, v, mask=mask) g = hidet.trace_from(o, inputs=[q, k, v, mask]) g = hidet.graph.optimize(g) - return g.latency() + g = g.cuda_graph() + return bench_torch_model(lambda: g.run_async(), []) def bench_reduce(params: str, *args, **kwargs) -> float: x_shape, axis = params.split(',', maxsplit=1) @@ -88,7 +94,8 @@ def bench_reduce(params: str, *args, **kwargs) -> float: o = hidet.ops.sum(x, dims=axis) g = hidet.trace_from(o, inputs=[x]) g = hidet.graph.optimize(g) - return g.latency() + g = g.cuda_graph() + return bench_torch_model(lambda: g.run_async(), []) bench_func_map = { 'matmul_f16': bench_matmul_f16, diff --git a/.github/scripts/bench/bench_utils.py b/.github/scripts/bench/bench_utils.py index 09cf862a8..3921eea7a 100644 --- a/.github/scripts/bench/bench_utils.py +++ b/.github/scripts/bench/bench_utils.py @@ -35,9 +35,10 @@ def bench_torch_model(model, torch_inputs, bench_iters=100, warmup_iters=10): return latency def enable_compile_server(enable=True): - hidet.option.compile_server.addr(os.environ.get('CI_CS_HOSTNAME')) - hidet.option.compile_server.port(int(os.environ.get('CI_CS_PORT'))) - hidet.option.compile_server.username(os.environ.get('CI_CS_USERNAME')) - hidet.option.compile_server.password(os.environ.get('CI_CS_PASSWORD')) - hidet.option.compile_server.repo(os.environ.get('REPO_NAME').strip(), os.environ.get('REPO_BRANCH').strip()) - hidet.option.compile_server.enable(flag=enable) \ No newline at end of file + if os.environ.get('CI_CS_HOSTNAME'): + hidet.option.compile_server.addr(os.environ.get('CI_CS_HOSTNAME')) + hidet.option.compile_server.port(int(os.environ.get('CI_CS_PORT'))) + hidet.option.compile_server.username(os.environ.get('CI_CS_USERNAME')) + hidet.option.compile_server.password(os.environ.get('CI_CS_PASSWORD')) + hidet.option.compile_server.repo(os.environ.get('REPO_NAME').strip(), os.environ.get('REPO_BRANCH').strip()) + hidet.option.compile_server.enable(flag=enable) \ No newline at end of file diff --git a/.github/workflows/launch.yaml b/.github/workflows/launch.yaml new file mode 100644 index 000000000..15663bf63 --- /dev/null +++ b/.github/workflows/launch.yaml @@ -0,0 +1,37 @@ +name: Launch CI + + +on: + workflow_dispatch: + inputs: + shutdown_instances: + description: 'Shut down GPU instances when finished.' + required: true + type: boolean + default: true + issue_comment: + types: [created] + + +jobs: + trigger: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'issue_comment' && github.event.issue.pull_request != '' && + contains(fromJSON('["MEMBER", "OWNER", "COLLABORATOR"]'), github.event.comment.author_association) && + contains(github.event.comment.body, '$hidet-ci launch') + runs-on: ubuntu-latest + steps: + - name: Trigger workflow in internal repository + run: | + curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GH_PAT }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/${{ secrets.REPO_NAME }}/actions/workflows/regression.yaml/dispatches \ + -d "{\"ref\": \"main\", \"inputs\": {\"shutdown_instances\": \"${{ env.SHUTDOWN }}\", \"source_repo\": \"${{ env.SOURCE_REPO }}\", \"source_ref\": \"${{ env.SOURCE_REF }}\"}}" + env: + SHUTDOWN: ${{ github.event_name == 'workflow_dispatch' && inputs.shutdown_instances || !contains(github.event.comment.body, '--keep') }} + SOURCE_REPO: ${{ github.repository }} + SOURCE_REF: ${{ github.event_name == 'workflow_dispatch' && github.ref_name || github.event.issue.number }} diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml index 4d4729c29..210ec3d30 100644 --- a/.github/workflows/regression.yaml +++ b/.github/workflows/regression.yaml @@ -8,16 +8,19 @@ on: required: true type: boolean default: true - issue_comment: - types: [created] + source_repo: + description: 'Source Repository Name. E.g, hidet-org/hidet' + required: true + type: string + default: 'this' + source_ref: + description: 'Source repository ref (Branch name or PR number).' + required: true + type: string + default: 'this' jobs: start_instances: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'issue_comment' && github.event.issue.pull_request != '' && - contains(fromJSON('["MEMBER", "OWNER", "COLLABORATOR"]'), github.event.comment.author_association) && - contains(github.event.comment.body, '$hidet-ci launch') runs-on: ubuntu-latest outputs: started_instances: ${{ steps.run_py_script.outputs.started_instances }} @@ -61,13 +64,20 @@ jobs: container: image: nvcr.io/nvidia/pytorch:23.10-py3 options: --gpus all + outputs: + commit_time: ${{ steps.get_commit_info.outputs.commit_time }} + commit_author: ${{ steps.get_commit_info.outputs.commit_author }} + commit_sha: ${{ steps.get_commit_info.outputs.commit_sha }} steps: - name: Checkout repo uses: actions/checkout@v4 with: + repository: | + ${{ inputs.source_repo == 'this' && github.repository || + inputs.source_repo }} ref: | - ${{ github.event_name == 'workflow_dispatch' && github.ref_name || - format('refs/pull/{0}/head', github.event.issue.number) }} + ${{ inputs.source_repo == 'this' && github.ref_name || + format('refs/pull/{0}/head', inputs.source_ref) }} path: hidet - name: Checkout models @@ -114,10 +124,8 @@ jobs: python hidet/.github/scripts/run_tests.py env: HW_CONFIG: ${{ matrix.hw_configs }} - REPO_NAME: ${{ github.repository }} - REPO_BRANCH: | - ${{ github.event_name == 'workflow_dispatch' && github.ref_name || - format('pull/{0}', github.event.issue.number) }} + REPO_NAME: ${{ inputs.source_repo == 'this' && github.repository || inputs.source_repo }} + REPO_BRANCH: ${{ inputs.source_repo == 'this' && github.ref_name || format('pull/{0}', inputs.source_ref) }} CI_CS_HOSTNAME: ${{ secrets.CI_CS_HOSTNAME }} CI_CS_PORT: ${{ secrets.CI_CS_PORT }} CI_CS_USERNAME: ${{ secrets.CI_CS_USERNAME }} @@ -130,6 +138,17 @@ jobs: name: run_configs_${{ matrix.hw_configs }} path: run_configs.json retention-days: 1 + + - name: Retrieve commit properties + id: get_commit_info + run: | + cd hidet + COMMIT_TIME=$(git log -1 --format=%cd --date=format:'%Y-%m-%d %H:%M:%S') + COMMIT_AUTHOR=$(git log -1 --format=%an) + COMMIT_SHA=$(git log -1 --format=%H) + echo "commit_time=$COMMIT_TIME" >> $GITHUB_OUTPUT + echo "commit_author=$COMMIT_AUTHOR" >> $GITHUB_OUTPUT + echo "commit_sha=$COMMIT_SHA" >> $GITHUB_OUTPUT upload_results: runs-on: ubuntu-latest @@ -143,26 +162,15 @@ jobs: - name: Download run configs uses: actions/download-artifact@v3 - - - name: Setup ENV - run: | - COMMIT_TIME=$(git log -1 --format=%cd --date=format:'%Y-%m-%d %H:%M:%S') - COMMIT_AUTHOR=$(git log -1 --format=%an) - COMMIT_SHA=$(git log -1 --format=%H) - echo "COMMIT_TIME=$COMMIT_TIME" >> $GITHUB_ENV - echo "COMMIT_AUTHOR=$COMMIT_AUTHOR" >> $GITHUB_ENV - echo "COMMIT_SHA=$COMMIT_SHA" >> $GITHUB_ENV - name: Run main Python script run: python ./.github/scripts/upload_results.py env: - REPO_NAME: ${{ github.repository }} - REPO_BRANCH: | - ${{ github.event_name == 'workflow_dispatch' && github.ref_name || - format('pull/{0}', github.event.issue.number) }} - COMMIT_SHA: ${{ env.COMMIT_SHA }} - COMMIT_TIME: ${{ env.COMMIT_TIME }} - COMMIT_AUTHOR: ${{ env.COMMIT_AUTHOR }} + REPO_NAME: ${{ inputs.source_repo == 'this' && github.repository || inputs.source_repo }} + REPO_BRANCH: ${{ inputs.source_repo == 'this' && github.ref_name || format('pull/{0}', inputs.source_ref) }} + COMMIT_SHA: ${{ needs.run_tests.outputs.commit_sha }} + COMMIT_TIME: ${{ needs.run_tests.outputs.commit_time }} + COMMIT_AUTHOR: ${{ needs.run_tests.outputs.commit_author }} HW_CONFIGS: ${{ needs.start_instances.outputs.hw_configs }} CI_DB_HOSTNAME: ${{ secrets.CI_DB_HOSTNAME }} CI_DB_PORT: ${{ secrets.CI_DB_PORT }} @@ -170,10 +178,7 @@ jobs: CI_DB_PASSWORD: ${{ secrets.CI_DB_PASSWORD }} stop_instances: - if: | - github.event_name == 'workflow_dispatch' && inputs.shutdown_instances || - github.event_name == 'issue_comment' && github.event.issue.pull_request != '' && - !contains(github.event.comment.body, '--keep') + if: inputs.shutdown_instances runs-on: ubuntu-latest needs: [start_instances, run_tests] steps: diff --git a/python/hidet/graph/ops/fusion/apply_prologue_epilogue.py b/python/hidet/graph/ops/fusion/apply_prologue_epilogue.py index 10d9c6fca..2b2a7f74e 100644 --- a/python/hidet/graph/ops/fusion/apply_prologue_epilogue.py +++ b/python/hidet/graph/ops/fusion/apply_prologue_epilogue.py @@ -771,7 +771,6 @@ def process_module(self, ir_module: IRModule) -> IRModule: try: rewriter = PrologueEpilogueFuseRewriter(self.fused_task, prologues, epilogues, tensor_map, marks) ir_module = rewriter.rewrite(ir_module) - print('success') return ir_module except CanNotFuseError: pass diff --git a/python/hidet/ir/schedulers/cuda/scheduler.py b/python/hidet/ir/schedulers/cuda/scheduler.py index cae20ab74..558b56aa0 100644 --- a/python/hidet/ir/schedulers/cuda/scheduler.py +++ b/python/hidet/ir/schedulers/cuda/scheduler.py @@ -35,7 +35,13 @@ def schedule_grid_compute(self, node: GridCompute, tensor_map: Dict[TensorNode, grid_dim: Expr = (prod(node.shape) + block_dim - 1) // block_dim if self.task is not None: - name = f'{self.task.name}_compute_{node.name}' + from hidet.graph.ops.fusion.fused_operator import FusedTask + + if isinstance(self.task, FusedTask): + fused_name = self.task.attrs['fused_ops'].replace(' ', '_') + name = f'fused_{fused_name}_{node.name}' + else: + name = f'{self.task.name}_{node.name}' else: name = f'compute_{node.name}'