Skip to content

Commit

Permalink
Merge branch 'hidet-org:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
BolinSNLHM committed Jan 10, 2024
2 parents ef57171 + 873d3a1 commit d33093e
Show file tree
Hide file tree
Showing 91 changed files with 4,014 additions and 548 deletions.
43 changes: 26 additions & 17 deletions .github/scripts/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
import json
import subprocess
import pathlib
import numpy as np
import tqdm
from db_utils import get_db_conn
import argparse
from tabulate import tabulate

external_models = ['llama-7b', 'gpt2']

Expand All @@ -21,15 +20,7 @@ def run_command(cmd):
raise RuntimeError(f'Command {cmd} failed with return code {ret}.')
return stdout

def get_bench_cmd(run_type, run_id, run_name, run_param_name, dtype):
# Get the name of the benchmark script from DB
conn = get_db_conn()
cursor = conn.cursor()
query = f'SELECT runfile FROM {run_type} WHERE id = {run_id}'
cursor.execute(query)
runfile = cursor.fetchall()[0][0]
cursor.close()
conn.close()
def get_bench_cmd(run_type, run_id, run_name, runfile, run_param_name, dtype):
if run_name in external_models:
runfile = './models/bench/' + runfile
else:
Expand All @@ -38,29 +29,47 @@ def get_bench_cmd(run_type, run_id, run_name, run_param_name, dtype):
return cmd

if __name__ == '__main__':
fh = open('run_configs.json')
parser = argparse.ArgumentParser(prog='Run Benchmarks')
parser.add_argument(
'--print',
action='store_true',
default=False,
help='Print results'
)
parser.add_argument(
'--configs',
type=str,
default='run_configs.json',
help='Specify configurations file to use for benchmarking'
)
args = parser.parse_args()
configs_file = args.configs
fh = open(configs_file)
run_configs = json.load(fh)
fh.close()
hw_config = os.environ.get('HW_CONFIG')
print('hw:', hw_config)
for run_config in run_configs:
# Append hardware_config column
run_config['hardware_config'] = hw_config
# Extract configurations
run_type = run_config['type']
run_id = run_config['id']
run_name = run_config['name']
runfile = run_config['runfile']
run_param_id = run_config['param_id']
run_param_name = run_config['param_name']
run_dtype_id = run_config['dtype_id']
run_dtype_name = run_config['dtype_name']
cmd = get_bench_cmd(run_type, run_id, run_name, run_param_name, run_dtype_name)
cmd = get_bench_cmd(run_type, run_id, run_name, runfile, run_param_name, run_dtype_name)
outputs = run_command(cmd)
if outputs:
# The second last line of All benchmark scripts' stdout is the latency. (Last line is empty)
latency = float(outputs.split('\n')[-2])
run_config['latency'] = latency
else:
run_config['latency'] = 999.99
with open('run_configs.json', 'w') as fh:
json.dump(run_configs, fh)
with open(configs_file, 'w') as fh:
json.dump(run_configs, fh)

if args.print:
print(tabulate(run_configs, headers="keys"))
31 changes: 23 additions & 8 deletions .github/scripts/start_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def run_command(cmd):
hw_config_ids = [s for s in hw_config_ids.split(',') if s]

instances = []
# Fetch list of (cloud_provider_id, instance_id) tuples from DB
# Fetch list of (cloud_provider_id, instance_id) tuples from DB and add them to the list of instances to launch
for hw_config_id in hw_config_ids:
query = (
'SELECT cloud_provider_id, instance_id, hardware_config.name as hw_config FROM cloud_instance '
Expand All @@ -43,12 +43,22 @@ def run_command(cmd):
raise ValueError(f'Instance with hardware config id {hw_config_id} does not exist.')
instances.append(rows[0])

# Fetch the compile server instance ID from DB and add it to list of instances to launch
query = (
f'SELECT cloud_provider_id, instance_id, 0 FROM compile_server WHERE org = \'{repo_org}\' LIMIT 1'
)
cursor.execute(query)
rows = cursor.fetchall()
if len(rows) == 0:
raise ValueError(f'No compile server found in DB.')
instances.append(rows[0])

# Store a json containing all the required model/OPs (and inputs) for this regression run
# This json will be uploaded as an artifact, and will be filled in by subsequent jobs
# For now, we run all model/input combinations by default
run_configs = []
query = (
'SELECT model.id as model_id, model.name as model_name, input_parameter.id as param_id, '
'SELECT model.id as model_id, model.name as model_name, model.runfile as runfile, input_parameter.id as param_id, '
'input_parameter.parameter as param_name, dtype.id as dtype_id, dtype.name as dtype_name '
'FROM model JOIN model_input_parameter ON '
'model.id = model_input_parameter.model_id JOIN input_parameter ON '
Expand All @@ -57,13 +67,13 @@ def run_command(cmd):
cursor.execute(query)
rows = cursor.fetchall()
for row in rows:
model_id, model_name, param_id, param_name, dtype_id, dtype_name = row
run_configs.append({'type': 'model', 'id': int(model_id), 'name': model_name,
model_id, model_name, model_runfile, param_id, param_name, dtype_id, dtype_name = row
run_configs.append({'type': 'model', 'id': int(model_id), 'name': model_name, 'runfile': model_runfile,
'param_id': int(param_id), 'param_name': param_name,
'dtype_id': int(dtype_id), 'dtype_name': dtype_name,
})
query = (
'SELECT operator.id as operator_id, operator.name as operator_name, input_parameter.id as param_id, '
'SELECT operator.id as operator_id, operator.name as operator_name, operator.runfile as runfile, input_parameter.id as param_id, '
'input_parameter.parameter as param_name, dtype.id as dtype_id, dtype.name as dtype_name '
'FROM operator JOIN operator_input_parameter ON '
'operator.id = operator_input_parameter.operator_id JOIN input_parameter ON '
Expand All @@ -72,8 +82,8 @@ def run_command(cmd):
cursor.execute(query)
rows = cursor.fetchall()
for row in rows:
op_id, op_name, param_id, param_name, dtype_id, dtype_name = row
run_configs.append({'type': 'operator', 'id': int(op_id), 'name': op_name,
op_id, op_name, op_runfile, param_id, param_name, dtype_id, dtype_name = row
run_configs.append({'type': 'operator', 'id': int(op_id), 'name': op_name, 'runfile': op_runfile,
'param_id': int(param_id), 'param_name': param_name,
'dtype_id': int(dtype_id), 'dtype_name': dtype_name,
})
Expand All @@ -89,6 +99,8 @@ def run_command(cmd):
cloud_provider_id, instance_id, _ = instance
if cloud_provider_id == 1: # AWS
cmd = ['aws', 'ec2', 'start-instances', '--instance-ids', instance_id]
elif cloud_provider_id == 2: # Always on, no need to launch. Do Nothing.
cmd = ['true']
else:
raise ValueError(f'Unknown cloud provider id: {cloud_provider_id}')
output = run_command(cmd)
Expand All @@ -108,6 +120,8 @@ def run_command(cmd):
raise RuntimeError(f'Failed to check status for {instance_id} on cloud provider {cloud_provider_id}.')
if output.stdout.count('ok') >= 2:
started = True
elif cloud_provider_id == 2: # Always on, no need to launch. Do Nothing.
started = True
else:
raise ValueError(f'Unknown cloud provider id: {cloud_provider_id}')

Expand All @@ -126,7 +140,8 @@ def run_command(cmd):
hw_configs = []
for instance in instances:
_, _, hw_config = instance
hw_configs.append(hw_config)
if hw_config != 0:
hw_configs.append(hw_config)
hw_config_json_str = json.dumps(hw_configs)
with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
print(f'hw_configs={hw_config_json_str}', file=fh)
4 changes: 4 additions & 0 deletions .github/scripts/stop_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ def run_command(cmd):
instance_id = ids[1]
if cloud_provider_id == 1: # AWS
cmd = ['aws', 'ec2', 'stop-instances', '--instance-ids', instance_id]
elif cloud_provider_id == 2: # Always on, no need to stop. Do Nothing.
cmd = ['true']
else:
raise ValueError(f'Unknown cloud provider id: {cloud_provider_id}')
output = run_command(cmd)
Expand All @@ -42,5 +44,7 @@ def run_command(cmd):
# An instance still running would contain its id in the status.
if instance_id not in output.stdout:
stopped = True
elif cloud_provider_id == 2: # Always on, no need to stop. Do Nothing.
stopped = True
else:
raise ValueError(f'Unknown cloud provider id: {cloud_provider_id}')
45 changes: 25 additions & 20 deletions .github/workflows/regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,6 @@ on:
issue_comment:
types: [created]

env:
CI_DB_HOSTNAME: ${{ secrets.CI_DB_HOSTNAME }}
CI_DB_PORT: ${{ secrets.CI_DB_PORT }}
CI_DB_USERNAME: ${{ secrets.CI_DB_USERNAME }}
CI_DB_PASSWORD: ${{ secrets.CI_DB_PASSWORD }}
CI_CS_HOSTNAME: ${{ secrets.CI_CS_HOSTNAME }}
CI_CS_PORT: ${{ secrets.CI_CS_PORT }}
CI_CS_USERNAME: ${{ secrets.CI_CS_USERNAME }}
CI_CS_PASSWORD: ${{ secrets.CI_CS_PASSWORD }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
HF_TOKEN: ${{ secrets.HF_TOKEN }}

jobs:
start_instances:
if: |
Expand All @@ -47,9 +33,16 @@ jobs:
id: run_py_script
run: timeout 900 python ./.github/scripts/start_instances.py
env:
# TODO: Allow launching only specified GPU instances
HW_CONFIG: all
REPO_NAME: ${{ github.repository }}
# TODO: Allow launching only specified GPU instances
CI_DB_HOSTNAME: ${{ secrets.CI_DB_HOSTNAME }}
CI_DB_PORT: ${{ secrets.CI_DB_PORT }}
CI_DB_USERNAME: ${{ secrets.CI_DB_USERNAME }}
CI_DB_PASSWORD: ${{ secrets.CI_DB_PASSWORD }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1

- name: Upload run configs
uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -110,6 +103,10 @@ jobs:
uses: actions/download-artifact@v3
with:
name: run_configs

- name: Clear cache
run: |
hidet cache clear --all
- name: Run tests
timeout-minutes: 2880
Expand All @@ -121,6 +118,11 @@ jobs:
REPO_BRANCH: |
${{ github.event_name == 'workflow_dispatch' && github.ref_name ||
format('pull/{0}', github.event.issue.number) }}
CI_CS_HOSTNAME: ${{ secrets.CI_CS_HOSTNAME }}
CI_CS_PORT: ${{ secrets.CI_CS_PORT }}
CI_CS_USERNAME: ${{ secrets.CI_CS_USERNAME }}
CI_CS_PASSWORD: ${{ secrets.CI_CS_PASSWORD }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}

- name: Upload run configs
uses: actions/upload-artifact@v3
Expand All @@ -135,10 +137,6 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: |
${{ github.event_name == 'workflow_dispatch' && github.ref_name ||
format('refs/pull/{0}/head', github.event.issue.number) }}

- name: Install dependencies
run: pip install mysql-connector-python
Expand Down Expand Up @@ -166,6 +164,10 @@ jobs:
COMMIT_TIME: ${{ env.COMMIT_TIME }}
COMMIT_AUTHOR: ${{ env.COMMIT_AUTHOR }}
HW_CONFIGS: ${{ needs.start_instances.outputs.hw_configs }}
CI_DB_HOSTNAME: ${{ secrets.CI_DB_HOSTNAME }}
CI_DB_PORT: ${{ secrets.CI_DB_PORT }}
CI_DB_USERNAME: ${{ secrets.CI_DB_USERNAME }}
CI_DB_PASSWORD: ${{ secrets.CI_DB_PASSWORD }}

stop_instances:
if: |
Expand All @@ -181,4 +183,7 @@ jobs:
- name: Run main Python script
run: timeout 900 python ./.github/scripts/stop_instances.py
env:
STARTED_INSTANCES: ${{ needs.start_instances.outputs.started_instances }}
STARTED_INSTANCES: ${{ needs.start_instances.outputs.started_instances }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,12 @@ build-release

# intermediate files
/gallery/**/*.json

# hidet model files
*.hidet

# lock files
*.lock

# experiments folder
/experiments
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")

# add hidet_runtime target
add_library(hidet_runtime SHARED
src/hidet/runtime/cuda_context.cpp
src/hidet/runtime/cpu_context.cpp
src/hidet/runtime/cuda/context.cpp
src/hidet/runtime/cuda/cublas.cpp
src/hidet/runtime/cuda/cuda.cpp
src/hidet/runtime/cpu/context.cpp
src/hidet/runtime/callbacks.cpp
src/hidet/runtime/logging.cpp
src/hidet/runtime/symbols.cpp
Expand All @@ -28,7 +30,7 @@ set_target_properties(hidet_runtime PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_

# add hidet target
add_library(hidet SHARED
src/hidet/packedfunc.cpp
src/hidet/empty.cpp # empty source file
)
target_include_directories(hidet PRIVATE ${CMAKE_SOURCE_DIR}/include)
set_target_properties(hidet PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
Expand Down
3 changes: 2 additions & 1 deletion apps/compile_server/resources/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def clone_github_repo(owner: str, repo: str, version: str) -> str:
branches = repo.git.branch("--all").split()
# If local branch already exists, delete it as we prepare to do a new fresh checkout
# This is because the local branch might be divergent with remote, so we just discard it
if version in branches:
# The exception is the main branch, since it should never diverge
if version in branches and version != 'main':
repo.git.checkout('main')
repo.git.branch('-D', version)
if 'pull/' in version:
Expand Down
2 changes: 2 additions & 0 deletions docs/source/how-to-guides/add-new-operator/index.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Add New Operator
================



Hidet is designed to be extensible. It is easy to add new operators to Hidet. There are two ways to add and schedule
an operator.

Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Hidet is an open-source DNN inference framework, it features
:maxdepth: 1
:caption: Developer Guide

gallery/developer-guides/add-torch-operator-mapping
how-to-guides/add-new-operator/index
gallery/developer-guides/add-operator-resolve-rule
gallery/developer-guides/add-subgraph-rewrite-rule
Expand Down
4 changes: 4 additions & 0 deletions docs/source/python_api/data_types.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
hidet.dtypes
============

Hidet supports the following primitive data types, which can be used as the ``dtype`` parameter of functions like
:func:`hidet.zeros` and :func:`hidet.ones`:.

.. data:: hidet.uint8
.. data:: hidet.uint16
.. data:: hidet.uint32
Expand All @@ -14,3 +17,4 @@ hidet.dtypes
.. data:: hidet.float64
.. data:: hidet.bfloat16
.. data:: hidet.tfloat32
.. data:: hidet.boolean
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ hidet.drivers
-------------

.. automodule:: hidet.drivers
:members:
:autosummary:
:members:
:imported-members:
:autosummary:
8 changes: 8 additions & 0 deletions docs/source/python_api/ffi/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
hidet.ffi
---------

.. automodule:: hidet.ffi
:members:
:imported-members:
:autosummary:

2 changes: 2 additions & 0 deletions docs/source/python_api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ Python API
cuda
tensor
data_types
drivers
ops/index
graph/index
runtime/index
ffi/index
utils/index
testing/index
1 change: 1 addition & 0 deletions docs/source/python_api/option.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ hidet.option
.. automodule:: hidet.option
:members:
:autosummary:
:member-order: groupwise
Loading

0 comments on commit d33093e

Please sign in to comment.